Replies: 4 comments 7 replies
-
Hi @good-demo, |
Beta Was this translation helpful? Give feedback.
-
Hi @xiexinch , norm_cfg = dict(type='SyncBN', requires_grad=True)
backbone_norm_cfg = dict(type='LN', requires_grad=True)
model = dict(
type='EncoderDecoder',
pretrained=None,
backbone=dict(
type='SwinTransformer',
pretrain_img_size=224,
embed_dims=48,
patch_size=4,
window_size=6,
mlp_ratio=4,
depths=[2, 2, 6, 2],
num_heads=[4, 8, 16, 32],
strides=(4, 2, 2, 2),
out_indices=(0, 1, 2, 3),
qkv_bias=True,
qk_scale=None,
patch_norm=True,
drop_rate=0.0,
attn_drop_rate=0.0,
drop_path_rate=0.3,
use_abs_pos_embed=False,
act_cfg=dict(type='GELU'),
norm_cfg=dict(type='LN', requires_grad=True),
init_cfg=dict(
type='Pretrained',
checkpoint='/home/data1/yxd/mmsegmentation/simmim/new.pth')),
decode_head=dict(
type='UPerHead',
in_channels=[48, 96, 192, 384],
in_index=[0, 1, 2, 3],
pool_scales=(1, 2, 3, 6),
channels=512,
dropout_ratio=0.1,
num_classes=2,
norm_cfg=dict(type='SyncBN', requires_grad=True),
align_corners=False,
loss_decode=dict(type='DiceLoss', loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=192,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=2,
norm_cfg=dict(type='SyncBN', requires_grad=True),
align_corners=False,
loss_decode=dict(type='DiceLoss', loss_weight=0.4)),
train_cfg=dict(),
test_cfg=dict(mode='whole'))
dataset_type = 'PupilDataset'
data_root = 'data/pupil/PupilData'
img_norm_cfg = dict(
mean=[0.2602, 0.2407, 0.2321], std=[0.337, 0.3328, 0.3297], to_rgb=True)
crop_size = (48, 48)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=False),
dict(type='Resize', img_scale=(192, 192), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=(48, 48), cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(
type='Normalize',
mean=[0.2602, 0.2407, 0.2321],
std=[0.337, 0.3328, 0.3297],
to_rgb=True),
dict(type='Pad', size=(48, 48), pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(192, 192),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[0.2602, 0.2407, 0.2321],
std=[0.337, 0.3328, 0.3297],
to_rgb=True),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
data = dict(
samples_per_gpu=202,
workers_per_gpu=4,
train=dict(
type='PupilDataset',
data_root='data/pupil/PupilData',
img_dir='images/training',
ann_dir='annotations/training',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=False),
dict(type='Resize', img_scale=(192, 192), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=(48, 48), cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(
type='Normalize',
mean=[0.2602, 0.2407, 0.2321],
std=[0.337, 0.3328, 0.3297],
to_rgb=True),
dict(type='Pad', size=(48, 48), pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
]),
val=dict(
type='PupilDataset',
data_root='data/pupil/PupilData',
img_dir='images/validation',
ann_dir='annotations/validation',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(192, 192),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[0.2602, 0.2407, 0.2321],
std=[0.337, 0.3328, 0.3297],
to_rgb=True),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]),
test=dict(
type='PupilDataset',
data_root='data/pupil/PupilData',
img_dir='images/validation',
ann_dir='annotations/validation',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(192, 192),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[0.2602, 0.2407, 0.2321],
std=[0.337, 0.3328, 0.3297],
to_rgb=True),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]))
log_config = dict(
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
cudnn_benchmark = True
optimizer = dict(
type='AdamW',
lr=6e-05,
betas=(0.9, 0.999),
weight_decay=0.01,
paramwise_cfg=dict(
custom_keys=dict(
absolute_pos_embed=dict(decay_mult=0.0),
relative_position_bias_table=dict(decay_mult=0.0),
norm=dict(decay_mult=0.0))))
optimizer_config = dict()
lr_config = dict(
policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-06,
power=1.0,
min_lr=0.0,
by_epoch=False)
runner = dict(type='IterBasedRunner', max_iters=1300)
checkpoint_config = dict(by_epoch=False, interval=200)
evaluation = dict(interval=2, metric='mIoU', pre_eval=True)
checkpoint_file = '/home/data1/test/mmsegmentation/simmim/new.pth'
work_dir = './work_dirs/pupil'
gpu_ids = range(0, 8)
auto_resume = False About Dataset file: # Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import mmcv
import numpy as np
from PIL import Image
from .builder import DATASETS
from .custom import CustomDataset
@DATASETS.register_module()
class PupilDataset(CustomDataset):
"""ADE20K dataset.
In segmentation map annotation for ADE20K, 0 stands for background, which
is not included in 150 categories. ``reduce_zero_label`` is fixed to True.
The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is fixed to
'.png'.
"""
CLASSES = ('pupil', 'bg')
PALETTE = [[255,255,255],[0,0,0]]
def __init__(self, **kwargs):
super(PupilDataset, self).__init__(
img_suffix='.png',
seg_map_suffix='.png',
reduce_zero_label=False,
ignore_index=10,
**kwargs)
def results2img(self, results, imgfile_prefix, to_label_id, indices=None):
"""Write the segmentation results to images.
Args:
results (list[ndarray]): Testing results of the
dataset.
imgfile_prefix (str): The filename prefix of the png files.
If the prefix is "somepath/xxx",
the png files will be named "somepath/xxx.png".
to_label_id (bool): whether convert output to label_id for
submission.
indices (list[int], optional): Indices of input results, if not
set, all the indices of the dataset will be used.
Default: None.
Returns:
list[str: str]: result txt files which contains corresponding
semantic segmentation images.
"""
if indices is None:
indices = list(range(len(self)))
mmcv.mkdir_or_exist(imgfile_prefix)
result_files = []
for result, idx in zip(results, indices):
filename = self.img_infos[idx]['filename']
basename = osp.splitext(osp.basename(filename))[0]
png_filename = osp.join(imgfile_prefix, f'{basename}.png')
# The index range of official requirement is from 0 to 150.
# But the index range of output is from 0 to 149.
# That is because we set reduce_zero_label=True.
result = result + 1
output = Image.fromarray(result.astype(np.uint8))
output.save(png_filename)
result_files.append(png_filename)
return result_files
def format_results(self,
results,
imgfile_prefix,
to_label_id=True,
indices=None):
"""Format the results into dir (standard format for ade20k evaluation).
Args:
results (list): Testing results of the dataset.
imgfile_prefix (str | None): The prefix of images files. It
includes the file path and the prefix of filename, e.g.,
"a/b/prefix".
to_label_id (bool): whether convert output to label_id for
submission. Default: False
indices (list[int], optional): Indices of input results, if not
set, all the indices of the dataset will be used.
Default: None.
Returns:
tuple: (result_files, tmp_dir), result_files is a list containing
the image paths, tmp_dir is the temporal directory created
for saving json/png files when img_prefix is not specified.
"""
if indices is None:
indices = list(range(len(self)))
assert isinstance(results, list), 'results must be a list.'
assert isinstance(indices, list), 'indices must be a list.'
result_files = self.results2img(results, imgfile_prefix, to_label_id,
indices)
return result_files |
Beta Was this translation helpful? Give feedback.
-
Hi @good-demo, |
Beta Was this translation helpful? Give feedback.
-
Can anyone have a good idea to solve this problem? |
Beta Was this translation helpful? Give feedback.
-
This is my setting.
But the result as fellow:
Is there a problem with my setting?
Beta Was this translation helpful? Give feedback.
All reactions