Skip to content

Commit

Permalink
Fix vit calib data bug
Browse files Browse the repository at this point in the history
  • Loading branch information
gushiqiao committed Oct 17, 2024
1 parent fada39c commit 15535df
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion llmc/data/dataset/base_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,11 @@ def get_calib_dataset(self):
elif self.calib_dataset_type == 'img_txt':
calib_samples = self.img_txt_group_samples_wo_mask(samples)
logger.info(f'len(calib_samples) : {len(calib_samples)}')
return calib_samples
if self.padding:
padding_mask = [calib_sample['attention_mask'] for calib_sample in calib_samples] # noqa
else:
padding_mask = None
return calib_samples, padding_mask

def general_preproc(self, calib_dataset, tokenizer, n_samples, seq_len):
dataset = calib_dataset.shuffle(seed=self.seed)
Expand Down

0 comments on commit 15535df

Please sign in to comment.