Spaces:
Running
on
Zero
Running
on
Zero
| class FolderData(Dataset): | |
| def __init__(self, | |
| root_dir, | |
| caption_file=None, | |
| image_transforms=[], | |
| ext="jpg", | |
| default_caption="", | |
| postprocess=None, | |
| return_paths=False, | |
| ) -> None: | |
| """Create a dataset from a folder of images. | |
| If you pass in a root directory it will be searched for images | |
| ending in ext (ext can be a list) | |
| """ | |
| self.root_dir = Path(root_dir) | |
| self.default_caption = default_caption | |
| self.return_paths = return_paths | |
| if isinstance(postprocess, DictConfig): | |
| postprocess = instantiate_from_config(postprocess) | |
| self.postprocess = postprocess | |
| if caption_file is not None: | |
| with open(caption_file, "rt") as f: | |
| ext = Path(caption_file).suffix.lower() | |
| if ext == ".json": | |
| captions = json.load(f) | |
| elif ext == ".jsonl": | |
| lines = f.readlines() | |
| lines = [json.loads(x) for x in lines] | |
| captions = {x["file_name"]: x["text"].strip("\n") for x in lines} | |
| else: | |
| raise ValueError(f"Unrecognised format: {ext}") | |
| self.captions = captions | |
| else: | |
| self.captions = None | |
| if not isinstance(ext, (tuple, list, ListConfig)): | |
| ext = [ext] | |
| # Only used if there is no caption file | |
| self.paths = [] | |
| for e in ext: | |
| self.paths.extend(sorted(list(self.root_dir.rglob(f"*.{e}")))) | |
| self.tform = make_tranforms(image_transforms) | |
| def __len__(self): | |
| if self.captions is not None: | |
| return len(self.captions.keys()) | |
| else: | |
| return len(self.paths) | |
| def __getitem__(self, index): | |
| data = {} | |
| if self.captions is not None: | |
| chosen = list(self.captions.keys())[index] | |
| caption = self.captions.get(chosen, None) | |
| if caption is None: | |
| caption = self.default_caption | |
| filename = self.root_dir/chosen | |
| else: | |
| filename = self.paths[index] | |
| if self.return_paths: | |
| data["path"] = str(filename) | |
| im = Image.open(filename).convert("RGB") | |
| im = self.process_im(im) | |
| data["image"] = im | |
| if self.captions is not None: | |
| data["txt"] = caption | |
| else: | |
| data["txt"] = self.default_caption | |
| if self.postprocess is not None: | |
| data = self.postprocess(data) | |
| return data | |
| def process_im(self, im): | |
| im = im.convert("RGB") | |
| return self.tform(im) | |
| import random | |
| class TransformDataset(): | |
| def __init__(self, ds, extra_label="sksbspic"): | |
| self.ds = ds | |
| self.extra_label = extra_label | |
| self.transforms = { | |
| "align": transforms.Resize(768), | |
| "centerzoom": transforms.CenterCrop(768), | |
| "randzoom": transforms.RandomCrop(768), | |
| } | |
| def __getitem__(self, index): | |
| data = self.ds[index] | |
| im = data['image'] | |
| im = im.permute(2,0,1) | |
| # In case data is smaller than expected | |
| im = transforms.Resize(1024)(im) | |
| tform_name = random.choice(list(self.transforms.keys())) | |
| im = self.transforms[tform_name](im) | |
| im = im.permute(1,2,0) | |
| data['image'] = im | |
| data['txt'] = data['txt'] + f" {self.extra_label} {tform_name}" | |
| return data | |
| def __len__(self): | |
| return len(self.ds) | |
| def hf_dataset( | |
| name, | |
| image_transforms=[], | |
| image_column="image", | |
| text_column="text", | |
| split='train', | |
| image_key='image', | |
| caption_key='txt', | |
| ): | |
| """Make huggingface dataset with appropriate list of transforms applied | |
| """ | |
| ds = load_dataset(name, split=split) | |
| tform = make_tranforms(image_transforms) | |
| assert image_column in ds.column_names, f"Didn't find column {image_column} in {ds.column_names}" | |
| assert text_column in ds.column_names, f"Didn't find column {text_column} in {ds.column_names}" | |
| def pre_process(examples): | |
| processed = {} | |
| processed[image_key] = [tform(im) for im in examples[image_column]] | |
| processed[caption_key] = examples[text_column] | |
| return processed | |
| ds.set_transform(pre_process) | |
| return ds | |
| class TextOnly(Dataset): | |
| def __init__(self, captions, output_size, image_key="image", caption_key="txt", n_gpus=1): | |
| """Returns only captions with dummy images""" | |
| self.output_size = output_size | |
| self.image_key = image_key | |
| self.caption_key = caption_key | |
| if isinstance(captions, Path): | |
| self.captions = self._load_caption_file(captions) | |
| else: | |
| self.captions = captions | |
| if n_gpus > 1: | |
| # hack to make sure that all the captions appear on each gpu | |
| repeated = [n_gpus*[x] for x in self.captions] | |
| self.captions = [] | |
| [self.captions.extend(x) for x in repeated] | |
| def __len__(self): | |
| return len(self.captions) | |
| def __getitem__(self, index): | |
| dummy_im = torch.zeros(3, self.output_size, self.output_size) | |
| dummy_im = rearrange(dummy_im * 2. - 1., 'c h w -> h w c') | |
| return {self.image_key: dummy_im, self.caption_key: self.captions[index]} | |
| def _load_caption_file(self, filename): | |
| with open(filename, 'rt') as f: | |
| captions = f.readlines() | |
| return [x.strip('\n') for x in captions] | |
| import random | |
| import json | |
| class IdRetreivalDataset(FolderData): | |
| def __init__(self, ret_file, *args, **kwargs): | |
| super().__init__(*args, **kwargs) | |
| with open(ret_file, "rt") as f: | |
| self.ret = json.load(f) | |
| def __getitem__(self, index): | |
| data = super().__getitem__(index) | |
| key = self.paths[index].name | |
| matches = self.ret[key] | |
| if len(matches) > 0: | |
| retreived = random.choice(matches) | |
| else: | |
| retreived = key | |
| filename = self.root_dir/retreived | |
| im = Image.open(filename).convert("RGB") | |
| im = self.process_im(im) | |
| # data["match"] = im | |
| data["match"] = torch.cat((data["image"], im), dim=-1) | |
| return data | |