Spaces:

LittleFrog
/

IntrinsicAnything

Running on Zero

IntrinsicAnything / models /ldm /data /legacy.py

burningdust

Initial commit

d72c37e over 1 year ago

6.26 kB


	class FolderData(Dataset):
	def __init__(self,
	root_dir,
	caption_file=None,
	image_transforms=[],
	ext="jpg",
	default_caption="",
	postprocess=None,
	return_paths=False,
	) -> None:
	"""Create a dataset from a folder of images.
	If you pass in a root directory it will be searched for images
	ending in ext (ext can be a list)
	"""
	self.root_dir = Path(root_dir)
	self.default_caption = default_caption
	self.return_paths = return_paths
	if isinstance(postprocess, DictConfig):
	postprocess = instantiate_from_config(postprocess)
	self.postprocess = postprocess
	if caption_file is not None:
	with open(caption_file, "rt") as f:
	ext = Path(caption_file).suffix.lower()
	if ext == ".json":
	captions = json.load(f)
	elif ext == ".jsonl":
	lines = f.readlines()
	lines = [json.loads(x) for x in lines]
	captions = {x["file_name"]: x["text"].strip("\n") for x in lines}
	else:
	raise ValueError(f"Unrecognised format: {ext}")
	self.captions = captions
	else:
	self.captions = None

	if not isinstance(ext, (tuple, list, ListConfig)):
	ext = [ext]

	# Only used if there is no caption file
	self.paths = []
	for e in ext:
	self.paths.extend(sorted(list(self.root_dir.rglob(f"*.{e}"))))
	self.tform = make_tranforms(image_transforms)

	def __len__(self):
	if self.captions is not None:
	return len(self.captions.keys())
	else:
	return len(self.paths)

	def __getitem__(self, index):
	data = {}
	if self.captions is not None:
	chosen = list(self.captions.keys())[index]
	caption = self.captions.get(chosen, None)
	if caption is None:
	caption = self.default_caption
	filename = self.root_dir/chosen
	else:
	filename = self.paths[index]

	if self.return_paths:
	data["path"] = str(filename)

	im = Image.open(filename).convert("RGB")
	im = self.process_im(im)
	data["image"] = im

	if self.captions is not None:
	data["txt"] = caption
	else:
	data["txt"] = self.default_caption

	if self.postprocess is not None:
	data = self.postprocess(data)

	return data

	def process_im(self, im):
	im = im.convert("RGB")
	return self.tform(im)
	import random

	class TransformDataset():
	def __init__(self, ds, extra_label="sksbspic"):
	self.ds = ds
	self.extra_label = extra_label
	self.transforms = {
	"align": transforms.Resize(768),
	"centerzoom": transforms.CenterCrop(768),
	"randzoom": transforms.RandomCrop(768),
	}


	def __getitem__(self, index):
	data = self.ds[index]

	im = data['image']
	im = im.permute(2,0,1)
	# In case data is smaller than expected
	im = transforms.Resize(1024)(im)

	tform_name = random.choice(list(self.transforms.keys()))
	im = self.transforms[tform_name](im)

	im = im.permute(1,2,0)

	data['image'] = im
	data['txt'] = data['txt'] + f" {self.extra_label} {tform_name}"

	return data

	def __len__(self):
	return len(self.ds)

	def hf_dataset(
	name,
	image_transforms=[],
	image_column="image",
	text_column="text",
	split='train',
	image_key='image',
	caption_key='txt',
	):
	"""Make huggingface dataset with appropriate list of transforms applied
	"""
	ds = load_dataset(name, split=split)
	tform = make_tranforms(image_transforms)

	assert image_column in ds.column_names, f"Didn't find column {image_column} in {ds.column_names}"
	assert text_column in ds.column_names, f"Didn't find column {text_column} in {ds.column_names}"

	def pre_process(examples):
	processed = {}
	processed[image_key] = [tform(im) for im in examples[image_column]]
	processed[caption_key] = examples[text_column]
	return processed

	ds.set_transform(pre_process)
	return ds

	class TextOnly(Dataset):
	def __init__(self, captions, output_size, image_key="image", caption_key="txt", n_gpus=1):
	"""Returns only captions with dummy images"""
	self.output_size = output_size
	self.image_key = image_key
	self.caption_key = caption_key
	if isinstance(captions, Path):
	self.captions = self._load_caption_file(captions)
	else:
	self.captions = captions

	if n_gpus > 1:
	# hack to make sure that all the captions appear on each gpu
	repeated = [n_gpus*[x] for x in self.captions]
	self.captions = []
	[self.captions.extend(x) for x in repeated]

	def __len__(self):
	return len(self.captions)

	def __getitem__(self, index):
	dummy_im = torch.zeros(3, self.output_size, self.output_size)
	dummy_im = rearrange(dummy_im * 2. - 1., 'c h w -> h w c')
	return {self.image_key: dummy_im, self.caption_key: self.captions[index]}

	def _load_caption_file(self, filename):
	with open(filename, 'rt') as f:
	captions = f.readlines()
	return [x.strip('\n') for x in captions]



	import random
	import json
	class IdRetreivalDataset(FolderData):
	def __init__(self, ret_file, args, *kwargs):
	super().__init__(args, *kwargs)
	with open(ret_file, "rt") as f:
	self.ret = json.load(f)

	def __getitem__(self, index):
	data = super().__getitem__(index)
	key = self.paths[index].name
	matches = self.ret[key]
	if len(matches) > 0:
	retreived = random.choice(matches)
	else:
	retreived = key
	filename = self.root_dir/retreived
	im = Image.open(filename).convert("RGB")
	im = self.process_im(im)
	# data["match"] = im
	data["match"] = torch.cat((data["image"], im), dim=-1)
	return data