Qwen-Image-Edit-Angles

Running on Zero

Qwen-Image-Edit-Angles / qwenimage /datamodels.py

Elea Zhong

update demo

6d99887 12 days ago

3.79 kB

	import enum
	from pathlib import Path
	from typing import Any, Literal

	import torch
	from diffusers.image_processor import PipelineImageInput
	from pydantic import BaseModel, ConfigDict, Field

	from qwenimage.types import DataRange
	from wandml.foundation.datamodels import FluxInputs
	from wandml.trainers.datamodels import ExperimentTrainerParameters


	class QwenInputs(BaseModel):
	image: PipelineImageInput \| None = None
	prompt: str\| list[str] \| None = None
	height: int\|None = None
	width: int\|None = None
	negative_prompt: str\| list[str] \| None = None
	true_cfg_scale: float = 1.0
	num_inference_steps: int = 50
	generator: torch.Generator \| list[torch.Generator] \| None = None
	max_sequence_length: int = 512
	vae_image_override: int \| None = None
	latent_size_override: int \| None = None

	model_config = ConfigDict(
	arbitrary_types_allowed=True,
	# extra="allow",
	)

	class TrainingType(str, enum.Enum):
	IM2IM = "im2im"
	NAIVE = "naive"
	REGRESSION = "regression"

	@property
	def is_style(self):
	return self in [TrainingType.NAIVE, TrainingType.IM2IM]

	class QuantOptions(str, enum.Enum):
	INT8WO = "int8wo"
	INT4WO = "int4wo"
	FP8ROW = "fp8row"

	LossTermSpecType = int\|float\|dict[str,int\|float]\|None

	class QwenLossTerms(BaseModel):
	mse: LossTermSpecType = 1.0
	triplet: LossTermSpecType = 0.0
	negative_mse: LossTermSpecType = 0.0
	distribution_matching: LossTermSpecType = 0.0
	pixel_triplet: LossTermSpecType = 0.0
	pixel_lpips: LossTermSpecType = 0.0
	pixel_mse: LossTermSpecType = 0.0
	pixel_distribution_matching: LossTermSpecType = 0.0
	adversarial: LossTermSpecType = 0.0
	teacher: LossTermSpecType = 0.0

	triplet_margin: float = 0.0
	triplet_min_abs_diff: float = 0.0
	teacher_steps: int = 4

	@property
	def pixel_terms(self) -> bool:
	return ("pixel_lpips", "pixel_mse", "pixel_triplet", "pixel_distribution_matching",)

	class QwenConfig(ExperimentTrainerParameters):
	load_multi_view_lora: bool = False
	train_max_sequence_length: int = 512
	train_dist: str = "linear" # "logit-normal"
	train_shift: bool = True
	inference_dist: str = "linear"
	inference_shift: bool = True
	static_mu: float \| None = None
	loss_weight_dist: str \| None = None # "scaled_clipped_gaussian", "logit-normal"

	vae_image_size: int = 1024 * 1024
	offload_text_encoder: bool = True
	quantize_text_encoder: bool = False
	quantize_transformer: bool = False
	vae_tiling: bool = False


	train_loss_terms:QwenLossTerms = Field(default_factory=QwenLossTerms)
	validation_loss_terms:QwenLossTerms = Field(default_factory=QwenLossTerms)

	training_type: TrainingType\|None=None
	train_range: DataRange\|None=None
	val_range: DataRange\|None=None
	test_range: DataRange\|None=None

	style_title: str\|None = None
	style_base_dir: str\|None = None
	style_csv_path: str\|None = None
	style_data_dir: str\|None = None
	style_ref_dir: str\|None = None
	style_val_with: str = "train"
	naive_static_prompt: str\|None = None

	regression_data_dir: str\|Path\|None = None
	regression_gen_steps: int = 50
	editing_data_dir: str\|Path\|None = None
	editing_total_per: int = 1
	regression_base_pipe_steps: int = 8

	name_suffix: dict[str,Any]\|None = None

	def add_suffix_to_names(self):
	if self.name_suffix is None:
	return
	suffix_sum = ""
	for suf_name,suf_val in self.name_suffix.items():
	suffix_sum += "_" + suf_name
	suf_val = str(suf_val)
	suffix_sum += "_" + suf_val
	self.run_name += suffix_sum
	self.output_dir = self.output_dir.removesuffix("/") # in case
	self.output_dir += suffix_sum