ImageEdit

Runtime error

App Files Files Community

ImageEdit / app.py

sabannna

Update app.py

526f456 verified 5 months ago

raw

history blame contribute delete

20.3 kB

	import os
	# ★ torch import 前に allocator 設定（ZeroGPU/断片化対策）
	os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True,max_split_size_mb:128")

	import gradio as gr
	import numpy as np
	import spaces
	import torch
	import random
	import gc
	from PIL import Image
	from typing import Iterable
	from gradio.themes import Soft
	from gradio.themes.utils import colors, fonts, sizes
	import uuid
	from datetime import datetime
	from huggingface_hub import HfApi

	# --- AYARLAR ---
	INPUT_DATASET_ID = "tyndreus/image-edit-logs"
	OUTPUT_DATASET_ID = "tyndreus/output"
	# ---------------

	colors.steel_blue = colors.Color(
	name="steel_blue",
	c50="#EBF3F8",
	c100="#D3E5F0",
	c200="#A8CCE1",
	c300="#7DB3D2",
	c400="#529AC3",
	c500="#4682B4",
	c600="#3E72A0",
	c700="#36638C",
	c800="#2E5378",
	c900="#264364",
	c950="#1E3450",
	)

	class SteelBlueTheme(Soft):
	def __init__(
	self,
	*,
	primary_hue: colors.Color \| str = colors.gray,
	secondary_hue: colors.Color \| str = colors.steel_blue,
	neutral_hue: colors.Color \| str = colors.slate,
	text_size: sizes.Size \| str = sizes.text_lg,
	font: fonts.Font \| str \| Iterable[fonts.Font \| str] = (
	fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
	),
	font_mono: fonts.Font \| str \| Iterable[fonts.Font \| str] = (
	fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
	),
	):
	super().__init__(
	primary_hue=primary_hue,
	secondary_hue=secondary_hue,
	neutral_hue=neutral_hue,
	text_size=text_size,
	font=font,
	font_mono=font_mono,
	)
	super().set(
	background_fill_primary="*primary_50",
	background_fill_primary_dark="*primary_900",
	body_background_fill="linear-gradient(135deg, primary_200, primary_100)",
	body_background_fill_dark="linear-gradient(135deg, primary_900, primary_800)",
	button_primary_text_color="white",
	button_primary_text_color_hover="white",
	button_primary_background_fill="linear-gradient(90deg, secondary_500, secondary_600)",
	button_primary_background_fill_hover="linear-gradient(90deg, secondary_600, secondary_700)",
	button_primary_background_fill_dark="linear-gradient(90deg, secondary_600, secondary_800)",
	button_primary_background_fill_hover_dark="linear-gradient(90deg, secondary_500, secondary_500)",
	button_secondary_text_color="black",
	button_secondary_text_color_hover="white",
	button_secondary_background_fill="linear-gradient(90deg, primary_300, primary_300)",
	button_secondary_background_fill_hover="linear-gradient(90deg, primary_400, primary_400)",
	button_secondary_background_fill_dark="linear-gradient(90deg, primary_500, primary_600)",
	button_secondary_background_fill_hover_dark="linear-gradient(90deg, primary_500, primary_500)",
	slider_color="*secondary_500",
	slider_color_dark="*secondary_600",
	block_title_text_weight="600",
	block_border_width="3px",
	block_shadow="*shadow_drop_lg",
	button_primary_shadow="*shadow_drop_lg",
	button_large_padding="11px",
	color_accent_soft="*primary_100",
	block_label_background_fill="*primary_200",
	)

	steel_blue_theme = SteelBlueTheme()

	from diffusers import FlowMatchEulerDiscreteScheduler
	from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
	from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
	from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3

	dtype = torch.bfloat16
	device = "cuda" if torch.cuda.is_available() else "cpu"

	pipe = QwenImageEditPlusPipeline.from_pretrained(
	"Qwen/Qwen-Image-Edit-2509",
	transformer=QwenImageTransformer2DModel.from_pretrained(
	"linoyts/Qwen-Image-Edit-Rapid-AIO",
	subfolder="transformer",
	torch_dtype=dtype,
	device_map="cuda" if torch.cuda.is_available() else None,
	),
	torch_dtype=dtype,
	).to(device)

	pipe.load_lora_weights("autoweeb/Qwen-Image-Edit-2509-Photo-to-Anime",
	weight_name="Qwen-Image-Edit-2509-Photo-to-Anime_000001000.safetensors",
	adapter_name="anime")
	pipe.load_lora_weights("dx8152/Qwen-Edit-2509-Multiple-angles",
	weight_name="镜头转换.safetensors",
	adapter_name="multiple-angles")
	pipe.load_lora_weights("dx8152/Qwen-Image-Edit-2509-Light_restoration",
	weight_name="移除光影.safetensors",
	adapter_name="light-restoration")
	pipe.load_lora_weights("dx8152/Qwen-Image-Edit-2509-Relight",
	weight_name="Qwen-Edit-Relight.safetensors",
	adapter_name="relight")
	pipe.load_lora_weights("dx8152/Qwen-Edit-2509-Multi-Angle-Lighting",
	weight_name="多角度灯光-251116.safetensors",
	adapter_name="multi-angle-lighting")
	pipe.load_lora_weights("tlennon-ie/qwen-edit-skin",
	weight_name="qwen-edit-skin_1.1_000002750.safetensors",
	adapter_name="edit-skin")
	pipe.load_lora_weights("lovis93/next-scene-qwen-image-lora-2509",
	weight_name="next-scene_lora-v2-3000.safetensors",
	adapter_name="next-scene")
	pipe.load_lora_weights("vafipas663/Qwen-Edit-2509-Upscale-LoRA",
	weight_name="qwen-edit-enhance_64-v3_000001000.safetensors",
	adapter_name="upscale-image")

	pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())

	# ★ メモリ節約（対応していれば効く）
	try:
	pipe.enable_vae_slicing()
	except Exception:
	pass
	try:
	pipe.enable_attention_slicing("auto")
	except Exception:
	pass

	MAX_SEED = np.iinfo(np.int32).max

	def _round8(x: int) -> int:
	x = int(x)
	return max(8, (x // 8) * 8)

	def fit_long_side(image: Image.Image, long_side: int):
	w0, h0 = image.size
	long_side = _round8(long_side)
	if w0 >= h0:
	w = long_side
	h = int(long_side * (h0 / w0))
	else:
	h = long_side
	w = int(long_side * (w0 / h0))
	return _round8(w), _round8(h)

	# --- HUB upload ---
	def upload_image_to_hub(image, dataset_id, folder_prefix="images"):
	try:
	hf_token = os.environ.get("HF_TOKEN")
	if not hf_token:
	print("Fail")
	return

	api = HfApi(token=hf_token)

	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	unique_id = str(uuid.uuid4())[:8]
	filename = f"{folder_prefix}_{timestamp}_{unique_id}.png"

	temp_path = f"/tmp/{filename}"
	image.save(temp_path)

	api.upload_file(
	path_or_fileobj=temp_path,
	path_in_repo=f"{folder_prefix}/{filename}",
	repo_id=dataset_id,
	repo_type="dataset",
	)

	os.remove(temp_path)
	print("Success")

	except Exception as e:
	print(f"Yükleme hatası ({dataset_id}): {e}")
	# -----------------

	# ===== Size logic =====
	SIZE_PRESETS = [
	"Smart Auto (closest base + scale)",
	"Auto (fit long side to 1024)",
	"1024 x 1024 (Square)",
	"1024 x 768 (Landscape)",
	"768 x 1024 (Portrait)",
	"512 x 512 (Fast)",
	"Custom (use sliders)",
	]

	SCALE_CHOICES = ["Auto", "0.5x", "0.75x", "1.0x", "1.25x", "1.5x"]
	SMART_BASE_LONG_SIDES = [512, 768, 1024, 1280, 1536]
	SMART_SCALE_CANDIDATES = [0.5, 0.75, 1.0, 1.25, 1.5]
	SMART_MAX_CHOICES = [768, 1024, 1280, 1536]
	SMART_MAX_LONG_SIDE_DEFAULT = 1024 # ★安全側デフォルト

	def parse_scale(scale_choice: str):
	if scale_choice == "Auto":
	return None
	return float(scale_choice.replace("x", "").strip())

	def smart_auto_size(image: Image.Image, scale_choice: str, smart_max_long: int):
	if image is None:
	return 1024, 1024, "No image"

	img = image.convert("RGB")
	w0, h0 = img.size
	long0 = max(w0, h0)

	base = min(
	SMART_BASE_LONG_SIDES,
	key=lambda b: (abs(b - long0), 0 if b <= long0 else 1, b)
	)

	s_user = parse_scale(scale_choice)
	smart_max_long = int(smart_max_long)

	if s_user is not None:
	cand_long = int(base * s_user)
	cand_long = max(256, min(cand_long, 2048))
	cand_long = min(cand_long, smart_max_long)
	w, h = fit_long_side(img, cand_long)
	info = f"Smart(base={base}, scale={s_user}x, max={smart_max_long}) -> {w}x{h} (orig {w0}x{h0})"
	return w, h, info

	best = None
	for s in SMART_SCALE_CANDIDATES:
	cand_long = int(base * s)
	if cand_long < 256:
	continue
	if cand_long > smart_max_long:
	continue

	diff = abs(cand_long - long0)
	upscale_penalty = 0
	if cand_long > long0:
	upscale_penalty = (cand_long - long0) * 2.5
	cost = diff + upscale_penalty

	if best is None or cost < best[0]:
	best = (cost, s, cand_long)

	if best is None:
	cand_long = min(max(256, base), smart_max_long)
	w, h = fit_long_side(img, cand_long)
	info = f"Smart(base={base}, scale=Fallback, max={smart_max_long}) -> {w}x{h} (orig {w0}x{h0})"
	return w, h, info

	_, s_best, long_best = best
	w, h = fit_long_side(img, long_best)
	info = f"Smart(base={base}, scale={s_best}x Auto, max={smart_max_long}) -> {w}x{h} (orig {w0}x{h0})"
	return w, h, info

	def apply_size_controls(preset, image, scale_choice, smart_max_long, cur_w, cur_h):
	smart_max_long = int(smart_max_long)

	if preset == "Smart Auto (closest base + scale)":
	w, h, info = smart_auto_size(image, scale_choice, smart_max_long)
	return w, h, info

	if preset == "Auto (fit long side to 1024)":
	if image is None:
	return 1024, 1024, "Auto long side 1024 (no image)"
	w, h = fit_long_side(image.convert("RGB"), 1024)
	return w, h, f"Auto long side 1024 -> {w}x{h}"

	if preset == "1024 x 1024 (Square)":
	return 1024, 1024, "Fixed 1024x1024"
	if preset == "1024 x 768 (Landscape)":
	return 1024, 768, "Fixed 1024x768"
	if preset == "768 x 1024 (Portrait)":
	return 768, 1024, "Fixed 768x1024"
	if preset == "512 x 512 (Fast)":
	return 512, 512, "Fixed 512x512"

	return _round8(cur_w), _round8(cur_h), f"Custom -> {_round8(cur_w)}x{_round8(cur_h)}"

	# ===== LoRA =====
	def set_adapter(lora_adapter: str):
	if lora_adapter == "Photo-to-Anime":
	pipe.set_adapters(["anime"], adapter_weights=[1.0])
	elif lora_adapter == "Multiple-Angles":
	pipe.set_adapters(["multiple-angles"], adapter_weights=[1.0])
	elif lora_adapter == "Light-Restoration":
	pipe.set_adapters(["light-restoration"], adapter_weights=[1.0])
	elif lora_adapter == "Relight":
	pipe.set_adapters(["relight"], adapter_weights=[1.0])
	elif lora_adapter == "Multi-Angle-Lighting":
	pipe.set_adapters(["multi-angle-lighting"], adapter_weights=[1.0])
	elif lora_adapter == "Edit-Skin":
	pipe.set_adapters(["edit-skin"], adapter_weights=[1.0])
	elif lora_adapter == "Next-Scene":
	pipe.set_adapters(["next-scene"], adapter_weights=[1.0])
	elif lora_adapter == "Upscale-Image":
	pipe.set_adapters(["upscale-image"], adapter_weights=[1.0])

	# ===== Prompt swap =====
	def swap_prompt_sets(p1, p2, p3, p4, p5, p6):
	return p4, p5, p6, p1, p2, p3

	# ===== Inference (6 images) =====
	@spaces.GPU(duration=120)
	def infer_6pack(
	input_image,
	prompt1,
	prompt2,
	prompt3,
	lora_adapter,
	size_preset,
	scale_choice,
	smart_max_long,
	width,
	height,
	seed,
	randomize_seed,
	guidance_scale,
	steps,
	progress=gr.Progress(track_tqdm=True),
	):
	if input_image is None:
	raise gr.Error("Please upload an image to edit.")

	upload_image_to_hub(input_image, INPUT_DATASET_ID, folder_prefix="inputs")

	set_adapter(lora_adapter)

	width = _round8(width)
	height = _round8(height)

	prompts = [prompt1, prompt2, prompt3]

	# seeds: 2 per prompt => 6
	seeds = []
	if randomize_seed:
	for _ in range(6):
	seeds.append(random.randint(0, MAX_SEED))
	else:
	base = int(seed)
	for i in range(6):
	seeds.append((base + i) % MAX_SEED)

	# true_cfg_scale<=1 のときは negative_prompt 渡さない（警告＆無駄回避）
	guidance_scale = float(guidance_scale)
	negative_prompt = None
	if guidance_scale > 1.0:
	negative_prompt = (
	"worst quality, low quality, bad anatomy, bad hands, text, error, missing fingers, "
	"extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry"
	)

	original_image = input_image.convert("RGB")

	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	gc.collect()

	outputs = []
	seed_idx = 0
	for p_i, p in enumerate(prompts):
	for v in range(2):
	s = seeds[seed_idx]
	seed_idx += 1

	generator = torch.Generator(device=device).manual_seed(int(s))

	call_kwargs = dict(
	image=original_image,
	prompt=p,
	height=int(height),
	width=int(width),
	num_inference_steps=int(steps),
	generator=generator,
	true_cfg_scale=float(guidance_scale),
	)
	if negative_prompt is not None:
	call_kwargs["negative_prompt"] = negative_prompt

	result = pipe(**call_kwargs).images[0]

	upload_image_to_hub(result, OUTPUT_DATASET_ID, folder_prefix="generated")

	caption = f"prompt{p_i+1} var{v+1} \| seed={s} \| {width}x{height}"
	outputs.append((result, caption))

	# ★ 連続生成のメモリ圧を下げる
	del generator
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	gc.collect()

	seeds_text = "\n".join([f"{i+1}: {s}" for i, s in enumerate(seeds)])
	return outputs, seeds_text

	css = """
	#col-container {
	margin: 0 auto;
	max-width: 960px;
	}
	#main-title h1 {font-size: 2.1em !important;}
	"""

	with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
	with gr.Column(elem_id="col-container"):
	gr.Markdown("# RAINBO PRO 3D IMAGE EDIT", elem_id="main-title")
	gr.Markdown("Test) adapters for the [Qwen-Image-Edit](https://huggingface.co/Qwen/Qwen-Image-Edit-2509) model.")

	with gr.Row(equal_height=True):
	with gr.Column():
	input_image = gr.Image(label="Upload Image", type="pil", height=290)

	with gr.Row():
	size_preset = gr.Dropdown(
	label="Image Size Preset",
	choices=SIZE_PRESETS,
	value="Smart Auto (closest base + scale)",
	)
	scale_choice = gr.Dropdown(
	label="Smart Scale",
	choices=SCALE_CHOICES,
	value="Auto",
	)

	smart_max_long = gr.Dropdown(
	label="Smart Max Long Side (Safe default 1024)",
	choices=[str(x) for x in SMART_MAX_CHOICES],
	value=str(SMART_MAX_LONG_SIDE_DEFAULT),
	)

	with gr.Row():
	width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
	height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)

	size_info = gr.Textbox(label="Size Decision Info", lines=2)

	# ---- main prompts (1-3) ----
	prompt1 = gr.Text(
	label="Prompt 1",
	value="move camera to below floor, make this girl to another standing pose, dynamic camera angle from below",
	)
	prompt2 = gr.Text(
	label="Prompt 2",
	value="make this girl to another sitting pose",
	)
	prompt3 = gr.Text(
	label="Prompt 3",
	value="make this girl to another standing pose with hand sign",
	)

	# ---- swap buttons ----
	with gr.Row():
	swap_left = gr.Button("◀", variant="secondary")
	swap_right = gr.Button("▶", variant="secondary")

	# ---- alt prompts (4-6) ----
	with gr.Accordion("Alt Prompts (4-6)", open=False):
	prompt4 = gr.Text(
	label="Prompt 4",
	value="camera zoom in to her face, cute face with smiling, aesthetics image film,",
	)
	prompt5 = gr.Text(
	label="Prompt 5",
	value="camera zoom out and she split legs, cute posing",
	)
	prompt6 = gr.Text(
	label="Prompt 6",
	value="camera move to up, she look at another, and sitting,",
	)

	run_button = gr.Button("Generate 6 Images (3 prompts x 2 seeds)", variant="primary")

	with gr.Column():
	output_gallery = gr.Gallery(
	label="Outputs (3 x 2 = 6)",
	columns=3,
	rows=2,
	height=380,
	preview=True,
	)

	lora_adapter = gr.Dropdown(
	label="Choose Editing Style",
	choices=[
	"Photo-to-Anime",
	"Multiple-Angles",
	"Light-Restoration",
	"Multi-Angle-Lighting",
	"Upscale-Image",
	"Relight",
	"Next-Scene",
	"Edit-Skin",
	],
	value="Next-Scene",
	)

	with gr.Accordion("Advanced Settings", open=False, visible=True):
	seed = gr.Slider(label="Base Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
	randomize_seed = gr.Checkbox(label="Randomize Seeds (6 images)", value=True)
	guidance_scale = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
	steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=6) # ★ default 6

	seeds_box = gr.Textbox(label="Used Seeds (1..6)", lines=6)

	# サイズUI更新：preset/scale/max/画像アップロードで追従
	def _size_update(preset, img, scale, mx, w, h):
	return apply_size_controls(preset, img, scale, mx, w, h)

	for evt in (size_preset.change, scale_choice.change, smart_max_long.change, input_image.change):
	evt(
	fn=_size_update,
	inputs=[size_preset, input_image, scale_choice, smart_max_long, width, height],
	outputs=[width, height, size_info],
	)

	# 左右ボタン：prompt1-3 <-> prompt4-6 を swap
	for btn in (swap_left, swap_right):
	btn.click(
	fn=swap_prompt_sets,
	inputs=[prompt1, prompt2, prompt3, prompt4, prompt5, prompt6],
	outputs=[prompt1, prompt2, prompt3, prompt4, prompt5, prompt6],
	)

	run_button.click(
	fn=infer_6pack,
	inputs=[
	input_image,
	prompt1, prompt2, prompt3,
	lora_adapter,
	size_preset, scale_choice, smart_max_long,
	width, height,
	seed, randomize_seed, guidance_scale, steps,
	],
	outputs=[output_gallery, seeds_box],
	)

	if __name__ == "__main__":
	demo.queue(max_size=30).launch(mcp_server=True, ssr_mode=False, show_error=True)