Spaces:

lenML
/

Z-Image-Turbo

Runtime error

App Files Files Community

Z-Image-Turbo / app.py

lenML

Update app.py

babc3bc verified 14 days ago

raw

history blame contribute delete

12.5 kB

	import torch
	import spaces
	import gradio as gr
	import time
	import re
	import random
	import os
	from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler
	import warnings

	# 忽略警告
	warnings.filterwarnings("ignore")

	# ==================== 1. 分辨率配置 ====================
	RES_CHOICES = {
	"1024": [
	"720x1280 (9:16)",
	"1024x1024 (1:1)",
	"1152x896 (9:7)",
	"896x1152 (7:9)",
	"1152x864 (4:3)",
	"864x1152 (3:4)",
	"1248x832 (3:2)",
	"832x1248 (2:3)",
	"1280x720 (16:9)",
	"1344x576 (21:9)",
	"576x1344 (9:21)",
	],
	"1280": [
	"864x1536 (9:16)",
	"1280x1280 (1:1)",
	"1440x1120 (9:7)",
	"1120x1440 (7:9)",
	"1472x1104 (4:3)",
	"1104x1472 (3:4)",
	"1536x1024 (3:2)",
	"1024x1536 (2:3)",
	"1536x864 (16:9)",
	"1680x720 (21:9)",
	"720x1680 (9:21)",
	],
	"1536": [
	"1152x2048 (9:16)",
	"1536x1536 (1:1)",
	"1728x1344 (9:7)",
	"1344x1728 (7:9)",
	"1728x1296 (4:3)",
	"1296x1728 (3:4)",
	"1872x1248 (3:2)",
	"1248x1872 (2:3)",
	"2048x1152 (16:9)",
	"2016x864 (21:9)",
	"864x2016 (9:21)",
	],
	}

	def get_resolution(resolution_str):
	"""从分辨率字符串提取宽高，确保是8的倍数"""
	if not resolution_str:
	return 1024, 1024
	match = re.search(r"(\d+)\s[×x]\s(\d+)", resolution_str)
	if match:
	width = int(match.group(1))
	height = int(match.group(2))
	return width - width % 8, height - height % 8
	return 1024, 1024

	# ==================== 2. 模型加载与核心优化 ====================
	print("🚀 Loading Z-Image-Turbo pipeline...")

	# 必须设置为 True，才能加载 Z-Image 自定义的 Pipeline 和 Transformer 类
	# 否则无法调用 set_attention_backend
	pipe = DiffusionPipeline.from_pretrained(
	"Tongyi-MAI/Z-Image-Turbo",
	torch_dtype=torch.bfloat16,
	low_cpu_mem_usage=True,
	use_safetensors=True,
	trust_remote_code=True,
	)

	# 使用 FlowMatchEulerDiscreteScheduler 并设置 shift=3.0
	try:
	scheduler_config = dict(pipe.scheduler.config)
	scheduler_config.pop("algorithm_type", None)
	pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(
	scheduler_config,
	shift=3.0
	)
	print("✅ Scheduler optimized with shift=3.0")
	except Exception as e:
	print(f"⚠️ Scheduler config warning: {e}")

	# 移动到 GPU
	pipe.to("cuda")


	print("Enabling torch.compile optimizations...")
	torch._inductor.config.conv_1x1_as_mm = True
	torch._inductor.config.coordinate_descent_tuning = True
	torch._inductor.config.epilogue_fusion = False
	torch._inductor.config.coordinate_descent_check_all_directions = True
	torch._inductor.config.max_autotune_gemm = True
	torch._inductor.config.max_autotune_gemm_backends = "TRITON,ATEN"
	torch._inductor.config.triton.cudagraphs = False

	# 尝试按顺序启用最快的后端
	def enable_best_attention_backend(pipeline):
	backends = [
	# ===== S Tier：当前最优 =====
	"flash_varlen", # FA v2 varlen，稳定 + 高性能
	"_flash_3_varlen_hub", # FA v3 varlen（hub），SM90 上非常强
	"_flash_varlen_3", # FA v3 varlen（本地）
	"_flash_3", # FA v3 非 varlen
	"flash", # FA v2 非 varlen

	# ===== A Tier：可接受 / 备用高性能 =====
	"flash_varlen_hub",
	"flash_hub",
	"xformers", # 成熟但性能略逊于 FA
	"_native_flash",

	# ===== B Tier：框架原生 / 兼容优先 =====
	"native",
	"_native_efficient",
	"_native_cudnn",

	# ===== C Tier：特定后端 / 场景受限 =====
	"flex",
	"_native_xla",
	"_native_npu",
	"aiter",

	# ===== D Tier：Sage / 实验性量化实现 =====
	"sage",
	"sage_hub",
	"sage_varlen",
	"_sage_qk_int8_pv_fp16_cuda",
	"_sage_qk_int8_pv_fp16_triton",
	"_sage_qk_int8_pv_fp8_cuda",
	"_sage_qk_int8_pv_fp8_cuda_sm90",

	# ===== Fallback =====
	"_native_math",
	]
	# 检查 pipeline.transformer 是否有 set_attention_backend 方法
	# 这是 Z-Image 自定义类特有的
	enabled = False
	for backend in backends:
	try:
	pipeline.transformer.set_attention_backend(backend)
	print(f"✅ Attention backend set to: {backend}")
	enabled = True
	break
	except Exception as e:
	pass

	if not enabled:
	print("⚠️ Warning: Transformer model does not support 'set_attention_backend'. Custom code might not be loaded.")
	# 如果加载失败，尝试标准的 xformers
	try:
	pipeline.enable_xformers_memory_efficient_attention()
	print("✅ Standard xFormers enabled as fallback")
	except:
	pass

	# 执行后端设置
	enable_best_attention_backend(pipe)

	# VAE 内存优化
	try:
	pipe.vae.enable_slicing()
	except:
	pass

	# print("Compiling transformer...")
	# pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune-no-cudagraphs", fullgraph=False)

	# ==================== 3. 生成逻辑 ====================
	@spaces.GPU
	def generate_image(
	prompt,
	resolution_choice,
	use_custom_res,
	custom_width,
	custom_height,
	num_inference_steps,
	seed,
	randomize_seed,
	negative_prompt,
	gallery_history,
	progress=gr.Progress(track_tqdm=True)
	):
	if gallery_history is None:
	gallery_history = []

	try:
	if not prompt or len(prompt.strip()) < 2:
	raise gr.Error("请输提示词 (Prompt)")

	prompt = prompt.strip()
	neg_prompt = negative_prompt.strip() if negative_prompt else None

	if use_custom_res:
	width = int(custom_width) - int(custom_width) % 8
	height = int(custom_height) - int(custom_height) % 8
	else:
	width, height = get_resolution(resolution_choice)

	if randomize_seed:
	seed = random.randint(0, 2**32 - 1)
	seed = int(seed)

	start_time = time.time()
	generator = torch.Generator("cuda").manual_seed(seed)

	# 清理显存确保最大空间
	torch.cuda.empty_cache()

	with torch.cuda.amp.autocast(dtype=torch.bfloat16):
	image = pipe(
	prompt=prompt,
	height=height,
	width=width,
	num_inference_steps=int(num_inference_steps),
	guidance_scale=0.0,
	generator=generator,
	negative_prompt=neg_prompt,
	max_sequence_length=512,
	).images[0]

	gen_time = time.time() - start_time

	# 格式化历史记录
	info_label = f"{width}x{height} \| Steps: {num_inference_steps} \| Seed: {seed} \| {gen_time:.2f}s"
	gallery_history.insert(0, (image, info_label))

	return gallery_history, seed

	except Exception as e:
	raise gr.Error(f"生成错误: {str(e)}")

	# ==================== 4. UI 样式 ====================
	css = """
	@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;800&display=swap');
	body, .gradio-container { font-family: 'Inter', sans-serif !important; }

	.header-container { text-align: center; margin-bottom: 20px; }
	.header-title {
	font-size: 2.5rem; font-weight: 800; margin: 0;
	background: linear-gradient(135deg, #f59e0b, #ea580c);
	-webkit-background-clip: text; -webkit-text-fill-color: transparent;
	}
	.header-subtitle { font-size: 1rem; color: #6b7280; font-weight: 500; }

	.primary-btn {
	background: linear-gradient(90deg, #f59e0b 0%, #d97706 100%) !important;
	border: none !important;
	color: white !important;
	font-weight: 600 !important;
	font-size: 1.1rem !important;
	box-shadow: 0 4px 6px -1px rgba(245, 158, 11, 0.2) !important;
	}
	.primary-btn:hover { transform: translateY(-2px); box-shadow: 0 10px 15px -3px rgba(245, 158, 11, 0.3) !important; }

	.panel-container {
	background: #ffffff; border: 1px solid #e5e7eb; border-radius: 12px; padding: 15px;
	}
	.dark .panel-container { background: #1f2937; border-color: #374151; }
	"""

	# ==================== 5. Gradio 界面 ====================
	with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange"), css=css, title="Z-Image-Turbo") as demo:

	gr.HTML("""
	<div class="header-container">
	<h1 class="header-title">⚡ Z-Image-Turbo</h1>
	<p class="header-subtitle">Optimized Backend • 8 Steps • Gallery History</p>
	</div>
	""")

	with gr.Row():
	# --- 控制面板 ---
	with gr.Column(scale=4, min_width=320):
	with gr.Group(elem_classes="panel-container"):
	prompt = gr.Textbox(
	label="Prompt",
	placeholder="Enter your prompt here...",
	lines=3
	)
	negative_prompt = gr.Textbox(
	label="Negative Prompt",
	placeholder="Low quality, blurry...",
	lines=1
	)
	generate_btn = gr.Button("🚀 Generate", elem_classes="primary-btn")

	with gr.Group(elem_classes="panel-container"):
	gr.Markdown("### 📐 Resolution")
	res_category = gr.Radio(
	choices=["1024", "1280", "1536"],
	value="1024",
	label="Resolution Base",
	container=False
	)
	resolution_dropdown = gr.Dropdown(
	choices=RES_CHOICES["1024"],
	value=RES_CHOICES["1024"][0],
	label="Select Ratio",
	show_label=False
	)

	with gr.Accordion("Custom Size", open=False):
	use_custom_res = gr.Checkbox(label="Enable Custom", value=False)
	with gr.Row(visible=False) as custom_res_row:
	width_slider = gr.Slider(512, 1536, value=1024, step=64, label="W")
	height_slider = gr.Slider(512, 1536, value=1024, step=64, label="H")

	with gr.Accordion("⚙️ Settings", open=False):
	with gr.Group(elem_classes="panel-container"):
	steps_slider = gr.Slider(4, 20, value=8, step=1, label="Steps")
	with gr.Row():
	random_seed = gr.Checkbox(label="Random Seed", value=True)
	seed_input = gr.Number(label="Seed", value=42, visible=False, precision=0)

	# --- 画廊 ---
	with gr.Column(scale=6, min_width=500):
	output_gallery = gr.Gallery(
	label="History",
	value=[],
	columns=[2],
	rows=[2],
	object_fit="contain",
	height="auto",
	show_share_button=True,
	show_download_button=True,
	interactive=False
	)
	with gr.Row():
	last_seed_display = gr.Textbox(label="Last Seed", interactive=False, scale=3)
	clear_btn = gr.Button("🗑️ Clear", scale=1, variant="secondary")

	# 交互逻辑
	def update_resolution_list(category):
	return gr.Dropdown(choices=RES_CHOICES[category], value=RES_CHOICES[category][0])

	res_category.change(update_resolution_list, inputs=res_category, outputs=resolution_dropdown)

	use_custom_res.change(
	lambda x: (gr.Row(visible=x), gr.Dropdown(interactive=not x)),
	inputs=use_custom_res, outputs=[custom_res_row, resolution_dropdown]
	)

	random_seed.change(lambda x: gr.Number(visible=not x), inputs=random_seed, outputs=seed_input)

	generate_btn.click(
	fn=generate_image,
	inputs=[prompt, resolution_dropdown, use_custom_res, width_slider, height_slider, steps_slider, seed_input, random_seed, negative_prompt, output_gallery],
	outputs=[output_gallery, last_seed_display]
	)

	clear_btn.click(lambda: ([], ""), outputs=[output_gallery, last_seed_display])

	if __name__ == "__main__":
	demo.launch()