Qwen-Image-Edit-Angles

Running on Zero

Elea Zhong

update app

077eede 24 days ago

7.54 kB

	import copy
	import math
	import random
	import os
	import tempfile
	import sys

	import numpy as np
	import torch
	from PIL import Image
	import gradio as gr
	import spaces

	from qwenimage.debug import ctimed, ftimed
	from qwenimage.experiments.experiments_qwen import ExperimentRegistry
	from qwenimage.prompt import build_camera_prompt

	# --- Model Loading ---
	dtype = torch.bfloat16
	device = "cuda" if torch.cuda.is_available() else "cpu"

	print(f"main cuda: {torch.cuda.is_available()=}")

	exp = ExperimentRegistry.get("qwen_lightning_fa3_aot_int8_fuse_downsize512")()
	exp.load()


	@spaces.GPU(duration=1500)
	def optim_pipe():
	print(f"func cuda: {torch.cuda.is_available()=}")
	exp.optimize()

	optim_pipe()


	MAX_SEED = np.iinfo(np.int32).max


	@spaces.GPU
	def infer_camera_edit(
	image,
	rotate_deg,
	move_forward,
	vertical_tilt,
	wideangle,
	seed,
	randomize_seed,
	true_guidance_scale,
	num_inference_steps,
	height,
	width,
	prev_output = None,
	progress=gr.Progress(track_tqdm=True)
	):
	with ctimed("pre pipe"):
	prompt = build_camera_prompt(rotate_deg, move_forward, vertical_tilt, wideangle)
	print(f"Generated Prompt: {prompt}")

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)
	generator = torch.Generator(device=device).manual_seed(seed)

	# Choose input image (prefer uploaded, else last output)
	pil_images = []
	if image is not None:
	if isinstance(image, Image.Image):
	pil_images.append(image.convert("RGB"))
	elif hasattr(image, "name"):
	pil_images.append(Image.open(image.name).convert("RGB"))
	elif prev_output:
	pil_images.append(prev_output.convert("RGB"))

	if len(pil_images) == 0:
	raise gr.Error("Please upload an image first.")

	print(f"{len(pil_images)=}")

	if prompt == "no camera movement":
	return image, seed, prompt
	result = exp.run_once(
	image=pil_images,
	prompt=prompt,
	height=height if height != 0 else None,
	width=width if width != 0 else None,
	num_inference_steps=num_inference_steps,
	generator=generator,
	true_cfg_scale=true_guidance_scale,
	num_images_per_prompt=1,
	)

	return result, seed, prompt


	# --- UI ---
	css = '''#col-container { max-width: 800px; margin: 0 auto; }
	.dark .progress-text{color: white !important}
	#examples{max-width: 800px; margin: 0 auto; }'''

	def reset_all():
	return [0, 0, 0, 0, False]

	def end_reset():
	return False

	def update_dimensions_on_upload(image):
	if image is None:
	return 1024, 1024

	original_width, original_height = image.size

	if original_width > original_height:
	new_width = 1024
	aspect_ratio = original_height / original_width
	new_height = int(new_width * aspect_ratio)
	else:
	new_height = 1024
	aspect_ratio = original_width / original_height
	new_width = int(new_height * aspect_ratio)

	# Ensure dimensions are multiples of 8
	new_width = (new_width // 8) * 8
	new_height = (new_height // 8) * 8

	return new_width, new_height


	with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
	with gr.Column(elem_id="col-container"):
	gr.Markdown("## 🎬 Qwen Image Edit — Camera Angle Control")
	gr.Markdown("""
	Qwen Image Edit 2509 for Camera Control ✨
	Using [dx8152's Qwen-Edit-2509-Multiple-angles LoRA](https://huggingface.co/dx8152/Qwen-Edit-2509-Multiple-angles) and [Phr00t/Qwen-Image-Edit-Rapid-AIO](https://huggingface.co/Phr00t/Qwen-Image-Edit-Rapid-AIO/tree/main) for 4-step inference 💨
	"""
	)

	with gr.Row():
	with gr.Column():
	image = gr.Image(label="Input Image", type="pil")
	prev_output = gr.Image(value=None, visible=False)
	is_reset = gr.Checkbox(value=False, visible=False)

	with gr.Tab("Camera Controls"):
	rotate_deg = gr.Slider(label="Rotate Right-Left (degrees °)", minimum=-90, maximum=90, step=45, value=0)
	move_forward = gr.Slider(label="Move Forward → Close-Up", minimum=0, maximum=10, step=5, value=0)
	vertical_tilt = gr.Slider(label="Vertical Angle (Bird ↔ Worm)", minimum=-1, maximum=1, step=1, value=0)
	wideangle = gr.Checkbox(label="Wide-Angle Lens", value=False)
	with gr.Row():
	reset_btn = gr.Button("Reset")
	run_btn = gr.Button("Generate", variant="primary")

	with gr.Accordion("Advanced Settings", open=False):
	seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
	randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
	true_guidance_scale = gr.Slider(label="True Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
	num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=2)
	height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
	width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)

	with gr.Column():
	result = gr.Image(label="Output Image", interactive=False)
	prompt_preview = gr.Textbox(label="Processed Prompt", interactive=False)

	inputs = [
	image,rotate_deg, move_forward,
	vertical_tilt, wideangle,
	seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width, prev_output
	]
	outputs = [result, seed, prompt_preview]

	# Reset behavior
	reset_btn.click(
	fn=reset_all,
	inputs=None,
	outputs=[rotate_deg, move_forward, vertical_tilt, wideangle, is_reset],
	queue=False
	).then(fn=end_reset, inputs=None, outputs=[is_reset], queue=False)

	run_event = run_btn.click(
	fn=infer_camera_edit,
	inputs=inputs,
	outputs=outputs
	)

	# Image upload triggers dimension update and control reset
	image.upload(
	fn=update_dimensions_on_upload,
	inputs=[image],
	outputs=[width, height]
	).then(
	fn=reset_all,
	inputs=None,
	outputs=[rotate_deg, move_forward, vertical_tilt, wideangle, is_reset],
	queue=False
	).then(
	fn=end_reset,
	inputs=None,
	outputs=[is_reset],
	queue=False
	)


	# Live updates
	@ftimed
	def maybe_infer(is_reset, progress=gr.Progress(track_tqdm=True), *args):
	if is_reset:
	return gr.update(), gr.update(), gr.update(), gr.update()
	else:
	return infer_camera_edit(*args)

	control_inputs = [
	image, rotate_deg, move_forward,
	vertical_tilt, wideangle,
	seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width, prev_output
	]
	control_inputs_with_flag = [is_reset] + control_inputs

	for control in [rotate_deg, move_forward, vertical_tilt]:
	control.release(fn=maybe_infer, inputs=control_inputs_with_flag, outputs=outputs)

	wideangle.input(fn=maybe_infer, inputs=control_inputs_with_flag, outputs=outputs)

	run_event.then(lambda img, *_: img, inputs=[result], outputs=[prev_output])

	demo.launch()