Elea Zhong
update app
077eede
raw
history blame
7.54 kB
import copy
import math
import random
import os
import tempfile
import sys
import numpy as np
import torch
from PIL import Image
import gradio as gr
import spaces
from qwenimage.debug import ctimed, ftimed
from qwenimage.experiments.experiments_qwen import ExperimentRegistry
from qwenimage.prompt import build_camera_prompt
# --- Model Loading ---
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"main cuda: {torch.cuda.is_available()=}")
exp = ExperimentRegistry.get("qwen_lightning_fa3_aot_int8_fuse_downsize512")()
exp.load()
@spaces.GPU(duration=1500)
def optim_pipe():
print(f"func cuda: {torch.cuda.is_available()=}")
exp.optimize()
optim_pipe()
MAX_SEED = np.iinfo(np.int32).max
@spaces.GPU
def infer_camera_edit(
image,
rotate_deg,
move_forward,
vertical_tilt,
wideangle,
seed,
randomize_seed,
true_guidance_scale,
num_inference_steps,
height,
width,
prev_output = None,
progress=gr.Progress(track_tqdm=True)
):
with ctimed("pre pipe"):
prompt = build_camera_prompt(rotate_deg, move_forward, vertical_tilt, wideangle)
print(f"Generated Prompt: {prompt}")
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator(device=device).manual_seed(seed)
# Choose input image (prefer uploaded, else last output)
pil_images = []
if image is not None:
if isinstance(image, Image.Image):
pil_images.append(image.convert("RGB"))
elif hasattr(image, "name"):
pil_images.append(Image.open(image.name).convert("RGB"))
elif prev_output:
pil_images.append(prev_output.convert("RGB"))
if len(pil_images) == 0:
raise gr.Error("Please upload an image first.")
print(f"{len(pil_images)=}")
if prompt == "no camera movement":
return image, seed, prompt
result = exp.run_once(
image=pil_images,
prompt=prompt,
height=height if height != 0 else None,
width=width if width != 0 else None,
num_inference_steps=num_inference_steps,
generator=generator,
true_cfg_scale=true_guidance_scale,
num_images_per_prompt=1,
)
return result, seed, prompt
# --- UI ---
css = '''#col-container { max-width: 800px; margin: 0 auto; }
.dark .progress-text{color: white !important}
#examples{max-width: 800px; margin: 0 auto; }'''
def reset_all():
return [0, 0, 0, 0, False]
def end_reset():
return False
def update_dimensions_on_upload(image):
if image is None:
return 1024, 1024
original_width, original_height = image.size
if original_width > original_height:
new_width = 1024
aspect_ratio = original_height / original_width
new_height = int(new_width * aspect_ratio)
else:
new_height = 1024
aspect_ratio = original_width / original_height
new_width = int(new_height * aspect_ratio)
# Ensure dimensions are multiples of 8
new_width = (new_width // 8) * 8
new_height = (new_height // 8) * 8
return new_width, new_height
with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown("## 🎬 Qwen Image Edit — Camera Angle Control")
gr.Markdown("""
Qwen Image Edit 2509 for Camera Control ✨
Using [dx8152's Qwen-Edit-2509-Multiple-angles LoRA](https://huggingface.co/dx8152/Qwen-Edit-2509-Multiple-angles) and [Phr00t/Qwen-Image-Edit-Rapid-AIO](https://huggingface.co/Phr00t/Qwen-Image-Edit-Rapid-AIO/tree/main) for 4-step inference 💨
"""
)
with gr.Row():
with gr.Column():
image = gr.Image(label="Input Image", type="pil")
prev_output = gr.Image(value=None, visible=False)
is_reset = gr.Checkbox(value=False, visible=False)
with gr.Tab("Camera Controls"):
rotate_deg = gr.Slider(label="Rotate Right-Left (degrees °)", minimum=-90, maximum=90, step=45, value=0)
move_forward = gr.Slider(label="Move Forward → Close-Up", minimum=0, maximum=10, step=5, value=0)
vertical_tilt = gr.Slider(label="Vertical Angle (Bird ↔ Worm)", minimum=-1, maximum=1, step=1, value=0)
wideangle = gr.Checkbox(label="Wide-Angle Lens", value=False)
with gr.Row():
reset_btn = gr.Button("Reset")
run_btn = gr.Button("Generate", variant="primary")
with gr.Accordion("Advanced Settings", open=False):
seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
true_guidance_scale = gr.Slider(label="True Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=2)
height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
with gr.Column():
result = gr.Image(label="Output Image", interactive=False)
prompt_preview = gr.Textbox(label="Processed Prompt", interactive=False)
inputs = [
image,rotate_deg, move_forward,
vertical_tilt, wideangle,
seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width, prev_output
]
outputs = [result, seed, prompt_preview]
# Reset behavior
reset_btn.click(
fn=reset_all,
inputs=None,
outputs=[rotate_deg, move_forward, vertical_tilt, wideangle, is_reset],
queue=False
).then(fn=end_reset, inputs=None, outputs=[is_reset], queue=False)
run_event = run_btn.click(
fn=infer_camera_edit,
inputs=inputs,
outputs=outputs
)
# Image upload triggers dimension update and control reset
image.upload(
fn=update_dimensions_on_upload,
inputs=[image],
outputs=[width, height]
).then(
fn=reset_all,
inputs=None,
outputs=[rotate_deg, move_forward, vertical_tilt, wideangle, is_reset],
queue=False
).then(
fn=end_reset,
inputs=None,
outputs=[is_reset],
queue=False
)
# Live updates
@ftimed
def maybe_infer(is_reset, progress=gr.Progress(track_tqdm=True), *args):
if is_reset:
return gr.update(), gr.update(), gr.update(), gr.update()
else:
return infer_camera_edit(*args)
control_inputs = [
image, rotate_deg, move_forward,
vertical_tilt, wideangle,
seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width, prev_output
]
control_inputs_with_flag = [is_reset] + control_inputs
for control in [rotate_deg, move_forward, vertical_tilt]:
control.release(fn=maybe_infer, inputs=control_inputs_with_flag, outputs=outputs)
wideangle.input(fn=maybe_infer, inputs=control_inputs_with_flag, outputs=outputs)
run_event.then(lambda img, *_: img, inputs=[result], outputs=[prev_output])
demo.launch()