wan2-2-14b-fast-t2i

Running on Zero

App Files Files Community

linoyts HF Staff commited on Aug 6, 2025

Commit

6687fea

verified ·

1 Parent(s): a0e2bdf

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -20

app.py CHANGED Viewed

@@ -24,12 +24,9 @@ LANDSCAPE_HEIGHT = 1024
 MAX_SEED = np.iinfo(np.int32).max
 FIXED_FPS = 16
-MIN_FRAMES_MODEL = 8
 MAX_FRAMES_MODEL = 81
-MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS,1)
-MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS,1)
 vae = AutoencoderKLWan.from_pretrained("Wan-AI/Wan2.2-T2V-A14B-Diffusers", subfolder="vae", torch_dtype=torch.float32)
 pipe = WanPipeline.from_pretrained(MODEL_ID,
     transformer=WanTransformer3DModel.from_pretrained('linoyts/Wan2.2-T2V-A14B-Diffusers-BF16',
@@ -60,7 +57,7 @@ optimize_pipeline_(pipe,
 )
-default_prompt_i2v = "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
 default_negative_prompt = "色调艳丽, 过曝, 静态, 细节模糊不清, 字幕, 风格, 作品, 画作, 画面, 静止, 整体发灰, 最差质量, 低质量, JPEG压缩残留, 丑陋的, 残缺的, 多余的手指, 画得不好的手部, 画得不好的脸部, 畸形的, 毁容的, 形态畸形的肢体, 手指融合, 静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
@@ -88,14 +85,13 @@ def generate_image(
     progress=gr.Progress(track_tqdm=True),
 ):
     """
-    Generate a video from an input image using the Wan 2.2 14B I2V model with Phantom LoRA.
-    This function takes an input image and generates a video animation based on the provided
-    prompt and parameters. It uses an FP8 qunatized Wan 2.2 14B Image-to-Video model in with Phantom LoRA
-    for fast generation in 6-8 steps.
     Args:
-        prompt (str): Text prompt describing the desired animation or motion.
         negative_prompt (str, optional): Negative prompt to avoid unwanted elements.
             Defaults to default_negative_prompt (contains unwanted visual artifacts).
         guidance_scale (float, optional): Controls adherence to the prompt. Higher values = more adherence.
@@ -112,18 +108,15 @@ def generate_image(
     Returns:
         tuple: A tuple containing:
-            - video_path (str): Path to the generated video file (.mp4)
             - current_seed (int): The seed used for generation (useful when randomize_seed=True)
     Raises:
         gr.Error: If input_image is None (no image uploaded).
     Note:
-        - The function automatically resizes the input image to the target dimensions
-        - Frame count is calculated as duration_seconds * FIXED_FPS (24)
-        - Output dimensions are adjusted to be multiples of MOD_VALUE (32)
         - The function uses GPU acceleration via the @spaces.GPU decorator
-        - Generation time varies based on steps and duration (see get_duration function)
     """
@@ -145,13 +138,11 @@ def generate_image(
     return out_img, current_seed
 with gr.Blocks() as demo:
-    gr.Markdown("# Wan 2.2 T2I (14B)")
-    #gr.Markdown("run Wan 2.2 in just 6-8 steps, with [FusionX Phantom LoRA by DeeJayT](https://huggingface.co/vrgamedevgirl84/Wan14BT2VFusioniX/tree/main/FusionX_LoRa), compatible with 🧨 diffusers")
     with gr.Row():
         with gr.Column():
-            prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
-            #duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=MAX_DURATION, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
             with gr.Accordion("Advanced Settings", open=False):
                 negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
                 seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)

 MAX_SEED = np.iinfo(np.int32).max
 FIXED_FPS = 16
+MIN_FRAMES_MODEL = 1
 MAX_FRAMES_MODEL = 81
 vae = AutoencoderKLWan.from_pretrained("Wan-AI/Wan2.2-T2V-A14B-Diffusers", subfolder="vae", torch_dtype=torch.float32)
 pipe = WanPipeline.from_pretrained(MODEL_ID,
     transformer=WanTransformer3DModel.from_pretrained('linoyts/Wan2.2-T2V-A14B-Diffusers-BF16',
 )
+default_prompt_t2v = "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
 default_negative_prompt = "色调艳丽, 过曝, 静态, 细节模糊不清, 字幕, 风格, 作品, 画作, 画面, 静止, 整体发灰, 最差质量, 低质量, JPEG压缩残留, 丑陋的, 残缺的, 多余的手指, 画得不好的手部, 画得不好的脸部, 畸形的, 毁容的, 形态畸形的肢体, 手指融合, 静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
     progress=gr.Progress(track_tqdm=True),
 ):
     """
+    Generate an image from a text prompt using the Wan 2.2 14B T2V model.
+    This function takes an input prompt and generates an image based on the provided
+    prompt and parameters. It uses an FP8 qunatized Wan 2.2 14B Text-to-Video model.
     Args:
+        prompt (str): Text prompt describing the desired image.
         negative_prompt (str, optional): Negative prompt to avoid unwanted elements.
             Defaults to default_negative_prompt (contains unwanted visual artifacts).
         guidance_scale (float, optional): Controls adherence to the prompt. Higher values = more adherence.
     Returns:
         tuple: A tuple containing:
+            - image_path (str): Path to the generated image
             - current_seed (int): The seed used for generation (useful when randomize_seed=True)
     Raises:
         gr.Error: If input_image is None (no image uploaded).
     Note:
         - The function uses GPU acceleration via the @spaces.GPU decorator
+        - Generation time varies based on steps
     """
     return out_img, current_seed
 with gr.Blocks() as demo:
+    gr.Markdown("# Wan 2.2 (14B) Image")
+    gr.Markdown("generate high quality images with Wan 2.2 14B")
     with gr.Row():
         with gr.Column():
+            prompt_input = gr.Textbox(label="Prompt", value=default_prompt_t2v)
             with gr.Accordion("Advanced Settings", open=False):
                 negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
                 seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)