linoyts HF Staff commited on
Commit
6687fea
·
verified ·
1 Parent(s): a0e2bdf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -20
app.py CHANGED
@@ -24,12 +24,9 @@ LANDSCAPE_HEIGHT = 1024
24
  MAX_SEED = np.iinfo(np.int32).max
25
 
26
  FIXED_FPS = 16
27
- MIN_FRAMES_MODEL = 8
28
  MAX_FRAMES_MODEL = 81
29
 
30
- MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS,1)
31
- MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS,1)
32
-
33
  vae = AutoencoderKLWan.from_pretrained("Wan-AI/Wan2.2-T2V-A14B-Diffusers", subfolder="vae", torch_dtype=torch.float32)
34
  pipe = WanPipeline.from_pretrained(MODEL_ID,
35
  transformer=WanTransformer3DModel.from_pretrained('linoyts/Wan2.2-T2V-A14B-Diffusers-BF16',
@@ -60,7 +57,7 @@ optimize_pipeline_(pipe,
60
  )
61
 
62
 
63
- default_prompt_i2v = "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
64
  default_negative_prompt = "色调艳丽, 过曝, 静态, 细节模糊不清, 字幕, 风格, 作品, 画作, 画面, 静止, 整体发灰, 最差质量, 低质量, JPEG压缩残留, 丑陋的, 残缺的, 多余的手指, 画得不好的手部, 画得不好的脸部, 畸形的, 毁容的, 形态畸形的肢体, 手指融合, 静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
65
 
66
 
@@ -88,14 +85,13 @@ def generate_image(
88
  progress=gr.Progress(track_tqdm=True),
89
  ):
90
  """
91
- Generate a video from an input image using the Wan 2.2 14B I2V model with Phantom LoRA.
92
 
93
- This function takes an input image and generates a video animation based on the provided
94
- prompt and parameters. It uses an FP8 qunatized Wan 2.2 14B Image-to-Video model in with Phantom LoRA
95
- for fast generation in 6-8 steps.
96
 
97
  Args:
98
- prompt (str): Text prompt describing the desired animation or motion.
99
  negative_prompt (str, optional): Negative prompt to avoid unwanted elements.
100
  Defaults to default_negative_prompt (contains unwanted visual artifacts).
101
  guidance_scale (float, optional): Controls adherence to the prompt. Higher values = more adherence.
@@ -112,18 +108,15 @@ def generate_image(
112
 
113
  Returns:
114
  tuple: A tuple containing:
115
- - video_path (str): Path to the generated video file (.mp4)
116
  - current_seed (int): The seed used for generation (useful when randomize_seed=True)
117
 
118
  Raises:
119
  gr.Error: If input_image is None (no image uploaded).
120
 
121
  Note:
122
- - The function automatically resizes the input image to the target dimensions
123
- - Frame count is calculated as duration_seconds * FIXED_FPS (24)
124
- - Output dimensions are adjusted to be multiples of MOD_VALUE (32)
125
  - The function uses GPU acceleration via the @spaces.GPU decorator
126
- - Generation time varies based on steps and duration (see get_duration function)
127
  """
128
 
129
 
@@ -145,13 +138,11 @@ def generate_image(
145
  return out_img, current_seed
146
 
147
  with gr.Blocks() as demo:
148
- gr.Markdown("# Wan 2.2 T2I (14B)")
149
- #gr.Markdown("run Wan 2.2 in just 6-8 steps, with [FusionX Phantom LoRA by DeeJayT](https://huggingface.co/vrgamedevgirl84/Wan14BT2VFusioniX/tree/main/FusionX_LoRa), compatible with 🧨 diffusers")
150
  with gr.Row():
151
  with gr.Column():
152
- prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
153
- #duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=MAX_DURATION, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
154
-
155
  with gr.Accordion("Advanced Settings", open=False):
156
  negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
157
  seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
 
24
  MAX_SEED = np.iinfo(np.int32).max
25
 
26
  FIXED_FPS = 16
27
+ MIN_FRAMES_MODEL = 1
28
  MAX_FRAMES_MODEL = 81
29
 
 
 
 
30
  vae = AutoencoderKLWan.from_pretrained("Wan-AI/Wan2.2-T2V-A14B-Diffusers", subfolder="vae", torch_dtype=torch.float32)
31
  pipe = WanPipeline.from_pretrained(MODEL_ID,
32
  transformer=WanTransformer3DModel.from_pretrained('linoyts/Wan2.2-T2V-A14B-Diffusers-BF16',
 
57
  )
58
 
59
 
60
+ default_prompt_t2v = "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
61
  default_negative_prompt = "色调艳丽, 过曝, 静态, 细节模糊不清, 字幕, 风格, 作品, 画作, 画面, 静止, 整体发灰, 最差质量, 低质量, JPEG压缩残留, 丑陋的, 残缺的, 多余的手指, 画得不好的手部, 画得不好的脸部, 畸形的, 毁容的, 形态畸形的肢体, 手指融合, 静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
62
 
63
 
 
85
  progress=gr.Progress(track_tqdm=True),
86
  ):
87
  """
88
+ Generate an image from a text prompt using the Wan 2.2 14B T2V model.
89
 
90
+ This function takes an input prompt and generates an image based on the provided
91
+ prompt and parameters. It uses an FP8 qunatized Wan 2.2 14B Text-to-Video model.
 
92
 
93
  Args:
94
+ prompt (str): Text prompt describing the desired image.
95
  negative_prompt (str, optional): Negative prompt to avoid unwanted elements.
96
  Defaults to default_negative_prompt (contains unwanted visual artifacts).
97
  guidance_scale (float, optional): Controls adherence to the prompt. Higher values = more adherence.
 
108
 
109
  Returns:
110
  tuple: A tuple containing:
111
+ - image_path (str): Path to the generated image
112
  - current_seed (int): The seed used for generation (useful when randomize_seed=True)
113
 
114
  Raises:
115
  gr.Error: If input_image is None (no image uploaded).
116
 
117
  Note:
 
 
 
118
  - The function uses GPU acceleration via the @spaces.GPU decorator
119
+ - Generation time varies based on steps
120
  """
121
 
122
 
 
138
  return out_img, current_seed
139
 
140
  with gr.Blocks() as demo:
141
+ gr.Markdown("# Wan 2.2 (14B) Image")
142
+ gr.Markdown("generate high quality images with Wan 2.2 14B")
143
  with gr.Row():
144
  with gr.Column():
145
+ prompt_input = gr.Textbox(label="Prompt", value=default_prompt_t2v)
 
 
146
  with gr.Accordion("Advanced Settings", open=False):
147
  negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
148
  seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)