Spaces:
Sleeping
Sleeping
Add CPU, GPU warm up process when app load first time
Browse files
app.py
CHANGED
|
@@ -189,6 +189,114 @@ print("\n" + "=" * 60)
|
|
| 189 |
print("All models loaded successfully!")
|
| 190 |
print("=" * 60 + "\n")
|
| 191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
# μ΄λ―Έμ§ μ μ²λ¦¬ ν¨μ
|
| 194 |
def preprocess_image(image):
|
|
@@ -628,9 +736,19 @@ with image_blocks as demo:
|
|
| 628 |
api_name='tryon'
|
| 629 |
)
|
| 630 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 631 |
|
| 632 |
print("β Gradio interface components created")
|
| 633 |
print("β Event handlers configured")
|
|
|
|
| 634 |
|
| 635 |
print("\n" + "=" * 60)
|
| 636 |
print("Gradio Application Interface Created Successfully!")
|
|
|
|
| 189 |
print("All models loaded successfully!")
|
| 190 |
print("=" * 60 + "\n")
|
| 191 |
|
| 192 |
+
# Warm-up: 첫 λ²μ§Έ μΆλ‘ μ§μ° κ°μλ₯Ό μν λͺ¨λΈ μ΄κΈ°ν
|
| 193 |
+
# JIT μ»΄νμΌ, CUDA 컀λ λ‘λ© λ±μ 미리 μν
|
| 194 |
+
print("=" * 60)
|
| 195 |
+
print("Warming up models (CPU)...")
|
| 196 |
+
print("=" * 60)
|
| 197 |
+
|
| 198 |
+
def warmup_models_cpu():
|
| 199 |
+
"""μ± μμ μ CPU λͺ¨λΈ μ΄κΈ°νλ₯Ό μν Warm-up ν¨μ"""
|
| 200 |
+
try:
|
| 201 |
+
# CPUμμ ν
μ€νΈ μλ² λ© Warm-up (Tokenizer + Text Encoder μ΄κΈ°ν)
|
| 202 |
+
print("[CPU Warm-up 1/2] Text Encoder warm-up...")
|
| 203 |
+
with torch.no_grad():
|
| 204 |
+
dummy_prompt = "a photo of clothing"
|
| 205 |
+
dummy_tokens = tokenizer_one(
|
| 206 |
+
dummy_prompt,
|
| 207 |
+
padding="max_length",
|
| 208 |
+
max_length=tokenizer_one.model_max_length,
|
| 209 |
+
truncation=True,
|
| 210 |
+
return_tensors="pt"
|
| 211 |
+
)
|
| 212 |
+
# CPUμμ μ€ν κ°λ₯ν μ΄κΈ°ν
|
| 213 |
+
_ = text_encoder_one(dummy_tokens.input_ids, output_hidden_states=True)
|
| 214 |
+
print("β Text Encoder warmed up")
|
| 215 |
+
|
| 216 |
+
# Tensor λ³ν Warm-up
|
| 217 |
+
print("[CPU Warm-up 2/2] Tensor transform warm-up...")
|
| 218 |
+
dummy_img = Image.new('RGB', (768, 1024), color='white')
|
| 219 |
+
_ = tensor_transfrom(dummy_img)
|
| 220 |
+
print("β Tensor transform warmed up")
|
| 221 |
+
|
| 222 |
+
return True
|
| 223 |
+
except Exception as e:
|
| 224 |
+
print(f"β CPU Warm-up partially completed: {e}")
|
| 225 |
+
return False
|
| 226 |
+
|
| 227 |
+
# CPU Warm-up μ€ν
|
| 228 |
+
warmup_success = warmup_models_cpu()
|
| 229 |
+
if warmup_success:
|
| 230 |
+
print("\nβ CPU warm-up completed successfully")
|
| 231 |
+
else:
|
| 232 |
+
print("\nβ CPU warm-up completed with warnings")
|
| 233 |
+
print("=" * 60 + "\n")
|
| 234 |
+
|
| 235 |
+
# GPU Warm-up ν¨μ (μ± λ‘λ μ μλ μ€ν)
|
| 236 |
+
# torch.compile() 첫 λ²μ§Έ μ»΄νμΌμ 미리 μν
|
| 237 |
+
@spaces.GPU
|
| 238 |
+
def warmup_gpu():
|
| 239 |
+
"""μ± λ‘λ μ GPU λͺ¨λΈ μ΄κΈ°νλ₯Ό μν Warm-up ν¨μ (torch.compile 첫 νΈμΆ)"""
|
| 240 |
+
try:
|
| 241 |
+
device = "cuda"
|
| 242 |
+
print("=" * 60)
|
| 243 |
+
print("GPU Warm-up: Triggering torch.compile() first compilation...")
|
| 244 |
+
print("=" * 60)
|
| 245 |
+
|
| 246 |
+
# λͺ¨λΈμ GPUλ‘ μ΄λ
|
| 247 |
+
pipe.to(device)
|
| 248 |
+
pipe.unet_encoder.to(device)
|
| 249 |
+
|
| 250 |
+
# λλ―Έ ν
μ μμ±
|
| 251 |
+
with torch.no_grad():
|
| 252 |
+
with torch.cuda.amp.autocast():
|
| 253 |
+
# 1. λλ―Έ ν둬ννΈ μλ² λ© μμ± (Text Encoder GPU warm-up)
|
| 254 |
+
print("[GPU Warm-up 1/3] Text Encoder GPU warm-up...")
|
| 255 |
+
dummy_prompt = "a photo of white t-shirt"
|
| 256 |
+
(
|
| 257 |
+
prompt_embeds,
|
| 258 |
+
negative_prompt_embeds,
|
| 259 |
+
pooled_prompt_embeds,
|
| 260 |
+
negative_pooled_prompt_embeds,
|
| 261 |
+
) = pipe.encode_prompt(
|
| 262 |
+
dummy_prompt,
|
| 263 |
+
num_images_per_prompt=1,
|
| 264 |
+
do_classifier_free_guidance=True,
|
| 265 |
+
negative_prompt="low quality",
|
| 266 |
+
)
|
| 267 |
+
print("β Text Encoder GPU warmed up")
|
| 268 |
+
|
| 269 |
+
# 2. λλ―Έ μ΄λ―Έμ§λ‘ VAE μΈμ½λ© (VAE GPU warm-up)
|
| 270 |
+
print("[GPU Warm-up 2/3] VAE GPU warm-up...")
|
| 271 |
+
dummy_img = torch.randn(1, 3, 1024, 768).to(device, torch.float16)
|
| 272 |
+
_ = pipe.vae.encode(dummy_img)
|
| 273 |
+
print("β VAE GPU warmed up")
|
| 274 |
+
|
| 275 |
+
# 3. UNet κ°λ¨ν forward pass (UNet + torch.compile warm-up)
|
| 276 |
+
print("[GPU Warm-up 3/3] UNet GPU warm-up (torch.compile trigger)...")
|
| 277 |
+
dummy_latent = torch.randn(1, 4, 128, 96).to(device, torch.float16)
|
| 278 |
+
dummy_timestep = torch.tensor([999]).to(device)
|
| 279 |
+
_ = pipe.unet(
|
| 280 |
+
dummy_latent,
|
| 281 |
+
dummy_timestep,
|
| 282 |
+
encoder_hidden_states=prompt_embeds.to(device, torch.float16),
|
| 283 |
+
)
|
| 284 |
+
print("β UNet GPU warmed up (torch.compile triggered)")
|
| 285 |
+
|
| 286 |
+
# GPU λ©λͺ¨λ¦¬ μ 리
|
| 287 |
+
torch.cuda.empty_cache()
|
| 288 |
+
|
| 289 |
+
print("\n" + "=" * 60)
|
| 290 |
+
print("β GPU Warm-up completed! torch.compile() compilation done.")
|
| 291 |
+
print(" All subsequent requests will be faster.")
|
| 292 |
+
print("=" * 60 + "\n")
|
| 293 |
+
|
| 294 |
+
return "GPU Warm-up completed successfully!"
|
| 295 |
+
except Exception as e:
|
| 296 |
+
print(f"\nβ GPU Warm-up failed: {e}")
|
| 297 |
+
print(" First user request will trigger compilation instead.")
|
| 298 |
+
return f"GPU Warm-up skipped: {e}"
|
| 299 |
+
|
| 300 |
|
| 301 |
# μ΄λ―Έμ§ μ μ²λ¦¬ ν¨μ
|
| 302 |
def preprocess_image(image):
|
|
|
|
| 736 |
api_name='tryon'
|
| 737 |
)
|
| 738 |
|
| 739 |
+
# GPU Warm-up μν νμμ© (μ¨κΉ)
|
| 740 |
+
warmup_status = gr.Textbox(visible=False)
|
| 741 |
+
|
| 742 |
+
# μ± λ‘λ μ GPU Warm-up μλ μ€ν (torch.compile 첫 μ»΄νμΌ)
|
| 743 |
+
demo.load(
|
| 744 |
+
fn=warmup_gpu,
|
| 745 |
+
inputs=None,
|
| 746 |
+
outputs=warmup_status,
|
| 747 |
+
)
|
| 748 |
|
| 749 |
print("β Gradio interface components created")
|
| 750 |
print("β Event handlers configured")
|
| 751 |
+
print("β GPU warm-up scheduled on app load")
|
| 752 |
|
| 753 |
print("\n" + "=" * 60)
|
| 754 |
print("Gradio Application Interface Created Successfully!")
|