| | |
| | from diffusers import DiffusionPipeline |
| | from diffusers.utils import pt_to_pil |
| | import torch |
| |
|
| | |
| | stage_1 = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-M-v1.0", variant="fp16", torch_dtype=torch.float16) |
| | stage_1.enable_xformers_memory_efficient_attention() |
| | stage_1.enable_model_cpu_offload() |
| |
|
| | |
| | stage_2 = DiffusionPipeline.from_pretrained( |
| | "DeepFloyd/IF-II-M-v1.0", text_encoder=None, variant="fp16", torch_dtype=torch.float16 |
| | ) |
| | stage_2.enable_xformers_memory_efficient_attention() |
| | stage_2.enable_model_cpu_offload() |
| |
|
| | |
| | safety_modules = {"feature_extractor": stage_1.feature_extractor, "safety_checker": stage_1.safety_checker, "watermarker": stage_1.watermarker} |
| | stage_3 = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-x4-upscaler", **safety_modules, torch_dtype=torch.float16) |
| | stage_3.enable_xformers_memory_efficient_attention() |
| | stage_3.enable_model_cpu_offload() |
| |
|
| | prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"' |
| |
|
| | |
| | prompt_embeds, negative_embeds = stage_1.encode_prompt(prompt) |
| |
|
| | generator = torch.manual_seed(0) |
| |
|
| | image = stage_1(prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, generator=generator, output_type="pt").images |
| | pt_to_pil(image)[0].save("./if_stage_I.png") |
| |
|
| | image = stage_2( |
| | image=image, prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, generator=generator, output_type="pt" |
| | ).images |
| | pt_to_pil(image)[0].save("./if_stage_II.png") |
| |
|
| | image = stage_3(prompt=prompt, image=image, generator=generator, noise_level=100).images |
| | image[0].save("./if_stage_III.png") |
| |
|
| |
|