Spaces:
Running
on
Zero
Running
on
Zero
| import os | |
| import subprocess | |
| import sys | |
| import io | |
| import gradio as gr | |
| import numpy as np | |
| import random | |
| import spaces | |
| import torch | |
| from diffusers import Flux2Pipeline, Flux2Transformer2DModel | |
| from diffusers import BitsAndBytesConfig as DiffBitsAndBytesConfig | |
| from optimization import optimize_pipeline_ | |
| import requests | |
| from PIL import Image | |
| import json | |
| dtype = torch.bfloat16 | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| MAX_SEED = np.iinfo(np.int32).max | |
| MAX_IMAGE_SIZE = 1024 | |
| def remote_text_encoder(prompts): | |
| from gradio_client import Client | |
| client = Client("multimodalart/mistral-text-encoder") | |
| result = client.predict( | |
| prompt=prompts, | |
| api_name="/encode_text" | |
| ) | |
| prompt_embeds = torch.load(result[0]) | |
| return prompt_embeds | |
| # Load model | |
| repo_id = "black-forest-labs/FLUX.2-dev" | |
| dit = Flux2Transformer2DModel.from_pretrained( | |
| repo_id, | |
| subfolder="transformer", | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| pipe = Flux2Pipeline.from_pretrained( | |
| repo_id, | |
| text_encoder=None, | |
| transformer=dit, | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| pipe.to("cuda") | |
| pipe.transformer.set_attention_backend("_flash_3_hub") | |
| optimize_pipeline_( | |
| pipe, | |
| image=[Image.new("RGB", (1024, 1024))], | |
| prompt_embeds = remote_text_encoder("prompt").to("cuda"), | |
| guidance_scale=2.5, | |
| width=1024, | |
| height=1024, | |
| num_inference_steps=1 | |
| ) | |
| def get_duration(prompt, input_images=None, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=50, guidance_scale=2.5, progress=gr.Progress(track_tqdm=True)): | |
| num_images = 0 if input_images is None else len(input_images) | |
| step_duration = 1 + 0.7 * num_images | |
| return max(65, num_inference_steps * step_duration + 10) | |
| def infer(prompt, input_images=None, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=50, guidance_scale=2.5, progress=gr.Progress(track_tqdm=True)): | |
| if randomize_seed: | |
| seed = random.randint(0, MAX_SEED) | |
| # Get prompt embeddings from remote text encoder | |
| progress(0.1, desc="ํ๋กฌํํธ ์ธ์ฝ๋ฉ ์ค...") | |
| prompt_embeds = remote_text_encoder(prompt).to("cuda") | |
| # Prepare image list (convert None or empty gallery to None) | |
| image_list = None | |
| if input_images is not None and len(input_images) > 0: | |
| image_list = [] | |
| for item in input_images: | |
| image_list.append(item[0]) | |
| # Generate image | |
| progress(0.3, desc="์ด๋ฏธ์ง ์์ฑ ์ค...") | |
| generator = torch.Generator(device=device).manual_seed(seed) | |
| image = pipe( | |
| prompt_embeds=prompt_embeds, | |
| image=image_list, | |
| width=width, | |
| height=height, | |
| num_inference_steps=num_inference_steps, | |
| guidance_scale=guidance_scale, | |
| generator=generator, | |
| ).images[0] | |
| return image, seed | |
| examples = [ | |
| ["๊ฑฐ์ค ํ์ ์์ ๊ฝ๋ณ์ ๋ง๋ค์ด ์ฃผ์ธ์. ๊ฝ๋ณ์ ์์์ #02eb3c ์์์์ ์์ํ์ฌ #edfa3c๋ก ๋๋๋ ๊ทธ๋ผ๋ฐ์ด์ ์ ๋๋ค. ๊ฝ๋ณ ์์ ๊ฝ๋ค์ #ff0088 ์์์ ๋๋ค"], | |
| ["๋ฒ ๋ฅผ๋ฆฐ TV ํ์(Fernsehturm)์ ์ ์ฒด ๊ตฌ์กฐ๋ฅผ ์ง๋ฉด ๊ธฐ์ด๋ถํฐ ์ํ ๋ ๋๊น์ง ๋ณด์ฌ์ฃผ๋ ์ฌ์ง์ฒ๋ผ ์ฌ์ค์ ์ธ ์ธํฌ๊ทธ๋ํฝ, ์ฝํฌ๋ฆฌํธ ์ถ, ๊ธ์ ๊ตฌ์ฒด, ์ํ ๋ ์ฒจํ์ ํฌํจํ ์ ์ฒด ๊ตฌ์กฐ๊ฐ ๋ณด์ด๋ ์์ง ์ ์ฒด ๋ทฐ. ์์ง์ ์ธ ๊ตฌ์ฒด๋ฅผ ์ฌ๋ ค๋ค๋ณด๋ ์ฝ๊ฐ์ ์์ชฝ ์๊ทผ๊ฐ ๊ฐ๋, ๊นจ๋ํ ํฐ์ ๋ฐฐ๊ฒฝ์ ์๋ฒฝํ๊ฒ ์ค์ ๋ฐฐ์น. ์์ ์ํ ์ฐ๊ฒฐ์ ์ด ์๋ ์ผ์ชฝ ๋ ์ด๋ธ: ๋งค์ฐ ํฐ ๊ตต์ ์งํ ํ์ ์ซ์(#2D3748)๋ก ๋ '368m' ํ ์คํธ๊ฐ ์ํ ๋ ๋์ ์ ํํ ์์นํ๊ณ ๊ทธ ์๋์ ์์ ๋๋ฌธ์๋ก 'TOTAL HEIGHT'๊ฐ ์์. ๋งค์ฐ ํฐ ๊ตต์ ๊ธ์จ๋ก ๋ '207m' ํ ์คํธ์ ๊ทธ ์๋ ์์ ๋๋ฌธ์๋ก 'TELECAFร'๊ฐ ์์ผ๋ฉฐ, ์ฐ๊ฒฐ์ ์ด ์ฐฝ๋ฌธ ๋์ด์ ๊ตฌ์ฒด์ ์ ํํ ๋ฟ์ ์์. ๊ตฌ์ฒด์ ์ ๋์ ๋ฟ๋ ์ํ ์ฐ๊ฒฐ์ ์ด ์๋ ์ค๋ฅธ์ชฝ ๋ ์ด๋ธ: ๋งค์ฐ ํฐ ๊ตต์ ์งํ ํ์ ์ซ์๋ก ๋ '32m' ํ ์คํธ์ ๊ทธ ์๋ ์์ ๋๋ฌธ์๋ก 'SPHERE DIAMETER'๊ฐ ์์. ์ธ ๊ฐ์ ๊ท ํ ์กํ ์ด๋ก ๋ฐฐ์ด๋ ํ๋จ ์น์ : ์ผ์ชฝ - ๋งค์ฐ ๊ตต์ ์งํ ํ์์ ํฐ ํ ์คํธ '986'๊ณผ ๊ทธ ์๋ ๋๋ฌธ์๋ก 'STEPS'. ์ค์ - ๊ตต์ ๋๋ฌธ์๋ก 'BERLIN TV TOWER'์ ๊ทธ ์๋ ๊ฐ๋ฒผ์ด ๋ฌด๊ฒ๋ก 'FERNSEHTURM'. ์ค๋ฅธ์ชฝ - ๊ตต์ ๋๋ฌธ์๋ก 'INAUGURATED'์ ๊ทธ ์๋ 'OCTOBER 3, 1969'. ๋ชจ๋ ํ์ดํฌ๊ทธ๋ํผ๋ ํ๋์ ์ธ ์ฐ์ธ๋ฆฌํ ํฐํธ(Inter ๋๋ Helvetica ๊ฐ์), ์์ #2D3748, ๊นจ๋ํ๊ณ ๋ฏธ๋๋ฉํ ๊ธฐ์ ๋ค์ด์ด๊ทธ๋จ ์คํ์ผ. ์ํ ์ฐ๊ฒฐ์ ์ ์๊ณ ์ ํํ๋ฉฐ ๋ช ํํ๊ฒ ๋ณด์ด๊ณ ํ์ ๊ตฌ์กฐ์ ์ ํํ ํด๋น ์ธก์ ์ง์ ์ ๋ฟ์ ์์. ๋์ด์ ์ ์ฅํจ์ ๋๋ ์ ์๋ ์ญ๋์ ์ธ ๋ฎ์ ๊ฐ๋ ์๊ทผ๊ฐ์ด ์๋ ์ ๋ฌธ์ ์ธ ๊ฑด์ถ ์ ๋ฉด๋ ๋ฏธํ, ์๋ฒฝํ ์๊ฐ์ ๊ณ์ธต ๊ตฌ์กฐ๋ฅผ ๊ฐ์ง ํฌ์คํฐ๊ธ ์ธํฌ๊ทธ๋ํฝ ๋์์ธ."], | |
| ["๋น์ค๋ ์ ๊ธ์์ ๋ฐ๋๋ ์ ์๋ ํผ์ ํ๊ณ ์๋ ํ ๋ป ์ ์ ์นดํผ๋ฐ๋ผ, ํด๋ก์ฆ์ ์ฌ์ง"], | |
| ["ํตํตํ ์ฃผํฉ์ ๊ณ ์์ด์ ์นด์์ด ๋ค์ด์ปท ์คํฐ์ปค, ํฌ๊ณ ๋ฐ์ง์ด๋ ๋๊ณผ ์ธ์ฌํ๋ฉฐ ๋ฐ์ ์ฌ๋ฆฐ ํ๋ณตํ ๋ฏธ์์ ํํธ ๋ชจ์์ ๋ถํ ์ฝ๊ฐ ์์ต๋๋ค. ๋์์ธ์ ๊ฒ์์ ์ค๊ณฝ์ ๊ณผ ๋ถํ ๋ณผ์ด ์๋ ๋ถ๋๋ฌ์ด ๊ทธ๋ผ๋ฐ์ด์ ์์์ด ์๋ ๋ถ๋๋ฌ์ด ๋ฅ๊ทผ ์ ์ด ์์ด์ผ ํฉ๋๋ค."], | |
| ] | |
| examples_images = [ | |
| # ["Replace the top of the person from image 1 with the one from image 2", ["person1.webp", "woman2.webp"]], | |
| ["์ด๋ฏธ์ง 1์ ์ฌ๋์ด ์ด๋ฏธ์ง 2์ ๊ณ ์์ด๋ฅผ ์ฐ๋ค๋ฌ๊ณ ์๊ณ , ์ด๋ฏธ์ง 3์ ์๊ฐ ๊ทธ๋ค ์์ ์์ต๋๋ค", ["woman1.webp", "cat_window.webp", "bird.webp"]] | |
| ] | |
| css=""" | |
| #col-container { | |
| margin: 0 auto; | |
| max-width: 620px; | |
| } | |
| """ | |
| with gr.Blocks() as demo: | |
| with gr.Column(elem_id="col-container"): | |
| gr.Markdown(f"""# FLUX.2 [dev] | |
| FLUX.2 [dev]๋ ํ ์คํธ ์ง์์ฌํญ์ ๊ธฐ๋ฐ์ผ๋ก ์ด๋ฏธ์ง๋ฅผ ์์ฑ, ํธ์ง ๋ฐ ๊ฒฐํฉํ ์ ์๋ 32B ํ๋ผ๋ฏธํฐ rectified flow ๋ชจ๋ธ์ ๋๋ค [[๋ชจ๋ธ](https://huggingface.co/black-forest-labs/FLUX.2-dev)], [[๋ธ๋ก๊ทธ](https://bfl.ai/blog/flux-2)] | |
| """) | |
| with gr.Accordion("์ ๋ ฅ ์ด๋ฏธ์ง (์ ํ์ฌํญ)", open=False): | |
| input_images = gr.Gallery( | |
| label="์ ๋ ฅ ์ด๋ฏธ์ง", | |
| type="pil", | |
| columns=3, | |
| rows=1, | |
| ) | |
| prompt = gr.Text( | |
| label="ํ๋กฌํํธ", | |
| show_label=False, | |
| lines=10, | |
| max_lines=15, | |
| placeholder="ํ๋กฌํํธ๋ฅผ ์ ๋ ฅํ์ธ์", | |
| container=False, | |
| ) | |
| run_button = gr.Button("์คํ") | |
| result = gr.Image(label="๊ฒฐ๊ณผ", show_label=False) | |
| with gr.Accordion("๊ณ ๊ธ ์ค์ ", open=False): | |
| seed = gr.Slider( | |
| label="์๋", | |
| minimum=0, | |
| maximum=MAX_SEED, | |
| step=1, | |
| value=0, | |
| ) | |
| randomize_seed = gr.Checkbox(label="๋๋ค ์๋", value=True) | |
| with gr.Row(): | |
| width = gr.Slider( | |
| label="๋๋น", | |
| minimum=256, | |
| maximum=MAX_IMAGE_SIZE, | |
| step=32, | |
| value=1024, | |
| ) | |
| height = gr.Slider( | |
| label="๋์ด", | |
| minimum=256, | |
| maximum=MAX_IMAGE_SIZE, | |
| step=32, | |
| value=1024, | |
| ) | |
| with gr.Row(): | |
| num_inference_steps = gr.Slider( | |
| label="์ถ๋ก ๋จ๊ณ ์", | |
| minimum=1, | |
| maximum=100, | |
| step=1, | |
| value=30, | |
| ) | |
| guidance_scale = gr.Slider( | |
| label="๊ฐ์ด๋์ค ์ค์ผ์ผ", | |
| minimum=0.0, | |
| maximum=10.0, | |
| step=0.1, | |
| value=4, | |
| ) | |
| gr.Examples( | |
| examples=examples, | |
| fn=infer, | |
| inputs=[prompt], | |
| outputs=[result, seed], | |
| cache_examples=True, | |
| cache_mode="lazy" | |
| ) | |
| gr.Examples( | |
| examples=examples_images, | |
| fn=infer, | |
| inputs=[prompt, input_images], | |
| outputs=[result, seed], | |
| cache_examples=True, | |
| cache_mode="lazy" | |
| ) | |
| gr.on( | |
| triggers=[run_button.click, prompt.submit], | |
| fn=infer, | |
| inputs=[prompt, input_images, seed, randomize_seed, width, height, num_inference_steps, guidance_scale], | |
| outputs=[result, seed] | |
| ) | |
| demo.launch(css=css) |