import random import threading import traceback import gradio as gr import torch from diffusers import Flux2KleinPipeline, Flux2Transformer2DModel, GGUFQuantizationConfig from PIL import Image, ImageDraw, ImageFont, ImageOps MODEL_ID = "black-forest-labs/FLUX.2-klein-4B" GGUF_BASE_URL = "https://huggingface.co/unsloth/FLUX.2-klein-4B-GGUF/resolve/main" MAX_SEED = 2_147_483_647 GPU_MAX_GENERATION_EDGE = 1024 CPU_MAX_GENERATION_EDGE = 512 MIN_GENERATION_EDGE = 256 SIZE_STEP = 32 PIPELINE = None PIPELINE_LOCK = threading.Lock() BSOD_PROMPT = ( "Transform the reference photo into a BSOD-inspired scene. " "Keep the main subject recognizable and preserve the overall composition. " "Use blue-screen-of-death aesthetics, computer hardware, machines, robots, " "cybernetic details, metallic structures, monitor glow, motherboard patterns, " "industrial sci-fi atmosphere, neon blue diagnostics, clean high detail." ) CSS = """ .app-shell { max-width: 1080px; margin: 0 auto; } .hero { padding: 8px 0 20px; } .hero h1 { margin-bottom: 8px; } """ def _device() -> str: return "cuda" if torch.cuda.is_available() else "cpu" def _dtype() -> torch.dtype: return torch.bfloat16 if torch.cuda.is_available() else torch.float32 def _gguf_url() -> str: filename = "flux-2-klein-4b-Q4_K_M.gguf" if torch.cuda.is_available() else "flux-2-klein-4b-Q2_K.gguf" return f"{GGUF_BASE_URL}/{filename}" def _max_generation_edge() -> int: return GPU_MAX_GENERATION_EDGE if torch.cuda.is_available() else CPU_MAX_GENERATION_EDGE def get_pipeline() -> Flux2KleinPipeline: global PIPELINE if PIPELINE is not None: return PIPELINE with PIPELINE_LOCK: if PIPELINE is not None: return PIPELINE quantization_config = GGUFQuantizationConfig(compute_dtype=_dtype()) transformer = Flux2Transformer2DModel.from_single_file( _gguf_url(), config=MODEL_ID, subfolder="transformer", quantization_config=quantization_config, torch_dtype=_dtype(), ) pipe = Flux2KleinPipeline.from_pretrained( MODEL_ID, transformer=transformer, torch_dtype=_dtype(), ) pipe.vae.enable_slicing() pipe.vae.enable_tiling() if torch.cuda.is_available(): pipe.enable_model_cpu_offload() else: pipe.enable_attention_slicing() pipe.to("cpu") pipe.set_progress_bar_config(disable=True) PIPELINE = pipe return PIPELINE def _round_to_step(value: int, step: int = SIZE_STEP) -> int: return max(step, int(round(value / step) * step)) def _generation_size(image: Image.Image) -> tuple[int, int]: width, height = image.size longest_edge = max(width, height) max_generation_edge = _max_generation_edge() scale = min(1.0, max_generation_edge / longest_edge) if longest_edge else 1.0 resized_width = max(MIN_GENERATION_EDGE, int(width * scale)) resized_height = max(MIN_GENERATION_EDGE, int(height * scale)) gen_width = _round_to_step(resized_width) gen_height = _round_to_step(resized_height) gen_width = max(MIN_GENERATION_EDGE, min(max_generation_edge, gen_width)) gen_height = max(MIN_GENERATION_EDGE, min(max_generation_edge, gen_height)) return gen_width, gen_height def _resize_for_model(image: Image.Image, width: int, height: int) -> Image.Image: return image.resize((width, height), Image.Resampling.LANCZOS) def _label_font() -> ImageFont.ImageFont | ImageFont.FreeTypeFont: for font_name in ("DejaVuSans-Bold.ttf", "Arial.ttf"): try: return ImageFont.truetype(font_name, 36) except OSError: continue return ImageFont.load_default() def _compose_comparison(original: Image.Image, bsod: Image.Image) -> Image.Image: pad = 28 gap = 24 header_height = 74 bg_color = (10, 16, 30) panel_color = (18, 30, 54) text_color = (223, 236, 255) left_w, left_h = original.size right_w, right_h = bsod.size panel_height = max(left_h, right_h) total_width = pad * 2 + left_w + right_w + gap total_height = pad * 2 + header_height + panel_height canvas = Image.new("RGB", (total_width, total_height), bg_color) draw = ImageDraw.Draw(canvas) font = _label_font() left_panel = (pad, pad + header_height, pad + left_w, pad + header_height + panel_height) right_panel = ( pad + left_w + gap, pad + header_height, pad + left_w + gap + right_w, pad + header_height + panel_height, ) draw.rounded_rectangle(left_panel, radius=20, fill=panel_color) draw.rounded_rectangle(right_panel, radius=20, fill=panel_color) left_text_x = pad + 16 right_text_x = pad + left_w + gap + 16 text_y = pad + 18 draw.text((left_text_x, text_y), "original", fill=text_color, font=font) draw.text((right_text_x, text_y), "bsod", fill=text_color, font=font) left_y = pad + header_height + (panel_height - left_h) // 2 right_y = pad + header_height + (panel_height - right_h) // 2 canvas.paste(original, (pad, left_y)) canvas.paste(bsod, (pad + left_w + gap, right_y)) return canvas def infer( input_image: Image.Image, extra_prompt: str, seed: int, randomize_seed: bool, num_inference_steps: int, guidance_scale: float, progress=gr.Progress(track_tqdm=True), ): if input_image is None: raise gr.Error("Upload a source image first.") try: original = ImageOps.exif_transpose(input_image).convert("RGB") width, height = _generation_size(original) conditioning = _resize_for_model(original, width, height) if randomize_seed: seed = random.randint(0, MAX_SEED) prompt = BSOD_PROMPT if extra_prompt and extra_prompt.strip(): prompt = f"{prompt} Extra instructions: {extra_prompt.strip()}" pipe = get_pipeline() generator_device = "cuda" if torch.cuda.is_available() else "cpu" generator = torch.Generator(device=generator_device).manual_seed(int(seed)) result = pipe( prompt=prompt, image=conditioning, width=width, height=height, guidance_scale=guidance_scale, num_inference_steps=int(num_inference_steps), generator=generator, ).images[0] comparison = _compose_comparison(original, result) return comparison, result, seed except Exception as exc: print(traceback.format_exc(), flush=True) raise gr.Error(f"{type(exc).__name__}: {exc}") from exc with gr.Blocks(css=CSS) as demo: with gr.Column(elem_classes=["app-shell"]): with gr.Column(elem_classes=["hero"]): gr.Markdown( """ # Make It BSOD Upload a normal photo and get a side-by-side comparison: the left panel stays untouched, the right panel is regenerated in a BSOD, computers, robots, and industrial sci-fi style. On free CPU hardware, generation uses a lighter quant and smaller working size, so higher step counts can be slow. """ ) with gr.Row(): input_image = gr.Image( label="Original photo", type="pil", image_mode="RGB", ) comparison_image = gr.Image( label="Comparison", type="pil", ) with gr.Row(): extra_prompt = gr.Textbox( label="Extra style instructions", placeholder="Optional: chrome limbs, server room, broken CRTs, robot swarm...", lines=2, ) stylized_image = gr.Image( label="BSOD only", type="pil", ) with gr.Accordion("Generation settings", open=False): seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, ) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) num_inference_steps = gr.Slider( label="Steps", minimum=1, maximum=50, step=1, value=4, ) guidance_scale = gr.Slider( label="Guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=4.0, ) run_button = gr.Button("Make it BSOD", variant="primary") gr.Examples( examples=[ ["cold blue datacenter, mechanical arms, diagnostic overlays"], ["retro windows crash screen, motherboard textures, chrome robot face"], ["factory machines, server racks, terminal glow, cybernetic details"], ], inputs=[extra_prompt], ) run_button.click( fn=infer, inputs=[ input_image, extra_prompt, seed, randomize_seed, num_inference_steps, guidance_scale, ], outputs=[comparison_image, stylized_image, seed], ) if __name__ == "__main__": demo.launch()