Make-it-bsod / app.py
Levaser's picture
Fix diffusers import for runtime startup
f2e2937 verified
import random
import threading
import traceback
import gradio as gr
import torch
from diffusers import Flux2KleinPipeline, Flux2Transformer2DModel, GGUFQuantizationConfig
from PIL import Image, ImageDraw, ImageFont, ImageOps
MODEL_ID = "black-forest-labs/FLUX.2-klein-4B"
GGUF_BASE_URL = "https://huggingface.co/unsloth/FLUX.2-klein-4B-GGUF/resolve/main"
MAX_SEED = 2_147_483_647
GPU_MAX_GENERATION_EDGE = 1024
CPU_MAX_GENERATION_EDGE = 512
MIN_GENERATION_EDGE = 256
SIZE_STEP = 32
PIPELINE = None
PIPELINE_LOCK = threading.Lock()
BSOD_PROMPT = (
"Transform the reference photo into a BSOD-inspired scene. "
"Keep the main subject recognizable and preserve the overall composition. "
"Use blue-screen-of-death aesthetics, computer hardware, machines, robots, "
"cybernetic details, metallic structures, monitor glow, motherboard patterns, "
"industrial sci-fi atmosphere, neon blue diagnostics, clean high detail."
)
CSS = """
.app-shell {
max-width: 1080px;
margin: 0 auto;
}
.hero {
padding: 8px 0 20px;
}
.hero h1 {
margin-bottom: 8px;
}
"""
def _device() -> str:
return "cuda" if torch.cuda.is_available() else "cpu"
def _dtype() -> torch.dtype:
return torch.bfloat16 if torch.cuda.is_available() else torch.float32
def _gguf_url() -> str:
filename = "flux-2-klein-4b-Q4_K_M.gguf" if torch.cuda.is_available() else "flux-2-klein-4b-Q2_K.gguf"
return f"{GGUF_BASE_URL}/{filename}"
def _max_generation_edge() -> int:
return GPU_MAX_GENERATION_EDGE if torch.cuda.is_available() else CPU_MAX_GENERATION_EDGE
def get_pipeline() -> Flux2KleinPipeline:
global PIPELINE
if PIPELINE is not None:
return PIPELINE
with PIPELINE_LOCK:
if PIPELINE is not None:
return PIPELINE
quantization_config = GGUFQuantizationConfig(compute_dtype=_dtype())
transformer = Flux2Transformer2DModel.from_single_file(
_gguf_url(),
config=MODEL_ID,
subfolder="transformer",
quantization_config=quantization_config,
torch_dtype=_dtype(),
)
pipe = Flux2KleinPipeline.from_pretrained(
MODEL_ID,
transformer=transformer,
torch_dtype=_dtype(),
)
pipe.vae.enable_slicing()
pipe.vae.enable_tiling()
if torch.cuda.is_available():
pipe.enable_model_cpu_offload()
else:
pipe.enable_attention_slicing()
pipe.to("cpu")
pipe.set_progress_bar_config(disable=True)
PIPELINE = pipe
return PIPELINE
def _round_to_step(value: int, step: int = SIZE_STEP) -> int:
return max(step, int(round(value / step) * step))
def _generation_size(image: Image.Image) -> tuple[int, int]:
width, height = image.size
longest_edge = max(width, height)
max_generation_edge = _max_generation_edge()
scale = min(1.0, max_generation_edge / longest_edge) if longest_edge else 1.0
resized_width = max(MIN_GENERATION_EDGE, int(width * scale))
resized_height = max(MIN_GENERATION_EDGE, int(height * scale))
gen_width = _round_to_step(resized_width)
gen_height = _round_to_step(resized_height)
gen_width = max(MIN_GENERATION_EDGE, min(max_generation_edge, gen_width))
gen_height = max(MIN_GENERATION_EDGE, min(max_generation_edge, gen_height))
return gen_width, gen_height
def _resize_for_model(image: Image.Image, width: int, height: int) -> Image.Image:
return image.resize((width, height), Image.Resampling.LANCZOS)
def _label_font() -> ImageFont.ImageFont | ImageFont.FreeTypeFont:
for font_name in ("DejaVuSans-Bold.ttf", "Arial.ttf"):
try:
return ImageFont.truetype(font_name, 36)
except OSError:
continue
return ImageFont.load_default()
def _compose_comparison(original: Image.Image, bsod: Image.Image) -> Image.Image:
pad = 28
gap = 24
header_height = 74
bg_color = (10, 16, 30)
panel_color = (18, 30, 54)
text_color = (223, 236, 255)
left_w, left_h = original.size
right_w, right_h = bsod.size
panel_height = max(left_h, right_h)
total_width = pad * 2 + left_w + right_w + gap
total_height = pad * 2 + header_height + panel_height
canvas = Image.new("RGB", (total_width, total_height), bg_color)
draw = ImageDraw.Draw(canvas)
font = _label_font()
left_panel = (pad, pad + header_height, pad + left_w, pad + header_height + panel_height)
right_panel = (
pad + left_w + gap,
pad + header_height,
pad + left_w + gap + right_w,
pad + header_height + panel_height,
)
draw.rounded_rectangle(left_panel, radius=20, fill=panel_color)
draw.rounded_rectangle(right_panel, radius=20, fill=panel_color)
left_text_x = pad + 16
right_text_x = pad + left_w + gap + 16
text_y = pad + 18
draw.text((left_text_x, text_y), "original", fill=text_color, font=font)
draw.text((right_text_x, text_y), "bsod", fill=text_color, font=font)
left_y = pad + header_height + (panel_height - left_h) // 2
right_y = pad + header_height + (panel_height - right_h) // 2
canvas.paste(original, (pad, left_y))
canvas.paste(bsod, (pad + left_w + gap, right_y))
return canvas
def infer(
input_image: Image.Image,
extra_prompt: str,
seed: int,
randomize_seed: bool,
num_inference_steps: int,
guidance_scale: float,
progress=gr.Progress(track_tqdm=True),
):
if input_image is None:
raise gr.Error("Upload a source image first.")
try:
original = ImageOps.exif_transpose(input_image).convert("RGB")
width, height = _generation_size(original)
conditioning = _resize_for_model(original, width, height)
if randomize_seed:
seed = random.randint(0, MAX_SEED)
prompt = BSOD_PROMPT
if extra_prompt and extra_prompt.strip():
prompt = f"{prompt} Extra instructions: {extra_prompt.strip()}"
pipe = get_pipeline()
generator_device = "cuda" if torch.cuda.is_available() else "cpu"
generator = torch.Generator(device=generator_device).manual_seed(int(seed))
result = pipe(
prompt=prompt,
image=conditioning,
width=width,
height=height,
guidance_scale=guidance_scale,
num_inference_steps=int(num_inference_steps),
generator=generator,
).images[0]
comparison = _compose_comparison(original, result)
return comparison, result, seed
except Exception as exc:
print(traceback.format_exc(), flush=True)
raise gr.Error(f"{type(exc).__name__}: {exc}") from exc
with gr.Blocks(css=CSS) as demo:
with gr.Column(elem_classes=["app-shell"]):
with gr.Column(elem_classes=["hero"]):
gr.Markdown(
"""
# Make It BSOD
Upload a normal photo and get a side-by-side comparison:
the left panel stays untouched, the right panel is regenerated
in a BSOD, computers, robots, and industrial sci-fi style.
On free CPU hardware, generation uses a lighter quant and smaller
working size, so higher step counts can be slow.
"""
)
with gr.Row():
input_image = gr.Image(
label="Original photo",
type="pil",
image_mode="RGB",
)
comparison_image = gr.Image(
label="Comparison",
type="pil",
)
with gr.Row():
extra_prompt = gr.Textbox(
label="Extra style instructions",
placeholder="Optional: chrome limbs, server room, broken CRTs, robot swarm...",
lines=2,
)
stylized_image = gr.Image(
label="BSOD only",
type="pil",
)
with gr.Accordion("Generation settings", open=False):
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0,
)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
num_inference_steps = gr.Slider(
label="Steps",
minimum=1,
maximum=50,
step=1,
value=4,
)
guidance_scale = gr.Slider(
label="Guidance scale",
minimum=1.0,
maximum=10.0,
step=0.1,
value=4.0,
)
run_button = gr.Button("Make it BSOD", variant="primary")
gr.Examples(
examples=[
["cold blue datacenter, mechanical arms, diagnostic overlays"],
["retro windows crash screen, motherboard textures, chrome robot face"],
["factory machines, server racks, terminal glow, cybernetic details"],
],
inputs=[extra_prompt],
)
run_button.click(
fn=infer,
inputs=[
input_image,
extra_prompt,
seed,
randomize_seed,
num_inference_steps,
guidance_scale,
],
outputs=[comparison_image, stylized_image, seed],
)
if __name__ == "__main__":
demo.launch()