Spaces:

Levaser
/

Make-it-bsod

Running

App Files Files Community

Make-it-bsod / app.py

Levaser

Fix diffusers import for runtime startup

f2e2937 verified about 2 months ago

raw

history blame contribute delete

9.5 kB

	import random
	import threading
	import traceback

	import gradio as gr
	import torch
	from diffusers import Flux2KleinPipeline, Flux2Transformer2DModel, GGUFQuantizationConfig
	from PIL import Image, ImageDraw, ImageFont, ImageOps


	MODEL_ID = "black-forest-labs/FLUX.2-klein-4B"
	GGUF_BASE_URL = "https://huggingface.co/unsloth/FLUX.2-klein-4B-GGUF/resolve/main"
	MAX_SEED = 2_147_483_647
	GPU_MAX_GENERATION_EDGE = 1024
	CPU_MAX_GENERATION_EDGE = 512
	MIN_GENERATION_EDGE = 256
	SIZE_STEP = 32

	PIPELINE = None
	PIPELINE_LOCK = threading.Lock()

	BSOD_PROMPT = (
	"Transform the reference photo into a BSOD-inspired scene. "
	"Keep the main subject recognizable and preserve the overall composition. "
	"Use blue-screen-of-death aesthetics, computer hardware, machines, robots, "
	"cybernetic details, metallic structures, monitor glow, motherboard patterns, "
	"industrial sci-fi atmosphere, neon blue diagnostics, clean high detail."
	)

	CSS = """
	.app-shell {
	max-width: 1080px;
	margin: 0 auto;
	}
	.hero {
	padding: 8px 0 20px;
	}
	.hero h1 {
	margin-bottom: 8px;
	}
	"""


	def _device() -> str:
	return "cuda" if torch.cuda.is_available() else "cpu"


	def _dtype() -> torch.dtype:
	return torch.bfloat16 if torch.cuda.is_available() else torch.float32


	def _gguf_url() -> str:
	filename = "flux-2-klein-4b-Q4_K_M.gguf" if torch.cuda.is_available() else "flux-2-klein-4b-Q2_K.gguf"
	return f"{GGUF_BASE_URL}/{filename}"


	def _max_generation_edge() -> int:
	return GPU_MAX_GENERATION_EDGE if torch.cuda.is_available() else CPU_MAX_GENERATION_EDGE


	def get_pipeline() -> Flux2KleinPipeline:
	global PIPELINE

	if PIPELINE is not None:
	return PIPELINE

	with PIPELINE_LOCK:
	if PIPELINE is not None:
	return PIPELINE

	quantization_config = GGUFQuantizationConfig(compute_dtype=_dtype())
	transformer = Flux2Transformer2DModel.from_single_file(
	_gguf_url(),
	config=MODEL_ID,
	subfolder="transformer",
	quantization_config=quantization_config,
	torch_dtype=_dtype(),
	)

	pipe = Flux2KleinPipeline.from_pretrained(
	MODEL_ID,
	transformer=transformer,
	torch_dtype=_dtype(),
	)
	pipe.vae.enable_slicing()
	pipe.vae.enable_tiling()

	if torch.cuda.is_available():
	pipe.enable_model_cpu_offload()
	else:
	pipe.enable_attention_slicing()
	pipe.to("cpu")

	pipe.set_progress_bar_config(disable=True)
	PIPELINE = pipe
	return PIPELINE


	def _round_to_step(value: int, step: int = SIZE_STEP) -> int:
	return max(step, int(round(value / step) * step))


	def _generation_size(image: Image.Image) -> tuple[int, int]:
	width, height = image.size
	longest_edge = max(width, height)
	max_generation_edge = _max_generation_edge()
	scale = min(1.0, max_generation_edge / longest_edge) if longest_edge else 1.0

	resized_width = max(MIN_GENERATION_EDGE, int(width * scale))
	resized_height = max(MIN_GENERATION_EDGE, int(height * scale))

	gen_width = _round_to_step(resized_width)
	gen_height = _round_to_step(resized_height)

	gen_width = max(MIN_GENERATION_EDGE, min(max_generation_edge, gen_width))
	gen_height = max(MIN_GENERATION_EDGE, min(max_generation_edge, gen_height))
	return gen_width, gen_height


	def _resize_for_model(image: Image.Image, width: int, height: int) -> Image.Image:
	return image.resize((width, height), Image.Resampling.LANCZOS)


	def _label_font() -> ImageFont.ImageFont \| ImageFont.FreeTypeFont:
	for font_name in ("DejaVuSans-Bold.ttf", "Arial.ttf"):
	try:
	return ImageFont.truetype(font_name, 36)
	except OSError:
	continue
	return ImageFont.load_default()


	def _compose_comparison(original: Image.Image, bsod: Image.Image) -> Image.Image:
	pad = 28
	gap = 24
	header_height = 74
	bg_color = (10, 16, 30)
	panel_color = (18, 30, 54)
	text_color = (223, 236, 255)

	left_w, left_h = original.size
	right_w, right_h = bsod.size
	panel_height = max(left_h, right_h)

	total_width = pad * 2 + left_w + right_w + gap
	total_height = pad * 2 + header_height + panel_height
	canvas = Image.new("RGB", (total_width, total_height), bg_color)
	draw = ImageDraw.Draw(canvas)
	font = _label_font()

	left_panel = (pad, pad + header_height, pad + left_w, pad + header_height + panel_height)
	right_panel = (
	pad + left_w + gap,
	pad + header_height,
	pad + left_w + gap + right_w,
	pad + header_height + panel_height,
	)

	draw.rounded_rectangle(left_panel, radius=20, fill=panel_color)
	draw.rounded_rectangle(right_panel, radius=20, fill=panel_color)

	left_text_x = pad + 16
	right_text_x = pad + left_w + gap + 16
	text_y = pad + 18
	draw.text((left_text_x, text_y), "original", fill=text_color, font=font)
	draw.text((right_text_x, text_y), "bsod", fill=text_color, font=font)

	left_y = pad + header_height + (panel_height - left_h) // 2
	right_y = pad + header_height + (panel_height - right_h) // 2

	canvas.paste(original, (pad, left_y))
	canvas.paste(bsod, (pad + left_w + gap, right_y))
	return canvas


	def infer(
	input_image: Image.Image,
	extra_prompt: str,
	seed: int,
	randomize_seed: bool,
	num_inference_steps: int,
	guidance_scale: float,
	progress=gr.Progress(track_tqdm=True),
	):
	if input_image is None:
	raise gr.Error("Upload a source image first.")

	try:
	original = ImageOps.exif_transpose(input_image).convert("RGB")
	width, height = _generation_size(original)
	conditioning = _resize_for_model(original, width, height)

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	prompt = BSOD_PROMPT
	if extra_prompt and extra_prompt.strip():
	prompt = f"{prompt} Extra instructions: {extra_prompt.strip()}"

	pipe = get_pipeline()
	generator_device = "cuda" if torch.cuda.is_available() else "cpu"
	generator = torch.Generator(device=generator_device).manual_seed(int(seed))

	result = pipe(
	prompt=prompt,
	image=conditioning,
	width=width,
	height=height,
	guidance_scale=guidance_scale,
	num_inference_steps=int(num_inference_steps),
	generator=generator,
	).images[0]

	comparison = _compose_comparison(original, result)
	return comparison, result, seed
	except Exception as exc:
	print(traceback.format_exc(), flush=True)
	raise gr.Error(f"{type(exc).__name__}: {exc}") from exc


	with gr.Blocks(css=CSS) as demo:
	with gr.Column(elem_classes=["app-shell"]):
	with gr.Column(elem_classes=["hero"]):
	gr.Markdown(
	"""
	# Make It BSOD
	Upload a normal photo and get a side-by-side comparison:
	the left panel stays untouched, the right panel is regenerated
	in a BSOD, computers, robots, and industrial sci-fi style.

	On free CPU hardware, generation uses a lighter quant and smaller
	working size, so higher step counts can be slow.
	"""
	)

	with gr.Row():
	input_image = gr.Image(
	label="Original photo",
	type="pil",
	image_mode="RGB",
	)
	comparison_image = gr.Image(
	label="Comparison",
	type="pil",
	)

	with gr.Row():
	extra_prompt = gr.Textbox(
	label="Extra style instructions",
	placeholder="Optional: chrome limbs, server room, broken CRTs, robot swarm...",
	lines=2,
	)
	stylized_image = gr.Image(
	label="BSOD only",
	type="pil",
	)

	with gr.Accordion("Generation settings", open=False):
	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	)
	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
	num_inference_steps = gr.Slider(
	label="Steps",
	minimum=1,
	maximum=50,
	step=1,
	value=4,
	)
	guidance_scale = gr.Slider(
	label="Guidance scale",
	minimum=1.0,
	maximum=10.0,
	step=0.1,
	value=4.0,
	)

	run_button = gr.Button("Make it BSOD", variant="primary")

	gr.Examples(
	examples=[
	["cold blue datacenter, mechanical arms, diagnostic overlays"],
	["retro windows crash screen, motherboard textures, chrome robot face"],
	["factory machines, server racks, terminal glow, cybernetic details"],
	],
	inputs=[extra_prompt],
	)

	run_button.click(
	fn=infer,
	inputs=[
	input_image,
	extra_prompt,
	seed,
	randomize_seed,
	num_inference_steps,
	guidance_scale,
	],
	outputs=[comparison_image, stylized_image, seed],
	)


	if __name__ == "__main__":
	demo.launch()