import gradio as gr print("Loading models...") import cv2 import numpy as np from PIL import Image from rembg import remove from sentence_transformers import SentenceTransformer import urllib.request import pathlib print("Libraries loaded") # Load CLIP Model image_model = SentenceTransformer("clip-ViT-B-32") print("CLIP loaded") # Load Anime Face Cascade def load_anime_model(): url = "https://raw.githubusercontent.com/nagadomi/lbpcascade_animeface/master/lbpcascade_animeface.xml" path = pathlib.Path("lbpcascade_animeface.xml") if not path.exists(): print("Downloading anime face model...") urllib.request.urlretrieve(url, path.as_posix()) return cv2.CascadeClassifier(path.as_posix()) # Load Human Face Cascade def load_human_model(): path = pathlib.Path(cv2.data.haarcascades + "haarcascade_frontalface_default.xml") return cv2.CascadeClassifier(path.as_posix()) anime_detector = load_anime_model() human_detector = load_human_model() print("Anime + Human detectors loaded") # Embedding Function def get_image_embedding(image): emb = image_model.encode(image) return {"embedding": emb.tolist()} # Face Crop + Background Remove def process_image(input_image, mode): img = cv2.cvtColor(np.array(input_image), cv2.COLOR_RGB2BGR) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Choose detector if mode == "Anime": detector = anime_detector else: detector = human_detector faces = detector.detectMultiScale( gray, scaleFactor=1.1, minNeighbors=5, minSize=(24, 24) ) if len(faces) == 0: print("direct to background removal") pil_image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) output = remove(pil_image) output = output.resize((224, 224)) return "Success ✅", output x, y, w, h = faces[0] height, width, _ = img.shape # Expand bounding box top_expand = 0.5 side_expand = 0.3 bottom_expand = 0.2 x1 = int(max(0, x - w * side_expand)) x2 = int(min(width, x + w + w * side_expand)) y1 = int(max(0, y - h * top_expand)) y2 = int(min(height, y + h + h * bottom_expand)) cropped = img[y1:y2, x1:x2] pil_image = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB)) # Background removal output = remove(pil_image) # Resize for CLIP output = output.resize((224, 224)) return "Success ✅", output # Gradio UI with gr.Blocks() as demo: with gr.Tab("Full Pipeline"): mode_selector = gr.Dropdown( choices=["Anime", "Human"], value="Anime", label="Detection Mode" ) img_input = gr.Image(type="pil") embedding_output = gr.JSON() run_btn = gr.Button("Run Pipeline") def run_pipeline(img, mode): status_msg, processed_img = process_image(img, mode) if status_msg != "Success ✅": return {"embedding": None} embedding = get_image_embedding(processed_img) return embedding run_btn.click( run_pipeline, inputs=[img_input, mode_selector], outputs=[embedding_output] ) with gr.Tab("Embedding Only"): img_input2 = gr.Image(type="pil") embedding_output2 = gr.JSON() run_btn2 = gr.Button("Get Embedding") def get_embedding_only(img): embedding = get_image_embedding(img) return embedding run_btn2.click( get_embedding_only, inputs=img_input2, outputs=embedding_output2 ) print("Launching demo...") demo.queue(max_size=15).launch()