Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from PIL import Image | |
| from diffusers import AutoPipelineForText2Image, DDIMScheduler | |
| import numpy as np | |
| from torchvision import transforms | |
| import spaces | |
| # Initialize the pipeline | |
| pipeline = AutoPipelineForText2Image.from_pretrained( | |
| "stabilityai/stable-diffusion-xl-base-1.0", | |
| torch_dtype=torch.float16 | |
| ) | |
| # Configure the scheduler for the pipeline | |
| pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config) | |
| # Load IP adapter with specified weights and set the scale for each component | |
| pipeline.load_ip_adapter( | |
| "h94/IP-Adapter", | |
| subfolder="sdxl_models", | |
| weight_name=[ | |
| "ip-adapter-plus_sdxl_vit-h.safetensors", | |
| "ip-adapter-plus-face_sdxl_vit-h.safetensors" | |
| ] | |
| ) | |
| pipeline.set_ip_adapter_scale([0.7, 0.5]) | |
| # Define the desired size for the images | |
| desired_size = (1024, 1024) | |
| def transform_image(face_image): | |
| # Move the pipeline to the GPU inside the function | |
| pipeline.to("cuda") | |
| generator = torch.Generator(device="cuda").manual_seed(0) | |
| # Process the input face image | |
| if isinstance(face_image, Image.Image): | |
| processed_face_image = face_image | |
| elif isinstance(face_image, np.ndarray): | |
| processed_face_image = Image.fromarray(face_image) | |
| else: | |
| raise ValueError("Unsupported image format") | |
| # Convert the processed face image to RGB format if it has only 1 channel | |
| if processed_face_image.mode == 'L': | |
| processed_face_image = processed_face_image.convert('RGB') | |
| # Resize the face image to 1024x1024 | |
| processed_face_image = processed_face_image.resize(desired_size, Image.LANCZOS) | |
| # Load the style image from the local path, resize it to 1024x1024, and convert to tensor | |
| style_image_path = "examples/soyjak2.jpg" # Ensure this path is correct | |
| style_image = Image.open(style_image_path).resize(desired_size, Image.LANCZOS) | |
| style_image_tensor = transforms.ToTensor()(style_image).unsqueeze(0).to("cuda") | |
| # Perform the transformation using the configured pipeline | |
| image = pipeline( | |
| prompt="soyjak", | |
| ip_adapter_image=[style_image_tensor, processed_face_image], # Ensure these are tensors | |
| negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality", | |
| num_inference_steps=30, | |
| generator=generator, | |
| ).images[0] | |
| # Convert the tensor to a PIL Image to display it in Gradio | |
| image = transforms.ToPILImage()(image.squeeze(0)) | |
| # Move the pipeline back to CPU after processing to release GPU resources | |
| pipeline.to("cpu") | |
| return image | |
| # Gradio interface setup | |
| demo = gr.Interface( | |
| fn=transform_image, | |
| inputs=gr.Image(label="Upload your face image"), | |
| outputs=gr.Image(label="Your Soyjak"), | |
| title="InstaSoyjak - turn anyone into a Soyjak", | |
| description="All you need to do is upload an image. Please use responsibly.", | |
| ) | |
| demo.queue(max_size=20) | |
| demo.launch() |