Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| from openai import OpenAI | |
| import base64 | |
| from PIL import Image | |
| import io | |
| # OpenRouter API key | |
| OPENROUTER_API_KEY = "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" | |
| # Available models | |
| MODELS = { | |
| "Mistral Small": "mistralai/mistral-small-3.2-24b-instruct:free", | |
| "Kimi Vision": "moonshotai/kimi-vl-a3b-thinking:free", | |
| "Gemini Pro": "google/gemini-2.5-pro-exp-03-25", | |
| "Qwen VL": "qwen/qwen2.5-vl-32b-instruct:free", | |
| "Mistral 3.1": "mistralai/mistral-small-3.1-24b-instruct:free", | |
| "Gemma": "google/gemma-3-4b-it:free", | |
| "Llama 3.2 Vision": "meta-llama/llama-3.2-11b-vision-instruct:free", | |
| } | |
| def image_to_base64(image): | |
| """Convert an image to base64 encoding""" | |
| # If image is a file path | |
| if isinstance(image, str): | |
| with open(image, "rb") as img_file: | |
| return base64.b64encode(img_file.read()).decode() | |
| # If image is already a PIL Image | |
| buffered = io.BytesIO() | |
| image.save(buffered, format="JPEG") | |
| return base64.b64encode(buffered.getvalue()).decode() | |
| def analyze_image(image, prompt, model_name): | |
| """Analyze an image using the selected OpenRouter model""" | |
| try: | |
| # Initialize OpenAI client with OpenRouter base URL | |
| client = OpenAI( | |
| base_url="https://openrouter.ai/api/v1", | |
| api_key=OPENROUTER_API_KEY, | |
| ) | |
| # Convert image to base64 | |
| img_base64 = image_to_base64(image) | |
| # Create the completion request | |
| completion = client.chat.completions.create( | |
| extra_headers={ | |
| "HTTP-Referer": "https://gradio-openrouter-interface.com", | |
| "X-Title": "Gradio OpenRouter Interface", | |
| }, | |
| model=MODELS[model_name], | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": prompt | |
| }, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/jpeg;base64,{img_base64}" | |
| } | |
| } | |
| ] | |
| } | |
| ] | |
| ) | |
| # Return the model's response | |
| return completion.choices[0].message.content | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Create the Gradio interface | |
| with gr.Blocks(title="OpenRouter AI Vision Interface", css="style.css") as demo: | |
| gr.Markdown( | |
| """ | |
| # π OpenRouter AI Vision Interface | |
| Upload an image and ask a question about it. The AI will analyze the image and respond. | |
| *Powered by OpenRouter API with multiple vision-language models* | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| # Input components with custom styling | |
| with gr.Group(): | |
| image_input = gr.Image(type="pil", label="Upload Image") | |
| prompt_input = gr.Textbox(label="Your Question", placeholder="What is in this image?", value="What is in this image?") | |
| model_dropdown = gr.Dropdown( | |
| choices=list(MODELS.keys()), | |
| value="Mistral Small", | |
| label="Select AI Model", | |
| info="Choose from different vision-language models" | |
| ) | |
| submit_button = gr.Button("Analyze Image", variant="primary") | |
| with gr.Column(): | |
| # Output component with custom styling | |
| with gr.Group(): | |
| output_text = gr.Textbox(label="AI Response", lines=12) | |
| gr.Markdown( | |
| """ | |
| ### Available Models | |
| - **Mistral Small**: Powerful vision-language model from Mistral AI | |
| - **Kimi Vision**: Specialized vision model from Moonshot AI | |
| - **Gemini Pro**: Google's advanced multimodal model | |
| - **Qwen VL**: Alibaba's vision-language model | |
| - **Mistral 3.1**: Earlier version of Mistral's vision model | |
| - **Gemma**: Google's lightweight vision model | |
| - **Llama 3.2 Vision**: Meta's vision-enabled large language model | |
| """ | |
| ) | |
| # Set up the submit action | |
| submit_button.click( | |
| fn=analyze_image, | |
| inputs=[image_input, prompt_input, model_dropdown], | |
| outputs=output_text | |
| ) | |
| # Add example | |
| gr.Examples( | |
| examples=[ | |
| ["examples/nature.jpg", "What is in this image?", "Mistral Small"], | |
| ["examples/nature.jpg", "Describe this scene in detail", "Kimi Vision"], | |
| ], | |
| inputs=[image_input, prompt_input, model_dropdown], | |
| ) | |
| # Create examples directory if it doesn't exist | |
| os.makedirs("examples", exist_ok=True) | |
| # Download example image if it doesn't exist | |
| def download_example_image(): | |
| if not os.path.exists("examples/nature.jpg"): | |
| import requests | |
| from PIL import Image | |
| from io import BytesIO | |
| # URL of the example image | |
| image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" | |
| # Download and save the image | |
| response = requests.get(image_url) | |
| if response.status_code == 200: | |
| # Open the image from the response content | |
| img = Image.open(BytesIO(response.content)) | |
| # Save the image to the examples directory | |
| img.save("examples/nature.jpg") | |
| print("Example image downloaded successfully!") | |
| else: | |
| print(f"Failed to download image. Status code: {response.status_code}") | |
| # Download example image before launching the app | |
| download_example_image() | |
| # For Hugging Face Spaces compatibility | |
| if __name__ == "__main__": | |
| # Launch the interface | |
| demo.launch(share=True) | |
| else: | |
| # For Hugging Face Spaces, we need to expose the app | |
| app = demo.launch(share=False, show_api=False) |