Multi_AI_Agent / app.py
SHIKARICHACHA's picture
Update app.py
03b567f verified
raw
history blame
6.18 kB
import gradio as gr
import os
from openai import OpenAI
import base64
from PIL import Image
import io
# OpenRouter API key
OPENROUTER_API_KEY = "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"
# Available models
MODELS = {
"Mistral Small": "mistralai/mistral-small-3.2-24b-instruct:free",
"Kimi Vision": "moonshotai/kimi-vl-a3b-thinking:free",
"Gemini Pro": "google/gemini-2.5-pro-exp-03-25",
"Qwen VL": "qwen/qwen2.5-vl-32b-instruct:free",
"Mistral 3.1": "mistralai/mistral-small-3.1-24b-instruct:free",
"Gemma": "google/gemma-3-4b-it:free",
"Llama 3.2 Vision": "meta-llama/llama-3.2-11b-vision-instruct:free",
}
def image_to_base64(image):
"""Convert an image to base64 encoding"""
# If image is a file path
if isinstance(image, str):
with open(image, "rb") as img_file:
return base64.b64encode(img_file.read()).decode()
# If image is already a PIL Image
buffered = io.BytesIO()
image.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode()
def analyze_image(image, prompt, model_name):
"""Analyze an image using the selected OpenRouter model"""
try:
# Initialize OpenAI client with OpenRouter base URL
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=OPENROUTER_API_KEY,
)
# Convert image to base64
img_base64 = image_to_base64(image)
# Create the completion request
completion = client.chat.completions.create(
extra_headers={
"HTTP-Referer": "https://gradio-openrouter-interface.com",
"X-Title": "Gradio OpenRouter Interface",
},
model=MODELS[model_name],
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{img_base64}"
}
}
]
}
]
)
# Return the model's response
return completion.choices[0].message.content
except Exception as e:
return f"Error: {str(e)}"
# Create the Gradio interface
with gr.Blocks(title="OpenRouter AI Vision Interface", css="style.css") as demo:
gr.Markdown(
"""
# πŸ” OpenRouter AI Vision Interface
Upload an image and ask a question about it. The AI will analyze the image and respond.
*Powered by OpenRouter API with multiple vision-language models*
"""
)
with gr.Row():
with gr.Column():
# Input components with custom styling
with gr.Group():
image_input = gr.Image(type="pil", label="Upload Image")
prompt_input = gr.Textbox(label="Your Question", placeholder="What is in this image?", value="What is in this image?")
model_dropdown = gr.Dropdown(
choices=list(MODELS.keys()),
value="Mistral Small",
label="Select AI Model",
info="Choose from different vision-language models"
)
submit_button = gr.Button("Analyze Image", variant="primary")
with gr.Column():
# Output component with custom styling
with gr.Group():
output_text = gr.Textbox(label="AI Response", lines=12)
gr.Markdown(
"""
### Available Models
- **Mistral Small**: Powerful vision-language model from Mistral AI
- **Kimi Vision**: Specialized vision model from Moonshot AI
- **Gemini Pro**: Google's advanced multimodal model
- **Qwen VL**: Alibaba's vision-language model
- **Mistral 3.1**: Earlier version of Mistral's vision model
- **Gemma**: Google's lightweight vision model
- **Llama 3.2 Vision**: Meta's vision-enabled large language model
"""
)
# Set up the submit action
submit_button.click(
fn=analyze_image,
inputs=[image_input, prompt_input, model_dropdown],
outputs=output_text
)
# Add example
gr.Examples(
examples=[
["examples/nature.jpg", "What is in this image?", "Mistral Small"],
["examples/nature.jpg", "Describe this scene in detail", "Kimi Vision"],
],
inputs=[image_input, prompt_input, model_dropdown],
)
# Create examples directory if it doesn't exist
os.makedirs("examples", exist_ok=True)
# Download example image if it doesn't exist
def download_example_image():
if not os.path.exists("examples/nature.jpg"):
import requests
from PIL import Image
from io import BytesIO
# URL of the example image
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
# Download and save the image
response = requests.get(image_url)
if response.status_code == 200:
# Open the image from the response content
img = Image.open(BytesIO(response.content))
# Save the image to the examples directory
img.save("examples/nature.jpg")
print("Example image downloaded successfully!")
else:
print(f"Failed to download image. Status code: {response.status_code}")
# Download example image before launching the app
download_example_image()
# For Hugging Face Spaces compatibility
if __name__ == "__main__":
# Launch the interface
demo.launch(share=True)
else:
# For Hugging Face Spaces, we need to expose the app
app = demo.launch(share=False, show_api=False)