import streamlit as st
import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForVision2Seq
import io

st.set_page_config(page_title="Qwen3-VL-8B Inference", layout="wide")

@st.cache_resource
def load_model(model_id):
    """Load model and processor with float16 optimization"""
    processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
    model = AutoModelForVision2Seq.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True
    )
    return model, processor

def main():
    st.title("🖼️ Qwen3-VL-8B Vision-Language Model")
    st.markdown("Upload an image and ask questions about it or provide instructions!")
    
    # Model configuration
    model_id = "reverseforward/qwenmeasurement"  # Replace with your model ID
    
    try:
        model, processor = load_model(model_id)
    except Exception as e:
        st.error(f"Error loading model: {e}")
        st.info("Make sure your model ID is correct and you have internet access to HuggingFace Hub")
        return
    
    # Create two columns for layout
    col1, col2 = st.columns([1, 1])
    
    with col1:
        st.subheader("📤 Upload Image")
        uploaded_file = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png", "webp"])
        
        if uploaded_file is not None:
            image = Image.open(uploaded_file)
            st.image(image, use_column_width=True, caption="Uploaded Image")
        else:
            image = None
            st.info("Please upload an image to continue")
    
    with col2:
        st.subheader("💬 Input Text")
        text_input = st.text_area(
            "Ask a question or provide instructions about the image:",
            placeholder="e.g., What objects are in this image? Describe them in detail.",
            height=150
        )
    
    st.divider()
    
    # Generate response
    if st.button("🚀 Generate Response", type="primary"):
        if image is None:
            st.warning("Please upload an image first!")
        elif not text_input.strip():
            st.warning("Please enter a text prompt!")
        else:
            with st.spinner("Processing... This may take a moment"):
                try:
                    # Prepare inputs
                    messages = [
                        {
                            "role": "user",
                            "content": [
                                {"type": "image", "image": image},
                                {"type": "text", "text": text_input}
                            ]
                        }
                    ]
                    
                    # Process with float16 for efficiency
                    text = processor.apply_chat_template(
                        messages,
                        tokenize=False,
                        add_generation_prompt=True
                    )
                    
                    inputs = processor(
                        text=text,
                        images=[image],
                        return_tensors="pt",
                        padding=True
                    )
                    
                    # Move to GPU and use float16
                    inputs = {k: v.to(model.device).to(torch.float16) if v.dtype in [torch.float32, torch.float64] else v.to(model.device) for k, v in inputs.items()}
                    
                    # Generate
                    with torch.no_grad():
                        output_ids = model.generate(
                            **inputs,
                            max_new_tokens=1024,
                            temperature=0.7,
                            top_p=0.95
                        )
                    
                    # Decode response
                    response = processor.decode(
                        output_ids[0][inputs["input_ids"].shape[1]:],
                        skip_special_tokens=True
                    )
                    
                    st.success("✅ Generation complete!")
                    st.subheader("📝 Response")
                    st.write(response)
                    
                except Exception as e:
                    st.error(f"Error during generation: {e}")
                    st.info("Check your model configuration and GPU memory")

if __name__ == "__main__":
    main()