import streamlit as st import torch from PIL import Image from transformers import AutoProcessor, AutoModelForVision2Seq import io st.set_page_config(page_title="Qwen3-VL-8B Inference", layout="wide") @st.cache_resource def load_model(model_id): """Load model and processor with float16 optimization""" processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) model = AutoModelForVision2Seq.from_pretrained( model_id, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True ) return model, processor def main(): st.title("🖼️ Qwen3-VL-8B Vision-Language Model") st.markdown("Upload an image and ask questions about it or provide instructions!") # Model configuration model_id = "reverseforward/qwenmeasurement" # Replace with your model ID try: model, processor = load_model(model_id) except Exception as e: st.error(f"Error loading model: {e}") st.info("Make sure your model ID is correct and you have internet access to HuggingFace Hub") return # Create two columns for layout col1, col2 = st.columns([1, 1]) with col1: st.subheader("📤 Upload Image") uploaded_file = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png", "webp"]) if uploaded_file is not None: image = Image.open(uploaded_file) st.image(image, use_column_width=True, caption="Uploaded Image") else: image = None st.info("Please upload an image to continue") with col2: st.subheader("💬 Input Text") text_input = st.text_area( "Ask a question or provide instructions about the image:", placeholder="e.g., What objects are in this image? Describe them in detail.", height=150 ) st.divider() # Generate response if st.button("🚀 Generate Response", type="primary"): if image is None: st.warning("Please upload an image first!") elif not text_input.strip(): st.warning("Please enter a text prompt!") else: with st.spinner("Processing... This may take a moment"): try: # Prepare inputs messages = [ { "role": "user", "content": [ {"type": "image", "image": image}, {"type": "text", "text": text_input} ] } ] # Process with float16 for efficiency text = processor.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = processor( text=text, images=[image], return_tensors="pt", padding=True ) # Move to GPU and use float16 inputs = {k: v.to(model.device).to(torch.float16) if v.dtype in [torch.float32, torch.float64] else v.to(model.device) for k, v in inputs.items()} # Generate with torch.no_grad(): output_ids = model.generate( **inputs, max_new_tokens=1024, temperature=0.7, top_p=0.95 ) # Decode response response = processor.decode( output_ids[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True ) st.success("✅ Generation complete!") st.subheader("📝 Response") st.write(response) except Exception as e: st.error(f"Error during generation: {e}") st.info("Check your model configuration and GPU memory") if __name__ == "__main__": main()