Spaces:

reverseforward
/

112233

Build error

112233 / app.py

reveseforward

all

417a05f about 2 months ago

4.44 kB

	import streamlit as st
	import torch
	from PIL import Image
	from transformers import AutoProcessor, AutoModelForVision2Seq
	import io

	st.set_page_config(page_title="Qwen3-VL-8B Inference", layout="wide")

	@st.cache_resource
	def load_model(model_id):
	"""Load model and processor with float16 optimization"""
	processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
	model = AutoModelForVision2Seq.from_pretrained(
	model_id,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True
	)
	return model, processor

	def main():
	st.title("🖼️ Qwen3-VL-8B Vision-Language Model")
	st.markdown("Upload an image and ask questions about it or provide instructions!")

	# Model configuration
	model_id = "reverseforward/qwenmeasurement" # Replace with your model ID

	try:
	model, processor = load_model(model_id)
	except Exception as e:
	st.error(f"Error loading model: {e}")
	st.info("Make sure your model ID is correct and you have internet access to HuggingFace Hub")
	return

	# Create two columns for layout
	col1, col2 = st.columns([1, 1])

	with col1:
	st.subheader("📤 Upload Image")
	uploaded_file = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png", "webp"])

	if uploaded_file is not None:
	image = Image.open(uploaded_file)
	st.image(image, use_column_width=True, caption="Uploaded Image")
	else:
	image = None
	st.info("Please upload an image to continue")

	with col2:
	st.subheader("💬 Input Text")
	text_input = st.text_area(
	"Ask a question or provide instructions about the image:",
	placeholder="e.g., What objects are in this image? Describe them in detail.",
	height=150
	)

	st.divider()

	# Generate response
	if st.button("🚀 Generate Response", type="primary"):
	if image is None:
	st.warning("Please upload an image first!")
	elif not text_input.strip():
	st.warning("Please enter a text prompt!")
	else:
	with st.spinner("Processing... This may take a moment"):
	try:
	# Prepare inputs
	messages = [
	{
	"role": "user",
	"content": [
	{"type": "image", "image": image},
	{"type": "text", "text": text_input}
	]
	}
	]

	# Process with float16 for efficiency
	text = processor.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	inputs = processor(
	text=text,
	images=[image],
	return_tensors="pt",
	padding=True
	)

	# Move to GPU and use float16
	inputs = {k: v.to(model.device).to(torch.float16) if v.dtype in [torch.float32, torch.float64] else v.to(model.device) for k, v in inputs.items()}

	# Generate
	with torch.no_grad():
	output_ids = model.generate(
	**inputs,
	max_new_tokens=1024,
	temperature=0.7,
	top_p=0.95
	)

	# Decode response
	response = processor.decode(
	output_ids[0][inputs["input_ids"].shape[1]:],
	skip_special_tokens=True
	)

	st.success("✅ Generation complete!")
	st.subheader("📝 Response")
	st.write(response)

	except Exception as e:
	st.error(f"Error during generation: {e}")
	st.info("Check your model configuration and GPU memory")

	if __name__ == "__main__":
	main()