Spaces:

ManarAli
/

Qwen

Runtime error

App Files Files Community

Qwen / app.py

ManarAli

Update app.py

2ee66d7 verified 7 months ago

raw

history blame contribute delete

3.38 kB

	from transformers import AutoProcessor, AutoModelForVision2Seq
	from qwen_vl_utils import process_vision_info
	import gradio as gr
	from PIL import Image
	import torch

	# Load 72B AWQ model
	model2 = AutoModelForVision2Seq.from_pretrained(
	"Qwen/Qwen2.5-VL-32B-Instruct",
	dtype=torch.float16,
	device_map="auto"
	)

	processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-32B-Instruct")

	# Game rules in German
	GAME_RULES = """In diesem Bild sehen Sie drei Farbraster. In der folgenden Äußerung beschreibt der Sprecher genau eines der Gitter.
	Bitte geben Sie mir an, ob er sich auf das
	linke, mittlere oder rechte Farbraster bezieht.
	"""

	# Load seven images
	IMAGE_OPTIONS = {
	"Bild 1": "example1.jpg",
	"Bild 2": "example2.jpg",
	"Bild 3": "example3.jpg",
	"Bild 4": "example4.jpg",
	"Bild 5": "example5.jpg",
	"Bild 6": "example6.jpg",
	"Bild 7": "example7.jpg",
	"Bild 8": "example8.jpg",
	"Bild 9": "example9.jpg"
	}

	# Function to run model
	def play_game(selected_image_label, user_prompt):
	selected_image_path = IMAGE_OPTIONS[selected_image_label]
	selected_image = Image.open(selected_image_path)

	# Build messages
	messages = [
	{
	"role": "user",
	"content": [
	{"type": "image", "image": selected_image},
	{"type": "text", "text": GAME_RULES + "\n" + (user_prompt or "")},
	],
	}
	]

	# Prepare input using Qwen's utility function
	text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	image_inputs, video_inputs = process_vision_info(messages) # Use Qwen utility!

	inputs = processor(
	text=[text],
	images=image_inputs,
	videos=video_inputs,
	padding=True,
	return_tensors="pt",
	).to(model2.device)

	# Run generation
	with torch.inference_mode():
	generated_ids = model2.generate(**inputs, max_new_tokens=512)
	generated_ids_trimmed = [
	out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
	]
	output_text = processor.batch_decode(
	generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
	)[0]

	return output_text

	# Gradio App
	with gr.Blocks() as demo:
	with gr.Column():
	image_selector = gr.Dropdown(
	choices=list(IMAGE_OPTIONS.keys()),
	value="Bild 2",
	label="Wählen Sie ein Bild"
	)
	image_display = gr.Image(
	value=Image.open(IMAGE_OPTIONS["Bild 2"]),
	label="Bild",
	interactive=False,
	type="pil"
	)
	prompt_input = gr.Textbox(
	value="Beschreibung",
	label="Ihre Beschreibung"
	)
	output_text = gr.Textbox(label="Antwort des Modells")
	play_button = gr.Button("Spiel starten")

	def update_image(selected_label):
	selected_path = IMAGE_OPTIONS[selected_label]
	return Image.open(selected_path)

	# When user changes selection, update image
	image_selector.change(
	fn=update_image,
	inputs=[image_selector],
	outputs=image_display
	)

	# When user clicks play, send inputs to model
	play_button.click(
	fn=play_game,
	inputs=[image_selector, prompt_input],
	outputs=output_text
	)

	demo.launch()