| from transformers import AutoProcessor, AutoModelForVision2Seq |
| from qwen_vl_utils import process_vision_info |
| import gradio as gr |
| from PIL import Image |
| import torch |
|
|
| |
| model2 = AutoModelForVision2Seq.from_pretrained( |
| "Qwen/Qwen2.5-VL-32B-Instruct", |
| dtype=torch.float16, |
| device_map="auto" |
| ) |
|
|
| processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-32B-Instruct") |
|
|
| |
| GAME_RULES = """In diesem Bild sehen Sie drei Farbraster. In der folgenden Äußerung beschreibt der Sprecher genau eines der Gitter. |
| Bitte geben Sie mir an, ob er sich auf das |
| linke, mittlere oder rechte Farbraster bezieht. |
| """ |
|
|
| |
| IMAGE_OPTIONS = { |
| "Bild 1": "example1.jpg", |
| "Bild 2": "example2.jpg", |
| "Bild 3": "example3.jpg", |
| "Bild 4": "example4.jpg", |
| "Bild 5": "example5.jpg", |
| "Bild 6": "example6.jpg", |
| "Bild 7": "example7.jpg", |
| "Bild 8": "example8.jpg", |
| "Bild 9": "example9.jpg" |
| } |
|
|
| |
| def play_game(selected_image_label, user_prompt): |
| selected_image_path = IMAGE_OPTIONS[selected_image_label] |
| selected_image = Image.open(selected_image_path) |
|
|
| |
| messages = [ |
| { |
| "role": "user", |
| "content": [ |
| {"type": "image", "image": selected_image}, |
| {"type": "text", "text": GAME_RULES + "\n" + (user_prompt or "")}, |
| ], |
| } |
| ] |
|
|
| |
| text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
| image_inputs, video_inputs = process_vision_info(messages) |
|
|
| inputs = processor( |
| text=[text], |
| images=image_inputs, |
| videos=video_inputs, |
| padding=True, |
| return_tensors="pt", |
| ).to(model2.device) |
|
|
| |
| with torch.inference_mode(): |
| generated_ids = model2.generate(**inputs, max_new_tokens=512) |
| generated_ids_trimmed = [ |
| out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) |
| ] |
| output_text = processor.batch_decode( |
| generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False |
| )[0] |
|
|
| return output_text |
|
|
| |
| with gr.Blocks() as demo: |
| with gr.Column(): |
| image_selector = gr.Dropdown( |
| choices=list(IMAGE_OPTIONS.keys()), |
| value="Bild 2", |
| label="Wählen Sie ein Bild" |
| ) |
| image_display = gr.Image( |
| value=Image.open(IMAGE_OPTIONS["Bild 2"]), |
| label="Bild", |
| interactive=False, |
| type="pil" |
| ) |
| prompt_input = gr.Textbox( |
| value="Beschreibung", |
| label="Ihre Beschreibung" |
| ) |
| output_text = gr.Textbox(label="Antwort des Modells") |
| play_button = gr.Button("Spiel starten") |
|
|
| def update_image(selected_label): |
| selected_path = IMAGE_OPTIONS[selected_label] |
| return Image.open(selected_path) |
|
|
| |
| image_selector.change( |
| fn=update_image, |
| inputs=[image_selector], |
| outputs=image_display |
| ) |
|
|
| |
| play_button.click( |
| fn=play_game, |
| inputs=[image_selector, prompt_input], |
| outputs=output_text |
| ) |
|
|
| demo.launch() |
|
|