import gradio as gr import argparse import datetime import json import os import time import gradio as gr import requests from PIL import Image from q_align.model.builder import load_pretrained_model from q_align.conversation import (default_conversation, conv_templates, SeparatorStyle) from q_align.constants import LOGDIR from q_align.utils import (build_logger, server_error_msg, violates_moderation, moderation_msg) from q_align.evaluate.scorer import QAlignScorer, QAlignAestheticScorer, QAlignVideoScorer import gradio as gr def load_video(video_file): from decord import VideoReader vr = VideoReader(video_file) # Get video frame rate fps = vr.get_avg_fps() # Calculate frame indices for 1fps frame_indices = [int(fps * i) for i in range(int(len(vr) / fps))] frames = vr.get_batch(frame_indices).asnumpy() return [Image.fromarray(frames[i]) for i in range(int(len(vr) / fps))] pretrained="q-future/one-align" # 自动 device：HF 免费 Space 没 GPU → 走 CPU（会非常慢但能跑） import torch device = "cuda:0" if torch.cuda.is_available() else "cpu" print(f"[OneScorer fork] device = {device}", flush=True) tokenizer, model, image_processor, _ = load_pretrained_model(pretrained, None, "mplug_owl2", device=device) iqa_scorer = QAlignScorer(tokenizer=tokenizer, model=model, image_processor=image_processor) iaa_scorer = QAlignAestheticScorer(tokenizer=tokenizer, model=model, image_processor=image_processor) vqa_scorer = QAlignVideoScorer(tokenizer=tokenizer, model=model, image_processor=image_processor) scorers = {"Image Aesthetics (IAA)": iaa_scorer, "Image Quality (IQA)": iqa_scorer, "Video Quality (VQA)": vqa_scorer} LEVELS = ["excellent (5)", "good (4)", "fair (3)", "poor (2)", "bad (1)"] scores = [5,4,3,2,1] def image_classifier(input_img, input_vid, scorer_type): if scorer_type is None: scorer_type = "Image Quality (IQA)" this_scorer = scorers[scorer_type] if input_vid is not None: input_ = load_video(input_vid) elif input_img is not None: input_ = [input_img] if "Video" in scorer_type: input_ = [input_] probs = this_scorer(input_).mean(0).tolist() prob_dict = {LEVEL: prob for LEVEL, prob in zip(LEVELS, probs)} score = sum([prob * score for score, prob in zip(scores, probs)]) return prob_dict, score title_markdown = ("""

If you like the OneScorer, please give us a star ✨ on [GitHub] for latest update.

""") input_img = gr.Image(type='pil', label="Upload an Image") input_vid = gr.Video(label="Upload a Video (will INGORE the image if a video is uploaded)",sources=["upload"]) radio = gr.Radio(["Image Aesthetics (IAA)", "Image Quality (IQA)", "Video Quality (VQA)"], label="Task", info="Which Scorer will you need?") input_img = gr.Image(type='pil', label="Upload an Image") labels = gr.Label(label="Probabilities of rating levels:") number = gr.Number(label="Output score:", info="Range in [1,5]. Higher is better.", precision=4) # Fork 修改： # 1) 删 examples（原仓库 LFS 图未在 HF 上拉取，导致 Interface 启动时缓存 examples 失败） # 2) cache_examples=False（保险） # 3) device 改为 auto（兼容 CPU Space；推理会非常慢） demo = gr.Interface( fn=image_classifier, inputs=[input_img, input_vid, radio], outputs=[labels, number], description=title_markdown, cache_examples=False, article=( "This is a fork of [teowu/OneScorer](https://huggingface.co/spaces/teowu/OneScorer) " "for personal experimentation. Removed broken example assets so the Space can boot. " "Backed by paper: *Q-Align: Teaching LMMs for Visual Scoring via Discrete Text-Defined Levels* (ICML 2024). " "**Note**: this Space runs on free CPU; expect very slow (minutes) inference for a single image." ), ) demo.launch(show_error=True, show_api=False)