gry2004's picture
init: CPU fork of OneScorer (no LFS, no examples)
4226d4b
import gradio as gr
import argparse
import datetime
import json
import os
import time
import gradio as gr
import requests
from PIL import Image
from q_align.model.builder import load_pretrained_model
from q_align.conversation import (default_conversation, conv_templates,
SeparatorStyle)
from q_align.constants import LOGDIR
from q_align.utils import (build_logger, server_error_msg,
violates_moderation, moderation_msg)
from q_align.evaluate.scorer import QAlignScorer, QAlignAestheticScorer, QAlignVideoScorer
import gradio as gr
def load_video(video_file):
from decord import VideoReader
vr = VideoReader(video_file)
# Get video frame rate
fps = vr.get_avg_fps()
# Calculate frame indices for 1fps
frame_indices = [int(fps * i) for i in range(int(len(vr) / fps))]
frames = vr.get_batch(frame_indices).asnumpy()
return [Image.fromarray(frames[i]) for i in range(int(len(vr) / fps))]
pretrained="q-future/one-align"
# 自动 device:HF 免费 Space 没 GPU → 走 CPU(会非常慢但能跑)
import torch
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(f"[OneScorer fork] device = {device}", flush=True)
tokenizer, model, image_processor, _ = load_pretrained_model(pretrained, None, "mplug_owl2", device=device)
iqa_scorer = QAlignScorer(tokenizer=tokenizer, model=model, image_processor=image_processor)
iaa_scorer = QAlignAestheticScorer(tokenizer=tokenizer, model=model, image_processor=image_processor)
vqa_scorer = QAlignVideoScorer(tokenizer=tokenizer, model=model, image_processor=image_processor)
scorers = {"Image Aesthetics (IAA)": iaa_scorer, "Image Quality (IQA)": iqa_scorer, "Video Quality (VQA)": vqa_scorer}
LEVELS = ["excellent (5)", "good (4)", "fair (3)", "poor (2)", "bad (1)"]
scores = [5,4,3,2,1]
def image_classifier(input_img, input_vid, scorer_type):
if scorer_type is None:
scorer_type = "Image Quality (IQA)"
this_scorer = scorers[scorer_type]
if input_vid is not None:
input_ = load_video(input_vid)
elif input_img is not None:
input_ = [input_img]
if "Video" in scorer_type:
input_ = [input_]
probs = this_scorer(input_).mean(0).tolist()
prob_dict = {LEVEL: prob for LEVEL, prob in zip(LEVELS, probs)}
score = sum([prob * score for score, prob in zip(scores, probs)])
return prob_dict, score
title_markdown = ("""
<div style="width: 100%; text-align: center; margin:auto;">
<img style="width: 100%" src="https://raw.githubusercontent.com/Q-Future/Q-Align/main/fig/onescorer.png">
</div>
<h4 align="center"> If you like the OneScorer, please give us a star ✨ on <a href='https://github.com/Q-Future/Q-Align'>[GitHub]</a> for latest update. </h4>
<h5 align="center">
<div style="display:flex; gap: 0.25rem;" align="center">
<a href='https://q-align.github.io'><img src='https://img.shields.io/badge/Homepage-green'></a>
<a href='https://github.com/Q-Future/Q-Align'><img src='https://img.shields.io/badge/Github-Code-blue'></a>
<a href="https://Q-Future.github.io/Q-Align/fig/Q_Align_v0_1_preview.pdf"><img src="https://img.shields.io/badge/Technical-Report-red"></a>
<a href='https://github.com/Q-Future/Q-Align/stargazers'><img src='https://img.shields.io/github/stars/Q-Future/Q-Align.svg?style=social'></a>
</div>
</h5>
""")
input_img = gr.Image(type='pil', label="Upload an Image")
input_vid = gr.Video(label="Upload a Video (will INGORE the image if a video is uploaded)",sources=["upload"])
radio = gr.Radio(["Image Aesthetics (IAA)", "Image Quality (IQA)", "Video Quality (VQA)"], label="Task", info="Which Scorer will you need?")
input_img = gr.Image(type='pil', label="Upload an Image")
labels = gr.Label(label="Probabilities of rating levels:")
number = gr.Number(label="Output score:", info="Range in [1,5]. Higher is better.", precision=4)
# Fork 修改:
# 1) 删 examples(原仓库 LFS 图未在 HF 上拉取,导致 Interface 启动时缓存 examples 失败)
# 2) cache_examples=False(保险)
# 3) device 改为 auto(兼容 CPU Space;推理会非常慢)
demo = gr.Interface(
fn=image_classifier,
inputs=[input_img, input_vid, radio],
outputs=[labels, number],
description=title_markdown,
cache_examples=False,
article=(
"This is a fork of [teowu/OneScorer](https://huggingface.co/spaces/teowu/OneScorer) "
"for personal experimentation. Removed broken example assets so the Space can boot. "
"Backed by paper: *Q-Align: Teaching LMMs for Visual Scoring via Discrete Text-Defined Levels* (ICML 2024). "
"**Note**: this Space runs on free CPU; expect very slow (minutes) inference for a single image."
),
)
demo.launch(show_error=True, show_api=False)