GEditBench v2: A Human-Aligned Benchmark for General Image Editing
Paper β’ 2603.28547 β’ Published β’ 26
git clone https://github.com/ZhangqiJiang07/GEditBench_v2.git
cd GEditBench_v2
torch/peft/transformerspython ./scripts/merge_lora.py \
--base-model-path /path/to/Qwen3/VL/8B/Instruct \
--lora-weights-path /path/to/LoRA/Weights \
--model-save-dir /path/to/save/PVC/Judge/model
python -m vllm.entrypoints.openai.api_server \
--model /path/to/save/PVC/Judge/model \
--served-model-name PVC-Judge \
--tensor-parallel-size 1 \
--mm-encoder-tp-mode data \
--limit-mm-per-prompt.video 0 \
--host 0.0.0.0 \
--port 25930 \
--dtype bfloat16 \
--gpu-memory-utilization 0.80 \
--max_num_seqs 32 \
--max-model-len 48000 \
--distributed-executor-backend mp
autopipeline for inference.See our repo for detailed usage!
# For local judge inference
conda env create -f environments/pvc_judge.yml
conda activate pvc_judge
# or:
python3.12 -m venv .venvs/pvc_judge
source .venvs/pvc_judge/bin/activate
python -m pip install -r environments/requirements/pvc_judge.lock.txt
# Run
bash ./scripts/local_eval.sh vc_reward
Base model
Qwen/Qwen3-VL-8B-Instruct