File size: 19,748 Bytes
b0c0df0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 |
LICENSE README.md pyproject.toml setup.py lmms_eval/__init__.py lmms_eval/__main__.py lmms_eval/evaluator.py lmms_eval/evaluator_utils.py lmms_eval/logging_utils.py lmms_eval/protocol.py lmms_eval/utils.py lmms_eval.egg-info/PKG-INFO lmms_eval.egg-info/SOURCES.txt lmms_eval.egg-info/dependency_links.txt lmms_eval.egg-info/entry_points.txt lmms_eval.egg-info/requires.txt lmms_eval.egg-info/top_level.txt lmms_eval/api/__init__.py lmms_eval/api/filter.py lmms_eval/api/group.py lmms_eval/api/instance.py lmms_eval/api/metrics.py lmms_eval/api/model.py lmms_eval/api/registry.py lmms_eval/api/samplers.py lmms_eval/api/task.py lmms_eval/caching/__init__.py lmms_eval/caching/cache.py lmms_eval/filters/__init__.py lmms_eval/filters/decontamination.py lmms_eval/filters/extraction.py lmms_eval/filters/selection.py lmms_eval/filters/transformation.py lmms_eval/llm_judge/__init__.py lmms_eval/llm_judge/base.py lmms_eval/llm_judge/factory.py lmms_eval/llm_judge/prompt.py lmms_eval/llm_judge/protocol.py lmms_eval/llm_judge/utils.py lmms_eval/llm_judge/launcher/__init__.py lmms_eval/llm_judge/launcher/base.py lmms_eval/llm_judge/launcher/sglang.py lmms_eval/llm_judge/providers/__init__.py lmms_eval/llm_judge/providers/async_azure_openai.py lmms_eval/llm_judge/providers/async_openai.py lmms_eval/llm_judge/providers/azure_openai.py lmms_eval/llm_judge/providers/dummy.py lmms_eval/llm_judge/providers/openai.py lmms_eval/loggers/__init__.py lmms_eval/loggers/evaluation_tracker.py lmms_eval/loggers/utils.py lmms_eval/loggers/wandb_logger.py lmms_eval/mcp/__init__.py lmms_eval/mcp/client.py lmms_eval/models/__init__.py lmms_eval/models/chat/async_openai.py lmms_eval/models/chat/huggingface.py lmms_eval/models/chat/llava_hf.py lmms_eval/models/chat/longvila.py lmms_eval/models/chat/openai_compatible.py lmms_eval/models/chat/qwen2_5_vl.py lmms_eval/models/chat/sglang.py lmms_eval/models/chat/thyme.py lmms_eval/models/chat/vllm.py lmms_eval/models/chat/vllm_generate.py lmms_eval/models/model_utils/__init__.py lmms_eval/models/model_utils/audio_processing.py lmms_eval/models/model_utils/gen_metrics.py lmms_eval/models/model_utils/load_video.py lmms_eval/models/model_utils/reasoning_model_utils.py lmms_eval/models/model_utils/qwen/qwen_generate_utils.py lmms_eval/models/model_utils/thyme/sandbox.py lmms_eval/models/model_utils/thyme/utils.py lmms_eval/models/simple/aero.py lmms_eval/models/simple/aria.py lmms_eval/models/simple/auroracap.py lmms_eval/models/simple/batch_gpt4.py lmms_eval/models/simple/cambrian.py lmms_eval/models/simple/claude.py lmms_eval/models/simple/cogvlm2.py lmms_eval/models/simple/egogpt.py lmms_eval/models/simple/from_log.py lmms_eval/models/simple/fuyu.py lmms_eval/models/simple/gemini_api.py lmms_eval/models/simple/gemma3.py lmms_eval/models/simple/gpt4o_audio.py lmms_eval/models/simple/gpt4v.py lmms_eval/models/simple/idefics2.py lmms_eval/models/simple/instructblip.py lmms_eval/models/simple/internvideo2.py lmms_eval/models/simple/internvideo2_5.py lmms_eval/models/simple/internvl.py lmms_eval/models/simple/internvl2.py lmms_eval/models/simple/llama_vid.py lmms_eval/models/simple/llama_vision.py lmms_eval/models/simple/llava.py lmms_eval/models/simple/llava_hf.py lmms_eval/models/simple/llava_onevision.py lmms_eval/models/simple/llava_onevision1_5.py lmms_eval/models/simple/llava_onevision_moviechat.py lmms_eval/models/simple/llava_sglang.py lmms_eval/models/simple/llava_vid.py lmms_eval/models/simple/longva.py lmms_eval/models/simple/mantis.py lmms_eval/models/simple/minicpm_v.py lmms_eval/models/simple/minimonkey.py lmms_eval/models/simple/moviechat.py lmms_eval/models/simple/mplug_owl_video.py lmms_eval/models/simple/ola.py lmms_eval/models/simple/openai_compatible.py lmms_eval/models/simple/oryx.py lmms_eval/models/simple/phi3v.py lmms_eval/models/simple/phi4_multimodal.py lmms_eval/models/simple/plm.py lmms_eval/models/simple/qwen2_5_omni.py lmms_eval/models/simple/qwen2_5_vl.py lmms_eval/models/simple/qwen2_5_vl_interleave.py lmms_eval/models/simple/qwen2_audio.py lmms_eval/models/simple/qwen2_vl.py lmms_eval/models/simple/qwen_vl.py lmms_eval/models/simple/qwen_vl_api.py lmms_eval/models/simple/reka.py lmms_eval/models/simple/ross.py lmms_eval/models/simple/slime.py lmms_eval/models/simple/srt_api.py lmms_eval/models/simple/tinyllava.py lmms_eval/models/simple/video_chatgpt.py lmms_eval/models/simple/video_llava.py lmms_eval/models/simple/videochat2.py lmms_eval/models/simple/videochat_flash.py lmms_eval/models/simple/videollama3.py lmms_eval/models/simple/vila.py lmms_eval/models/simple/vita.py lmms_eval/models/simple/vllm.py lmms_eval/models/simple/vora.py lmms_eval/models/simple/whisper.py lmms_eval/models/simple/whisper_vllm.py lmms_eval/models/simple/xcomposer2_4KHD.py lmms_eval/models/simple/xcomposer2d5.py lmms_eval/tasks/__init__.py lmms_eval/tasks/VisualPuzzles/utils.py lmms_eval/tasks/_task_utils/file_utils.py lmms_eval/tasks/_task_utils/gpt_eval_utils.py lmms_eval/tasks/_task_utils/math_verify_utils.py lmms_eval/tasks/_task_utils/video_loader.py lmms_eval/tasks/_task_utils/vqa_eval_metric.py lmms_eval/tasks/activitynetqa/utils.py lmms_eval/tasks/ai2d/upload_ai2d.py lmms_eval/tasks/ai2d/utils.py lmms_eval/tasks/aime/utils.py lmms_eval/tasks/air_bench/utils.py lmms_eval/tasks/alpaca_audio/utils.py lmms_eval/tasks/av_odyssey/utils.py lmms_eval/tasks/camerabench_vqa/utils.py lmms_eval/tasks/capability/prompt.py lmms_eval/tasks/capability/utils.py lmms_eval/tasks/charades_sta/eval_tvg.py lmms_eval/tasks/charades_sta/utils.py lmms_eval/tasks/chartqa/upload_chartqa.py lmms_eval/tasks/chartqa/utils.py lmms_eval/tasks/charxiv/constant.py lmms_eval/tasks/charxiv/descriptive_utils.py lmms_eval/tasks/charxiv/reasoning_utils.py lmms_eval/tasks/charxiv/utils.py lmms_eval/tasks/cinepile/utils.py lmms_eval/tasks/clotho_aqa/utils.py lmms_eval/tasks/cmmmu/utils.py lmms_eval/tasks/coco_cap/utils.py lmms_eval/tasks/common_voice_15/utils.py lmms_eval/tasks/conbench/utils.py lmms_eval/tasks/covost2/utils.py lmms_eval/tasks/csbench/utils.py lmms_eval/tasks/cuva/utils.py lmms_eval/tasks/cvrr/utils.py lmms_eval/tasks/detailcaps/utils.py lmms_eval/tasks/docvqa/utils.py lmms_eval/tasks/dtcbench/utils.py lmms_eval/tasks/egoplan/utils.py lmms_eval/tasks/egoschema/utils.py lmms_eval/tasks/egothink/utils.py lmms_eval/tasks/emma/utils.py lmms_eval/tasks/ferret/utils.py lmms_eval/tasks/fleurs/utils.py lmms_eval/tasks/flickr30k/utils.py lmms_eval/tasks/funqa/utils.py lmms_eval/tasks/gigaspeech/utils.py lmms_eval/tasks/gigaspeech/whisper_normalizer/basic.py lmms_eval/tasks/gigaspeech/whisper_normalizer/english.py lmms_eval/tasks/gpqa/cot_n_shot/_generate_configs.py lmms_eval/tasks/gpqa/cot_n_shot/utils.py lmms_eval/tasks/gpqa/cot_zeroshot/_generate_configs.py lmms_eval/tasks/gpqa/cot_zeroshot/utils.py lmms_eval/tasks/gpqa/generative/_generate_configs.py lmms_eval/tasks/gpqa/generative/utils.py lmms_eval/tasks/gpqa/n_shot/_generate_configs.py lmms_eval/tasks/gpqa/n_shot/utils.py lmms_eval/tasks/gpqa/openai/utils.py lmms_eval/tasks/gpqa/zeroshot/_generate_configs.py lmms_eval/tasks/gpqa/zeroshot/utils.py lmms_eval/tasks/gqa/utils.py lmms_eval/tasks/gqa_ru/utils.py lmms_eval/tasks/hallusion_bench/evaluate_hb.py lmms_eval/tasks/hallusion_bench/utils.py lmms_eval/tasks/hellaswag/utils.py lmms_eval/tasks/hrbench/hrbench_evals.py lmms_eval/tasks/hrbench/utils.py lmms_eval/tasks/iconqa/utils.py lmms_eval/tasks/ifeval/instructions.py lmms_eval/tasks/ifeval/instructions_registry.py lmms_eval/tasks/ifeval/instructions_util.py lmms_eval/tasks/ifeval/utils.py lmms_eval/tasks/ii_bench/utils.py lmms_eval/tasks/illusionvqa/utils.py lmms_eval/tasks/infovqa/utils.py lmms_eval/tasks/internal_eval/d170_cn_utils.py lmms_eval/tasks/internal_eval/d170_en_utils.py lmms_eval/tasks/internal_eval/dc100_en_utils.py lmms_eval/tasks/internal_eval/dc200_cn_utils.py lmms_eval/tasks/internal_eval/utils.py lmms_eval/tasks/jmmmu/utils.py lmms_eval/tasks/k12/utils.py lmms_eval/tasks/lemonade/utils.py lmms_eval/tasks/librispeech/cn_tn.py lmms_eval/tasks/librispeech/utils.py lmms_eval/tasks/librispeech/whisper_normalizer/basic.py lmms_eval/tasks/librispeech/whisper_normalizer/english.py lmms_eval/tasks/live_bench/utils.py lmms_eval/tasks/live_bench/utils_v2.py lmms_eval/tasks/livexiv_tqa/utils.py lmms_eval/tasks/livexiv_vqa/utils.py lmms_eval/tasks/llava-bench-coco/utils.py lmms_eval/tasks/llava-in-the-wild/utils.py lmms_eval/tasks/llava-in-the-wild/utils_ko.py lmms_eval/tasks/llava_interleave_bench/utils.py lmms_eval/tasks/llava_wilder/utils.py lmms_eval/tasks/longtimescope/utils.py lmms_eval/tasks/longvideobench/utils.py lmms_eval/tasks/lsdbench/utils.py lmms_eval/tasks/lvbench/utils.py lmms_eval/tasks/mathverse/mathverse_evals.py lmms_eval/tasks/mathverse/utils.py lmms_eval/tasks/mathvision/eval_utils.py lmms_eval/tasks/mathvision/utils.py lmms_eval/tasks/mathvista/mathvista_evals.py lmms_eval/tasks/mathvista/utils.py lmms_eval/tasks/medqa/utils.py lmms_eval/tasks/megabench/evaluator.py lmms_eval/tasks/megabench/image_video_utils.py lmms_eval/tasks/megabench/utils.py lmms_eval/tasks/megabench/breakdown/analysis_utils.py lmms_eval/tasks/megabench/breakdown/derive_breakdown_results.py lmms_eval/tasks/megabench/metrics/__init__.py lmms_eval/tasks/megabench/metrics/aggregation_type.py lmms_eval/tasks/megabench/metrics/metric_type.py lmms_eval/tasks/megabench/metrics/response_parse_type.py lmms_eval/tasks/megabench/metrics/aggregation/mean_agg.py lmms_eval/tasks/megabench/metrics/aggregation/min_agg.py lmms_eval/tasks/megabench/metrics/aggregation/unsupported_agg.py lmms_eval/tasks/megabench/metrics/parsing/answer_str_parse.py lmms_eval/tasks/megabench/metrics/parsing/dummy_parse.py lmms_eval/tasks/megabench/metrics/parsing/json_parse.py lmms_eval/tasks/megabench/metrics/parsing/common/parsers.py lmms_eval/tasks/megabench/metrics/parsing/common/utils.py lmms_eval/tasks/megabench/metrics/scoring/ascii_art_vlm_judge.py lmms_eval/tasks/megabench/metrics/scoring/chess_jaccard.py lmms_eval/tasks/megabench/metrics/scoring/constrained_generation.py lmms_eval/tasks/megabench/metrics/scoring/coordinate_sequence_match.py lmms_eval/tasks/megabench/metrics/scoring/dict_equality.py lmms_eval/tasks/megabench/metrics/scoring/dict_exact_match_agg_recall.py lmms_eval/tasks/megabench/metrics/scoring/dict_jaccard_agg_jaccard.py lmms_eval/tasks/megabench/metrics/scoring/dict_nbbox_iou_tuple_agg_jaccard.py lmms_eval/tasks/megabench/metrics/scoring/dict_set_equality_agg_jaccard.py lmms_eval/tasks/megabench/metrics/scoring/exact_str_match.py lmms_eval/tasks/megabench/metrics/scoring/exact_str_match_case_insensitive.py lmms_eval/tasks/megabench/metrics/scoring/general_numerical_match.py lmms_eval/tasks/megabench/metrics/scoring/geo_proximity.py lmms_eval/tasks/megabench/metrics/scoring/gleu.py lmms_eval/tasks/megabench/metrics/scoring/jaccard.py lmms_eval/tasks/megabench/metrics/scoring/latex_expr_equality.py lmms_eval/tasks/megabench/metrics/scoring/longest_common_list_prefix_ratio.py lmms_eval/tasks/megabench/metrics/scoring/mse.py lmms_eval/tasks/megabench/metrics/scoring/multi_ref_phrase.py lmms_eval/tasks/megabench/metrics/scoring/nbbox_iou.py lmms_eval/tasks/megabench/metrics/scoring/near_str_match.py lmms_eval/tasks/megabench/metrics/scoring/nli_entailment.py lmms_eval/tasks/megabench/metrics/scoring/normalized_similarity_damerau_levenshtein.py lmms_eval/tasks/megabench/metrics/scoring/number_rel_diff_ratio.py lmms_eval/tasks/megabench/metrics/scoring/positive_int_match.py lmms_eval/tasks/megabench/metrics/scoring/program_judge.py lmms_eval/tasks/megabench/metrics/scoring/sacrebleu_bleu.py lmms_eval/tasks/megabench/metrics/scoring/sequence_equality.py lmms_eval/tasks/megabench/metrics/scoring/set_equality.py lmms_eval/tasks/megabench/metrics/scoring/set_precision.py lmms_eval/tasks/megabench/metrics/scoring/simple_str_match.py lmms_eval/tasks/megabench/metrics/scoring/symbolic_planning.py lmms_eval/tasks/megabench/metrics/scoring/unsupported_scoring.py lmms_eval/tasks/megabench/metrics/scoring/vlm_as_judge.py lmms_eval/tasks/megabench/metrics/scoring/xml_nbbox_iou.py lmms_eval/tasks/megabench/metrics/scoring/xml_norm_point_distance.py lmms_eval/tasks/megabench/metrics/scoring/xml_norm_point_in_bbox.py lmms_eval/tasks/megabench/metrics/scoring/common/conversions.py lmms_eval/tasks/megabench/metrics/scoring/common/metrics.py lmms_eval/tasks/megabench/metrics/scoring/common/transformations.py lmms_eval/tasks/mia_bench/utils.py lmms_eval/tasks/mirb/utils.py lmms_eval/tasks/mix_evals/audio2text/utils.py lmms_eval/tasks/mix_evals/image2text/utils.py lmms_eval/tasks/mix_evals/video2text/utils.py lmms_eval/tasks/mlvu/utils.py lmms_eval/tasks/mmau/utils.py lmms_eval/tasks/mmbench/cc_utils.py lmms_eval/tasks/mmbench/cn_utils.py lmms_eval/tasks/mmbench/en_utils.py lmms_eval/tasks/mmbench/ko_utils.py lmms_eval/tasks/mmbench/mmbench_evals.py lmms_eval/tasks/mmbench/ru_utils.py lmms_eval/tasks/mme/utils.py lmms_eval/tasks/mme_cot/utils.py lmms_eval/tasks/mme_realworld/utils.py lmms_eval/tasks/mmlu/_generate_configs.py lmms_eval/tasks/mmlu/flan_cot_zeroshot/utils.py lmms_eval/tasks/mmlu/flan_n_shot/generative/utils.py lmms_eval/tasks/mmlu_pro/utils.py lmms_eval/tasks/mmmu/utils.py lmms_eval/tasks/mmmu/utils_group_img.py lmms_eval/tasks/mmmu_pro/utils.py lmms_eval/tasks/mmrefine/mmrefine_evals.py lmms_eval/tasks/mmrefine/prompts.py lmms_eval/tasks/mmrefine/utils.py lmms_eval/tasks/mmsearch/constants.py lmms_eval/tasks/mmsearch/get_final_scores.py lmms_eval/tasks/mmsearch/lmms_eval_utils.py lmms_eval/tasks/mmsearch/prompts/prompt.py lmms_eval/tasks/mmsearch/prompts/prompt_w_imagesearch.py lmms_eval/tasks/mmsearch/retrieve_content/retriever.py lmms_eval/tasks/mmsearch/retrieve_content/tokenization/__init__.py lmms_eval/tasks/mmsearch/retrieve_content/tokenization/tokenizers.py lmms_eval/tasks/mmsearch/retrieve_content/tokenization/utils.py lmms_eval/tasks/mmsearch/score/f1_score.py lmms_eval/tasks/mmsearch/score/req_score.py lmms_eval/tasks/mmsearch/score/result_summary.py lmms_eval/tasks/mmsearch/utils/image_utils.py lmms_eval/tasks/mmsearch/utils/lmms_eval_utils.py lmms_eval/tasks/mmsearch/utils/prompt_utils.py lmms_eval/tasks/mmsearch/utils/utils.py lmms_eval/tasks/mmsearch/utils/web_content_utils.py lmms_eval/tasks/mmsi_bench/utils.py lmms_eval/tasks/mmstar/ko_utils.py lmms_eval/tasks/mmstar/utils.py lmms_eval/tasks/mmt/utils.py lmms_eval/tasks/mmupd/mmupd_evals.py lmms_eval/tasks/mmupd/utils.py lmms_eval/tasks/mmvet/utils.py lmms_eval/tasks/mmvetv2/utils.py lmms_eval/tasks/mmvu/utils.py lmms_eval/tasks/mmworld/utils.py lmms_eval/tasks/moviechat/utils.py lmms_eval/tasks/muchomusic/utils.py lmms_eval/tasks/muirbench/utils.py lmms_eval/tasks/multidocvqa/utils.py lmms_eval/tasks/multilingual-llava-bench-in-the-wild/utils.py lmms_eval/tasks/multimodal_rewardbench/utils.py lmms_eval/tasks/mvbench/utils.py lmms_eval/tasks/naturalbench/utils.py lmms_eval/tasks/nextqa/utils.py lmms_eval/tasks/nocaps/utils.py lmms_eval/tasks/ocrbench/upload_ocrbench.py lmms_eval/tasks/ocrbench/utils.py lmms_eval/tasks/ocrbench_v2/IoUscore_metric.py lmms_eval/tasks/ocrbench_v2/TEDS_metric.py lmms_eval/tasks/ocrbench_v2/__init__.py lmms_eval/tasks/ocrbench_v2/page_ocr_metric.py lmms_eval/tasks/ocrbench_v2/parallel.py lmms_eval/tasks/ocrbench_v2/spotting_metric.py lmms_eval/tasks/ocrbench_v2/upload_ocrbench_v2.py lmms_eval/tasks/ocrbench_v2/utils.py lmms_eval/tasks/ocrbench_v2/vqa_metric.py lmms_eval/tasks/ocrbench_v2/spotting_eval/__init__.py lmms_eval/tasks/ocrbench_v2/spotting_eval/rrc_evaluation_funcs_1_1.py lmms_eval/tasks/ocrbench_v2/spotting_eval/script.py lmms_eval/tasks/ok_vqa/_generate_config.py lmms_eval/tasks/ok_vqa/utils.py lmms_eval/tasks/olympiadbench/cn_utils.py lmms_eval/tasks/olympiadbench/en_utils.py lmms_eval/tasks/olympiadbench/olympiadbench_evals.py lmms_eval/tasks/olympiadbench/testmini_utils.py lmms_eval/tasks/olympiadbench_mimo/en_utils.py lmms_eval/tasks/olympiadbench_mimo/olympiadbench_evals.py lmms_eval/tasks/olympiadbench_mimo/utils.py lmms_eval/tasks/olympiadbench_mimo/zh_utils.py lmms_eval/tasks/omni_bench/utils.py lmms_eval/tasks/open_asr/utils.py lmms_eval/tasks/openai_math/utils.py lmms_eval/tasks/openhermes/utils.py lmms_eval/tasks/people_speech/utils.py lmms_eval/tasks/perceptiontest/test/utils.py lmms_eval/tasks/perceptiontest/val/utils.py lmms_eval/tasks/phyx/phyx_evals.py lmms_eval/tasks/phyx/utils.py lmms_eval/tasks/plm_videobench/eval_utils.py lmms_eval/tasks/plm_videobench/fgqa/fgqa_utils.py lmms_eval/tasks/plm_videobench/rcap/rcap_utils.py lmms_eval/tasks/plm_videobench/rdcap/rdcap_utils.py lmms_eval/tasks/plm_videobench/rtloc/rtloc_utils.py lmms_eval/tasks/plm_videobench/sgqa/sgqa_utils.py lmms_eval/tasks/pope/utils.py lmms_eval/tasks/qbench/utils.py lmms_eval/tasks/realworldqa/utils.py lmms_eval/tasks/refcoco/_generate_config.py lmms_eval/tasks/refcoco/utils.py lmms_eval/tasks/refcoco/utils_rec.py lmms_eval/tasks/refcoco+/_generate_config.py lmms_eval/tasks/refcoco+/utils.py lmms_eval/tasks/refcoco+/utils_rec.py lmms_eval/tasks/refcocog/_generate_config.py lmms_eval/tasks/refcocog/utils.py lmms_eval/tasks/refcocog/utils_rec.py lmms_eval/tasks/salbench/utils.py lmms_eval/tasks/scibench/utils.py lmms_eval/tasks/scienceqa/utils.py lmms_eval/tasks/screenspot/utils.py lmms_eval/tasks/screenspot/utils_rec.py lmms_eval/tasks/seedbench/ko_utils.py lmms_eval/tasks/seedbench/utils.py lmms_eval/tasks/seedbench_2/utils.py lmms_eval/tasks/seedbench_2_plus/utils.py lmms_eval/tasks/step2_audio_paralinguistic/utils.py lmms_eval/tasks/stvqa/utils.py lmms_eval/tasks/super_gpqa/utils.py lmms_eval/tasks/synthdog/donut_evaluator.py lmms_eval/tasks/synthdog/utils.py lmms_eval/tasks/tedlium/utils.py lmms_eval/tasks/tempcompass/utils.py lmms_eval/tasks/temporalbench/utils.py lmms_eval/tasks/textcaps/utils.py lmms_eval/tasks/textvqa/utils.py lmms_eval/tasks/threedod/utils.py lmms_eval/tasks/timescope/utils.py lmms_eval/tasks/tomato/utils.py lmms_eval/tasks/vatex/utils.py lmms_eval/tasks/vcr_wiki/utils.py lmms_eval/tasks/vdc/utils.py lmms_eval/tasks/vibe_eval/utils.py lmms_eval/tasks/video-tt/gpt_utils.py lmms_eval/tasks/video-tt/utils.py lmms_eval/tasks/video_detail_description/utils.py lmms_eval/tasks/videochatgpt/utils.py lmms_eval/tasks/videoevalpro/utils.py lmms_eval/tasks/videomathqa/cot_postprocess.py lmms_eval/tasks/videomathqa/cot_step_evaluation.py lmms_eval/tasks/videomathqa/utils.py lmms_eval/tasks/videomme/utils.py lmms_eval/tasks/videommmu/utils.py lmms_eval/tasks/vinoground/utils.py lmms_eval/tasks/visualwebbench/prompts.py lmms_eval/tasks/visualwebbench/utils.py lmms_eval/tasks/vitatecs/utils.py lmms_eval/tasks/vizwiz_vqa/_generate_config.py lmms_eval/tasks/vizwiz_vqa/utils.py lmms_eval/tasks/vl_rewardbench/utils.py lmms_eval/tasks/vlmsareblind/__init__.py lmms_eval/tasks/vlmsareblind/utils.py lmms_eval/tasks/vmcbench/utils.py lmms_eval/tasks/vocalsound/utils.py lmms_eval/tasks/voicebench/utils.py lmms_eval/tasks/voicebench/instruction_following_eval/__init__.py lmms_eval/tasks/voicebench/instruction_following_eval/instructions.py lmms_eval/tasks/voicebench/instruction_following_eval/instructions_registry.py lmms_eval/tasks/voicebench/instruction_following_eval/instructions_util.py lmms_eval/tasks/vqav2/utils.py lmms_eval/tasks/vsibench/utils.py lmms_eval/tasks/vstar_bench/__init__.py lmms_eval/tasks/vstar_bench/utils.py lmms_eval/tasks/wavcaps/utils.py lmms_eval/tasks/websrc/utils.py lmms_eval/tasks/wenet_speech/utils.py lmms_eval/tasks/wild_vision_bench/utils.py lmms_eval/tasks/worldqa/utils.py lmms_eval/tasks/worldqa/worldqa_mc_evaluator.py lmms_eval/tasks/worldsense/utils.py lmms_eval/tasks/xlrs/mcq_utils.py lmms_eval/tasks/youcook2/utils.py |