| from dataclasses import dataclass, make_dataclass, field |
| from enum import Enum |
| from typing import TypeVar |
|
|
| import pandas as pd |
|
|
| _E = TypeVar("_E", bound=Enum) |
|
|
|
|
| def _enum_from_str(enum_cls: type[_E], value: str, default: _E) -> _E: |
| """Generic enum lookup by value name. Returns *default* on miss.""" |
| for member in enum_cls: |
| if member.value.name == value: |
| return member |
| return default |
|
|
| def fields(raw_class): |
| return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"] |
|
|
|
|
| @dataclass |
| class Task: |
| benchmark: str |
| metric: str |
| col_name: str |
|
|
| class Tasks(Enum): |
| arc = Task("arc:challenge", "acc,none", "ARC-c") |
| arc_easy = Task("arc:easy", "acc,none", "ARC-e") |
| boolq = Task("boolq", "acc,none", "Boolq") |
| hellaswag = Task("hellaswag", "acc,none", "HellaSwag") |
| lambada_openai = Task("lambada:openai", "acc,none", "Lambada") |
| mmlu = Task("mmlu", "acc,none", "MMLU") |
| openbookqa = Task("openbookqa", "acc,none", "Openbookqa") |
| piqa = Task("piqa", "acc,none", "Piqa") |
| |
| truthfulqa_mc = Task("truthfulqa:mc1", "acc,none", "Truthfulqa") |
| |
| |
| |
| winogrande = Task("winogrande", "acc,none", "Winogrande") |
| |
|
|
| |
| |
| |
| @dataclass |
| class ColumnContent: |
| name: str |
| type: str |
| displayed_by_default: bool |
| hidden: bool = False |
| never_hidden: bool = False |
| dummy: bool = False |
|
|
| auto_eval_column_list = [] |
| |
| auto_eval_column_list.append([ |
| "model_type_symbol", |
| ColumnContent, |
| field(default_factory=lambda: ColumnContent("T", "str", False, hidden=True)) |
| ]) |
|
|
| auto_eval_column_list.append([ |
| "model", |
| ColumnContent, |
| field(default_factory=lambda: ColumnContent("Model", "markdown", True, never_hidden=True)) |
| ]) |
|
|
| |
| auto_eval_column_list.append([ |
| "average", |
| ColumnContent, |
| field(default_factory=lambda: ColumnContent("Average", "number", True)) |
| ]) |
|
|
| for task in Tasks: |
| auto_eval_column_list.append([ |
| task.name, |
| ColumnContent, |
| field(default_factory=lambda t=task: ColumnContent(t.value.col_name, "number", True)) |
| ]) |
|
|
| auto_eval_column_list.append([ |
| "params", |
| ColumnContent, |
| field(default_factory=lambda: ColumnContent("#Params (B)", "number", True)) |
| ]) |
|
|
| auto_eval_column_list.append([ |
| "model_size", |
| ColumnContent, |
| field(default_factory=lambda: ColumnContent("#Size (G)", "number", True)) |
| ]) |
|
|
| |
| auto_eval_column_list.append([ |
| "dummy", |
| ColumnContent, |
| field(default_factory=lambda: ColumnContent("model_name_for_query", "str", False, dummy=True)) |
| ]) |
|
|
| auto_eval_column_list.append(["model_type", ColumnContent, field(default_factory=lambda: ColumnContent("Type", "str", False, hidden=True))]) |
| auto_eval_column_list.append(["architecture", ColumnContent, field(default_factory=lambda: ColumnContent("Architecture", "str", False))]) |
| auto_eval_column_list.append(["weight_type", ColumnContent, field(default_factory=lambda: ColumnContent("Weight type", "str", False, True))]) |
| auto_eval_column_list.append(["quant_type", ColumnContent, field(default_factory=lambda: ColumnContent("Quant type", "str", False))]) |
| auto_eval_column_list.append(["precision", ColumnContent, field(default_factory=lambda: ColumnContent("Precision", "str", False))]) |
| auto_eval_column_list.append(["weight_dtype", ColumnContent, field(default_factory=lambda: ColumnContent("Weight dtype", "str", False))]) |
| auto_eval_column_list.append(["compute_dtype", ColumnContent, field(default_factory=lambda: ColumnContent("Compute dtype", "str", False))]) |
| auto_eval_column_list.append(["merged", ColumnContent, field(default_factory=lambda: ColumnContent("Merged", "bool", False, hidden=True))]) |
| auto_eval_column_list.append(["license", ColumnContent, field(default_factory=lambda: ColumnContent("Hub License", "str", False))]) |
| auto_eval_column_list.append(["likes", ColumnContent, field(default_factory=lambda: ColumnContent("Hub β€οΈ", "number", False))]) |
| auto_eval_column_list.append(["still_on_hub", ColumnContent, field(default_factory=lambda: ColumnContent("Available on the hub", "bool", False, hidden=True))]) |
| auto_eval_column_list.append(["revision", ColumnContent, field(default_factory=lambda: ColumnContent("Model sha", "str", False, False))]) |
| auto_eval_column_list.append(["flagged", ColumnContent, field(default_factory=lambda: ColumnContent("Flagged", "bool", False, hidden=True))]) |
| auto_eval_column_list.append(["moe", ColumnContent, field(default_factory=lambda: ColumnContent("MoE", "bool", False, hidden=True))]) |
| auto_eval_column_list.append(["double_quant", ColumnContent, field(default_factory=lambda: ColumnContent("Double Quant", "bool", False))]) |
| auto_eval_column_list.append(["group_size", ColumnContent, field(default_factory=lambda: ColumnContent("Group Size", "bool", False))]) |
| |
| |
| _PINNED_AFTER_MODEL = {"model_size", "params"} |
| _pinned = [x for x in auto_eval_column_list[2:] if x[0] in _PINNED_AFTER_MODEL] |
| _rest = [x for x in auto_eval_column_list[2:] if x[0] not in _PINNED_AFTER_MODEL] |
| sorted_columns = sorted(_rest, key=lambda x: x[0]) |
| sorted_auto_eval_column_list = auto_eval_column_list[:2] + _pinned + sorted_columns |
| AutoEvalColumn = make_dataclass("AutoEvalColumn", sorted_auto_eval_column_list, frozen=True) |
| auto_eval_cols = AutoEvalColumn() |
|
|
|
|
| @dataclass(frozen=True) |
| class EvalQueueColumn: |
| model = ColumnContent("model", "markdown", True) |
| revision = ColumnContent("revision", "str", True) |
| private = ColumnContent("private", "bool", True) |
| precision = ColumnContent("precision", "str", True) |
| weight_type = ColumnContent("weight_type", "str", False) |
| status = ColumnContent("status", "str", True) |
| eta = ColumnContent("eta", "str", True) |
| submitted_by = ColumnContent("submitted_by", "str", True) |
| submitted_time = ColumnContent("submitted_time", "str", True) |
|
|
| eval_queue_cols = EvalQueueColumn() |
|
|
|
|
| @dataclass(frozen=True) |
| class QuantQueueColumn: |
| model = ColumnContent("model", "markdown", True) |
| revision = ColumnContent("revision", "str", True) |
| private = ColumnContent("private", "bool", True) |
| quant_scheme = ColumnContent("quant_scheme", "str", True) |
| input_dtype = ColumnContent("input_dtype", "str", True) |
| status = ColumnContent("status", "str", True) |
| eta = ColumnContent("eta", "str", True) |
| submitted_by = ColumnContent("submitted_by", "str", True) |
| submitted_time = ColumnContent("submitted_time", "str", True) |
|
|
|
|
| @dataclass |
| class ModelDetails: |
| name: str |
| symbol: str = "" |
|
|
|
|
| class ModelType(Enum): |
| PT = ModelDetails(name="pretrained", symbol="π’") |
| CPT = ModelDetails(name="continuously pretrained", symbol="π©") |
| FT = ModelDetails(name="fine-tuned on domain-specific datasets", symbol="π·") |
| chat = ModelDetails(name="chat models (RLHF, DPO, IFT, ...)", symbol="π΅") |
| merges = ModelDetails(name="base merges and moerges", symbol="π") |
| Unknown = ModelDetails(name="", symbol="?") |
|
|
| def to_str(self, separator=" "): |
| return f"{self.value.symbol}{separator}{self.value.name}" |
|
|
| @staticmethod |
| def from_str(type): |
| if "fine-tuned" in type or "π·" in type: |
| return ModelType.FT |
| if "continously pretrained" in type or "π©" in type: |
| return ModelType.CPT |
| if "pretrained" in type or "π’" in type or "quantization" in type: |
| return ModelType.PT |
| if any([k in type for k in ["instruction-tuned", "RL-tuned", "chat", "π¦", "β", "π΅"]]): |
| return ModelType.chat |
| if "merge" in type or "π" in type: |
| return ModelType.merges |
| return ModelType.Unknown |
|
|
| class WeightType(Enum): |
| Adapter = ModelDetails("Adapter") |
| Original = ModelDetails("Original") |
| Delta = ModelDetails("Delta") |
| |
|
|
| class QuantType(Enum): |
| gptq = ModelDetails(name="GPTQ", symbol="π’") |
| aqlm = ModelDetails(name="AQLM", symbol="β") |
| awq = ModelDetails(name="AWQ", symbol="π©") |
| llama_cpp = ModelDetails(name="llama.cpp", symbol="π·") |
| bnb = ModelDetails(name="bitsandbytes", symbol="π΅") |
| autoround = ModelDetails(name="AutoRound", symbol="π") |
| Unknown = ModelDetails(name="?", symbol="?") |
| QuantType_None = ModelDetails(name="None", symbol="β") |
|
|
|
|
| def to_str(self, separator=" "): |
| return f"{self.value.symbol}{separator}{self.value.name}" |
|
|
| @staticmethod |
| def from_str(quant_dtype): |
| return _enum_from_str(QuantType, quant_dtype, QuantType.Unknown) |
|
|
|
|
|
|
| class WeightDtype(Enum): |
| all = ModelDetails("All") |
| int2 = ModelDetails("int2") |
| int3 = ModelDetails("int3") |
| int4 = ModelDetails("int4") |
| int8 = ModelDetails("int8") |
| nf4 = ModelDetails("nf4") |
| fp4 = ModelDetails("fp4") |
| mxfp4 = ModelDetails("mxfp4") |
| nvfp4 = ModelDetails("nvfp4") |
| f16 = ModelDetails("float16") |
| bf16 = ModelDetails("bfloat16") |
| f32 = ModelDetails("float32") |
|
|
| Unknown = ModelDetails("?") |
|
|
| @staticmethod |
| def from_str(weight_dtype): |
| return _enum_from_str(WeightDtype, weight_dtype, WeightDtype.Unknown) |
|
|
| class ComputeDtype(Enum): |
| all = ModelDetails("All") |
| fp16 = ModelDetails("float16") |
| bf16 = ModelDetails("bfloat16") |
| int8 = ModelDetails("int8") |
| fp32 = ModelDetails("float32") |
|
|
|
|
| Unknown = ModelDetails("?") |
|
|
| @staticmethod |
| def from_str(compute_dtype): |
| return _enum_from_str(ComputeDtype, compute_dtype, ComputeDtype.Unknown) |
| |
| |
| class GroupDtype(Enum): |
| group_1 = ModelDetails("-1") |
| group_1024 = ModelDetails("1024") |
| group_256 = ModelDetails("256") |
| group_128 = ModelDetails("128") |
| group_64 = ModelDetails("64") |
| group_32 = ModelDetails("32") |
|
|
| group_all = ModelDetails("All") |
|
|
| @staticmethod |
| def from_str(group_dtype): |
| return _enum_from_str(GroupDtype, group_dtype, GroupDtype.group_all) |
|
|
| class Precision(Enum): |
| |
| |
| qt_2bit = ModelDetails("2bit") |
| qt_3bit = ModelDetails("3bit") |
| qt_4bit = ModelDetails("4bit") |
| qt_8bit = ModelDetails("8bit") |
| qt_16bit = ModelDetails("16bit") |
| qt_32bit = ModelDetails("32bit") |
| Unknown = ModelDetails("?") |
|
|
| @staticmethod |
| def from_str(precision): |
| return _enum_from_str(Precision, precision, Precision.Unknown) |
|
|
|
|
|
|
|
|
| |
| COLS = [c.name for c in fields(auto_eval_cols)] |
| TYPES = [c.type for c in fields(auto_eval_cols)] |
|
|
| EVAL_COLS = [c.name for c in fields(EvalQueueColumn)] |
| EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)] |
|
|
| QUANT_COLS = [c.name for c in fields(QuantQueueColumn)] |
| QUANT_TYPES = [c.type for c in fields(QuantQueueColumn)] |
|
|
| |
| _QUEUE_UI_HIDDEN = {"submitted_by", "private", "eta", "revision"} |
| QUANT_DISPLAY_COLS = [c for c in QUANT_COLS if c not in _QUEUE_UI_HIDDEN] |
| QUANT_DISPLAY_TYPES = [t for c, t in zip(QUANT_COLS, QUANT_TYPES) if c not in _QUEUE_UI_HIDDEN] |
| EVAL_DISPLAY_COLS = [c for c in EVAL_COLS if c not in _QUEUE_UI_HIDDEN] |
| EVAL_DISPLAY_TYPES = [t for c, t in zip(EVAL_COLS, EVAL_TYPES) if c not in _QUEUE_UI_HIDDEN] |
|
|
| |
| |
| _QUEUE_HEADER_LABELS = { |
| "submitted_time": "submitted time", |
| } |
|
|
|
|
| def _queue_headers(cols: list[str]) -> list[str]: |
| return [_QUEUE_HEADER_LABELS.get(c, c) for c in cols] |
|
|
|
|
| QUANT_DISPLAY_HEADERS = _queue_headers(QUANT_DISPLAY_COLS) |
| EVAL_DISPLAY_HEADERS = _queue_headers(EVAL_DISPLAY_COLS) |
|
|
| BENCHMARK_COLS = [t.value.col_name for t in Tasks] |
|
|
| NUMERIC_INTERVALS = { |
| "?": pd.Interval(-1, 0, closed="right"), |
| "~1.5": pd.Interval(0, 2, closed="right"), |
| "~3": pd.Interval(2, 4, closed="right"), |
| "~7": pd.Interval(4, 9, closed="right"), |
| "~13": pd.Interval(9, 20, closed="right"), |
| "~35": pd.Interval(20, 45, closed="right"), |
| "~60": pd.Interval(45, 70, closed="right"), |
| "70+": pd.Interval(70, 10000, closed="right"), |
| } |
|
|
| NUMERIC_MODELSIZE = { |
| "?": pd.Interval(-1, 0, closed="right"), |
| "~4": pd.Interval(0, 4, closed="right"), |
| "~8": pd.Interval(4, 8, closed="right"), |
| "~16": pd.Interval(8, 16, closed="right"), |
| "~36": pd.Interval(16, 36, closed="right"), |
| "~48": pd.Interval(36, 48, closed="right"), |
| "~64": pd.Interval(48, 64, closed="right"), |
| ">72": pd.Interval(64, 200, closed="right"), |
| } |