Add accuracy data
#4
by
JonnaMat - opened
- app.js +97 -0
- config.json +7 -3
- data/acc-Cosmos-Reason2.csv +5 -0
- index.html +3 -0
- style.css +37 -1
app.js
CHANGED
|
@@ -59,6 +59,38 @@ async function loadFamilyData(familyKey) {
|
|
| 59 |
// Current family's loaded data
|
| 60 |
let DATA = [];
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
// βββ Config shortcuts βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 63 |
|
| 64 |
const MODEL_COL = config.model_column;
|
|
@@ -705,6 +737,70 @@ function buildExperimentSetup() {
|
|
| 705 |
});
|
| 706 |
}
|
| 707 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 708 |
// βββ Render βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 709 |
|
| 710 |
function render() {
|
|
@@ -729,6 +825,7 @@ function render() {
|
|
| 729 |
(config.metrics.length <= 1 || !chartsShown) ? "none" : "";
|
| 730 |
}
|
| 731 |
buildTables(filtered, chartsShown);
|
|
|
|
| 732 |
buildExperimentSetup();
|
| 733 |
}
|
| 734 |
|
|
|
|
| 59 |
// Current family's loaded data
|
| 60 |
let DATA = [];
|
| 61 |
|
| 62 |
+
// βββ Accuracy data cache ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 63 |
+
|
| 64 |
+
const accDataCache = {};
|
| 65 |
+
|
| 66 |
+
async function loadAccuracyData(filePath) {
|
| 67 |
+
if (!filePath) return null;
|
| 68 |
+
if (accDataCache[filePath]) return accDataCache[filePath];
|
| 69 |
+
try {
|
| 70 |
+
const text = await fetch(filePath).then(r => {
|
| 71 |
+
if (!r.ok) return null;
|
| 72 |
+
return r.text();
|
| 73 |
+
});
|
| 74 |
+
if (!text) return null;
|
| 75 |
+
const lines = text.replace(/\r/g, "").trim().split("\n");
|
| 76 |
+
const headers = lines[0].split(",");
|
| 77 |
+
const rows = lines.slice(1).map(line => {
|
| 78 |
+
const vals = line.split(",");
|
| 79 |
+
const row = {};
|
| 80 |
+
headers.forEach((h, i) => {
|
| 81 |
+
const raw = (vals[i] || "").trim();
|
| 82 |
+
row[h] = raw;
|
| 83 |
+
});
|
| 84 |
+
return row;
|
| 85 |
+
});
|
| 86 |
+
const result = { headers, rows };
|
| 87 |
+
accDataCache[filePath] = result;
|
| 88 |
+
return result;
|
| 89 |
+
} catch {
|
| 90 |
+
return null;
|
| 91 |
+
}
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
// βββ Config shortcuts βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 95 |
|
| 96 |
const MODEL_COL = config.model_column;
|
|
|
|
| 737 |
});
|
| 738 |
}
|
| 739 |
|
| 740 |
+
// βββ Accuracy Table βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 741 |
+
|
| 742 |
+
async function buildAccuracyTable() {
|
| 743 |
+
const section = document.getElementById("accuracy-section");
|
| 744 |
+
section.innerHTML = "";
|
| 745 |
+
const familyCfg = config.model_families?.[activeFamilyKey()] || {};
|
| 746 |
+
const accFile = familyCfg.accuracy_file;
|
| 747 |
+
if (!accFile) return;
|
| 748 |
+
|
| 749 |
+
const accData = await loadAccuracyData(accFile);
|
| 750 |
+
if (!accData || !accData.rows.length) return;
|
| 751 |
+
|
| 752 |
+
// Filter to active models if a variant is selected
|
| 753 |
+
const activeModels = getActiveModelSet();
|
| 754 |
+
const rows = accData.rows.filter(r => activeModels.has(r[accData.headers[0]]));
|
| 755 |
+
if (!rows.length) return;
|
| 756 |
+
|
| 757 |
+
const modelCol = accData.headers[0];
|
| 758 |
+
const metricCols = accData.headers.slice(1);
|
| 759 |
+
|
| 760 |
+
const card = document.createElement("div");
|
| 761 |
+
card.className = "table-card";
|
| 762 |
+
|
| 763 |
+
// Find best value per column (higher is better for accuracy)
|
| 764 |
+
const best = {};
|
| 765 |
+
metricCols.forEach(col => {
|
| 766 |
+
const vals = rows.map(r => parseFloat(r[col])).filter(v => !isNaN(v));
|
| 767 |
+
if (vals.length) best[col] = Math.max(...vals);
|
| 768 |
+
});
|
| 769 |
+
|
| 770 |
+
// Fixed model column
|
| 771 |
+
let fixedHtml = `<table><thead><tr><th>MODEL</th></tr></thead><tbody>`;
|
| 772 |
+
rows.forEach(r => {
|
| 773 |
+
const model = r[modelCol];
|
| 774 |
+
const modelColor = MODEL_COLORS[model]?.border || '#888';
|
| 775 |
+
fixedHtml += `<tr><td class="model-cell"><span class="model-dot" style="background:${modelColor}"></span><a href="${LINK_PREFIX}${model}" target="_blank" rel="noopener" style="color:${modelColor}">${model}</a></td></tr>`;
|
| 776 |
+
});
|
| 777 |
+
fixedHtml += `</tbody></table>`;
|
| 778 |
+
|
| 779 |
+
// Scrollable metric columns
|
| 780 |
+
let scrollHtml = `<table><thead><tr>`;
|
| 781 |
+
scrollHtml += metricCols.map(h => `<th class="metric-cell">${h}</th>`).join("");
|
| 782 |
+
scrollHtml += `</tr></thead><tbody>`;
|
| 783 |
+
rows.forEach(r => {
|
| 784 |
+
scrollHtml += `<tr>`;
|
| 785 |
+
metricCols.forEach(col => {
|
| 786 |
+
const val = parseFloat(r[col]);
|
| 787 |
+
const isBest = !isNaN(val) && val === best[col];
|
| 788 |
+
const display = isNaN(val) ? (r[col] || "β") : val.toFixed(2);
|
| 789 |
+
scrollHtml += `<td class="metric-cell">${isBest ? '<strong style="color: white; opacity: 0.7">' + display + '</strong>' : display}</td>`;
|
| 790 |
+
});
|
| 791 |
+
scrollHtml += `</tr>`;
|
| 792 |
+
});
|
| 793 |
+
scrollHtml += `</tbody></table>`;
|
| 794 |
+
|
| 795 |
+
const accTitle = familyCfg.accuracy_title || config.accuracy_title || "Accuracy";
|
| 796 |
+
const accUrl = familyCfg.accuracy_url;
|
| 797 |
+
const titleHtml = accUrl
|
| 798 |
+
? `<h3><a href="${accUrl}" target="_blank" rel="noopener" class="acc-title-link">${accTitle}</a></h3>`
|
| 799 |
+
: `<h3>${accTitle}</h3>`;
|
| 800 |
+
card.innerHTML = `${titleHtml}<div class="table-split"><div class="table-split-fixed">${fixedHtml}</div><div class="table-split-scroll">${scrollHtml}</div></div>`;
|
| 801 |
+
section.appendChild(card);
|
| 802 |
+
}
|
| 803 |
+
|
| 804 |
// βββ Render βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 805 |
|
| 806 |
function render() {
|
|
|
|
| 825 |
(config.metrics.length <= 1 || !chartsShown) ? "none" : "";
|
| 826 |
}
|
| 827 |
buildTables(filtered, chartsShown);
|
| 828 |
+
buildAccuracyTable();
|
| 829 |
buildExperimentSetup();
|
| 830 |
}
|
| 831 |
|
config.json
CHANGED
|
@@ -138,7 +138,10 @@
|
|
| 138 |
"agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.14.0 tegra, 256 tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 139 |
"orin_nano": "Measurement setup: NVIDIA AI IoT vLLM 0.14.0 tegra, 256 tokens generated, 10 warm-up runs, averaged over 25 runs."
|
| 140 |
},
|
| 141 |
-
"default_device": "orin_nano"
|
|
|
|
|
|
|
|
|
|
| 142 |
},
|
| 143 |
"Qwen3.5": {
|
| 144 |
"data_file": "data/Qwen3.5.csv",
|
|
@@ -153,5 +156,6 @@
|
|
| 153 |
},
|
| 154 |
"default_device": "agx_orin"
|
| 155 |
}
|
| 156 |
-
}
|
| 157 |
-
|
|
|
|
|
|
| 138 |
"agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.14.0 tegra, 256 tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 139 |
"orin_nano": "Measurement setup: NVIDIA AI IoT vLLM 0.14.0 tegra, 256 tokens generated, 10 warm-up runs, averaged over 25 runs."
|
| 140 |
},
|
| 141 |
+
"default_device": "orin_nano",
|
| 142 |
+
"accuracy_file": "data/acc-Cosmos-Reason2.csv",
|
| 143 |
+
"accuracy_title": "Physical AI Bench (PAI-Bench)",
|
| 144 |
+
"accuracy_url": "https://huggingface.co/spaces/shi-labs/physical-ai-bench-leaderboard"
|
| 145 |
},
|
| 146 |
"Qwen3.5": {
|
| 147 |
"data_file": "data/Qwen3.5.csv",
|
|
|
|
| 156 |
},
|
| 157 |
"default_device": "agx_orin"
|
| 158 |
}
|
| 159 |
+
},
|
| 160 |
+
"accuracy_title": "Accuracy"
|
| 161 |
+
}
|
data/acc-Cosmos-Reason2.csv
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Model,Overall,Embodied Reasoning,Common Sense,AV,Physical World,Time,Space,Agibot,HoloAssist,RoboFail,RoboVQA,BridgeData V2
|
| 2 |
+
nvidia/Cosmos-Reason2-2B,50.60,53.93,47.19,44.00,46.90,45.30,55.00,34.00,60.00,49.00,90.91,42.00
|
| 3 |
+
embedl/Cosmos-Reason2-2B-NVFP4A16,49.84,50.16,49.50,44.00,45.13,52.01,52.50,28.00,58.00,51.00,84.55,32.00
|
| 4 |
+
embedl/Cosmos-Reason2-2B-W4A16,48.68,50.49,46.85,36.00,47.79,44.30,53.75,36.00,61.00,42.00,80.91,44.00
|
| 5 |
+
embedl/Cosmos-Reason2-2B-W4A16-Edge2,50.58,53.61,47.52,45.00,44.25,48.66,52.50,32.00,59.00,54.00,85.45,43.00
|
index.html
CHANGED
|
@@ -28,6 +28,9 @@
|
|
| 28 |
<p class="hero-sub" id="hero-sub">Compare throughput and latency across devices and model variants.</p>
|
| 29 |
</header>
|
| 30 |
|
|
|
|
|
|
|
|
|
|
| 31 |
<!-- Filters -->
|
| 32 |
<section class="filters-bar" id="filters-bar"></section>
|
| 33 |
|
|
|
|
| 28 |
<p class="hero-sub" id="hero-sub">Compare throughput and latency across devices and model variants.</p>
|
| 29 |
</header>
|
| 30 |
|
| 31 |
+
<!-- Accuracy Table -->
|
| 32 |
+
<section id="accuracy-section"></section>
|
| 33 |
+
|
| 34 |
<!-- Filters -->
|
| 35 |
<section class="filters-bar" id="filters-bar"></section>
|
| 36 |
|
style.css
CHANGED
|
@@ -384,7 +384,43 @@ tbody tr.row-group-break td {
|
|
| 384 |
border-top: 2px solid var(--border);
|
| 385 |
}
|
| 386 |
|
| 387 |
-
/* ββ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
.legend-section {
|
| 389 |
margin: 2rem 0;
|
| 390 |
padding-top: 1.5rem;
|
|
|
|
| 384 |
border-top: 2px solid var(--border);
|
| 385 |
}
|
| 386 |
|
| 387 |
+
/* ββ Sticky first column in scrollable tables βββββββββββ */
|
| 388 |
+
.table-split {
|
| 389 |
+
display: flex;
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
.table-split-fixed table,
|
| 393 |
+
.table-split-scroll table {
|
| 394 |
+
width: 100%;
|
| 395 |
+
}
|
| 396 |
+
|
| 397 |
+
.table-split-fixed {
|
| 398 |
+
flex-shrink: 0;
|
| 399 |
+
}
|
| 400 |
+
|
| 401 |
+
.table-split-scroll {
|
| 402 |
+
overflow-x: auto;
|
| 403 |
+
flex: 1;
|
| 404 |
+
min-width: 0;
|
| 405 |
+
scrollbar-color: var(--text-dim) var(--border);
|
| 406 |
+
scrollbar-width: thin;
|
| 407 |
+
}
|
| 408 |
+
#accuracy-section:empty {
|
| 409 |
+
margin-bottom: 0;
|
| 410 |
+
}
|
| 411 |
+
#accuracy-section {
|
| 412 |
+
margin-bottom: 2.5rem;
|
| 413 |
+
}
|
| 414 |
+
|
| 415 |
+
.acc-title-link {
|
| 416 |
+
color: inherit;
|
| 417 |
+
text-decoration: none;
|
| 418 |
+
}
|
| 419 |
+
.acc-title-link:hover {
|
| 420 |
+
text-decoration: underline;
|
| 421 |
+
color: var(--accent);
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
.legend-section {
|
| 425 |
margin: 2rem 0;
|
| 426 |
padding-top: 1.5rem;
|