Spaces:
Running
Running
Commit
Β·
7d08e8b
1
Parent(s):
0e48ab9
test new loading if it would avoid failure
Browse files
main.py
CHANGED
|
@@ -223,7 +223,7 @@ def make_leaderboard(lb: LBContainer) -> Leaderboard:
|
|
| 223 |
return Leaderboard(
|
| 224 |
elem_id=f"lb_for_{lb.name}",
|
| 225 |
value=df_leaderboard,
|
| 226 |
-
|
| 227 |
select_columns=SelectColumns(
|
| 228 |
default_selection=list(df_leaderboard.columns),
|
| 229 |
cant_deselect=["Type", "Model"],
|
|
@@ -421,41 +421,36 @@ def main():
|
|
| 421 |
gr.Markdown("## π TabArena Leaderboards")
|
| 422 |
lb_matrix = LBMatrix()
|
| 423 |
|
| 424 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
with gr.Tabs(elem_classes="tab-buttons") as t_impute_tabs:
|
| 426 |
for impute_id, impute_t in enumerate(lb_matrix.imputation):
|
| 427 |
impute_t_name = lb_matrix.get_name_for_lb("imputation", impute_t)
|
| 428 |
-
|
| 429 |
-
with gr.TabItem(
|
| 430 |
-
impute_t_name,
|
| 431 |
-
elem_id="llm-benchmark-tab-table",
|
| 432 |
-
id=impute_id,
|
| 433 |
-
) as t_impute:
|
| 434 |
-
# Splits
|
| 435 |
with gr.Tabs(elem_classes="tab-buttons"):
|
| 436 |
for splits_id, splits_t in enumerate(lb_matrix.splits):
|
| 437 |
splits_t_label = lb_matrix.get_name_for_lb(
|
| 438 |
"splits", splits_t
|
| 439 |
)
|
| 440 |
-
|
| 441 |
with gr.TabItem(
|
| 442 |
splits_t_label,
|
| 443 |
-
elem_id="llm-benchmark-tab-table",
|
| 444 |
id=f"{impute_id}_{splits_id}",
|
| 445 |
) as t_splits:
|
| 446 |
-
# Tasks
|
| 447 |
with gr.Tabs(elem_classes="tab-buttons"):
|
| 448 |
for tasks_id, tasks_t in enumerate(lb_matrix.tasks):
|
| 449 |
tasks_t_name = lb_matrix.get_name_for_lb(
|
| 450 |
"tasks", tasks_t
|
| 451 |
)
|
| 452 |
-
|
| 453 |
with gr.TabItem(
|
| 454 |
tasks_t_name,
|
| 455 |
-
elem_id="llm-benchmark-tab-table",
|
| 456 |
id=f"{impute_id}_{splits_id}_{tasks_id}",
|
| 457 |
) as t_tasks:
|
| 458 |
-
# Datasets
|
| 459 |
with gr.Tabs(elem_classes="tab-buttons"):
|
| 460 |
for (
|
| 461 |
datasets_id,
|
|
@@ -467,14 +462,11 @@ def main():
|
|
| 467 |
)
|
| 468 |
)
|
| 469 |
|
| 470 |
-
# Capture the final dataset tab object
|
| 471 |
with gr.TabItem(
|
| 472 |
datasets_t_name,
|
| 473 |
-
elem_id="llm-benchmark-tab-table",
|
| 474 |
id=f"{impute_id}_{splits_id}_{tasks_id}_{datasets_id}",
|
| 475 |
) as t_dataset:
|
| 476 |
-
#
|
| 477 |
-
# Check if this is the absolute first tab
|
| 478 |
is_absolute_first = (
|
| 479 |
impute_id == 0
|
| 480 |
and splits_id == 0
|
|
@@ -482,77 +474,59 @@ def main():
|
|
| 482 |
and datasets_id == 0
|
| 483 |
)
|
| 484 |
|
| 485 |
-
if
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
splits_t,
|
| 490 |
-
tasks_t,
|
| 491 |
-
datasets_t,
|
| 492 |
-
lb_matrix,
|
| 493 |
-
)
|
| 494 |
-
else:
|
| 495 |
-
# Lazy Loading
|
| 496 |
-
load_trigger = gr.State(
|
| 497 |
-
False
|
| 498 |
-
)
|
| 499 |
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
|
|
|
|
|
|
|
|
|
| 503 |
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
load_trigger,
|
| 509 |
)
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
if datasets_id == 0:
|
| 515 |
-
t_tasks.select(
|
| 516 |
-
activate_trigger,
|
| 517 |
-
None,
|
| 518 |
-
load_trigger,
|
| 519 |
-
)
|
| 520 |
-
|
| 521 |
-
# Continue up the chain:
|
| 522 |
-
# If this Task is also a default, clicking Split should trigger it.
|
| 523 |
-
if tasks_id == 0:
|
| 524 |
-
t_splits.select(
|
| 525 |
-
activate_trigger,
|
| 526 |
-
None,
|
| 527 |
load_trigger,
|
| 528 |
)
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
None,
|
| 535 |
load_trigger,
|
| 536 |
)
|
|
|
|
| 537 |
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
@gr.render(
|
| 548 |
-
inputs=load_trigger
|
| 549 |
-
)
|
| 550 |
-
def lazy_load(triggered):
|
| 551 |
-
if triggered:
|
| 552 |
-
renderer()
|
| 553 |
-
else:
|
| 554 |
-
gr.Markdown("<i>Loading results...</i>")
|
| 555 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 556 |
|
| 557 |
with gr.Row(), gr.Accordion("π Version History", open=False):
|
| 558 |
gr.Markdown(VERSION_HISTORY_BUTTON_TEXT, elem_classes="markdown-text")
|
|
|
|
| 223 |
return Leaderboard(
|
| 224 |
elem_id=f"lb_for_{lb.name}",
|
| 225 |
value=df_leaderboard,
|
| 226 |
+
datatype=datatypes,
|
| 227 |
select_columns=SelectColumns(
|
| 228 |
default_selection=list(df_leaderboard.columns),
|
| 229 |
cant_deselect=["Type", "Model"],
|
|
|
|
| 421 |
gr.Markdown("## π TabArena Leaderboards")
|
| 422 |
lb_matrix = LBMatrix()
|
| 423 |
|
| 424 |
+
# LIST TO STORE DEFERRED EVENT BINDINGS
|
| 425 |
+
# Tuples of (ParentComponent, ChildTriggerState)
|
| 426 |
+
deferred_bindings = []
|
| 427 |
+
|
| 428 |
+
# --- TABS LOGIC ---
|
| 429 |
with gr.Tabs(elem_classes="tab-buttons") as t_impute_tabs:
|
| 430 |
for impute_id, impute_t in enumerate(lb_matrix.imputation):
|
| 431 |
impute_t_name = lb_matrix.get_name_for_lb("imputation", impute_t)
|
| 432 |
+
|
| 433 |
+
with gr.TabItem(impute_t_name, id=impute_id) as t_impute:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
with gr.Tabs(elem_classes="tab-buttons"):
|
| 435 |
for splits_id, splits_t in enumerate(lb_matrix.splits):
|
| 436 |
splits_t_label = lb_matrix.get_name_for_lb(
|
| 437 |
"splits", splits_t
|
| 438 |
)
|
| 439 |
+
|
| 440 |
with gr.TabItem(
|
| 441 |
splits_t_label,
|
|
|
|
| 442 |
id=f"{impute_id}_{splits_id}",
|
| 443 |
) as t_splits:
|
|
|
|
| 444 |
with gr.Tabs(elem_classes="tab-buttons"):
|
| 445 |
for tasks_id, tasks_t in enumerate(lb_matrix.tasks):
|
| 446 |
tasks_t_name = lb_matrix.get_name_for_lb(
|
| 447 |
"tasks", tasks_t
|
| 448 |
)
|
| 449 |
+
|
| 450 |
with gr.TabItem(
|
| 451 |
tasks_t_name,
|
|
|
|
| 452 |
id=f"{impute_id}_{splits_id}_{tasks_id}",
|
| 453 |
) as t_tasks:
|
|
|
|
| 454 |
with gr.Tabs(elem_classes="tab-buttons"):
|
| 455 |
for (
|
| 456 |
datasets_id,
|
|
|
|
| 462 |
)
|
| 463 |
)
|
| 464 |
|
|
|
|
| 465 |
with gr.TabItem(
|
| 466 |
datasets_t_name,
|
|
|
|
| 467 |
id=f"{impute_id}_{splits_id}_{tasks_id}_{datasets_id}",
|
| 468 |
) as t_dataset:
|
| 469 |
+
# 1. Determine if Absolute First (Load immediately)
|
|
|
|
| 470 |
is_absolute_first = (
|
| 471 |
impute_id == 0
|
| 472 |
and splits_id == 0
|
|
|
|
| 474 |
and datasets_id == 0
|
| 475 |
)
|
| 476 |
|
| 477 |
+
# 2. State Trigger (True if first, False otherwise)
|
| 478 |
+
load_trigger = gr.State(
|
| 479 |
+
value=is_absolute_first
|
| 480 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
|
| 482 |
+
# 3. Self-Click Trigger
|
| 483 |
+
t_dataset.select(
|
| 484 |
+
lambda: True,
|
| 485 |
+
None,
|
| 486 |
+
load_trigger,
|
| 487 |
+
)
|
| 488 |
|
| 489 |
+
# 4. Store Parent->Child Triggers for LATER application (avoids recursion error)
|
| 490 |
+
if datasets_id == 0:
|
| 491 |
+
deferred_bindings.append(
|
| 492 |
+
(t_tasks, load_trigger)
|
|
|
|
| 493 |
)
|
| 494 |
+
if tasks_id == 0:
|
| 495 |
+
deferred_bindings.append(
|
| 496 |
+
(
|
| 497 |
+
t_splits,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
load_trigger,
|
| 499 |
)
|
| 500 |
+
)
|
| 501 |
+
if splits_id == 0:
|
| 502 |
+
deferred_bindings.append(
|
| 503 |
+
(
|
| 504 |
+
t_impute,
|
|
|
|
| 505 |
load_trigger,
|
| 506 |
)
|
| 507 |
+
)
|
| 508 |
|
| 509 |
+
# 5. Render Logic
|
| 510 |
+
renderer = partial(
|
| 511 |
+
render_details,
|
| 512 |
+
imputation=impute_t,
|
| 513 |
+
splits=splits_t,
|
| 514 |
+
tasks=tasks_t,
|
| 515 |
+
datasets=datasets_t,
|
| 516 |
+
lb_matrix=lb_matrix,
|
| 517 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 518 |
|
| 519 |
+
@gr.render(inputs=load_trigger)
|
| 520 |
+
def lazy_load(triggered):
|
| 521 |
+
if triggered:
|
| 522 |
+
renderer()
|
| 523 |
+
else:
|
| 524 |
+
gr.Markdown("Loading...")
|
| 525 |
+
|
| 526 |
+
# --- APPLY DEFERRED BINDINGS ---
|
| 527 |
+
# Now that the block dictionary is stable, we can safely add cross-component events
|
| 528 |
+
for parent_tab, child_trigger in deferred_bindings:
|
| 529 |
+
parent_tab.select(lambda: True, None, child_trigger)
|
| 530 |
|
| 531 |
with gr.Row(), gr.Accordion("π Version History", open=False):
|
| 532 |
gr.Markdown(VERSION_HISTORY_BUTTON_TEXT, elem_classes="markdown-text")
|