Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
import json
|
| 3 |
import gradio as gr
|
|
|
|
| 4 |
from huggingface_hub import InferenceClient
|
| 5 |
from smolagents import CodeAgent, InferenceClientModel, tool
|
| 6 |
|
|
@@ -77,23 +78,7 @@ def classify_and_score(
|
|
| 77 |
target_dok: str,
|
| 78 |
agg: str = "max"
|
| 79 |
) -> dict:
|
| 80 |
-
"""Classify a question against Bloom’s and DOK targets and return guidance.
|
| 81 |
-
|
| 82 |
-
Args:
|
| 83 |
-
question: The question text to evaluate for cognitive demand.
|
| 84 |
-
target_bloom: Target Bloom’s level or range. Accepts exact (e.g., "Analyze")
|
| 85 |
-
or plus form (e.g., "Apply+") meaning that level or higher.
|
| 86 |
-
target_dok: Target DOK level or range. Accepts exact (e.g., "DOK3")
|
| 87 |
-
or span (e.g., "DOK2-DOK3").
|
| 88 |
-
agg: Aggregation method over phrase similarities within a level
|
| 89 |
-
(choices: "mean", "max", "topk_mean").
|
| 90 |
-
|
| 91 |
-
Returns:
|
| 92 |
-
A dictionary with:
|
| 93 |
-
ok: True if both Bloom’s and DOK match the targets.
|
| 94 |
-
measured: Dict with best levels and per-level scores for Bloom’s and DOK.
|
| 95 |
-
feedback: Brief guidance describing how to adjust the question to hit targets.
|
| 96 |
-
"""
|
| 97 |
res = classify_levels_phrases(
|
| 98 |
question,
|
| 99 |
BLOOMS_PHRASES,
|
|
@@ -173,15 +158,16 @@ def classify_and_score(
|
|
| 173 |
"feedback": " ".join(feedback_parts) if feedback_parts else "On target.",
|
| 174 |
}
|
| 175 |
|
| 176 |
-
# ------------------------ Backend selection +
|
| 177 |
-
_LOCAL_MODEL_CACHE = {
|
| 178 |
-
"model": None,
|
| 179 |
-
"model_id": None,
|
| 180 |
-
}
|
| 181 |
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
try:
|
| 186 |
from smolagents import TransformersModel # provided by smolagents
|
| 187 |
except Exception as e:
|
|
@@ -196,16 +182,14 @@ def _get_local_model(model_id: str):
|
|
| 196 |
):
|
| 197 |
return _LOCAL_MODEL_CACHE["model"]
|
| 198 |
|
| 199 |
-
# Instantiate and cache
|
| 200 |
local_model = TransformersModel(
|
| 201 |
model_id=model_id,
|
| 202 |
-
device_map="auto"
|
| 203 |
)
|
| 204 |
_LOCAL_MODEL_CACHE["model"] = local_model
|
| 205 |
_LOCAL_MODEL_CACHE["model_id"] = model_id
|
| 206 |
return local_model
|
| 207 |
|
| 208 |
-
# ------------------------ Agent setup with timeout ------------------------
|
| 209 |
def make_agent(
|
| 210 |
backend_choice: str, # "Hosted API" | "Local GPU"
|
| 211 |
hf_token: str,
|
|
@@ -215,7 +199,8 @@ def make_agent(
|
|
| 215 |
max_tokens: int
|
| 216 |
):
|
| 217 |
if backend_choice == "Local GPU":
|
| 218 |
-
|
|
|
|
| 219 |
else:
|
| 220 |
client = InferenceClient(
|
| 221 |
model=model_id,
|
|
@@ -225,7 +210,6 @@ def make_agent(
|
|
| 225 |
model = InferenceClientModel(client=client)
|
| 226 |
|
| 227 |
agent = CodeAgent(model=model, tools=[classify_and_score])
|
| 228 |
-
# Not used by agent core; helpful for debugging
|
| 229 |
agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens}
|
| 230 |
return agent
|
| 231 |
|
|
@@ -251,7 +235,6 @@ If you output JSON, ensure it is valid JSON (no trailing commas, use double quot
|
|
| 251 |
|
| 252 |
# ------------------------ Utility: robust JSON extractor ------------------
|
| 253 |
def extract_top_level_json(s: str) -> str:
|
| 254 |
-
"""Extract the first top-level JSON object by tracking braces."""
|
| 255 |
start = s.find("{")
|
| 256 |
if start == -1:
|
| 257 |
return ""
|
|
@@ -296,7 +279,6 @@ def run_pipeline(
|
|
| 296 |
max_tokens=int(max_tokens),
|
| 297 |
)
|
| 298 |
except Exception as e:
|
| 299 |
-
# Surface backend/model setup errors directly
|
| 300 |
err = f"ERROR initializing backend '{backend_choice}': {e}"
|
| 301 |
return "", err
|
| 302 |
|
|
@@ -324,18 +306,6 @@ def run_pipeline(
|
|
| 324 |
|
| 325 |
return final_json, result_text
|
| 326 |
|
| 327 |
-
# ------------------------ Optional Spaces warmup --------------------------
|
| 328 |
-
# If you deploy on HF Spaces and want to pre-allocate GPU for Local mode,
|
| 329 |
-
# you can try to warm up the model at startup by setting:
|
| 330 |
-
# BACKEND_WARMUP=1 and BACKEND_WARMUP_MODEL=<model id>
|
| 331 |
-
if (os.getenv("SYSTEM") == "spaces") and os.getenv("BACKEND_WARMUP") == "1":
|
| 332 |
-
try:
|
| 333 |
-
wm = os.getenv("BACKEND_WARMUP_MODEL", "swiss-ai/Apertus-70B-Instruct-2509")
|
| 334 |
-
_get_local_model(wm)
|
| 335 |
-
print(f"[Warmup] Local GPU model loaded: {wm}")
|
| 336 |
-
except Exception as e:
|
| 337 |
-
print(f"[Warmup] Skipped or failed: {e}")
|
| 338 |
-
|
| 339 |
# ------------------------ Gradio UI --------------------------------------
|
| 340 |
with gr.Blocks() as demo:
|
| 341 |
gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
|
|
@@ -358,7 +328,7 @@ with gr.Blocks() as demo:
|
|
| 358 |
)
|
| 359 |
model_id = gr.Textbox(
|
| 360 |
value="swiss-ai/Apertus-70B-Instruct-2509",
|
| 361 |
-
label="Model ID (repo or local
|
| 362 |
)
|
| 363 |
timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s, Hosted API only)")
|
| 364 |
|
|
@@ -393,7 +363,6 @@ with gr.Blocks() as demo:
|
|
| 393 |
temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
|
| 394 |
max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
|
| 395 |
|
| 396 |
-
# Helpful hint text depending on backend
|
| 397 |
backend_tips = gr.Markdown(
|
| 398 |
"*Hosted API:* uses Hugging Face Inference endpoints. Provide a token if needed.\n\n"
|
| 399 |
"*Local GPU:* loads the model into the Space with `TransformersModel (device_map='auto')`. "
|
|
@@ -405,13 +374,11 @@ with gr.Blocks() as demo:
|
|
| 405 |
final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
|
| 406 |
transcript = gr.Textbox(label="Agent Transcript", lines=18)
|
| 407 |
|
| 408 |
-
# Dynamically show/hide token & timeout based on backend
|
| 409 |
def _toggle_backend_fields(choice):
|
| 410 |
-
# Show token + timeout only for Hosted API
|
| 411 |
return (
|
| 412 |
-
gr.update(visible=(choice == "Hosted API")),
|
| 413 |
-
gr.update(visible=True),
|
| 414 |
-
gr.update(visible=(choice == "Hosted API"))
|
| 415 |
)
|
| 416 |
|
| 417 |
backend_choice.change(
|
|
|
|
| 1 |
import os
|
| 2 |
import json
|
| 3 |
import gradio as gr
|
| 4 |
+
import spaces
|
| 5 |
from huggingface_hub import InferenceClient
|
| 6 |
from smolagents import CodeAgent, InferenceClientModel, tool
|
| 7 |
|
|
|
|
| 78 |
target_dok: str,
|
| 79 |
agg: str = "max"
|
| 80 |
) -> dict:
|
| 81 |
+
"""Classify a question against Bloom’s and DOK targets and return guidance."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
res = classify_levels_phrases(
|
| 83 |
question,
|
| 84 |
BLOOMS_PHRASES,
|
|
|
|
| 158 |
"feedback": " ".join(feedback_parts) if feedback_parts else "On target.",
|
| 159 |
}
|
| 160 |
|
| 161 |
+
# ------------------------ Backend selection + GPU-wrapped local loader ------------------------
|
| 162 |
+
_LOCAL_MODEL_CACHE = {"model": None, "model_id": None}
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
+
@spaces.GPU(duration=30) # request GPU only when loading/using local model
|
| 165 |
+
def get_local_model_gpu(model_id: str):
|
| 166 |
+
"""
|
| 167 |
+
Load and cache a local Transformers model for smolagents on GPU.
|
| 168 |
+
Decorated so Spaces knows this task needs a GPU.
|
| 169 |
+
"""
|
| 170 |
+
# Import here to keep Hosted mode lightweight.
|
| 171 |
try:
|
| 172 |
from smolagents import TransformersModel # provided by smolagents
|
| 173 |
except Exception as e:
|
|
|
|
| 182 |
):
|
| 183 |
return _LOCAL_MODEL_CACHE["model"]
|
| 184 |
|
|
|
|
| 185 |
local_model = TransformersModel(
|
| 186 |
model_id=model_id,
|
| 187 |
+
device_map="auto" # lets accelerate pick the best device(s)
|
| 188 |
)
|
| 189 |
_LOCAL_MODEL_CACHE["model"] = local_model
|
| 190 |
_LOCAL_MODEL_CACHE["model_id"] = model_id
|
| 191 |
return local_model
|
| 192 |
|
|
|
|
| 193 |
def make_agent(
|
| 194 |
backend_choice: str, # "Hosted API" | "Local GPU"
|
| 195 |
hf_token: str,
|
|
|
|
| 199 |
max_tokens: int
|
| 200 |
):
|
| 201 |
if backend_choice == "Local GPU":
|
| 202 |
+
# This call is GPU-annotated; Spaces will allocate a GPU for it.
|
| 203 |
+
model = get_local_model_gpu(model_id)
|
| 204 |
else:
|
| 205 |
client = InferenceClient(
|
| 206 |
model=model_id,
|
|
|
|
| 210 |
model = InferenceClientModel(client=client)
|
| 211 |
|
| 212 |
agent = CodeAgent(model=model, tools=[classify_and_score])
|
|
|
|
| 213 |
agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens}
|
| 214 |
return agent
|
| 215 |
|
|
|
|
| 235 |
|
| 236 |
# ------------------------ Utility: robust JSON extractor ------------------
|
| 237 |
def extract_top_level_json(s: str) -> str:
|
|
|
|
| 238 |
start = s.find("{")
|
| 239 |
if start == -1:
|
| 240 |
return ""
|
|
|
|
| 279 |
max_tokens=int(max_tokens),
|
| 280 |
)
|
| 281 |
except Exception as e:
|
|
|
|
| 282 |
err = f"ERROR initializing backend '{backend_choice}': {e}"
|
| 283 |
return "", err
|
| 284 |
|
|
|
|
| 306 |
|
| 307 |
return final_json, result_text
|
| 308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
# ------------------------ Gradio UI --------------------------------------
|
| 310 |
with gr.Blocks() as demo:
|
| 311 |
gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
|
|
|
|
| 328 |
)
|
| 329 |
model_id = gr.Textbox(
|
| 330 |
value="swiss-ai/Apertus-70B-Instruct-2509",
|
| 331 |
+
label="Model ID (repo id for Hosted, or local repo for GPU)"
|
| 332 |
)
|
| 333 |
timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s, Hosted API only)")
|
| 334 |
|
|
|
|
| 363 |
temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
|
| 364 |
max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
|
| 365 |
|
|
|
|
| 366 |
backend_tips = gr.Markdown(
|
| 367 |
"*Hosted API:* uses Hugging Face Inference endpoints. Provide a token if needed.\n\n"
|
| 368 |
"*Local GPU:* loads the model into the Space with `TransformersModel (device_map='auto')`. "
|
|
|
|
| 374 |
final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
|
| 375 |
transcript = gr.Textbox(label="Agent Transcript", lines=18)
|
| 376 |
|
|
|
|
| 377 |
def _toggle_backend_fields(choice):
|
|
|
|
| 378 |
return (
|
| 379 |
+
gr.update(visible=(choice == "Hosted API")), # hf_token
|
| 380 |
+
gr.update(visible=True), # model_id always visible
|
| 381 |
+
gr.update(visible=(choice == "Hosted API")) # timeout slider
|
| 382 |
)
|
| 383 |
|
| 384 |
backend_choice.change(
|