Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -420,71 +420,125 @@ def smart_distractors(correct: str, phrase_pool: List[str], sentence: str, k: in
|
|
| 420 |
if len(out) < k:
|
| 421 |
out.extend(legacy_distractors(correct, phrase_pool, k=k-len(out)))
|
| 422 |
return out[:k]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
|
| 424 |
# ====== (4-أ) مُولِّد أسئلة "فراغ" ======
|
| 425 |
def make_mcqs(text:str, n:int=6, difficulty: str = "متوسط")->List[MCQ]:
|
| 426 |
all_sents = split_sents(text)
|
| 427 |
sents = pick_clean_sentences(all_sents, difficulty)
|
| 428 |
if not sents:
|
| 429 |
-
|
|
|
|
| 430 |
|
| 431 |
-
keyphrases = yake_keywords(text, k=
|
| 432 |
keyphrases = [kp for kp in keyphrases if safe_keyword(kp) and 2 <= len(kp) <= 40]
|
| 433 |
|
| 434 |
-
# ربط العبارة بجملة مناسبة (
|
| 435 |
-
sent_for={}
|
| 436 |
for s in sents:
|
| 437 |
for kp in keyphrases:
|
| 438 |
-
if kp in sent_for:
|
| 439 |
-
|
| 440 |
-
if
|
| 441 |
-
sent_for[kp]=s
|
| 442 |
-
if len(sent_for)>=n*
|
| 443 |
break
|
| 444 |
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
freq = [w for w,_ in sorted(((t, text.count(t)) for t in tokens), key=lambda x:-x[1])]
|
| 448 |
-
keyphrases = [w for w in freq if safe_keyword(w)][:150]
|
| 449 |
-
for s in sents:
|
| 450 |
-
for kp in keyphrases:
|
| 451 |
-
if kp in sent_for: continue
|
| 452 |
-
hits = re2.findall(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", s)
|
| 453 |
-
if len(hits) == 1: sent_for[kp]=s
|
| 454 |
-
if len(sent_for)>=n*2:
|
| 455 |
-
break
|
| 456 |
-
|
| 457 |
-
if not sent_for:
|
| 458 |
-
raise RuntimeError("تعذّر توليد أسئلة من هذا النص.")
|
| 459 |
-
|
| 460 |
-
# أولوية للعبارات الأطول (أعلميّة أعلى)
|
| 461 |
-
items=[]; used_sents=set(); used_keys=set()
|
| 462 |
-
for kp in sorted(sent_for.keys(), key=lambda x: (-len(x), x)):
|
| 463 |
-
if len(items)>=n: break
|
| 464 |
-
s=sent_for[kp]
|
| 465 |
-
if s in used_sents or kp in used_keys: continue
|
| 466 |
|
| 467 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
|
|
|
|
| 469 |
pool = [x for x in keyphrases if x != kp]
|
| 470 |
ch = smart_distractors(kp, pool, s, k=3, all_sentences=all_sents, difficulty=difficulty) + [kp]
|
| 471 |
|
| 472 |
-
#
|
| 473 |
-
|
| 474 |
for c in ch:
|
| 475 |
c = c.strip()
|
| 476 |
-
if not c or c in seen:
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
while len(
|
| 480 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
|
| 482 |
-
items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=
|
| 483 |
-
|
| 484 |
|
| 485 |
if not items:
|
| 486 |
raise RuntimeError("تعذّر توليد أسئلة.")
|
| 487 |
-
return items
|
| 488 |
|
| 489 |
# ====== (4-ب) مُولِّد أسئلة "فهم مباشر" (mT5) ======
|
| 490 |
_MT5 = {"tok": None, "model": None, "ok": False}
|
|
@@ -557,18 +611,36 @@ def gen_one_comp_q(sentence: str, tok, model, max_new_tokens=128) -> Optional[MC
|
|
| 557 |
def make_comp_mcqs(text: str, n: int = 6, difficulty: str = "متوسط") -> List[MCQ]:
|
| 558 |
tok, model, ok = get_mt5()
|
| 559 |
if not ok:
|
|
|
|
| 560 |
return make_mcqs(text, n, difficulty=difficulty)
|
| 561 |
|
| 562 |
sents_all = split_sents(text)
|
| 563 |
sents = pick_clean_sentences(sents_all, difficulty)
|
|
|
|
|
|
|
| 564 |
if not sents:
|
| 565 |
return make_mcqs(text, n, difficulty=difficulty)
|
| 566 |
|
| 567 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 568 |
|
| 569 |
items: List[MCQ] = []
|
| 570 |
tried = 0
|
| 571 |
-
for s in
|
| 572 |
if len(items) >= n: break
|
| 573 |
mcq = gen_one_comp_q(s, tok, model)
|
| 574 |
tried += 1
|
|
@@ -583,15 +655,15 @@ def make_comp_mcqs(text: str, n: int = 6, difficulty: str = "متوسط") -> Lis
|
|
| 583 |
seen.add(c); clean.append(c)
|
| 584 |
clean = (clean + ["…","…","…","…"])[:4]
|
| 585 |
ai = mcq.answer_index if isinstance(mcq.answer_index,int) and 0<=mcq.answer_index<4 else 0
|
| 586 |
-
|
| 587 |
items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=clean, answer_index=ai))
|
| 588 |
-
if tried >= n *
|
| 589 |
break
|
| 590 |
|
| 591 |
if not items:
|
| 592 |
return make_mcqs(text, n, difficulty=difficulty)
|
| 593 |
return items[:n]
|
| 594 |
|
|
|
|
| 595 |
# ------------------ تحويل إلى سجلات العرض ------------------
|
| 596 |
def clean_option_text(t: str) -> str:
|
| 597 |
t = (t or "").strip()
|
|
@@ -656,16 +728,25 @@ def build_quiz(text_area, file_path, n, model_id, zoom, mode, difficulty):
|
|
| 656 |
raw = text_area if text_area else file_to_text(file_path, model_id=model_id, zoom=float(zoom))[0]
|
| 657 |
cleaned = postprocess(raw)
|
| 658 |
|
|
|
|
| 659 |
try:
|
| 660 |
if mode == "فهم مباشر":
|
| 661 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 662 |
else:
|
| 663 |
items = make_mcqs(cleaned, n=int(n), difficulty=difficulty)
|
| 664 |
except Exception:
|
| 665 |
items = make_mcqs(cleaned, n=int(n), difficulty=difficulty)
|
|
|
|
| 666 |
|
| 667 |
recs = to_records(items)
|
| 668 |
-
|
|
|
|
|
|
|
| 669 |
|
| 670 |
# ------------------ CSS ------------------
|
| 671 |
CSS = """
|
|
|
|
| 420 |
if len(out) < k:
|
| 421 |
out.extend(legacy_distractors(correct, phrase_pool, k=k-len(out)))
|
| 422 |
return out[:k]
|
| 423 |
+
def best_keyword_in_sentence(sentence: str, global_text: str) -> Optional[str]:
|
| 424 |
+
"""اختيار هدف مناسب للفراغ من نفس الجملة: YAKE على الجملة نفسها مع فلترة."""
|
| 425 |
+
try:
|
| 426 |
+
ex = yake.KeywordExtractor(lan='ar', n=3, top=20)
|
| 427 |
+
pairs = ex.extract_keywords(sentence)
|
| 428 |
+
except Exception:
|
| 429 |
+
pairs = []
|
| 430 |
+
# رتب حسب طول العبارة (أطول غالبًا أوضح) مع وزن خفيف لتكرارها بالنص
|
| 431 |
+
cands = []
|
| 432 |
+
for w, _ in pairs:
|
| 433 |
+
w = re2.sub(r"\s+", " ", w.strip())
|
| 434 |
+
if not w:
|
| 435 |
+
continue
|
| 436 |
+
if not good_kw(w) or not safe_keyword(w):
|
| 437 |
+
continue
|
| 438 |
+
if len(w) < 2 or len(w) > 40:
|
| 439 |
+
continue
|
| 440 |
+
# لازم تظهر فعليًا ضمن الجملة نصيًا
|
| 441 |
+
if not re2.search(rf"(?<!\p{{L}}){re2.escape(w)}(?!\p{{L}})", sentence):
|
| 442 |
+
continue
|
| 443 |
+
# وزن بالتكرار العام لتمييز الأهم
|
| 444 |
+
freq_weight = global_text.count(w)
|
| 445 |
+
cands.append((w, len(w) + 0.5*freq_weight))
|
| 446 |
+
if not cands:
|
| 447 |
+
# fallback أبسط: التقط أطول “كلمة” معقولة
|
| 448 |
+
toks = [t for t in re2.findall(r"\p{L}+", sentence) if good_kw(t) and safe_keyword(t)]
|
| 449 |
+
toks.sort(key=len, reverse=True)
|
| 450 |
+
return toks[0] if toks else None
|
| 451 |
+
cands.sort(key=lambda x: -x[1])
|
| 452 |
+
return cands[0][0] if cands else None
|
| 453 |
|
| 454 |
# ====== (4-أ) مُولِّد أسئلة "فراغ" ======
|
| 455 |
def make_mcqs(text:str, n:int=6, difficulty: str = "متوسط")->List[MCQ]:
|
| 456 |
all_sents = split_sents(text)
|
| 457 |
sents = pick_clean_sentences(all_sents, difficulty)
|
| 458 |
if not sents:
|
| 459 |
+
# لو ما في جمل “نظيفة” كفاية، استعمل كل الجمل المتاحة
|
| 460 |
+
sents = all_sents[:]
|
| 461 |
|
| 462 |
+
keyphrases = yake_keywords(text, k=240)
|
| 463 |
keyphrases = [kp for kp in keyphrases if safe_keyword(kp) and 2 <= len(kp) <= 40]
|
| 464 |
|
| 465 |
+
# ربط العبارة بجملة مناسبة (بس ما نقيّد بظهور وحيد فقط)
|
| 466 |
+
sent_for = {}
|
| 467 |
for s in sents:
|
| 468 |
for kp in keyphrases:
|
| 469 |
+
if kp in sent_for:
|
| 470 |
+
continue
|
| 471 |
+
if re2.search(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", s):
|
| 472 |
+
sent_for[kp] = s
|
| 473 |
+
if len(sent_for) >= n * 4: # خزّن أكتر من الحاجة
|
| 474 |
break
|
| 475 |
|
| 476 |
+
items: List[MCQ] = []
|
| 477 |
+
used_pairs = set() # (sentence, keyword)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
|
| 479 |
+
# (أ) استهلك المطابقات المتاحة أولاً
|
| 480 |
+
for kp in sorted(sent_for.keys(), key=lambda x: (-len(x), x)):
|
| 481 |
+
if len(items) >= n: break
|
| 482 |
+
s = sent_for[kp]
|
| 483 |
+
pair = (s, kp)
|
| 484 |
+
if pair in used_pairs:
|
| 485 |
+
continue
|
| 486 |
|
| 487 |
+
q = re2.sub(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", "_____", s, count=1)
|
| 488 |
pool = [x for x in keyphrases if x != kp]
|
| 489 |
ch = smart_distractors(kp, pool, s, k=3, all_sentences=all_sents, difficulty=difficulty) + [kp]
|
| 490 |
|
| 491 |
+
# نظّف وثبّت 4 خيارات
|
| 492 |
+
uniq, seen = [], set()
|
| 493 |
for c in ch:
|
| 494 |
c = c.strip()
|
| 495 |
+
if not c or c in seen:
|
| 496 |
+
continue
|
| 497 |
+
seen.add(c); uniq.append(c)
|
| 498 |
+
while len(uniq) < 4: uniq.append("…")
|
| 499 |
+
uniq = uniq[:4]
|
| 500 |
+
random.shuffle(uniq)
|
| 501 |
+
ans = uniq.index(kp) if kp in uniq else 3
|
| 502 |
+
|
| 503 |
+
items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=uniq, answer_index=ans))
|
| 504 |
+
used_pairs.add(pair)
|
| 505 |
+
|
| 506 |
+
# (ب) إن ما كفّى العدد، فعّل fallback “من نفس الجملة”
|
| 507 |
+
si = 0
|
| 508 |
+
while len(items) < n and si < len(sents):
|
| 509 |
+
s = sents[si]; si += 1
|
| 510 |
+
# اختَر هدفًا مناسبًا من الجملة نفسها
|
| 511 |
+
kp = best_keyword_in_sentence(s, text)
|
| 512 |
+
if not kp:
|
| 513 |
+
continue
|
| 514 |
+
pair = (s, kp)
|
| 515 |
+
if pair in used_pairs:
|
| 516 |
+
continue
|
| 517 |
+
# ابنِ سؤال الفراغ
|
| 518 |
+
if not re2.search(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", s):
|
| 519 |
+
# لو الهدف ما انوجد كما هو، جرّب أقرب صيغة مبسّطة ضمن s
|
| 520 |
+
continue
|
| 521 |
+
q = re2.sub(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", "_____", s, count=1)
|
| 522 |
+
pool = [x for x in keyphrases if x != kp] or keyphrases[:]
|
| 523 |
+
ch = smart_distractors(kp, pool, s, k=3, all_sentences=all_sents, difficulty=difficulty) + [kp]
|
| 524 |
+
|
| 525 |
+
uniq, seen = [], set()
|
| 526 |
+
for c in ch:
|
| 527 |
+
c = c.strip()
|
| 528 |
+
if not c or c in seen:
|
| 529 |
+
continue
|
| 530 |
+
seen.add(c); uniq.append(c)
|
| 531 |
+
while len(uniq) < 4: uniq.append("…")
|
| 532 |
+
uniq = uniq[:4]
|
| 533 |
+
random.shuffle(uniq)
|
| 534 |
+
ans = uniq.index(kp) if kp in uniq else 3
|
| 535 |
|
| 536 |
+
items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=uniq, answer_index=ans))
|
| 537 |
+
used_pairs.add(pair)
|
| 538 |
|
| 539 |
if not items:
|
| 540 |
raise RuntimeError("تعذّر توليد أسئلة.")
|
| 541 |
+
return items[:n]
|
| 542 |
|
| 543 |
# ====== (4-ب) مُولِّد أسئلة "فهم مباشر" (mT5) ======
|
| 544 |
_MT5 = {"tok": None, "model": None, "ok": False}
|
|
|
|
| 611 |
def make_comp_mcqs(text: str, n: int = 6, difficulty: str = "متوسط") -> List[MCQ]:
|
| 612 |
tok, model, ok = get_mt5()
|
| 613 |
if not ok:
|
| 614 |
+
# عدم توفر mT5 → ارجعي لأسئلة الفراغ
|
| 615 |
return make_mcqs(text, n, difficulty=difficulty)
|
| 616 |
|
| 617 |
sents_all = split_sents(text)
|
| 618 |
sents = pick_clean_sentences(sents_all, difficulty)
|
| 619 |
+
if not sents:
|
| 620 |
+
sents = sents_all[:]
|
| 621 |
if not sents:
|
| 622 |
return make_mcqs(text, n, difficulty=difficulty)
|
| 623 |
|
| 624 |
+
# جرّبي أولًا على جمل مفردة، ثم على “مقاطع” (دمج 2–3 جمل) إذا لزم
|
| 625 |
+
def make_chunks(sents, max_len=260):
|
| 626 |
+
chunks = []
|
| 627 |
+
i = 0
|
| 628 |
+
while i < len(sents):
|
| 629 |
+
cur = sents[i]
|
| 630 |
+
j = i + 1
|
| 631 |
+
while j < len(sents) and len(cur) + 1 + len(sents[j]) <= max_len:
|
| 632 |
+
cur = cur + " " + sents[j]
|
| 633 |
+
j += 1
|
| 634 |
+
chunks.append(cur)
|
| 635 |
+
i = j
|
| 636 |
+
return chunks
|
| 637 |
+
|
| 638 |
+
candidates = sents[:] + make_chunks(sents, max_len=220)
|
| 639 |
+
random.shuffle(candidates)
|
| 640 |
|
| 641 |
items: List[MCQ] = []
|
| 642 |
tried = 0
|
| 643 |
+
for s in candidates:
|
| 644 |
if len(items) >= n: break
|
| 645 |
mcq = gen_one_comp_q(s, tok, model)
|
| 646 |
tried += 1
|
|
|
|
| 655 |
seen.add(c); clean.append(c)
|
| 656 |
clean = (clean + ["…","…","…","…"])[:4]
|
| 657 |
ai = mcq.answer_index if isinstance(mcq.answer_index,int) and 0<=mcq.answer_index<4 else 0
|
|
|
|
| 658 |
items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=clean, answer_index=ai))
|
| 659 |
+
if tried >= n * 12:
|
| 660 |
break
|
| 661 |
|
| 662 |
if not items:
|
| 663 |
return make_mcqs(text, n, difficulty=difficulty)
|
| 664 |
return items[:n]
|
| 665 |
|
| 666 |
+
|
| 667 |
# ------------------ تحويل إلى سجلات العرض ------------------
|
| 668 |
def clean_option_text(t: str) -> str:
|
| 669 |
t = (t or "").strip()
|
|
|
|
| 728 |
raw = text_area if text_area else file_to_text(file_path, model_id=model_id, zoom=float(zoom))[0]
|
| 729 |
cleaned = postprocess(raw)
|
| 730 |
|
| 731 |
+
used_mode = mode
|
| 732 |
try:
|
| 733 |
if mode == "فهم مباشر":
|
| 734 |
+
tok, model, ok = get_mt5()
|
| 735 |
+
if ok:
|
| 736 |
+
items = make_comp_mcqs(cleaned, n=int(n), difficulty=difficulty)
|
| 737 |
+
else:
|
| 738 |
+
items = make_mcqs(cleaned, n=int(n), difficulty=difficulty)
|
| 739 |
+
used_mode = "فراغ (fallback)"
|
| 740 |
else:
|
| 741 |
items = make_mcqs(cleaned, n=int(n), difficulty=difficulty)
|
| 742 |
except Exception:
|
| 743 |
items = make_mcqs(cleaned, n=int(n), difficulty=difficulty)
|
| 744 |
+
used_mode = "فراغ (fallback)"
|
| 745 |
|
| 746 |
recs = to_records(items)
|
| 747 |
+
warn = f"نمط مُستخدَم: **{used_mode}** — عدد الأسئلة: {len(items)}"
|
| 748 |
+
return render_quiz_html(recs), gr.update(visible=False), gr.update(visible=True), warn
|
| 749 |
+
|
| 750 |
|
| 751 |
# ------------------ CSS ------------------
|
| 752 |
CSS = """
|