Leen172 commited on
Commit
3ca8fa1
·
verified ·
1 Parent(s): 4988947

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -47
app.py CHANGED
@@ -420,71 +420,125 @@ def smart_distractors(correct: str, phrase_pool: List[str], sentence: str, k: in
420
  if len(out) < k:
421
  out.extend(legacy_distractors(correct, phrase_pool, k=k-len(out)))
422
  return out[:k]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
 
424
  # ====== (4-أ) مُولِّد أسئلة "فراغ" ======
425
  def make_mcqs(text:str, n:int=6, difficulty: str = "متوسط")->List[MCQ]:
426
  all_sents = split_sents(text)
427
  sents = pick_clean_sentences(all_sents, difficulty)
428
  if not sents:
429
- raise ValueError("النص قصير أو غير صالح.")
 
430
 
431
- keyphrases = yake_keywords(text, k=200)
432
  keyphrases = [kp for kp in keyphrases if safe_keyword(kp) and 2 <= len(kp) <= 40]
433
 
434
- # ربط العبارة بجملة مناسبة (ظهور وحيد)
435
- sent_for={}
436
  for s in sents:
437
  for kp in keyphrases:
438
- if kp in sent_for: continue
439
- hits = re2.findall(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", s)
440
- if len(hits) == 1:
441
- sent_for[kp]=s
442
- if len(sent_for)>=n*3:
443
  break
444
 
445
- if not sent_for:
446
- tokens = [t for t in re2.findall(r"[\p{L}\p{N}_]+", text) if good_kw(t)]
447
- freq = [w for w,_ in sorted(((t, text.count(t)) for t in tokens), key=lambda x:-x[1])]
448
- keyphrases = [w for w in freq if safe_keyword(w)][:150]
449
- for s in sents:
450
- for kp in keyphrases:
451
- if kp in sent_for: continue
452
- hits = re2.findall(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", s)
453
- if len(hits) == 1: sent_for[kp]=s
454
- if len(sent_for)>=n*2:
455
- break
456
-
457
- if not sent_for:
458
- raise RuntimeError("تعذّر توليد أسئلة من هذا النص.")
459
-
460
- # أولوية للعبارات الأطول (أعلميّة أعلى)
461
- items=[]; used_sents=set(); used_keys=set()
462
- for kp in sorted(sent_for.keys(), key=lambda x: (-len(x), x)):
463
- if len(items)>=n: break
464
- s=sent_for[kp]
465
- if s in used_sents or kp in used_keys: continue
466
 
467
- q=re2.sub(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", "_____", s, count=1)
 
 
 
 
 
 
468
 
 
469
  pool = [x for x in keyphrases if x != kp]
470
  ch = smart_distractors(kp, pool, s, k=3, all_sentences=all_sents, difficulty=difficulty) + [kp]
471
 
472
- # تنظيف ومنع تكرار وضمان أربع خيارات
473
- clean_choices=[]; seen=set()
474
  for c in ch:
475
  c = c.strip()
476
- if not c or c in seen: continue
477
- seen.add(c); clean_choices.append(c)
478
- ch = clean_choices[:4]
479
- while len(ch)<4: ch.append("…")
480
- random.shuffle(ch); ans=ch.index(kp) if kp in ch else 3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
 
482
- items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=ch, answer_index=ans))
483
- used_sents.add(s); used_keys.add(kp)
484
 
485
  if not items:
486
  raise RuntimeError("تعذّر توليد أسئلة.")
487
- return items
488
 
489
  # ====== (4-ب) مُولِّد أسئلة "فهم مباشر" (mT5) ======
490
  _MT5 = {"tok": None, "model": None, "ok": False}
@@ -557,18 +611,36 @@ def gen_one_comp_q(sentence: str, tok, model, max_new_tokens=128) -> Optional[MC
557
  def make_comp_mcqs(text: str, n: int = 6, difficulty: str = "متوسط") -> List[MCQ]:
558
  tok, model, ok = get_mt5()
559
  if not ok:
 
560
  return make_mcqs(text, n, difficulty=difficulty)
561
 
562
  sents_all = split_sents(text)
563
  sents = pick_clean_sentences(sents_all, difficulty)
 
 
564
  if not sents:
565
  return make_mcqs(text, n, difficulty=difficulty)
566
 
567
- random.shuffle(sents)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
568
 
569
  items: List[MCQ] = []
570
  tried = 0
571
- for s in sents:
572
  if len(items) >= n: break
573
  mcq = gen_one_comp_q(s, tok, model)
574
  tried += 1
@@ -583,15 +655,15 @@ def make_comp_mcqs(text: str, n: int = 6, difficulty: str = "متوسط") -> Lis
583
  seen.add(c); clean.append(c)
584
  clean = (clean + ["…","…","…","…"])[:4]
585
  ai = mcq.answer_index if isinstance(mcq.answer_index,int) and 0<=mcq.answer_index<4 else 0
586
-
587
  items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=clean, answer_index=ai))
588
- if tried >= n * 7:
589
  break
590
 
591
  if not items:
592
  return make_mcqs(text, n, difficulty=difficulty)
593
  return items[:n]
594
 
 
595
  # ------------------ تحويل إلى سجلات العرض ------------------
596
  def clean_option_text(t: str) -> str:
597
  t = (t or "").strip()
@@ -656,16 +728,25 @@ def build_quiz(text_area, file_path, n, model_id, zoom, mode, difficulty):
656
  raw = text_area if text_area else file_to_text(file_path, model_id=model_id, zoom=float(zoom))[0]
657
  cleaned = postprocess(raw)
658
 
 
659
  try:
660
  if mode == "فهم مباشر":
661
- items = make_comp_mcqs(cleaned, n=int(n), difficulty=difficulty)
 
 
 
 
 
662
  else:
663
  items = make_mcqs(cleaned, n=int(n), difficulty=difficulty)
664
  except Exception:
665
  items = make_mcqs(cleaned, n=int(n), difficulty=difficulty)
 
666
 
667
  recs = to_records(items)
668
- return render_quiz_html(recs), gr.update(visible=False), gr.update(visible=True), ""
 
 
669
 
670
  # ------------------ CSS ------------------
671
  CSS = """
 
420
  if len(out) < k:
421
  out.extend(legacy_distractors(correct, phrase_pool, k=k-len(out)))
422
  return out[:k]
423
+ def best_keyword_in_sentence(sentence: str, global_text: str) -> Optional[str]:
424
+ """اختيار هدف مناسب للفراغ من نفس الجملة: YAKE على الجملة نفسها مع فلترة."""
425
+ try:
426
+ ex = yake.KeywordExtractor(lan='ar', n=3, top=20)
427
+ pairs = ex.extract_keywords(sentence)
428
+ except Exception:
429
+ pairs = []
430
+ # رتب حسب طول العبارة (أطول غالبًا أوضح) مع وزن خفيف لتكرارها بالنص
431
+ cands = []
432
+ for w, _ in pairs:
433
+ w = re2.sub(r"\s+", " ", w.strip())
434
+ if not w:
435
+ continue
436
+ if not good_kw(w) or not safe_keyword(w):
437
+ continue
438
+ if len(w) < 2 or len(w) > 40:
439
+ continue
440
+ # لازم تظهر فعليًا ضمن الجملة نصيًا
441
+ if not re2.search(rf"(?<!\p{{L}}){re2.escape(w)}(?!\p{{L}})", sentence):
442
+ continue
443
+ # وزن بالتكرار العام لتمييز الأهم
444
+ freq_weight = global_text.count(w)
445
+ cands.append((w, len(w) + 0.5*freq_weight))
446
+ if not cands:
447
+ # fallback أبسط: التقط أطول “كلمة” معقولة
448
+ toks = [t for t in re2.findall(r"\p{L}+", sentence) if good_kw(t) and safe_keyword(t)]
449
+ toks.sort(key=len, reverse=True)
450
+ return toks[0] if toks else None
451
+ cands.sort(key=lambda x: -x[1])
452
+ return cands[0][0] if cands else None
453
 
454
  # ====== (4-أ) مُولِّد أسئلة "فراغ" ======
455
  def make_mcqs(text:str, n:int=6, difficulty: str = "متوسط")->List[MCQ]:
456
  all_sents = split_sents(text)
457
  sents = pick_clean_sentences(all_sents, difficulty)
458
  if not sents:
459
+ # لو ما في جمل “نظيفة” كفاية، استعمل كل الجمل المتاحة
460
+ sents = all_sents[:]
461
 
462
+ keyphrases = yake_keywords(text, k=240)
463
  keyphrases = [kp for kp in keyphrases if safe_keyword(kp) and 2 <= len(kp) <= 40]
464
 
465
+ # ربط العبارة بجملة مناسبة (بس ما نقيّد بظهور وحيد فقط)
466
+ sent_for = {}
467
  for s in sents:
468
  for kp in keyphrases:
469
+ if kp in sent_for:
470
+ continue
471
+ if re2.search(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", s):
472
+ sent_for[kp] = s
473
+ if len(sent_for) >= n * 4: # خزّن أكتر من الحاجة
474
  break
475
 
476
+ items: List[MCQ] = []
477
+ used_pairs = set() # (sentence, keyword)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
 
479
+ # (أ) استهلك المطابقات المتاحة أولاً
480
+ for kp in sorted(sent_for.keys(), key=lambda x: (-len(x), x)):
481
+ if len(items) >= n: break
482
+ s = sent_for[kp]
483
+ pair = (s, kp)
484
+ if pair in used_pairs:
485
+ continue
486
 
487
+ q = re2.sub(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", "_____", s, count=1)
488
  pool = [x for x in keyphrases if x != kp]
489
  ch = smart_distractors(kp, pool, s, k=3, all_sentences=all_sents, difficulty=difficulty) + [kp]
490
 
491
+ # نظّف وثبّت 4 خيارات
492
+ uniq, seen = [], set()
493
  for c in ch:
494
  c = c.strip()
495
+ if not c or c in seen:
496
+ continue
497
+ seen.add(c); uniq.append(c)
498
+ while len(uniq) < 4: uniq.append("…")
499
+ uniq = uniq[:4]
500
+ random.shuffle(uniq)
501
+ ans = uniq.index(kp) if kp in uniq else 3
502
+
503
+ items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=uniq, answer_index=ans))
504
+ used_pairs.add(pair)
505
+
506
+ # (ب) إن ما كفّى العدد، فعّل fallback “من نفس الجملة”
507
+ si = 0
508
+ while len(items) < n and si < len(sents):
509
+ s = sents[si]; si += 1
510
+ # اختَر هدفًا مناسبًا من الجملة نفسها
511
+ kp = best_keyword_in_sentence(s, text)
512
+ if not kp:
513
+ continue
514
+ pair = (s, kp)
515
+ if pair in used_pairs:
516
+ continue
517
+ # ابنِ سؤال الفراغ
518
+ if not re2.search(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", s):
519
+ # لو الهدف ما انوجد كما هو، جرّب أقرب صيغة مبسّطة ضمن s
520
+ continue
521
+ q = re2.sub(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", "_____", s, count=1)
522
+ pool = [x for x in keyphrases if x != kp] or keyphrases[:]
523
+ ch = smart_distractors(kp, pool, s, k=3, all_sentences=all_sents, difficulty=difficulty) + [kp]
524
+
525
+ uniq, seen = [], set()
526
+ for c in ch:
527
+ c = c.strip()
528
+ if not c or c in seen:
529
+ continue
530
+ seen.add(c); uniq.append(c)
531
+ while len(uniq) < 4: uniq.append("…")
532
+ uniq = uniq[:4]
533
+ random.shuffle(uniq)
534
+ ans = uniq.index(kp) if kp in uniq else 3
535
 
536
+ items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=uniq, answer_index=ans))
537
+ used_pairs.add(pair)
538
 
539
  if not items:
540
  raise RuntimeError("تعذّر توليد أسئلة.")
541
+ return items[:n]
542
 
543
  # ====== (4-ب) مُولِّد أسئلة "فهم مباشر" (mT5) ======
544
  _MT5 = {"tok": None, "model": None, "ok": False}
 
611
  def make_comp_mcqs(text: str, n: int = 6, difficulty: str = "متوسط") -> List[MCQ]:
612
  tok, model, ok = get_mt5()
613
  if not ok:
614
+ # عدم توفر mT5 → ارجعي لأسئلة الفراغ
615
  return make_mcqs(text, n, difficulty=difficulty)
616
 
617
  sents_all = split_sents(text)
618
  sents = pick_clean_sentences(sents_all, difficulty)
619
+ if not sents:
620
+ sents = sents_all[:]
621
  if not sents:
622
  return make_mcqs(text, n, difficulty=difficulty)
623
 
624
+ # جرّبي أولًا على جمل مفردة، ثم على “مقاطع” (دمج 2–3 جمل) إذا لزم
625
+ def make_chunks(sents, max_len=260):
626
+ chunks = []
627
+ i = 0
628
+ while i < len(sents):
629
+ cur = sents[i]
630
+ j = i + 1
631
+ while j < len(sents) and len(cur) + 1 + len(sents[j]) <= max_len:
632
+ cur = cur + " " + sents[j]
633
+ j += 1
634
+ chunks.append(cur)
635
+ i = j
636
+ return chunks
637
+
638
+ candidates = sents[:] + make_chunks(sents, max_len=220)
639
+ random.shuffle(candidates)
640
 
641
  items: List[MCQ] = []
642
  tried = 0
643
+ for s in candidates:
644
  if len(items) >= n: break
645
  mcq = gen_one_comp_q(s, tok, model)
646
  tried += 1
 
655
  seen.add(c); clean.append(c)
656
  clean = (clean + ["…","…","…","…"])[:4]
657
  ai = mcq.answer_index if isinstance(mcq.answer_index,int) and 0<=mcq.answer_index<4 else 0
 
658
  items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=clean, answer_index=ai))
659
+ if tried >= n * 12:
660
  break
661
 
662
  if not items:
663
  return make_mcqs(text, n, difficulty=difficulty)
664
  return items[:n]
665
 
666
+
667
  # ------------------ تحويل إلى سجلات العرض ------------------
668
  def clean_option_text(t: str) -> str:
669
  t = (t or "").strip()
 
728
  raw = text_area if text_area else file_to_text(file_path, model_id=model_id, zoom=float(zoom))[0]
729
  cleaned = postprocess(raw)
730
 
731
+ used_mode = mode
732
  try:
733
  if mode == "فهم مباشر":
734
+ tok, model, ok = get_mt5()
735
+ if ok:
736
+ items = make_comp_mcqs(cleaned, n=int(n), difficulty=difficulty)
737
+ else:
738
+ items = make_mcqs(cleaned, n=int(n), difficulty=difficulty)
739
+ used_mode = "فراغ (fallback)"
740
  else:
741
  items = make_mcqs(cleaned, n=int(n), difficulty=difficulty)
742
  except Exception:
743
  items = make_mcqs(cleaned, n=int(n), difficulty=difficulty)
744
+ used_mode = "فراغ (fallback)"
745
 
746
  recs = to_records(items)
747
+ warn = f"نمط مُستخدَم: **{used_mode}** — عدد الأسئلة: {len(items)}"
748
+ return render_quiz_html(recs), gr.update(visible=False), gr.update(visible=True), warn
749
+
750
 
751
  # ------------------ CSS ------------------
752
  CSS = """