Spaces:

Leen172
/

Question_generator

Sleeping

App Files Files Community

Leen172 commited on Nov 2

Commit

d3a2eea

verified ·

1 Parent(s): 17a1e17

Update app.py

Browse files

Files changed (1) hide show

app.py +169 -100

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
 # -*- coding: utf-8 -*-
 # صفحتان ثابتتان + Submit لكل سؤال يعمل فعليًا + منع تغيّر أبعاد صفحة الإدخال
 import os, json, uuid, random, unicodedata
 from dataclasses import dataclass
 from pathlib import Path
-from typing import List, Tuple
 from PIL import Image
 from pypdf import PdfReader
@@ -18,6 +19,7 @@ random.seed(42)
 DEFAULT_NUM_QUESTIONS = 6
 DEFAULT_TROCR_MODEL = "microsoft/trocr-base-printed"
 DEFAULT_TROCR_ZOOM   = 2.6
 # ------------------ OCR (تحميل كسول) ------------------
 _OCR = {}
@@ -105,7 +107,7 @@ def postprocess(raw:str)->str:
     t = re2.sub(r"\[\d+\]", " ", t)
     return norm_ar(t)
-# ------------------ توليد أسئلة (تحسينات كبيرة داخليًا فقط) ------------------
 SENT_SPLIT = re2.compile(r"(?<=[\.!؟\?])\s+")
 AR_STOP = set("""في على من إلى عن مع لدى ذلك هذه هذا الذين التي الذي أو أم إن أن كان تكون كانوا كانت كنت ثم قد لقد ربما بل لكن إلا سوى حتى حيث كما لما ما لماذا متى أين كيف أي هناك هنا هؤلاء أولئك نحن هو هي هم هن أنت أنتم أنتن""".split())
@@ -132,9 +134,9 @@ def yake_keywords(t: str, k: int = 160) -> List[str]:
             pairs = []
         for w, _ in pairs:
             w = re2.sub(r"\s+", " ", w.strip())
-            if not w or w in seen:
                 continue
-            if re2.match(r"^[\p{P}\p{S}\d_]+$", w):
                 continue
             if 2 <= len(w) <= 40:
                 phrases.append(w)
@@ -144,10 +146,9 @@ def yake_keywords(t: str, k: int = 160) -> List[str]:
 def good_kw(kw:str)->bool:
     return kw and len(kw)>=2 and kw not in AR_STOP and not re2.match(r"^[\p{P}\p{S}\d_]+$", kw)
-# ====== تحسينات "الذكاء": POS/NER اختياري مع fallback ======
 _HAS_CAMEL = False
 try:
-    from camel_tools.tokenizers.word import simple_word_tokenize
     from camel_tools.morphology.analyzer import Analyzer
     from camel_tools.ner import NERecognizer
     _HAS_CAMEL = True
@@ -168,9 +169,8 @@ def ar_pos(word: str) -> str:
     try:
         ana = _AN.analyze(word)
         if not ana: return "X"
-        pos_candidates = [a.get('pos','X') for a in ana]
-        # خذ الأكثر تكرارًا
         from collections import Counter
         return Counter(pos_candidates).most_common(1)[0][0] if pos_candidates else "X"
     except Exception:
         return "X"
@@ -205,7 +205,7 @@ def get_embedder():
             from sentence_transformers import SentenceTransformer
             _EMB = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
         except Exception:
-            _EMB = False  # تشير لتعطّل التحميل
     return _EMB
 def nearest_terms(target: str, pool: List[str], k: int = 12) -> List[Tuple[str, float]]:
@@ -218,7 +218,7 @@ def nearest_terms(target: str, pool: List[str], k: int = 12) -> List[Tuple[str,
     vecs = emb.encode([target] + cand, normalize_embeddings=True)
     t, C = vecs[0], vecs[1:]
     import numpy as np
-    sims = (C @ t)  # cosine لأن المتجهات مُطبّعة
     idx = np.argsort(-sims)[:k]
     return [(cand[i], float(sims[i])) for i in idx]
@@ -254,7 +254,6 @@ def mlm_distractors(sentence_with_blank: str, correct: str, k: int = 8) -> List[
         return []
 def legacy_distractors(correct:str, pool:List[str], k:int=3)->List[str]:
-    # النسخة القديمة كنسخة احتياط
     L=len(correct.strip()); cand=[]
     for w in pool:
         w=w.strip()
@@ -280,7 +279,7 @@ def get_cross_encoder():
 def pos_compatible(a: str, b: str) -> bool:
     pa, pb = ar_pos(a), ar_pos(b)
-    if "X" in (pa, pb):
         return True
     return pa == pb
@@ -301,32 +300,19 @@ def rank_by_ce(sentence_with_blank: str, candidates: List[str]) -> List[str]:
 def smart_distractors(correct: str, phrase_pool: List[str], sentence: str, k: int = 3) -> List[str]:
     base = []
-    # 1) جيران دلاليين
-    neigh = nearest_terms(correct, phrase_pool, k=20)
-    base.extend([w for w,_ in neigh])
-    # 2) FILL-MASK بديل
-    mlm = mlm_distractors(sentence.replace(correct, "_____"), correct, k=15)
-    for w in mlm:
-        if w not in base:
-            base.append(w)
-    # 3) فلترة POS/NER وطول وتشابه/تطبيع
     clean = []
     for w in base:
         w = w.strip()
-        if not w or w == correct:
-            continue
-        if is_named_entity(w):
-            continue
-        if not pos_compatible(w, correct):
-            continue
-        if not length_close(w, correct):
-            continue
-        if norm_ar(w) == norm_ar(correct):
-            continue
         clean.append(w)
-    # 4) ترتيب Cross-Encoder اختياري
     clean = rank_by_ce(sentence.replace(correct, "_____"), clean)[:max(k*2, k)]
-    # 5) إزالة المتشابه جداً مع الجواب
     try:
         emb = get_embedder()
         if emb and clean:
@@ -342,94 +328,170 @@ def smart_distractors(correct: str, phrase_pool: List[str], sentence: str, k: in
     out = clean[:k]
     while len(out) < k:
         extra = [w for w in phrase_pool if w not in out and w != correct and length_close(w, correct)]
-        if not extra:
-            break
-        out.extend(extra[:(k-len(out))])
-        break
     if len(out) < k:
         out.extend(legacy_distractors(correct, phrase_pool, k=k-len(out)))
     return out[:k]
-# ====== (4) مُولِّد أسئلة جديد بمحافظته على نفس الواجهة تمامًا ======
 def make_mcqs(text:str, n:int=6)->List[MCQ]:
     sents=split_sents(text)
-    if not sents:
         raise ValueError("النص قصير أو غير صالح.")
-    # عبارات مفتاحية 1–3 كلمات + فلترة أذكى
     keyphrases = yake_keywords(text, k=160)
     keyphrases = [kp for kp in keyphrases if safe_keyword(kp) and 2 <= len(kp) <= 40]
-    # ربط العبارة بجملة مناسبة (نظيفة، ظهور وحيد للعبارة)
     sent_for={}
     for s in sents:
-        if not is_clean_sentence(s):
-            continue
         for kp in keyphrases:
-            if kp in sent_for:
-                continue
             hits = re2.findall(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", s)
             if len(hits) == 1:
                 sent_for[kp]=s
-        if len(sent_for)>=n*3:
-            break
     if not sent_for:
-        # fallback: لو ما لقينا مطابقات جيدة، نرجع للمفردات من النص
         tokens = [t for t in re2.findall(r"[\p{L}\p{N}_]+", text) if good_kw(t)]
         freq = [w for w,_ in sorted(((t, text.count(t)) for t in tokens), key=lambda x:-x[1])]
         keyphrases = [w for w in freq if safe_keyword(w)][:120]
         for s in sents:
-            if not is_clean_sentence(s):
-                continue
             for kp in keyphrases:
-                if kp in sent_for:
-                    continue
                 hits = re2.findall(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", s)
-                if len(hits) == 1:
-                    sent_for[kp]=s
-            if len(sent_for)>=n*2:
-                break
     if not sent_for:
         raise RuntimeError("تعذّر توليد أسئلة من هذا النص.")
-    # نعطي أولوية للعبارات الأطول (أكثر إعلامية)
     items=[]; used_sents=set(); used_keys=set()
     for kp in sorted(sent_for.keys(), key=lambda x: (-len(x), x)):
         if len(items)>=n: break
         s=sent_for[kp]
-        if s in used_sents or kp in used_keys:
-            continue
-        # ابنِ سؤال الفراغ
         q=re2.sub(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", "_____", s, count=1)
-        # مشتتات أذكى (مع رجوع تلقائي لو النماذج مش متاحة)
         pool = [x for x in keyphrases if x != kp]
         ch = smart_distractors(kp, pool, s, k=3) + [kp]
-        # تنظيف سريع وخلوّ من التكرار
-        clean_choices=[]
-        seen=set()
         for c in ch:
             c = c.strip()
-            if not c: continue
-            if c in seen: continue
-            seen.add(c)
-            clean_choices.append(c)
         ch = clean_choices[:4]
-        # تأكيد وجود 4 خيارات
-        while len(ch)<4:
-            ch.append("…")
         random.shuffle(ch); ans=ch.index(kp) if kp in ch else 3
         items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=ch, answer_index=ans))
         used_sents.add(s); used_keys.add(kp)
-    if not items:
         raise RuntimeError("تعذّر توليد أسئلة.")
     return items
 def clean_option_text(t: str) -> str:
     t = (t or "").strip()
     t = re2.sub(AR_DIAC, "", t)
@@ -454,12 +516,12 @@ def to_records(items:List[MCQ])->List[dict]:
 # ------------------ صفحة الأسئلة (HTML فقط) ------------------
 def render_quiz_html(records: List[dict]) -> str:
-    parts = []
     for i, rec in enumerate(records, start=1):
-        qid = rec["id"]
         qtxt = rec["question"]
-        cor = next((o["id"] for o in rec["options"] if o["is_correct"]), "")
-        opts_html = []
         for o in rec["options"]:
             lid, txt = o["id"], o["text"]
             opts_html.append(f"""
@@ -485,9 +547,8 @@ def render_quiz_html(records: List[dict]) -> str:
         """)
     return f"""<div id="quiz" class="quiz-wrap">{''.join(parts)}</div>"""
 # ------------------ توليد الامتحان وتبديل الصفحات ------------------
-def build_quiz(text_area, file_path, n, model_id, zoom):
     text_area = (text_area or "").strip()
     if not text_area and not file_path:
         return "", gr.update(visible=True), gr.update(visible=False), "🛈 الصق نصًا أو ارفع ملفًا أولًا."
@@ -496,7 +557,17 @@ def build_quiz(text_area, file_path, n, model_id, zoom):
     else:
         raw, _ = file_to_text(file_path, model_id=model_id, zoom=float(zoom))
     cleaned = postprocess(raw)
-    items   = make_mcqs(cleaned, n=int(n))
     recs    = to_records(items)
     return render_quiz_html(recs), gr.update(visible=False), gr.update(visible=True), ""
@@ -510,7 +581,7 @@ body{direction:rtl; font-family:system-ui,'Cairo','IBM Plex Arabic',sans-serif;
 .gradio-container{max-width:980px;margin:0 auto;padding:12px 12px 40px;}
 h2.top{color:#eaeaf2;margin:6px 0 16px}
-/* صفحة الإدخال ثابتة الارتفاع ولا تتغير بعد الرفع */
 .input-panel{background:var(--panel);border:1px solid var(--border);border-radius:14px;padding:16px;
   box-shadow:0 16px 38px rgba(0,0,0,.35); min-height:360px; display:flex; flex-direction:column; gap:12px;}
 .small{opacity:.9;color:#d9dee8}
@@ -547,13 +618,12 @@ textarea{min-height:120px}
 }
 .q-actions .q-submit:disabled{opacity:.5;cursor:not-allowed}
 .q-note{color:#ffd1d6}
-.q-note.warn{color:#ffd1d6}
 """
-# ------------------ JS: ربط Submit بعد الرندر (مع تحسين إبراز الصحيحة) ------------------
 ATTACH_LISTENERS_JS = """
 () => {
-  // اربط مرة واحدة فقط
   if (window.__q_submit_bound_multi2) { return 'already'; }
   window.__q_submit_bound_multi2 = true;
@@ -576,23 +646,20 @@ ATTACH_LISTENERS_JS = """
     const chosenLabel = chosen.closest('.opt');
-    // حالة صحيحة: لوّن أخضر وأقفل السؤال كاملاً + إبراز الكلمة الصحيحة داخل الجملة
     if (chosen.value === correct) {
       chosenLabel.classList.add('ok');
       if (badge){ badge.hidden=false; badge.className='q-badge ok'; badge.textContent='Correct!'; }
-      // أقفل هذا السؤال فقط بعد الصح
       card.querySelectorAll('input[type="radio"]').forEach(i => i.disabled = true);
       e.target.disabled = true;
       if (note) note.textContent = '';
-      // إبراز الجواب الصحيح ضمن الجملة الحالية دون تغيير البنية
       const qNode = card.querySelector('.q-text');
       if (qNode){
-        const full = qNode.textContent || '';
         const correctText = [...card.querySelectorAll('.opt')].find(o =>
           o.querySelector('input').value === correct
         )?.querySelector('.opt-text')?.textContent || '';
-        if (full && correctText){
           const highlighted = full.replace('_____', `<mark style="background:#2dd4bf22;border:1px solid #2dd4bf55;border-radius:6px;padding:0 4px">${correctText}</mark>`);
           qNode.innerHTML = highlighted;
         }
@@ -600,7 +667,6 @@ ATTACH_LISTENERS_JS = """
       return;
     }
-    // حالة خاطئة: لوّن أحمر فقط، ولا تعطل أي شيء — ليقدر يجرّب خيار آخر
     chosenLabel.classList.add('err');
     if (badge){ badge.hidden=false; badge.className='q-badge err'; badge.textContent='Incorrect.'; }
     if (note) note.textContent = '';
@@ -610,7 +676,7 @@ ATTACH_LISTENERS_JS = """
 }
 """
-# ------------------ واجهة Gradio (بدون تغيير بنية الواجهات) ------------------
 with gr.Blocks(title="Question Generator", css=CSS) as demo:
     gr.Markdown("<h2 class='top'>Question Generator</h2>")
@@ -622,6 +688,9 @@ with gr.Blocks(title="Question Generator", css=CSS) as demo:
         file_comp = gr.File(label="أو ارفع ملف (PDF / TXT)", file_count="single",
                             file_types=[".pdf",".txt"], type="filepath", elem_classes=["upload-like"])
         num_q = gr.Slider(4, 20, value=DEFAULT_NUM_QUESTIONS, step=1, label="عدد الأسئلة")
         with gr.Accordion("خيارات PDF المصوّر (اختياري)", open=False):
             trocr_model = gr.Dropdown(
                 choices=[
@@ -645,7 +714,7 @@ with gr.Blocks(title="Question Generator", css=CSS) as demo:
     # بناء الامتحان + تبديل الصفحات + ربط الـJS
     btn_build.click(
         build_quiz,
-        inputs=[text_area, file_comp, num_q, trocr_model, trocr_zoom],
         outputs=[quiz_html, page1, page2, warn]
     ).then(
         None, inputs=None, outputs=[js_wired], js=ATTACH_LISTENERS_JS

 # -*- coding: utf-8 -*-
 # صفحتان ثابتتان + Submit لكل سؤال يعمل فعليًا + منع تغيّر أبعاد صفحة الإدخال
+# + طور اختياري لأسئلة فهم مباشر باستخدام mT5 (تحميل كسول + fallback)
 import os, json, uuid, random, unicodedata
 from dataclasses import dataclass
 from pathlib import Path
+from typing import List, Tuple, Optional
 from PIL import Image
 from pypdf import PdfReader
 DEFAULT_NUM_QUESTIONS = 6
 DEFAULT_TROCR_MODEL = "microsoft/trocr-base-printed"
 DEFAULT_TROCR_ZOOM   = 2.6
+QUESTION_MODES = ["فراغ", "فهم مباشر"]  # جديد
 # ------------------ OCR (تحميل كسول) ------------------
 _OCR = {}
     t = re2.sub(r"\[\d+\]", " ", t)
     return norm_ar(t)
+# ------------------ بنية السؤال ------------------
 SENT_SPLIT = re2.compile(r"(?<=[\.!؟\?])\s+")
 AR_STOP = set("""في على من إلى عن مع لدى ذلك هذه هذا الذين التي الذي أو أم إن أن كان تكون كانوا كانت كنت ثم قد لقد ربما بل لكن إلا سوى حتى حيث كما لما ما لماذا متى أين كيف أي هناك هنا هؤلاء أولئك نحن هو هي هم هن أنت أنتم أنتن""".split())
             pairs = []
         for w, _ in pairs:
             w = re2.sub(r"\s+", " ", w.strip())
+            if not w or w in seen:
                 continue
+            if re2.match(r"^[\p{P}\p{S}\d_]+$", w):
                 continue
             if 2 <= len(w) <= 40:
                 phrases.append(w)
 def good_kw(kw:str)->bool:
     return kw and len(kw)>=2 and kw not in AR_STOP and not re2.match(r"^[\p{P}\p{S}\d_]+$", kw)
+# ====== تحسينات الذكاء: POS/NER اختياري مع fallback ======
 _HAS_CAMEL = False
 try:
     from camel_tools.morphology.analyzer import Analyzer
     from camel_tools.ner import NERecognizer
     _HAS_CAMEL = True
     try:
         ana = _AN.analyze(word)
         if not ana: return "X"
         from collections import Counter
+        pos_candidates = [a.get('pos','X') for a in ana]
         return Counter(pos_candidates).most_common(1)[0][0] if pos_candidates else "X"
     except Exception:
         return "X"
             from sentence_transformers import SentenceTransformer
             _EMB = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
         except Exception:
+            _EMB = False
     return _EMB
 def nearest_terms(target: str, pool: List[str], k: int = 12) -> List[Tuple[str, float]]:
     vecs = emb.encode([target] + cand, normalize_embeddings=True)
     t, C = vecs[0], vecs[1:]
     import numpy as np
+    sims = (C @ t)
     idx = np.argsort(-sims)[:k]
     return [(cand[i], float(sims[i])) for i in idx]
         return []
 def legacy_distractors(correct:str, pool:List[str], k:int=3)->List[str]:
     L=len(correct.strip()); cand=[]
     for w in pool:
         w=w.strip()
 def pos_compatible(a: str, b: str) -> bool:
     pa, pb = ar_pos(a), ar_pos(b)
+    if "X" in (pa, pb):
         return True
     return pa == pb
 def smart_distractors(correct: str, phrase_pool: List[str], sentence: str, k: int = 3) -> List[str]:
     base = []
+    base.extend([w for w,_ in nearest_terms(correct, phrase_pool, k=20)])
+    for w in mlm_distractors(sentence.replace(correct, "_____"), correct, k=15):
+        if w not in base: base.append(w)
     clean = []
     for w in base:
         w = w.strip()
+        if not w or w == correct: continue
+        if is_named_entity(w): continue
+        if not pos_compatible(w, correct): continue
+        if not length_close(w, correct): continue
+        if norm_ar(w) == norm_ar(correct): continue
         clean.append(w)
     clean = rank_by_ce(sentence.replace(correct, "_____"), clean)[:max(k*2, k)]
     try:
         emb = get_embedder()
         if emb and clean:
     out = clean[:k]
     while len(out) < k:
         extra = [w for w in phrase_pool if w not in out and w != correct and length_close(w, correct)]
+        if not extra: break
+        out.extend(extra[:(k-len(out))]); break
     if len(out) < k:
         out.extend(legacy_distractors(correct, phrase_pool, k=k-len(out)))
     return out[:k]
+# ====== (4-أ) مُولِّد أسئلة "فراغ" (القائم) ======
 def make_mcqs(text:str, n:int=6)->List[MCQ]:
     sents=split_sents(text)
+    if not sents:
         raise ValueError("النص قصير أو غير صالح.")
     keyphrases = yake_keywords(text, k=160)
     keyphrases = [kp for kp in keyphrases if safe_keyword(kp) and 2 <= len(kp) <= 40]
     sent_for={}
     for s in sents:
+        if not is_clean_sentence(s): continue
         for kp in keyphrases:
+            if kp in sent_for: continue
             hits = re2.findall(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", s)
             if len(hits) == 1:
                 sent_for[kp]=s
+        if len(sent_for)>=n*3: break
     if not sent_for:
         tokens = [t for t in re2.findall(r"[\p{L}\p{N}_]+", text) if good_kw(t)]
         freq = [w for w,_ in sorted(((t, text.count(t)) for t in tokens), key=lambda x:-x[1])]
         keyphrases = [w for w in freq if safe_keyword(w)][:120]
         for s in sents:
+            if not is_clean_sentence(s): continue
             for kp in keyphrases:
+                if kp in sent_for: continue
                 hits = re2.findall(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", s)
+                if len(hits) == 1: sent_for[kp]=s
+            if len(sent_for)>=n*2: break
     if not sent_for:
         raise RuntimeError("تعذّر توليد أسئلة من هذا النص.")
     items=[]; used_sents=set(); used_keys=set()
     for kp in sorted(sent_for.keys(), key=lambda x: (-len(x), x)):
         if len(items)>=n: break
         s=sent_for[kp]
+        if s in used_sents or kp in used_keys: continue
         q=re2.sub(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", "_____", s, count=1)
         pool = [x for x in keyphrases if x != kp]
         ch = smart_distractors(kp, pool, s, k=3) + [kp]
+        clean_choices=[]; seen=set()
         for c in ch:
             c = c.strip()
+            if not c or c in seen: continue
+            seen.add(c); clean_choices.append(c)
         ch = clean_choices[:4]
+        while len(ch)<4: ch.append("…")
         random.shuffle(ch); ans=ch.index(kp) if kp in ch else 3
         items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=ch, answer_index=ans))
         used_sents.add(s); used_keys.add(kp)
+    if not items:
         raise RuntimeError("تعذّر توليد أسئلة.")
     return items
+# ====== (4-ب) مُولِّد أسئلة "فهم مباشر" (توليدي mT5) ======
+_MT5 = {"tok": None, "model": None, "ok": False}
+def get_mt5():
+    if _MT5["tok"] is not None or _MT5["model"] is not None or _MT5["ok"]:
+        return _MT5["tok"], _MT5["model"], _MT5["ok"]
+    try:
+        from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+        _MT5["tok"] = AutoTokenizer.from_pretrained("google/mt5-small")
+        _MT5["model"] = AutoModelForSeq2SeqLM.from_pretrained("google/mt5-small")
+        _MT5["ok"] = True
+    except Exception:
+        _MT5["tok"] = None; _MT5["model"] = None; _MT5["ok"] = False
+    return _MT5["tok"], _MT5["model"], _MT5["ok"]
+def parse_json_block(s: str) -> Optional[dict]:
+    # حاول التقاط أول كائن JSON صالح
+    try:
+        # إن وُجد JSON مباشر
+        return json.loads(s)
+    except Exception:
+        pass
+    # التقط أقواس {} الأولى والأخيرة
+    m = re2.search(r"\{.*\}", s, flags=re2.DOTALL)
+    if m:
+        try:
+            return json.loads(m.group(0))
+        except Exception:
+            return None
+    return None
+def comp_prompt(sentence: str) -> str:
+    # تعليمات قصيرة ومحددة مع تنسيق JSON
+    return (
+        "أنت منشئ أسئلة متعددة الخيارات باللغة العربية.\n"
+        "من الجملة التالية، أنشئ سؤال فهم مباشر واحدًا مع أربع خيارات وإشارة للجواب الصحيح.\n"
+        "أعد فقط JSON بهذا الشكل:\n"
+        "{"
+        "\"question\": \"...\",\n"
+        "\"choices\": [\"...\",\"...\",\"...\",\"...\"],\n"
+        "\"answer_index\": 0\n"
+        "}\n\n"
+        f"الجملة: {sentence}"
+    )
+def gen_one_comp_q(sentence: str, tok, model, max_new_tokens=128) -> Optional[MCQ]:
+    try:
+        import torch
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        model = model.to(device)
+        inp = tok(comp_prompt(sentence), return_tensors="pt").to(device)
+        out = model.generate(
+            **inp,
+            max_new_tokens=max_new_tokens,
+            do_sample=True,
+            temperature=0.8,
+            top_p=0.9,
+            num_return_sequences=1,
+            eos_token_id=tok.eos_token_id
+        )
+        text = tok.decode(out[0], skip_special_tokens=True)
+        data = parse_json_block(text) or {}
+        q = str(data.get("question","")).strip()
+        choices = data.get("choices", [])
+        ai = data.get("answer_index", 0)
+        if not q or not isinstance(choices, list) or len(choices) < 4:
+            return None
+        choices = [str(c).strip() for c in choices][:4]
+        ai = ai if isinstance(ai, int) and 0 <= ai < 4 else 0
+        return MCQ(id=str(uuid.uuid4())[:8], question=q, choices=choices, answer_index=ai)
+    except Exception:
+        return None
+def make_comp_mcqs(text: str, n: int = 6) -> List[MCQ]:
+    tok, model, ok = get_mt5()
+    if not ok:
+        # لو ما توفر mT5 نرجع للفراغ
+        return make_mcqs(text, n)
+    sents = [s for s in split_sents(text) if is_clean_sentence(s)]
+    if not sents:
+        return make_mcqs(text, n)
+    random.shuffle(sents)
+    items: List[MCQ] = []
+    tried = 0
+    for s in sents:
+        if len(items) >= n: break
+        mcq = gen_one_comp_q(s, tok, model)
+        tried += 1
+        if mcq:
+            # تنظيف بسيط للخيار والنص
+            mcq.question = re2.sub(r"\s+", " ", mcq.question).strip()
+            mcq.choices = [re2.sub(r"\s+", " ", c).strip() or "…" for c in mcq.choices]
+            items.append(mcq)
+        if tried >= n * 6:  # سقف محاولات معقول
+            break
+    if not items:
+        # fallback احتياطي
+        return make_mcqs(text, n)
+    # توحيد البنية (A..D) بنفس الشكل
+    normed=[]
+    for it in items[:n]:
+        # القص إلى 4 خيارات وتأمين الفهارس
+        ch = (it.choices + ["…","…","…","…"])[:4]
+        ai = it.answer_index if 0 <= it.answer_index < 4 else 0
+        normed.append(MCQ(id=it.id, question=it.question, choices=ch, answer_index=ai))
+    return normed
+# ------------------ تحويل إلى سجلات العرض ------------------
 def clean_option_text(t: str) -> str:
     t = (t or "").strip()
     t = re2.sub(AR_DIAC, "", t)
 # ------------------ صفحة الأسئلة (HTML فقط) ------------------
 def render_quiz_html(records: List[dict]) -> str:
+    parts=[]
     for i, rec in enumerate(records, start=1):
+        qid  = rec["id"]
         qtxt = rec["question"]
+        cor  = next((o["id"] for o in rec["options"] if o["is_correct"]), "")
+        opts_html=[]
         for o in rec["options"]:
             lid, txt = o["id"], o["text"]
             opts_html.append(f"""
         """)
     return f"""<div id="quiz" class="quiz-wrap">{''.join(parts)}</div>"""
 # ------------------ توليد الامتحان وتبديل الصفحات ------------------
+def build_quiz(text_area, file_path, n, model_id, zoom, mode):
     text_area = (text_area or "").strip()
     if not text_area and not file_path:
         return "", gr.update(visible=True), gr.update(visible=False), "🛈 الصق نصًا أو ارفع ملفًا أولًا."
     else:
         raw, _ = file_to_text(file_path, model_id=model_id, zoom=float(zoom))
     cleaned = postprocess(raw)
+    # اختيار الطور
+    try:
+        if mode == "فهم مباشر":
+            items = make_comp_mcqs(cleaned, n=int(n))
+        else:
+            items = make_mcqs(cleaned, n=int(n))
+    except Exception as e:
+        # fallback النهائي
+        items = make_mcqs(cleaned, n=int(n))
     recs    = to_records(items)
     return render_quiz_html(recs), gr.update(visible=False), gr.update(visible=True), ""
 .gradio-container{max-width:980px;margin:0 auto;padding:12px 12px 40px;}
 h2.top{color:#eaeaf2;margin:6px 0 16px}
+/* صفحة الإدخال ثابتة الارتفاع ولا تتغير أبعاده */
 .input-panel{background:var(--panel);border:1px solid var(--border);border-radius:14px;padding:16px;
   box-shadow:0 16px 38px rgba(0,0,0,.35); min-height:360px; display:flex; flex-direction:column; gap:12px;}
 .small{opacity:.9;color:#d9dee8}
 }
 .q-actions .q-submit:disabled{opacity:.5;cursor:not-allowed}
 .q-note{color:#ffd1d6}
+.q-note.warn{color:#ffd1د6}
 """
+# ------------------ JS: ربط Submit بعد الرندر (كما هو مع إبراز الصح) ------------------
 ATTACH_LISTENERS_JS = """
 () => {
   if (window.__q_submit_bound_multi2) { return 'already'; }
   window.__q_submit_bound_multi2 = true;
     const chosenLabel = chosen.closest('.opt');
     if (chosen.value === correct) {
       chosenLabel.classList.add('ok');
       if (badge){ badge.hidden=false; badge.className='q-badge ok'; badge.textContent='Correct!'; }
       card.querySelectorAll('input[type="radio"]').forEach(i => i.disabled = true);
       e.target.disabled = true;
       if (note) note.textContent = '';
       const qNode = card.querySelector('.q-text');
       if (qNode){
+        const full = qNode.textContent || qNode.innerText || '';
         const correctText = [...card.querySelectorAll('.opt')].find(o =>
           o.querySelector('input').value === correct
         )?.querySelector('.opt-text')?.textContent || '';
+        if (full && correctText && full.includes('_____')){
           const highlighted = full.replace('_____', `<mark style="background:#2dd4bf22;border:1px solid #2dd4bf55;border-radius:6px;padding:0 4px">${correctText}</mark>`);
           qNode.innerHTML = highlighted;
         }
       return;
     }
     chosenLabel.classList.add('err');
     if (badge){ badge.hidden=false; badge.className='q-badge err'; badge.textContent='Incorrect.'; }
     if (note) note.textContent = '';
 }
 """
+# ------------------ واجهة Gradio (نفس الصفحتين + اختيار نوع السؤال) ------------------
 with gr.Blocks(title="Question Generator", css=CSS) as demo:
     gr.Markdown("<h2 class='top'>Question Generator</h2>")
         file_comp = gr.File(label="أو ارفع ملف (PDF / TXT)", file_count="single",
                             file_types=[".pdf",".txt"], type="filepath", elem_classes=["upload-like"])
         num_q = gr.Slider(4, 20, value=DEFAULT_NUM_QUESTIONS, step=1, label="عدد الأسئلة")
+        # جديد: اختيار نوع السؤال دون تغيير بنية الصفحة
+        mode_radio = gr.Radio(choices=QUESTION_MODES, value="فراغ", label="نوع السؤال")
         with gr.Accordion("خيارات PDF المصوّر (اختياري)", open=False):
             trocr_model = gr.Dropdown(
                 choices=[
     # بناء الامتحان + تبديل الصفحات + ربط الـJS
     btn_build.click(
         build_quiz,
+        inputs=[text_area, file_comp, num_q, trocr_model, trocr_zoom, mode_radio],
         outputs=[quiz_html, page1, page2, warn]
     ).then(
         None, inputs=None, outputs=[js_wired], js=ATTACH_LISTENERS_JS