Spaces:

Leen172
/

Question_generator

Sleeping

App Files Files Community

Leen172 commited on Oct 29

Commit

a5f2a6f

verified ·

1 Parent(s): 254fb45

Update app.py

Browse files

Files changed (1) hide show

app.py +282 -129

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ from tqdm import tqdm
 random.seed(42)
 DEFAULT_LANG = "ar"
 DEFAULT_NUM_QUESTIONS = 8
-DEFAULT_TROCR_MODEL = "microsoft/trocr-base-printed"  # أسرع من large
 DEFAULT_TROCR_ZOOM = 2.8
 # كاش بسيط للـ OCR pipeline (تحميل كسول)
@@ -42,7 +42,7 @@ def _get_ocr_pipeline(model_id: str):
     return _OCR_PIPE[model_id]
 # =========================
-# 2) استخراج النص من PDF
 # =========================
 def extract_text_with_pypdf(pdf_path: str) -> str:
     reader = PdfReader(pdf_path)
@@ -88,7 +88,8 @@ def is_extraction_good(text: str, min_chars: int = 250, min_alpha_ratio: float =
     return ratio >= min_alpha_ratio
 def save_text(text: str, out_path: str) -> None:
-    os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True)
     with open(out_path, "w", encoding="utf-8") as f:
         f.write(text)
@@ -103,7 +104,6 @@ def pdf_to_txt(pdf_path: str, out_txt_path: str = None,
         method = "embedded (pypdf)"
     else:
         if not ocr_model:
-            # وضع تجريبي بلا OCR
             final_text = embedded_text
             method = "embedded (pypdf: weak)"
         else:
@@ -125,12 +125,9 @@ def strip_page_headers(text: str) -> str:
     lines = text.splitlines()
     out = []
     for ln in lines:
-        if re2.match(r"^\s*--- \[Page \d+\] ---\s*$", ln):
-            continue
-        if re2.match(r"^\s*(Page\s*\d+|صفحة\s*\d+)\s*$", ln):
-            continue
-        if re2.match(r"^\s*[-–—_*]{3,}\s*$", ln):
-            continue
         out.append(ln)
     return "\n".join(out)
@@ -143,8 +140,8 @@ def normalize_arabic(text: str) -> str:
     text = re2.sub(r"[يى]", "ي", text)
     text = re2.sub(r"\s+", " ", text)
     # إزالة التكرار الزائد للحروف (مثل جذرياا -> جذريا)
-    text = re2.sub(r'(\p{L})\1{2,}', r'\1', text)  # أكثر من مرتين
-    text = re2.sub(r'(\p{L})\1', r'\1', text)      # التكرار المتبقي
     return text.strip()
 def arabic_ocr_fixes(text: str) -> str:
@@ -193,14 +190,10 @@ def top_keywords_yake(text: str, max_k: int = 120, lan: str = 'ar') -> List[str]
     seen, out = set(), []
     for k in candidates:
         kk = k.strip()
-        if not kk or kk in seen:
-            continue
-        if lan == "ar" and kk in AR_STOP:
-            continue
-        if len(kk) < 3:
-            continue
-        if re2.match(r"^[\p{P}\p{S}]+$", kk):
-            continue
         seen.add(kk)
         out.append(kk)
     return out
@@ -231,26 +224,18 @@ def build_distractors(correct: str, pool: List[str], k: int = 3) -> List[str]:
     target_len = len(correct.strip())
     cand = []
     for w in pool:
-        if not w:
-            continue
         w2 = w.strip()
-        if w2 == correct.strip():
-            continue
-        if len(w2) < 3 or w2 in AR_STOP:
-            continue
-        if re2.match(r"^[\p{P}\p{S}\d_]+$", w2):
-            continue
-        # تقارب طولي
         if abs(len(w2) - target_len) <= 3:
             cand.append(w2)
     random.shuffle(cand)
     out = []
     for w in cand:
         out.append(w)
-        if len(out) == k:
-            break
     fillers = ["—", "— —", "—-"]
     while len(out) < k:
         out.append(random.choice(fillers))
@@ -260,7 +245,6 @@ def make_mcqs_from_text(text: str, n: int = 8, lang: str = 'ar') -> List[MCQ]:
     sentences = split_sentences(text)
     if not sentences:
         raise ValueError("النص قصير جدًا أو غير صالح لتوليد أسئلة.")
     keywords = top_keywords_yake(text, max_k=160, lan=lang)
     if not keywords:
         toks = re2.findall(r"[\p{L}\p{N}_]+", text)
@@ -269,27 +253,20 @@ def make_mcqs_from_text(text: str, n: int = 8, lang: str = 'ar') -> List[MCQ]:
         for t in toks:
             freq[t] = freq.get(t, 0) + 1
         keywords = [w for w, c in sorted(freq.items(), key=lambda x: -x[1])][:80]
     sent_for_kw = {}
     for s in sentences:
         for kw in keywords:
-            if not _is_good_kw(kw):
-                continue
             if re2.search(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", s) and kw not in sent_for_kw:
                 sent_for_kw[kw] = s
     items: List[MCQ] = []
     used_sents = set()
     pool_iter = [kw for kw in keywords if kw in sent_for_kw]
     for kw in pool_iter:
-        if len(items) >= n:
-            break
-        if not _is_good_kw(kw):
-            continue
         s = sent_for_kw[kw]
-        if s in used_sents:
-            continue
         blanked = re2.sub(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", "_____", s, count=1)
         correct = kw
         distractors = build_distractors(correct, [x for x in keywords if x != kw], k=3)
@@ -297,15 +274,8 @@ def make_mcqs_from_text(text: str, n: int = 8, lang: str = 'ar') -> List[MCQ]:
         random.shuffle(choices)
         ans_idx = choices.index(correct)
         exp = f"مقتبس من الجملة: {s[:220]}" + ("..." if len(s) > 220 else "")
-        items.append(MCQ(
-            id=str(uuid.uuid4())[:8],
-            question=blanked,
-            choices=choices,
-            answer_index=ans_idx,
-            explanation=exp
-        ))
         used_sents.add(s)
     if not items:
         raise RuntimeError("تعذر توليد أسئلة من النص. جرّب نصاً أطول أو مختلفاً.")
     return items
@@ -317,67 +287,196 @@ AR_PUNCT = "،؛؟"
 EN_PUNCT = ",;?"
 def normalize_punct(s: str) -> str:
-    if not s:
-        return ""
     s = s.replace(",", "،").replace(";", "؛").replace("?", "؟")
     return s.strip().strip(AR_PUNCT + EN_PUNCT).strip()
 def is_bad_choice(txt: str) -> bool:
-    if not txt:
-        return True
     txt = txt.strip()
     BAD_NOISE = {"وهنا","اليه","الي","ليبق","لان","لانها","لانّه","ذلك","هذا","هذه"}
-    if txt in BAD_NOISE:
-        return True
-    if len(txt) > 18 and " " not in txt:
-        return True
-    if len(txt) < 2:
-        return True
-    if txt in AR_STOP:
-        return True
-    if re2.match(r"^[\p{P}\p{S}]+$", txt):
-        return True
     return False
 def build_json_records(items: List[MCQ], lang: str, source_pdf: str, method: str, num_questions: int):
     json_data = []
     letters = ["A", "B", "C", "D"]
     for it in items:
-        opts = []
-        seen = set()
         for idx, lbl in enumerate(letters):
             raw = it.choices[idx] if idx < len(it.choices) else ""
             txt = normalize_punct(raw)
-            if is_bad_choice(txt):
-                txt = "—"
-            if txt in seen:
-                txt += " "
             seen.add(txt)
-            opts.append({
-                "id": lbl,
-                "text": txt,
-                "is_correct": (it.answer_index == idx)
-            })
         q_clean = normalize_punct(it.question)
         exp_clean = normalize_punct(it.explanation)
         record = {
-            "id": it.id,
-            "question": q_clean,
-            "options": opts,
-            "explanation": exp_clean,
-            "meta": {
-                "lang": lang,
-                "normalized": True,
-                "source_pdf": source_pdf,
-                "extraction_method": method,
-                "num_questions": int(num_questions),
-            }
         }
         json_data.append(record)
     return json_data
 # =========================
-# 7) الدالة الرئيسية (دعم PDF و TXT)
 # =========================
 def process_pdf(pdf_file_path,
                 num_questions=DEFAULT_NUM_QUESTIONS,
@@ -389,16 +488,13 @@ def process_pdf(pdf_file_path,
         if not pdf_file_path:
             return {}, None, "يرجى رفع ملف PDF/TXT أولاً."
-        # pdf_file_path قد يكون str أو NamedString -> خذه كمسار
         src_path = str(pdf_file_path)
         name_guess = getattr(pdf_file_path, "name", "") if hasattr(pdf_file_path, "name") else ""
         filename = Path(name_guess).name or Path(src_path).name or "input"
         workdir = tempfile.mkdtemp(prefix="mcq_")
-        # تأكد من الامتداد
         ext = Path(filename).suffix.lower()
         if ext not in [".pdf", ".txt"]:
-            # حاول تخمين نوعه، افتراض PDF
             ext = ".pdf"
         if not Path(filename).suffix:
             filename += ext
@@ -407,7 +503,7 @@ def process_pdf(pdf_file_path,
         shutil.copy(src_path, local_path)
         logs.append(f"تم نسخ الملف إلى: {local_path}")
-        # 1) استخراج النص بحسب النوع
         if ext == ".txt":
             with open(local_path, "r", encoding="utf-8", errors="ignore") as f:
                 raw_text = f.read()
@@ -448,45 +544,102 @@ def process_pdf(pdf_file_path,
         return {}, None, "\n".join(logs)
 # =========================
-# 8) واجهة Gradio (v5)
 # =========================
 import gradio as gr
-with gr.Blocks(title="PDF/TXT → MCQ JSON (Arabic YAKE / TrOCR)") as demo:
-    gr.Markdown("## تحويل PDF/TXT إلى أسئلة اختيار من متعدد وإرجاع JSON جاهز للواجهة")
-    with gr.Row():
-        inp_pdf = gr.File(
-            label="ارفع PDF أو TXT",
-            file_count="single",
-            file_types=[".pdf", ".txt"],
-            type="filepath",  # يُعيد مسار الملف
-        )
-        with gr.Column():
-            num_q = gr.Slider(4, 20, value=DEFAULT_NUM_QUESTIONS, step=1, label="عدد الأسئلة")
-            trocr_zoom = gr.Slider(2.0, 3.5, value=DEFAULT_TROCR_ZOOM, step=0.1, label="دقة تحويل PDF لصور (Zoom)")
-            trocr_model = gr.Dropdown(
-                choices=[
-                    "microsoft/trocr-base-printed",
-                    "microsoft/trocr-large-printed",
-                    "microsoft/trocr-base-handwritten",
-                    "microsoft/trocr-large-handwritten",
-                ],
-                value=DEFAULT_TROCR_MODEL,
-                label="موديل TrOCR (للـ PDF المصوّر)"
             )
-    btn = gr.Button("تشغيل المعالجة", variant="primary")
-    out_json = gr.JSON(label="النتيجة (JSON)")
-    out_file = gr.File(label="تحميل ملف JSON")
-    out_log = gr.Textbox(label="Logs", lines=10)
-    btn.click(
-        fn=process_pdf,
-        inputs=[inp_pdf, num_q, gr.State(DEFAULT_LANG), trocr_model, trocr_zoom],
-        outputs=[out_json, out_file, out_log]
-    )
-# ملاحظة: Spaces تتعرف تلقائياً على المتغير "demo".
 if __name__ == "__main__":
     demo.queue().launch()

 random.seed(42)
 DEFAULT_LANG = "ar"
 DEFAULT_NUM_QUESTIONS = 8
+DEFAULT_TROCR_MODEL = "microsoft/trocr-base-printed"
 DEFAULT_TROCR_ZOOM = 2.8
 # كاش بسيط للـ OCR pipeline (تحميل كسول)
     return _OCR_PIPE[model_id]
 # =========================
+# 2) استخراج النص من PDF/TXT
 # =========================
 def extract_text_with_pypdf(pdf_path: str) -> str:
     reader = PdfReader(pdf_path)
     return ratio >= min_alpha_ratio
 def save_text(text: str, out_path: str) -> None:
+    os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True
+    )
     with open(out_path, "w", encoding="utf-8") as f:
         f.write(text)
         method = "embedded (pypdf)"
     else:
         if not ocr_model:
             final_text = embedded_text
             method = "embedded (pypdf: weak)"
         else:
     lines = text.splitlines()
     out = []
     for ln in lines:
+        if re2.match(r"^\s*--- \[Page \d+\] ---\s*$", ln): continue
+        if re2.match(r"^\s*(Page\s*\d+|صفحة\s*\d+)\s*$", ln): continue
+        if re2.match(r"^\s*[-–—_*]{3,}\s*$", ln): continue
         out.append(ln)
     return "\n".join(out)
     text = re2.sub(r"[يى]", "ي", text)
     text = re2.sub(r"\s+", " ", text)
     # إزالة التكرار الزائد للحروف (مثل جذرياا -> جذريا)
+    text = re2.sub(r'(\p{L})\1{2,}', r'\1', text)
+    text = re2.sub(r'(\p{L})\1', r'\1', text)
     return text.strip()
 def arabic_ocr_fixes(text: str) -> str:
     seen, out = set(), []
     for k in candidates:
         kk = k.strip()
+        if not kk or kk in seen: continue
+        if lan == "ar" and kk in AR_STOP: continue
+        if len(kk) < 3: continue
+        if re2.match(r"^[\p{P}\p{S}]+$", kk): continue
         seen.add(kk)
         out.append(kk)
     return out
     target_len = len(correct.strip())
     cand = []
     for w in pool:
+        if not w: continue
         w2 = w.strip()
+        if w2 == correct.strip(): continue
+        if len(w2) < 3 or w2 in AR_STOP: continue
+        if re2.match(r"^[\p{P}\p{S}\d_]+$", w2): continue
         if abs(len(w2) - target_len) <= 3:
             cand.append(w2)
     random.shuffle(cand)
     out = []
     for w in cand:
         out.append(w)
+        if len(out) == k: break
     fillers = ["—", "— —", "—-"]
     while len(out) < k:
         out.append(random.choice(fillers))
     sentences = split_sentences(text)
     if not sentences:
         raise ValueError("النص قصير جدًا أو غير صالح لتوليد أسئلة.")
     keywords = top_keywords_yake(text, max_k=160, lan=lang)
     if not keywords:
         toks = re2.findall(r"[\p{L}\p{N}_]+", text)
         for t in toks:
             freq[t] = freq.get(t, 0) + 1
         keywords = [w for w, c in sorted(freq.items(), key=lambda x: -x[1])][:80]
     sent_for_kw = {}
     for s in sentences:
         for kw in keywords:
+            if not _is_good_kw(kw): continue
             if re2.search(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", s) and kw not in sent_for_kw:
                 sent_for_kw[kw] = s
     items: List[MCQ] = []
     used_sents = set()
     pool_iter = [kw for kw in keywords if kw in sent_for_kw]
     for kw in pool_iter:
+        if len(items) >= n: break
+        if not _is_good_kw(kw): continue
         s = sent_for_kw[kw]
+        if s in used_sents: continue
         blanked = re2.sub(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", "_____", s, count=1)
         correct = kw
         distractors = build_distractors(correct, [x for x in keywords if x != kw], k=3)
         random.shuffle(choices)
         ans_idx = choices.index(correct)
         exp = f"مقتبس من الجملة: {s[:220]}" + ("..." if len(s) > 220 else "")
+        items.append(MCQ(id=str(uuid.uuid4())[:8], question=blanked, choices=choices, answer_index=ans_idx, explanation=exp))
         used_sents.add(s)
     if not items:
         raise RuntimeError("تعذر توليد أسئلة من النص. جرّب نصاً أطول أو مختلفاً.")
     return items
 EN_PUNCT = ",;?"
 def normalize_punct(s: str) -> str:
+    if not s: return ""
     s = s.replace(",", "،").replace(";", "؛").replace("?", "؟")
     return s.strip().strip(AR_PUNCT + EN_PUNCT).strip()
 def is_bad_choice(txt: str) -> bool:
+    if not txt: return True
     txt = txt.strip()
     BAD_NOISE = {"وهنا","اليه","الي","ليبق","لان","لانها","لانّه","ذلك","هذا","هذه"}
+    if txt in BAD_NOISE: return True
+    if len(txt) > 18 and " " not in txt: return True
+    if len(txt) < 2: return True
+    if txt in AR_STOP: return True
+    if re2.match(r"^[\p{P}\p{S}]+$", txt): return True
     return False
 def build_json_records(items: List[MCQ], lang: str, source_pdf: str, method: str, num_questions: int):
     json_data = []
     letters = ["A", "B", "C", "D"]
     for it in items:
+        opts, seen = [], set()
         for idx, lbl in enumerate(letters):
             raw = it.choices[idx] if idx < len(it.choices) else ""
             txt = normalize_punct(raw)
+            if is_bad_choice(txt): txt = "—"
+            if txt in seen: txt += " "
             seen.add(txt)
+            opts.append({"id": lbl, "text": txt, "is_correct": (it.answer_index == idx)})
         q_clean = normalize_punct(it.question)
         exp_clean = normalize_punct(it.explanation)
         record = {
+            "id": it.id, "question": q_clean, "options": opts, "explanation": exp_clean,
+            "meta": {"lang": lang, "normalized": True, "source_pdf": source_pdf, "extraction_method": method, "num_questions": int(num_questions)}
         }
         json_data.append(record)
     return json_data
 # =========================
+# 7) دوال تبويب "حلّ الاختبار"
+# =========================
+def _format_question(rec):
+    q = rec.get("question","").strip()
+    return f"### السؤال:\n{q}"
+def _radio_choices(rec):
+    # يعيد قائمة نصوص مثل "A) ...", "B) ..."
+    letters = ["A","B","C","D"]
+    out = []
+    for opt in rec.get("options", []):
+        lid, text = opt.get("id",""), opt.get("text","")
+        out.append(f"{lid}) {text}")
+    # إذا ناقص خيارات، كمّل لمواءمة المكوّن
+    while len(out) < 4:
+        out.append(f"{letters[len(out)]}) —")
+    return out
+def _correct_letter(rec):
+    for opt in rec.get("options", []):
+        if opt.get("is_correct"):
+            return opt.get("id","")
+    return ""
+def _explanation(rec):
+    return rec.get("explanation","")
+def init_quiz_state(records):
+    # ترتيب عشوائي اختياري هنا (يمكن إبقاء كما هو)
+    # random.shuffle(records)
+    return {
+        "records": records,
+        "idx": 0,
+        "answers": {},      # id السؤال -> "A"/"B"/"C"/"D"
+        "revealed": set(),  # ids تم إظهار حلّها
+        "finished": False,
+        "csv_path": None
+    }
+def render_current(rec, user_choice=None, revealed=False):
+    q_md = _format_question(rec)
+    choices = _radio_choices(rec)
+    exp = _explanation(rec) if revealed else ""
+    progress = ""
+    correct = _correct_letter(rec)
+    feedback = ""
+    if user_choice:
+        if revealed:
+            feedback = "✅ إجابة صحيحة" if user_choice == correct else f"❌ إجابة خاطئة — الصحيح: {correct}"
+        else:
+            feedback = f"تم اختيار: {user_choice}"
+    return q_md, choices, exp, feedback
+def on_start_quiz(json_records):
+    if not json_records or not isinstance(json_records, list):
+        return None, "لم يتم العثور على أسئلة صالحة."
+    return init_quiz_state(json_records), "تم بدء الاختبار. بالتوفيق!"
+def on_load_json_file(file_path):
+    if not file_path: return None, "لم يتم اختيار ملف."
+    try:
+        with open(str(file_path), "r", encoding="utf-8") as f:
+            data = json.load(f)
+        if not isinstance(data, list): raise ValueError("صيغة JSON غير صحيحة (يجب أن تكون قائمة).")
+        return init_quiz_state(data), "تم تحميل ملف JSON بنجاح. اضغط بدء الاختبار."
+    except Exception as e:
+        return None, f"خطأ في قراءة JSON: {e}"
+def on_show_question(state):
+    if not state: return "", [], "", "",""
+    recs, idx = state["records"], state["idx"]
+    rec = recs[idx]
+    q_md, choices, exp, feedback = render_current(
+        rec,
+        user_choice=state["answers"].get(rec["id"]),
+        revealed=(rec["id"] in state["revealed"])
+    )
+    pos = f"{idx+1} / {len(recs)}"
+    return q_md, choices, exp, feedback, pos
+def on_select_choice(state, choice_label):
+    if not state or not choice_label: return state, ""
+    rec = state["records"][state["idx"]]
+    # choice_label على شكل "A) نص"
+    chosen_letter = choice_label.split(")")[0].strip()
+    state["answers"][rec["id"]] = chosen_letter
+    if rec["id"] in state["revealed"]:
+        # أعِد توليد الفيدباك
+        correct = _correct_letter(rec)
+        fb = "✅ إجابة صحيحة" if chosen_letter == correct else f"❌ إجابة خاطئة — الصحيح: {correct}"
+    else:
+        fb = f"تم اختيار: {chosen_letter}"
+    return state, fb
+def on_prev(state):
+    if not state: return state
+    state["idx"] = max(0, state["idx"]-1)
+    return state
+def on_next(state):
+    if not state: return state
+    state["idx"] = min(len(state["records"])-1, state["idx"]+1)
+    return state
+def on_reveal(state):
+    if not state: return state, ""
+    rec = state["records"][state["idx"]]
+    state["revealed"].add(rec["id"])
+    user = state["answers"].get(rec["id"])
+    correct = _correct_letter(rec)
+    fb = "✅ إجابة صحيحة" if user == correct else (f"❌ إجابة خاطئة — الصحيح: {correct}" if user else f"الصحيح: {correct}")
+    return state, fb
+def on_finish(state):
+    if not state: return state, "", None
+    recs = state["records"]
+    correct_count, wrong_count, skipped = 0,0,0
+    rows = []
+    for rec in recs:
+        qid = rec["id"]
+        user = state["answers"].get(qid)
+        correct = _correct_letter(rec)
+        is_correct = (user == correct) if user else False
+        if user is None: skipped += 1
+        elif is_correct: correct_count += 1
+        else: wrong_count += 1
+        # صف للـ CSV
+        # جمع النصوص للخيارات
+        opts = {opt["id"]: opt["text"] for opt in rec.get("options", [])}
+        rows.append({
+            "question": rec.get("question",""),
+            "A": opts.get("A",""), "B": opts.get("B",""),
+            "C": opts.get("C",""), "D": opts.get("D",""),
+            "user_choice": user or "",
+            "correct": correct,
+            "is_correct": bool(is_correct)
+        })
+    total = len(recs)
+    score = f"النتيجة: {correct_count}/{total} (صحيح: {correct_count}، خطأ: {wrong_count}، متروك: {skipped})"
+    # CSV
+    df = pd.DataFrame(rows)
+    workdir = tempfile.mkdtemp(prefix="quiz_")
+    csv_path = os.path.join(workdir, "results.csv")
+    df.to_csv(csv_path, index=False, encoding="utf-8-sig")
+    state["finished"] = True
+    state["csv_path"] = csv_path
+    return state, score, csv_path
+def on_reset():
+    return None, "", "", "", "", "", None, "تمت إعادة الضبط."
+# =========================
+# 8) التبويب الأول: توليد الأسئلة (PDF/TXT → JSON)
 # =========================
 def process_pdf(pdf_file_path,
                 num_questions=DEFAULT_NUM_QUESTIONS,
         if not pdf_file_path:
             return {}, None, "يرجى رفع ملف PDF/TXT أولاً."
         src_path = str(pdf_file_path)
         name_guess = getattr(pdf_file_path, "name", "") if hasattr(pdf_file_path, "name") else ""
         filename = Path(name_guess).name or Path(src_path).name or "input"
         workdir = tempfile.mkdtemp(prefix="mcq_")
         ext = Path(filename).suffix.lower()
         if ext not in [".pdf", ".txt"]:
             ext = ".pdf"
         if not Path(filename).suffix:
             filename += ext
         shutil.copy(src_path, local_path)
         logs.append(f"تم نسخ الملف إلى: {local_path}")
+        # 1) استخراج النص
         if ext == ".txt":
             with open(local_path, "r", encoding="utf-8", errors="ignore") as f:
                 raw_text = f.read()
         return {}, None, "\n".join(logs)
 # =========================
+# 9) واجهة Gradio (تبويبان)
 # =========================
 import gradio as gr
+with gr.Blocks(title="PDF/TXT → MCQ + Quiz", css="""
+body { direction: rtl; font-family: system-ui, 'Cairo', 'IBM Plex Arabic', sans-serif; }
+label, .gr-markdown { text-align: right; }
+""") as demo:
+    gr.Markdown("## مولّد أسئلة + واجهة اختبار تفاعلي")
+    # حالة مشتركة بين التبويبين
+    quiz_state = gr.State(value=None)  # سيحمل dict من init_quiz_state(...)
+    toast = gr.Markdown("")
+    with gr.Tabs():
+        # --- تبويب 1: توليد الأسئلة ---
+        with gr.TabItem("توليد الأسئلة (PDF/TXT → JSON)"):
+            with gr.Row():
+                inp_pdf = gr.File(label="ارفع PDF أو TXT", file_count="single", file_types=[".pdf",".txt"], type="filepath")
+                with gr.Column():
+                    num_q = gr.Slider(4, 20, value=DEFAULT_NUM_QUESTIONS, step=1, label="عدد الأسئلة")
+                    trocr_zoom = gr.Slider(2.0, 3.5, value=DEFAULT_TROCR_ZOOM, step=0.1, label="دقة تحويل PDF لصور (Zoom)")
+                    trocr_model = gr.Dropdown(
+                        choices=[
+                            "microsoft/trocr-base-printed",
+                            "microsoft/trocr-large-printed",
+                            "microsoft/trocr-base-handwritten",
+                            "microsoft/trocr-large-handwritten",
+                        ],
+                        value=DEFAULT_TROCR_MODEL, label="موديل TrOCR (للـ PDF المصوّر)"
+                    )
+            btn_gen = gr.Button("تشغيل المعالجة", variant="primary")
+            out_json = gr.JSON(label="النتيجة (JSON)")
+            out_file = gr.File(label="تحميل mcqs.json")
+            out_log = gr.Textbox(label="Logs", lines=10)
+            btn_send_to_quiz = gr.Button("إرسال الأسئلة إلى تبويب الاختبار")
+            btn_gen.click(
+                fn=process_pdf,
+                inputs=[inp_pdf, num_q, gr.State(DEFAULT_LANG), trocr_model, trocr_zoom],
+                outputs=[out_json, out_file, out_log]
             )
+            # إرسال الناتج مباشرة إلى التبويب الثاني
+            def _send_to_quiz(records):
+                if not records: return None, "لا يوجد أسئلة لإرسالها."
+                return init_quiz_state(records), "تم إرسال الأسئلة إلى تبويب الاختبار. افتحه واضغط 'إظهار السؤال'."
+            btn_send_to_quiz.click(_send_to_quiz, inputs=[out_json], outputs=[quiz_state, toast])
+        # --- تبويب 2: حلّ الاختبار ---
+        with gr.TabItem("حلّ الاختبار (Quiz)"):
+            gr.Markdown("### 1) حمّل JSON للأسئلة أو استخدم زر الإرسال من التبويب الأول")
+            json_file = gr.File(label="أو ارفع ملف JSON", file_types=[".json"], type="filepath")
+            btn_load_json = gr.Button("تحميل ملف JSON")
+            btn_start = gr.Button("بدء الاختبار", variant="primary")
+            gr.Markdown("### 2) حل السؤال الحالي")
+            q_md = gr.Markdown("")
+            choices = gr.Radio(choices=[], label="اختر الإجابة")
+            exp_md = gr.Markdown("")
+            feedback = gr.Markdown("")
+            progress = gr.Label("")
+            with gr.Row():
+                btn_prev = gr.Button("السابق")
+                btn_next = gr.Button("التالي")
+                btn_reveal = gr.Button("إظهار الإجابة")
+            with gr.Row():
+                btn_finish = gr.Button("إنهاء الاختبار", variant="stop")
+                btn_reset = gr.Button("إعادة ضبط")
+            score_md = gr.Markdown("")
+            results_csv = gr.File(label="تحميل نتائج CSV")
+            # ربط الأزرار بالدوال
+            btn_load_json.click(on_load_json_file, inputs=[json_file], outputs=[quiz_state, toast])
+            btn_start.click(on_start_quiz, inputs=[quiz_state], outputs=[quiz_state, toast])
+            # عرض السؤال الحالي
+            def _show_and_render(state):
+                return on_show_question(state)
+            # عند البدء أو التنقل أو الإظهار نعيد رندر
+            btn_start.click(_show_and_render, inputs=[quiz_state], outputs=[q_md, choices, exp_md, feedback, progress])
+            btn_prev.click(on_prev, inputs=[quiz_state], outputs=[quiz_state]).then(_show_and_render, inputs=[quiz_state], outputs=[q_md, choices, exp_md, feedback, progress])
+            btn_next.click(on_next, inputs=[quiz_state], outputs=[quiz_state]).then(_show_and_render, inputs=[quiz_state], outputs=[q_md, choices, exp_md, feedback, progress])
+            btn_reveal.click(on_reveal, inputs=[quiz_state], outputs=[quiz_state, feedback]).then(_show_and_render, inputs=[quiz_state], outputs=[q_md, choices, exp_md, feedback, progress])
+            # اختيار الإجابة
+            def _on_choice(state, choice):
+                return on_select_choice(state, choice)
+            choices.change(_on_choice, inputs=[quiz_state, choices], outputs=[quiz_state, feedback])
+            # إنهاء
+            btn_finish.click(on_finish, inputs=[quiz_state], outputs=[quiz_state, score_md, results_csv])
+            # إعادة ضبط
+            btn_reset.click(on_reset, outputs=[quiz_state, q_md, exp_md, feedback, progress, score_md, results_csv, toast])
+# Spaces تتعرف على demo تلقائيًا
 if __name__ == "__main__":
     demo.queue().launch()