Spaces:

Leen172
/

Question_generator

Running

App Files Files Community

Leen172 commited on Oct 31

Commit

ffb63f9

verified ·

1 Parent(s): a5f2a6f

Update app.py

Browse files

Files changed (1) hide show

app.py +145 -276

app.py CHANGED Viewed

@@ -13,13 +13,11 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import List, Tuple
-import pandas as pd
 from PIL import Image
 from pypdf import PdfReader
 import fitz  # PyMuPDF
 import regex as re2
 import yake
-from tqdm import tqdm
 # =========================
 # إعدادات عامة
@@ -66,12 +64,11 @@ def pdf_pages_to_images(pdf_path: str, zoom: float = 2.5) -> List[Image.Image]:
     doc.close()
     return imgs
-def extract_text_with_ocr(pdf_path: str, model_id: str, zoom: float = 2.5, disable_tqdm: bool = True) -> str:
     ocr = _get_ocr_pipeline(model_id)
     images = pdf_pages_to_images(pdf_path, zoom=zoom)
     page_texts = []
-    pbar = tqdm(images, desc="TrOCR OCR", unit="p", disable=disable_tqdm)
-    for idx, img in enumerate(pbar):
         try:
             out = ocr(img)
             txt = out[0]["generated_text"].strip() if out and "generated_text" in out[0] else ""
@@ -87,36 +84,19 @@ def is_extraction_good(text: str, min_chars: int = 250, min_alpha_ratio: float =
     ratio = alnum / max(1, len(text))
     return ratio >= min_alpha_ratio
-def save_text(text: str, out_path: str) -> None:
-    os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True
-    )
-    with open(out_path, "w", encoding="utf-8") as f:
-        f.write(text)
-def pdf_to_txt(pdf_path: str, out_txt_path: str = None,
-               ocr_model: str = DEFAULT_TROCR_MODEL,
-               ocr_zoom: float = DEFAULT_TROCR_ZOOM) -> Tuple[str, str, str]:
     assert os.path.isfile(pdf_path), f"File not found: {pdf_path}"
     embedded_text = extract_text_with_pypdf(pdf_path)
     if is_extraction_good(embedded_text):
-        final_text = embedded_text
-        method = "embedded (pypdf)"
-    else:
-        if not ocr_model:
-            final_text = embedded_text
-            method = "embedded (pypdf: weak)"
-        else:
-            final_text = extract_text_with_ocr(pdf_path, model_id=ocr_model, zoom=ocr_zoom)
-            method = "OCR (Hugging Face TrOCR)"
-    if out_txt_path is None:
-        base, _ = os.path.splitext(pdf_path)
-        out_txt_path = base + ".txt"
-    header = f"[[ Extraction method: {method} ]]\n\n"
-    save_text(header + final_text, out_txt_path)
-    return final_text, out_txt_path, method
 # =========================
 # 3) تطبيع/تصحيح عربي
@@ -139,7 +119,7 @@ def normalize_arabic(text: str) -> str:
     text = re2.sub(r"[إأآا]", "ا", text)
     text = re2.sub(r"[يى]", "ي", text)
     text = re2.sub(r"\s+", " ", text)
-    # إزالة التكرار الزائد للحروف (مثل جذرياا -> جذريا)
     text = re2.sub(r'(\p{L})\1{2,}', r'\1', text)
     text = re2.sub(r'(\p{L})\1', r'\1', text)
     return text.strip()
@@ -281,7 +261,7 @@ def make_mcqs_from_text(text: str, n: int = 8, lang: str = 'ar') -> List[MCQ]:
     return items
 # =========================
-# 6) بناء JSON للإخراج
 # =========================
 AR_PUNCT = "،؛؟"
 EN_PUNCT = ",;?"
@@ -291,127 +271,82 @@ def normalize_punct(s: str) -> str:
     s = s.replace(",", "،").replace(";", "؛").replace("?", "؟")
     return s.strip().strip(AR_PUNCT + EN_PUNCT).strip()
-def is_bad_choice(txt: str) -> bool:
-    if not txt: return True
-    txt = txt.strip()
-    BAD_NOISE = {"وهنا","اليه","الي","ليبق","لان","لانها","لانّه","ذلك","هذا","هذه"}
-    if txt in BAD_NOISE: return True
-    if len(txt) > 18 and " " not in txt: return True
-    if len(txt) < 2: return True
-    if txt in AR_STOP: return True
-    if re2.match(r"^[\p{P}\p{S}]+$", txt): return True
-    return False
-def build_json_records(items: List[MCQ], lang: str, source_pdf: str, method: str, num_questions: int):
     json_data = []
     letters = ["A", "B", "C", "D"]
     for it in items:
-        opts, seen = [], set()
         for idx, lbl in enumerate(letters):
             raw = it.choices[idx] if idx < len(it.choices) else ""
-            txt = normalize_punct(raw)
-            if is_bad_choice(txt): txt = "—"
-            if txt in seen: txt += " "
-            seen.add(txt)
             opts.append({"id": lbl, "text": txt, "is_correct": (it.answer_index == idx)})
         q_clean = normalize_punct(it.question)
         exp_clean = normalize_punct(it.explanation)
         record = {
-            "id": it.id, "question": q_clean, "options": opts, "explanation": exp_clean,
-            "meta": {"lang": lang, "normalized": True, "source_pdf": source_pdf, "extraction_method": method, "num_questions": int(num_questions)}
         }
         json_data.append(record)
     return json_data
 # =========================
-# 7) دوال تبويب "حلّ الاختبار"
 # =========================
 def _format_question(rec):
     q = rec.get("question","").strip()
     return f"### السؤال:\n{q}"
 def _radio_choices(rec):
-    # يعيد قائمة نصوص مثل "A) ...", "B) ..."
-    letters = ["A","B","C","D"]
     out = []
     for opt in rec.get("options", []):
         lid, text = opt.get("id",""), opt.get("text","")
         out.append(f"{lid}) {text}")
-    # إذا ناقص خيارات، كمّل لمواءمة المكوّن
     while len(out) < 4:
         out.append(f"{letters[len(out)]}) —")
     return out
 def _correct_letter(rec):
     for opt in rec.get("options", []):
-        if opt.get("is_correct"):
-            return opt.get("id","")
     return ""
-def _explanation(rec):
-    return rec.get("explanation","")
 def init_quiz_state(records):
-    # ترتيب عشوائي اختياري هنا (يمكن إبقاء كما هو)
-    # random.shuffle(records)
-    return {
-        "records": records,
-        "idx": 0,
-        "answers": {},      # id السؤال -> "A"/"B"/"C"/"D"
-        "revealed": set(),  # ids تم إظهار حلّها
-        "finished": False,
-        "csv_path": None
-    }
 def render_current(rec, user_choice=None, revealed=False):
     q_md = _format_question(rec)
     choices = _radio_choices(rec)
     exp = _explanation(rec) if revealed else ""
-    progress = ""
     correct = _correct_letter(rec)
-    feedback = ""
-    if user_choice:
-        if revealed:
-            feedback = "✅ إجابة صحيحة" if user_choice == correct else f"❌ إجابة خاطئة — الصحيح: {correct}"
-        else:
-            feedback = f"تم اختيار: {user_choice}"
     return q_md, choices, exp, feedback
-def on_start_quiz(json_records):
-    if not json_records or not isinstance(json_records, list):
-        return None, "لم يتم العثور على أسئلة صالحة."
-    return init_quiz_state(json_records), "تم بدء الاختبار. بالتوفيق!"
-def on_load_json_file(file_path):
-    if not file_path: return None, "لم يتم اختيار ملف."
-    try:
-        with open(str(file_path), "r", encoding="utf-8") as f:
-            data = json.load(f)
-        if not isinstance(data, list): raise ValueError("صيغة JSON غير صحيحة (يجب أن تكون قائمة).")
-        return init_quiz_state(data), "تم تحميل ملف JSON بنجاح. اضغط بدء الاختبار."
-    except Exception as e:
-        return None, f"خطأ في قراءة JSON: {e}"
 def on_show_question(state):
     if not state: return "", [], "", "",""
     recs, idx = state["records"], state["idx"]
     rec = recs[idx]
-    q_md, choices, exp, feedback = render_current(
-        rec,
-        user_choice=state["answers"].get(rec["id"]),
-        revealed=(rec["id"] in state["revealed"])
-    )
     pos = f"{idx+1} / {len(recs)}"
     return q_md, choices, exp, feedback, pos
 def on_select_choice(state, choice_label):
     if not state or not choice_label: return state, ""
     rec = state["records"][state["idx"]]
-    # choice_label على شكل "A) نص"
     chosen_letter = choice_label.split(")")[0].strip()
     state["answers"][rec["id"]] = chosen_letter
     if rec["id"] in state["revealed"]:
-        # أعِد توليد الفيدباك
         correct = _correct_letter(rec)
         fb = "✅ إجابة صحيحة" if chosen_letter == correct else f"❌ إجابة خاطئة — الصحيح: {correct}"
     else:
@@ -438,207 +373,141 @@ def on_reveal(state):
     return state, fb
 def on_finish(state):
-    if not state: return state, "", None
     recs = state["records"]
     correct_count, wrong_count, skipped = 0,0,0
-    rows = []
     for rec in recs:
         qid = rec["id"]
         user = state["answers"].get(qid)
         correct = _correct_letter(rec)
-        is_correct = (user == correct) if user else False
         if user is None: skipped += 1
-        elif is_correct: correct_count += 1
         else: wrong_count += 1
-        # صف للـ CSV
-        # جمع النصوص للخيارات
-        opts = {opt["id"]: opt["text"] for opt in rec.get("options", [])}
-        rows.append({
-            "question": rec.get("question",""),
-            "A": opts.get("A",""), "B": opts.get("B",""),
-            "C": opts.get("C",""), "D": opts.get("D",""),
-            "user_choice": user or "",
-            "correct": correct,
-            "is_correct": bool(is_correct)
-        })
     total = len(recs)
     score = f"النتيجة: {correct_count}/{total} (صحيح: {correct_count}، خطأ: {wrong_count}، متروك: {skipped})"
-    # CSV
-    df = pd.DataFrame(rows)
-    workdir = tempfile.mkdtemp(prefix="quiz_")
-    csv_path = os.path.join(workdir, "results.csv")
-    df.to_csv(csv_path, index=False, encoding="utf-8-sig")
     state["finished"] = True
-    state["csv_path"] = csv_path
-    return state, score, csv_path
 def on_reset():
-    return None, "", "", "", "", "", None, "تمت إعادة الضبط."
 # =========================
-# 8) التبويب الأول: توليد الأسئلة (PDF/TXT → JSON)
 # =========================
-def process_pdf(pdf_file_path,
-                num_questions=DEFAULT_NUM_QUESTIONS,
-                lang=DEFAULT_LANG,
-                trocr_model=DEFAULT_TROCR_MODEL,
-                trocr_zoom=DEFAULT_TROCR_ZOOM):
-    logs = []
-    try:
-        if not pdf_file_path:
-            return {}, None, "يرجى رفع ملف PDF/TXT أولاً."
-        src_path = str(pdf_file_path)
-        name_guess = getattr(pdf_file_path, "name", "") if hasattr(pdf_file_path, "name") else ""
-        filename = Path(name_guess).name or Path(src_path).name or "input"
-        workdir = tempfile.mkdtemp(prefix="mcq_")
-        ext = Path(filename).suffix.lower()
-        if ext not in [".pdf", ".txt"]:
-            ext = ".pdf"
-        if not Path(filename).suffix:
-            filename += ext
-        local_path = os.path.join(workdir, filename)
-        shutil.copy(src_path, local_path)
-        logs.append(f"تم نسخ الملف إلى: {local_path}")
-        # 1) استخراج النص
-        if ext == ".txt":
-            with open(local_path, "r", encoding="utf-8", errors="ignore") as f:
-                raw_text = f.read()
-            method = "plain text (no PDF)"
-        else:
-            raw_text, out_txt_path, method = pdf_to_txt(
-                pdf_path=local_path,
-                ocr_model=trocr_model,
-                ocr_zoom=float(trocr_zoom)
-            )
-        logs.append(f"طريقة الاستخراج: {method}")
-        # 2) تنظيف/تطبيع
-        cleaned_text = postprocess_text(raw_text, lang=lang)
-        save_text(cleaned_text, os.path.join(workdir, "cleaned.txt"))
-        logs.append("تم تنظيف/تطبيع النص.")
-        # 3) توليد أسئلة
-        items = make_mcqs_from_text(cleaned_text, n=int(num_questions), lang=lang)
-        logs.append(f"تم توليد {len(items)} سؤالاً.")
-        # 4) بناء JSON
-        json_records = build_json_records(
-            items, lang=lang, source_pdf=Path(filename).name, method=method, num_questions=num_questions
-        )
-        json_str = json.dumps(json_records, ensure_ascii=False, indent=2)
-        # 5) حفظ ملف JSON للتنزيل
-        json_path = os.path.join(workdir, "mcqs.json")
-        with open(json_path, "w", encoding="utf-8") as fj:
-            fj.write(json_str)
-        logs.append("تم إنشاء ملف mcqs.json.")
-        return json_records, json_path, "\n".join(logs)
-    except Exception as e:
-        logs.append(f"خطأ: {e}")
-        return {}, None, "\n".join(logs)
 # =========================
-# 9) واجهة Gradio (تبويبان)
 # =========================
 import gradio as gr
-with gr.Blocks(title="PDF/TXT → MCQ + Quiz", css="""
 body { direction: rtl; font-family: system-ui, 'Cairo', 'IBM Plex Arabic', sans-serif; }
-label, .gr-markdown { text-align: right; }
-""") as demo:
-    gr.Markdown("## مولّد أسئلة + واجهة اختبار تفاعلي")
-    # حالة مشتركة بين التبويبين
-    quiz_state = gr.State(value=None)  # سيحمل dict من init_quiz_state(...)
     toast = gr.Markdown("")
-    with gr.Tabs():
-        # --- تبويب 1: توليد الأسئلة ---
-        with gr.TabItem("توليد الأسئلة (PDF/TXT → JSON)"):
-            with gr.Row():
-                inp_pdf = gr.File(label="ارفع PDF أو TXT", file_count="single", file_types=[".pdf",".txt"], type="filepath")
-                with gr.Column():
-                    num_q = gr.Slider(4, 20, value=DEFAULT_NUM_QUESTIONS, step=1, label="عدد الأسئلة")
-                    trocr_zoom = gr.Slider(2.0, 3.5, value=DEFAULT_TROCR_ZOOM, step=0.1, label="دقة تحويل PDF لصور (Zoom)")
-                    trocr_model = gr.Dropdown(
-                        choices=[
-                            "microsoft/trocr-base-printed",
-                            "microsoft/trocr-large-printed",
-                            "microsoft/trocr-base-handwritten",
-                            "microsoft/trocr-large-handwritten",
-                        ],
-                        value=DEFAULT_TROCR_MODEL, label="موديل TrOCR (للـ PDF المصوّر)"
-                    )
-            btn_gen = gr.Button("تشغيل المعالجة", variant="primary")
-            out_json = gr.JSON(label="النتيجة (JSON)")
-            out_file = gr.File(label="تحميل mcqs.json")
-            out_log = gr.Textbox(label="Logs", lines=10)
-            btn_send_to_quiz = gr.Button("إرسال الأسئلة إلى تبويب الاختبار")
-            btn_gen.click(
-                fn=process_pdf,
-                inputs=[inp_pdf, num_q, gr.State(DEFAULT_LANG), trocr_model, trocr_zoom],
-                outputs=[out_json, out_file, out_log]
-            )
-            # إرسال الناتج مباشرة إلى التبويب الثاني
-            def _send_to_quiz(records):
-                if not records: return None, "لا يوجد أسئلة لإرسالها."
-                return init_quiz_state(records), "تم إرسال الأسئلة إلى تبويب الاختبار. افتحه واضغط 'إظهار السؤال'."
-            btn_send_to_quiz.click(_send_to_quiz, inputs=[out_json], outputs=[quiz_state, toast])
-        # --- تبويب 2: حلّ الاختبار ---
-        with gr.TabItem("حلّ الاختبار (Quiz)"):
-            gr.Markdown("### 1) حمّل JSON للأسئلة أو استخدم زر الإرسال من التبويب الأول")
-            json_file = gr.File(label="أو ارفع ملف JSON", file_types=[".json"], type="filepath")
-            btn_load_json = gr.Button("تحميل ملف JSON")
-            btn_start = gr.Button("بدء الاختبار", variant="primary")
-            gr.Markdown("### 2) حل السؤال الحالي")
-            q_md = gr.Markdown("")
-            choices = gr.Radio(choices=[], label="اختر الإجابة")
-            exp_md = gr.Markdown("")
-            feedback = gr.Markdown("")
-            progress = gr.Label("")
-            with gr.Row():
-                btn_prev = gr.Button("السابق")
-                btn_next = gr.Button("التالي")
-                btn_reveal = gr.Button("إظهار الإجابة")
-            with gr.Row():
-                btn_finish = gr.Button("إنهاء الاختبار", variant="stop")
-                btn_reset = gr.Button("إعادة ضبط")
-            score_md = gr.Markdown("")
-            results_csv = gr.File(label="تحميل نتائج CSV")
-            # ربط الأزرار بالدوال
-            btn_load_json.click(on_load_json_file, inputs=[json_file], outputs=[quiz_state, toast])
-            btn_start.click(on_start_quiz, inputs=[quiz_state], outputs=[quiz_state, toast])
-            # عرض السؤال الحالي
-            def _show_and_render(state):
-                return on_show_question(state)
-            # عند البدء أو التنقل أو الإظهار نعيد رندر
-            btn_start.click(_show_and_render, inputs=[quiz_state], outputs=[q_md, choices, exp_md, feedback, progress])
-            btn_prev.click(on_prev, inputs=[quiz_state], outputs=[quiz_state]).then(_show_and_render, inputs=[quiz_state], outputs=[q_md, choices, exp_md, feedback, progress])
-            btn_next.click(on_next, inputs=[quiz_state], outputs=[quiz_state]).then(_show_and_render, inputs=[quiz_state], outputs=[q_md, choices, exp_md, feedback, progress])
-            btn_reveal.click(on_reveal, inputs=[quiz_state], outputs=[quiz_state, feedback]).then(_show_and_render, inputs=[quiz_state], outputs=[q_md, choices, exp_md, feedback, progress])
-            # اختيار الإجابة
-            def _on_choice(state, choice):
-                return on_select_choice(state, choice)
-            choices.change(_on_choice, inputs=[quiz_state, choices], outputs=[quiz_state, feedback])
-            # إنهاء
-            btn_finish.click(on_finish, inputs=[quiz_state], outputs=[quiz_state, score_md, results_csv])
-            # إعادة ضبط
-            btn_reset.click(on_reset, outputs=[quiz_state, q_md, exp_md, feedback, progress, score_md, results_csv, toast])
 # Spaces تتعرف على demo تلقائيًا
 if __name__ == "__main__":

 from pathlib import Path
 from typing import List, Tuple
 from PIL import Image
 from pypdf import PdfReader
 import fitz  # PyMuPDF
 import regex as re2
 import yake
 # =========================
 # إعدادات عامة
     doc.close()
     return imgs
+def extract_text_with_ocr(pdf_path: str, model_id: str, zoom: float = 2.5) -> str:
     ocr = _get_ocr_pipeline(model_id)
     images = pdf_pages_to_images(pdf_path, zoom=zoom)
     page_texts = []
+    for idx, img in enumerate(images):
         try:
             out = ocr(img)
             txt = out[0]["generated_text"].strip() if out and "generated_text" in out[0] else ""
     ratio = alnum / max(1, len(text))
     return ratio >= min_alpha_ratio
+def pdf_to_text(pdf_path: str,
+                ocr_model: str = DEFAULT_TROCR_MODEL,
+                ocr_zoom: float = DEFAULT_TROCR_ZOOM) -> Tuple[str, str]:
+    """
+    يرجع (النص النهائي، طريقة الاستخراج) بدون أي حفظ ملفات.
+    """
     assert os.path.isfile(pdf_path), f"File not found: {pdf_path}"
     embedded_text = extract_text_with_pypdf(pdf_path)
     if is_extraction_good(embedded_text):
+        return embedded_text, "embedded (pypdf)"
+    if not ocr_model:
+        return embedded_text, "embedded (pypdf: weak)"
+    return extract_text_with_ocr(pdf_path, model_id=ocr_model, zoom=ocr_zoom), "OCR (Hugging Face TrOCR)"
 # =========================
 # 3) تطبيع/تصحيح عربي
     text = re2.sub(r"[إأآا]", "ا", text)
     text = re2.sub(r"[يى]", "ي", text)
     text = re2.sub(r"\s+", " ", text)
+    # إزالة تكرار الحروف
     text = re2.sub(r'(\p{L})\1{2,}', r'\1', text)
     text = re2.sub(r'(\p{L})\1', r'\1', text)
     return text.strip()
     return items
 # =========================
+# 6) تحويل عناصر الأسئلة إلى سجلات لواجهة الحلّ
 # =========================
 AR_PUNCT = "،؛؟"
 EN_PUNCT = ",;?"
     s = s.replace(",", "،").replace(";", "؛").replace("?", "؟")
     return s.strip().strip(AR_PUNCT + EN_PUNCT).strip()
+def build_quiz_records(items: List[MCQ], lang: str, source_name: str, method: str, num_questions: int):
     json_data = []
     letters = ["A", "B", "C", "D"]
     for it in items:
+        opts = []
         for idx, lbl in enumerate(letters):
             raw = it.choices[idx] if idx < len(it.choices) else ""
+            txt = normalize_punct(raw) or "—"
             opts.append({"id": lbl, "text": txt, "is_correct": (it.answer_index == idx)})
         q_clean = normalize_punct(it.question)
         exp_clean = normalize_punct(it.explanation)
         record = {
+            "id": it.id,
+            "question": q_clean,
+            "options": opts,
+            "explanation": exp_clean,
+            "meta": {"lang": lang, "source": source_name, "extraction_method": method, "num_questions": int(num_questions)}
         }
         json_data.append(record)
     return json_data
 # =========================
+# 7) منطق الاختبار (State + Handlers)
 # =========================
 def _format_question(rec):
     q = rec.get("question","").strip()
     return f"### السؤال:\n{q}"
 def _radio_choices(rec):
     out = []
     for opt in rec.get("options", []):
         lid, text = opt.get("id",""), opt.get("text","")
         out.append(f"{lid}) {text}")
     while len(out) < 4:
+        letters = ["A","B","C","D"]
         out.append(f"{letters[len(out)]}) —")
     return out
 def _correct_letter(rec):
     for opt in rec.get("options", []):
+        if opt.get("is_correct"): return opt.get("id","")
     return ""
+def _explanation(rec): return rec.get("explanation","")
 def init_quiz_state(records):
+    return {"records": records, "idx": 0, "answers": {}, "revealed": set(), "finished": False}
 def render_current(rec, user_choice=None, revealed=False):
     q_md = _format_question(rec)
     choices = _radio_choices(rec)
     exp = _explanation(rec) if revealed else ""
     correct = _correct_letter(rec)
+    if user_choice and revealed:
+        feedback = "✅ إجابة صحيحة" if user_choice == correct else f"❌ إجابة خاطئة — الصحيح: {correct}"
+    elif user_choice:
+        feedback = f"تم اختيار: {user_choice}"
+    else:
+        feedback = ""
     return q_md, choices, exp, feedback
 def on_show_question(state):
     if not state: return "", [], "", "",""
     recs, idx = state["records"], state["idx"]
     rec = recs[idx]
+    q_md, choices, exp, feedback = render_current(rec, user_choice=state["answers"].get(rec["id"]),
+                                                 revealed=(rec["id"] in state["revealed"]))
     pos = f"{idx+1} / {len(recs)}"
     return q_md, choices, exp, feedback, pos
 def on_select_choice(state, choice_label):
     if not state or not choice_label: return state, ""
     rec = state["records"][state["idx"]]
     chosen_letter = choice_label.split(")")[0].strip()
     state["answers"][rec["id"]] = chosen_letter
     if rec["id"] in state["revealed"]:
         correct = _correct_letter(rec)
         fb = "✅ إجابة صحيحة" if chosen_letter == correct else f"❌ إجابة خاطئة — الصحيح: {correct}"
     else:
     return state, fb
 def on_finish(state):
+    if not state: return state, ""
     recs = state["records"]
     correct_count, wrong_count, skipped = 0,0,0
     for rec in recs:
         qid = rec["id"]
         user = state["answers"].get(qid)
         correct = _correct_letter(rec)
         if user is None: skipped += 1
+        elif user == correct: correct_count += 1
         else: wrong_count += 1
     total = len(recs)
     score = f"النتيجة: {correct_count}/{total} (صحيح: {correct_count}، خطأ: {wrong_count}، متروك: {skipped})"
     state["finished"] = True
+    return state, score
 def on_reset():
+    return None, "", "", "", "", "", "تمت إعادة الضبط."
 # =========================
+# 8) معالجة الملف وبناء الأسئلة (بدون أي ملفات ناتجة)
 # =========================
+def process_input_file(uploaded_path,
+                       num_questions=DEFAULT_NUM_QUESTIONS,
+                       lang=DEFAULT_LANG,
+                       trocr_model=DEFAULT_TROCR_MODEL,
+                       trocr_zoom=DEFAULT_TROCR_ZOOM):
+    if not uploaded_path:
+        return None, "يرجى رفع ملف PDF/TXT أولاً."
+    src_path = str(uploaded_path)
+    filename = Path(src_path).name or "input"
+    ext = Path(filename).suffix.lower()
+    if ext not in [".pdf", ".txt"]:
+        return None, "الرجاء رفع PDF أو TXT فقط."
+    # قراءة النص
+    if ext == ".txt":
+        with open(src_path, "r", encoding="utf-8", errors="ignore") as f:
+            raw_text = f.read()
+        method = "plain text (no PDF)"
+    else:
+        raw_text, method = pdf_to_text(src_path, ocr_model=trocr_model, ocr_zoom=float(trocr_zoom))
+    cleaned_text = postprocess_text(raw_text, lang=lang)
+    items = make_mcqs_from_text(cleaned_text, n=int(num_questions), lang=lang)
+    records = build_quiz_records(items, lang=lang, source_name=filename, method=method, num_questions=num_questions)
+    return init_quiz_state(records), f"تم توليد {len(records)} سؤالاً. بالتوفيق!"
 # =========================
+# 9) واجهة Gradio (تبويب واحد)
 # =========================
 import gradio as gr
+THEME_CSS = """
 body { direction: rtl; font-family: system-ui, 'Cairo', 'IBM Plex Arabic', sans-serif; }
+label, .gr-markdown, .gr-button { text-align: right; }
+.gradio-container { max-width: 880px; margin: auto; }
+.card { background: #fff; border-radius: 1rem; padding: 1rem 1.2rem; box-shadow: 0 10px 25px rgba(0,0,0,0.06); }
+.small { opacity: .85; font-size: .9rem; }
+.progress { text-align: left; opacity:.75 }
+"""
+with gr.Blocks(title="اختبار من ملف (PDF/TXT)", css=THEME_CSS) as demo:
+    gr.Markdown("## ✨ صانع اختبار من ملف PDF/TXT — واجهة واحدة بسيطة")
+    gr.Markdown("ارفع ملفك، حدّد عدد الأسئلة، واضغط **ابدأ**. ثمّ أجب وتحقق من الإجابة.")
+    quiz_state = gr.State(value=None)
     toast = gr.Markdown("")
+    with gr.Row():
+        inp_file = gr.File(label="ارفع ملف PDF أو TXT", file_count="single", file_types=[".pdf",".txt"], type="filepath")
+        num_q = gr.Slider(4, 20, value=DEFAULT_NUM_QUESTIONS, step=1, label="عدد الأسئلة")
+    with gr.Accordion("خيارات متقدمة (للـ PDF المصوّر)", open=False):
+        trocr_zoom = gr.Slider(2.0, 3.5, value=DEFAULT_TROCR_ZOOM, step=0.1, label="Zoom لتحويل الصفحات لصورة (OCR)")
+        trocr_model = gr.Dropdown(
+            choices=[
+                "microsoft/trocr-base-printed",
+                "microsoft/trocr-large-printed",
+                "microsoft/trocr-base-handwritten",
+                "microsoft/trocr-large-handwritten",
+            ],
+            value=DEFAULT_TROCR_MODEL, label="نموذج TrOCR"
+        )
+    btn_start = gr.Button("ابدأ توليد الاختبار", variant="primary")
+    with gr.Group():
+        with gr.Row():
+            progress = gr.Label("", elem_classes=["progress"])
+        q_md = gr.Markdown("", elem_classes=["card"])
+        choices = gr.Radio(choices=[], label="اختر الإجابة", interactive=True)
+        feedback = gr.Markdown("")
+        exp_md = gr.Markdown("")
+        with gr.Row():
+            btn_prev = gr.Button("السابق")
+            btn_next = gr.Button("التالي")
+            btn_reveal = gr.Button("إظهار الإجابة")
+            btn_finish = gr.Button("إنهاء الاختبار", variant="stop")
+            btn_reset = gr.Button("إعادة ضبط")
+        score_md = gr.Markdown("")
+    # بدء المعالجة وبناء الأسئلة
+    btn_start.click(
+        process_input_file,
+        inputs=[inp_file, num_q, gr.State(DEFAULT_LANG), trocr_model, trocr_zoom],
+        outputs=[quiz_state, toast]
+    ).then(
+        on_show_question, inputs=[quiz_state],
+        outputs=[q_md, choices, exp_md, feedback, progress]
+    )
+    # التنقل
+    btn_prev.click(on_prev, inputs=[quiz_state], outputs=[quiz_state]).then(
+        on_show_question, inputs=[quiz_state],
+        outputs=[q_md, choices, exp_md, feedback, progress]
+    )
+    btn_next.click(on_next, inputs=[quiz_state], outputs=[quiz_state]).then(
+        on_show_question, inputs=[quiz_state],
+        outputs=[q_md, choices, exp_md, feedback, progress]
+    )
+    btn_reveal.click(on_reveal, inputs=[quiz_state], outputs=[quiz_state, feedback]).then(
+        on_show_question, inputs=[quiz_state],
+        outputs=[q_md, choices, exp_md, feedback, progress]
+    )
+    # اختيار الإجابة
+    def _on_choice(state, choice):
+        return on_select_choice(state, choice)
+    choices.change(_on_choice, inputs=[quiz_state, choices], outputs=[quiz_state, feedback])
+    # إنهاء وإظهار نتيجة
+    btn_finish.click(on_finish, inputs=[quiz_state], outputs=[quiz_state, score_md])
+    # إعادة ضبط
+    btn_reset.click(lambda: on_reset(), outputs=[quiz_state, q_md, choices, exp_md, feedback, score_md, toast])
 # Spaces تتعرف على demo تلقائيًا
 if __name__ == "__main__":