Leen172 commited on
Commit
ffb63f9
·
verified ·
1 Parent(s): a5f2a6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -276
app.py CHANGED
@@ -13,13 +13,11 @@ from dataclasses import dataclass
13
  from pathlib import Path
14
  from typing import List, Tuple
15
 
16
- import pandas as pd
17
  from PIL import Image
18
  from pypdf import PdfReader
19
  import fitz # PyMuPDF
20
  import regex as re2
21
  import yake
22
- from tqdm import tqdm
23
 
24
  # =========================
25
  # إعدادات عامة
@@ -66,12 +64,11 @@ def pdf_pages_to_images(pdf_path: str, zoom: float = 2.5) -> List[Image.Image]:
66
  doc.close()
67
  return imgs
68
 
69
- def extract_text_with_ocr(pdf_path: str, model_id: str, zoom: float = 2.5, disable_tqdm: bool = True) -> str:
70
  ocr = _get_ocr_pipeline(model_id)
71
  images = pdf_pages_to_images(pdf_path, zoom=zoom)
72
  page_texts = []
73
- pbar = tqdm(images, desc="TrOCR OCR", unit="p", disable=disable_tqdm)
74
- for idx, img in enumerate(pbar):
75
  try:
76
  out = ocr(img)
77
  txt = out[0]["generated_text"].strip() if out and "generated_text" in out[0] else ""
@@ -87,36 +84,19 @@ def is_extraction_good(text: str, min_chars: int = 250, min_alpha_ratio: float =
87
  ratio = alnum / max(1, len(text))
88
  return ratio >= min_alpha_ratio
89
 
90
- def save_text(text: str, out_path: str) -> None:
91
- os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True
92
- )
93
- with open(out_path, "w", encoding="utf-8") as f:
94
- f.write(text)
95
-
96
- def pdf_to_txt(pdf_path: str, out_txt_path: str = None,
97
- ocr_model: str = DEFAULT_TROCR_MODEL,
98
- ocr_zoom: float = DEFAULT_TROCR_ZOOM) -> Tuple[str, str, str]:
99
  assert os.path.isfile(pdf_path), f"File not found: {pdf_path}"
100
-
101
  embedded_text = extract_text_with_pypdf(pdf_path)
102
  if is_extraction_good(embedded_text):
103
- final_text = embedded_text
104
- method = "embedded (pypdf)"
105
- else:
106
- if not ocr_model:
107
- final_text = embedded_text
108
- method = "embedded (pypdf: weak)"
109
- else:
110
- final_text = extract_text_with_ocr(pdf_path, model_id=ocr_model, zoom=ocr_zoom)
111
- method = "OCR (Hugging Face TrOCR)"
112
-
113
- if out_txt_path is None:
114
- base, _ = os.path.splitext(pdf_path)
115
- out_txt_path = base + ".txt"
116
-
117
- header = f"[[ Extraction method: {method} ]]\n\n"
118
- save_text(header + final_text, out_txt_path)
119
- return final_text, out_txt_path, method
120
 
121
  # =========================
122
  # 3) تطبيع/تصحيح عربي
@@ -139,7 +119,7 @@ def normalize_arabic(text: str) -> str:
139
  text = re2.sub(r"[إأآا]", "ا", text)
140
  text = re2.sub(r"[يى]", "ي", text)
141
  text = re2.sub(r"\s+", " ", text)
142
- # إزالة التكرار الزائد للحروف (مثل جذرياا -> جذريا)
143
  text = re2.sub(r'(\p{L})\1{2,}', r'\1', text)
144
  text = re2.sub(r'(\p{L})\1', r'\1', text)
145
  return text.strip()
@@ -281,7 +261,7 @@ def make_mcqs_from_text(text: str, n: int = 8, lang: str = 'ar') -> List[MCQ]:
281
  return items
282
 
283
  # =========================
284
- # 6) بناء JSON للإخراج
285
  # =========================
286
  AR_PUNCT = "،؛؟"
287
  EN_PUNCT = ",;?"
@@ -291,127 +271,82 @@ def normalize_punct(s: str) -> str:
291
  s = s.replace(",", "،").replace(";", "؛").replace("?", "؟")
292
  return s.strip().strip(AR_PUNCT + EN_PUNCT).strip()
293
 
294
- def is_bad_choice(txt: str) -> bool:
295
- if not txt: return True
296
- txt = txt.strip()
297
- BAD_NOISE = {"وهنا","اليه","الي","ليبق","لان","لانها","لانّه","ذلك","هذا","هذه"}
298
- if txt in BAD_NOISE: return True
299
- if len(txt) > 18 and " " not in txt: return True
300
- if len(txt) < 2: return True
301
- if txt in AR_STOP: return True
302
- if re2.match(r"^[\p{P}\p{S}]+$", txt): return True
303
- return False
304
-
305
- def build_json_records(items: List[MCQ], lang: str, source_pdf: str, method: str, num_questions: int):
306
  json_data = []
307
  letters = ["A", "B", "C", "D"]
308
  for it in items:
309
- opts, seen = [], set()
310
  for idx, lbl in enumerate(letters):
311
  raw = it.choices[idx] if idx < len(it.choices) else ""
312
- txt = normalize_punct(raw)
313
- if is_bad_choice(txt): txt = "—"
314
- if txt in seen: txt += " "
315
- seen.add(txt)
316
  opts.append({"id": lbl, "text": txt, "is_correct": (it.answer_index == idx)})
317
  q_clean = normalize_punct(it.question)
318
  exp_clean = normalize_punct(it.explanation)
319
  record = {
320
- "id": it.id, "question": q_clean, "options": opts, "explanation": exp_clean,
321
- "meta": {"lang": lang, "normalized": True, "source_pdf": source_pdf, "extraction_method": method, "num_questions": int(num_questions)}
 
 
 
322
  }
323
  json_data.append(record)
324
  return json_data
325
 
326
  # =========================
327
- # 7) دوال تبويب "حلّ الاختبار"
328
  # =========================
329
  def _format_question(rec):
330
  q = rec.get("question","").strip()
331
  return f"### السؤال:\n{q}"
332
 
333
  def _radio_choices(rec):
334
- # يعيد قائمة نصوص مثل "A) ...", "B) ..."
335
- letters = ["A","B","C","D"]
336
  out = []
337
  for opt in rec.get("options", []):
338
  lid, text = opt.get("id",""), opt.get("text","")
339
  out.append(f"{lid}) {text}")
340
- # إذا ناقص خيارات، كمّل لمواءمة المكوّن
341
  while len(out) < 4:
 
342
  out.append(f"{letters[len(out)]}) —")
343
  return out
344
 
345
  def _correct_letter(rec):
346
  for opt in rec.get("options", []):
347
- if opt.get("is_correct"):
348
- return opt.get("id","")
349
  return ""
350
 
351
- def _explanation(rec):
352
- return rec.get("explanation","")
353
 
354
  def init_quiz_state(records):
355
- # ترتيب عشوائي اختياري هنا (يمكن إبقاء كما هو)
356
- # random.shuffle(records)
357
- return {
358
- "records": records,
359
- "idx": 0,
360
- "answers": {}, # id السؤال -> "A"/"B"/"C"/"D"
361
- "revealed": set(), # ids تم إظهار حلّها
362
- "finished": False,
363
- "csv_path": None
364
- }
365
 
366
  def render_current(rec, user_choice=None, revealed=False):
367
  q_md = _format_question(rec)
368
  choices = _radio_choices(rec)
369
  exp = _explanation(rec) if revealed else ""
370
- progress = ""
371
  correct = _correct_letter(rec)
372
- feedback = ""
373
- if user_choice:
374
- if revealed:
375
- feedback = "✅ إجابة صحيحة" if user_choice == correct else f" إجابة خاطئة — الصحيح: {correct}"
376
- else:
377
- feedback = f"تم اختيار: {user_choice}"
378
  return q_md, choices, exp, feedback
379
 
380
- def on_start_quiz(json_records):
381
- if not json_records or not isinstance(json_records, list):
382
- return None, "لم يتم العثور على أسئلة صالحة."
383
- return init_quiz_state(json_records), "تم بدء الاختبار. بالتوفيق!"
384
-
385
- def on_load_json_file(file_path):
386
- if not file_path: return None, "لم يتم اختيار ملف."
387
- try:
388
- with open(str(file_path), "r", encoding="utf-8") as f:
389
- data = json.load(f)
390
- if not isinstance(data, list): raise ValueError("صيغة JSON غير صحيحة (يجب أن تكون قائمة).")
391
- return init_quiz_state(data), "تم تحميل ملف JSON بنجاح. اضغط بدء الاختبار."
392
- except Exception as e:
393
- return None, f"خطأ في قراءة JSON: {e}"
394
-
395
  def on_show_question(state):
396
  if not state: return "", [], "", "",""
397
  recs, idx = state["records"], state["idx"]
398
  rec = recs[idx]
399
- q_md, choices, exp, feedback = render_current(
400
- rec,
401
- user_choice=state["answers"].get(rec["id"]),
402
- revealed=(rec["id"] in state["revealed"])
403
- )
404
  pos = f"{idx+1} / {len(recs)}"
405
  return q_md, choices, exp, feedback, pos
406
 
407
  def on_select_choice(state, choice_label):
408
  if not state or not choice_label: return state, ""
409
  rec = state["records"][state["idx"]]
410
- # choice_label على شكل "A) نص"
411
  chosen_letter = choice_label.split(")")[0].strip()
412
  state["answers"][rec["id"]] = chosen_letter
413
  if rec["id"] in state["revealed"]:
414
- # أعِد توليد الفيدباك
415
  correct = _correct_letter(rec)
416
  fb = "✅ إجابة صحيحة" if chosen_letter == correct else f"❌ إجابة خاطئة — الصحيح: {correct}"
417
  else:
@@ -438,207 +373,141 @@ def on_reveal(state):
438
  return state, fb
439
 
440
  def on_finish(state):
441
- if not state: return state, "", None
442
  recs = state["records"]
443
  correct_count, wrong_count, skipped = 0,0,0
444
- rows = []
445
  for rec in recs:
446
  qid = rec["id"]
447
  user = state["answers"].get(qid)
448
  correct = _correct_letter(rec)
449
- is_correct = (user == correct) if user else False
450
  if user is None: skipped += 1
451
- elif is_correct: correct_count += 1
452
  else: wrong_count += 1
453
- # صف للـ CSV
454
- # جمع النصوص للخيارات
455
- opts = {opt["id"]: opt["text"] for opt in rec.get("options", [])}
456
- rows.append({
457
- "question": rec.get("question",""),
458
- "A": opts.get("A",""), "B": opts.get("B",""),
459
- "C": opts.get("C",""), "D": opts.get("D",""),
460
- "user_choice": user or "",
461
- "correct": correct,
462
- "is_correct": bool(is_correct)
463
- })
464
  total = len(recs)
465
  score = f"النتيجة: {correct_count}/{total} (صحيح: {correct_count}، خطأ: {wrong_count}، متروك: {skipped})"
466
- # CSV
467
- df = pd.DataFrame(rows)
468
- workdir = tempfile.mkdtemp(prefix="quiz_")
469
- csv_path = os.path.join(workdir, "results.csv")
470
- df.to_csv(csv_path, index=False, encoding="utf-8-sig")
471
  state["finished"] = True
472
- state["csv_path"] = csv_path
473
- return state, score, csv_path
474
 
475
  def on_reset():
476
- return None, "", "", "", "", "", None, "تمت إعادة الضبط."
477
 
478
  # =========================
479
- # 8) التبويب الأول: توليد الأسئلة (PDF/TXT JSON)
480
  # =========================
481
- def process_pdf(pdf_file_path,
482
- num_questions=DEFAULT_NUM_QUESTIONS,
483
- lang=DEFAULT_LANG,
484
- trocr_model=DEFAULT_TROCR_MODEL,
485
- trocr_zoom=DEFAULT_TROCR_ZOOM):
486
- logs = []
487
- try:
488
- if not pdf_file_path:
489
- return {}, None, "يرجى رفع ملف PDF/TXT أولاً."
490
-
491
- src_path = str(pdf_file_path)
492
- name_guess = getattr(pdf_file_path, "name", "") if hasattr(pdf_file_path, "name") else ""
493
- filename = Path(name_guess).name or Path(src_path).name or "input"
494
- workdir = tempfile.mkdtemp(prefix="mcq_")
495
-
496
- ext = Path(filename).suffix.lower()
497
- if ext not in [".pdf", ".txt"]:
498
- ext = ".pdf"
499
- if not Path(filename).suffix:
500
- filename += ext
501
-
502
- local_path = os.path.join(workdir, filename)
503
- shutil.copy(src_path, local_path)
504
- logs.append(f"تم نسخ الملف إلى: {local_path}")
505
-
506
- # 1) استخراج النص
507
- if ext == ".txt":
508
- with open(local_path, "r", encoding="utf-8", errors="ignore") as f:
509
- raw_text = f.read()
510
- method = "plain text (no PDF)"
511
- else:
512
- raw_text, out_txt_path, method = pdf_to_txt(
513
- pdf_path=local_path,
514
- ocr_model=trocr_model,
515
- ocr_zoom=float(trocr_zoom)
516
- )
517
- logs.append(f"طريقة الاستخراج: {method}")
518
-
519
- # 2) تنظيف/تطبيع
520
- cleaned_text = postprocess_text(raw_text, lang=lang)
521
- save_text(cleaned_text, os.path.join(workdir, "cleaned.txt"))
522
- logs.append("تم تنظيف/تطبيع النص.")
523
-
524
- # 3) توليد أسئلة
525
- items = make_mcqs_from_text(cleaned_text, n=int(num_questions), lang=lang)
526
- logs.append(f"تم توليد {len(items)} سؤالاً.")
527
-
528
- # 4) بناء JSON
529
- json_records = build_json_records(
530
- items, lang=lang, source_pdf=Path(filename).name, method=method, num_questions=num_questions
531
- )
532
- json_str = json.dumps(json_records, ensure_ascii=False, indent=2)
533
-
534
- # 5) حفظ ملف JSON للتنزيل
535
- json_path = os.path.join(workdir, "mcqs.json")
536
- with open(json_path, "w", encoding="utf-8") as fj:
537
- fj.write(json_str)
538
- logs.append("تم إنشاء ملف mcqs.json.")
539
-
540
- return json_records, json_path, "\n".join(logs)
541
 
542
- except Exception as e:
543
- logs.append(f"خطأ: {e}")
544
- return {}, None, "\n".join(logs)
 
545
 
546
  # =========================
547
- # 9) واجهة Gradio (تبويبان)
548
  # =========================
549
  import gradio as gr
550
 
551
- with gr.Blocks(title="PDF/TXT → MCQ + Quiz", css="""
552
  body { direction: rtl; font-family: system-ui, 'Cairo', 'IBM Plex Arabic', sans-serif; }
553
- label, .gr-markdown { text-align: right; }
554
- """) as demo:
555
- gr.Markdown("## مولّد أسئلة + واجهة اختبار تفاعلي")
556
-
557
- # حالة مشتركة بين التبويبين
558
- quiz_state = gr.State(value=None) # سيحمل dict من init_quiz_state(...)
 
 
 
 
 
 
559
  toast = gr.Markdown("")
560
 
561
- with gr.Tabs():
562
- # --- تبويب 1: توليد الأسئلة ---
563
- with gr.TabItem("توليد الأسئلة (PDF/TXT JSON)"):
564
- with gr.Row():
565
- inp_pdf = gr.File(label="ارفع PDF أو TXT", file_count="single", file_types=[".pdf",".txt"], type="filepath")
566
- with gr.Column():
567
- num_q = gr.Slider(4, 20, value=DEFAULT_NUM_QUESTIONS, step=1, label="عدد الأسئلة")
568
- trocr_zoom = gr.Slider(2.0, 3.5, value=DEFAULT_TROCR_ZOOM, step=0.1, label="دقة تحويل PDF لصور (Zoom)")
569
- trocr_model = gr.Dropdown(
570
- choices=[
571
- "microsoft/trocr-base-printed",
572
- "microsoft/trocr-large-printed",
573
- "microsoft/trocr-base-handwritten",
574
- "microsoft/trocr-large-handwritten",
575
- ],
576
- value=DEFAULT_TROCR_MODEL, label="موديل TrOCR (للـ PDF المصوّر)"
577
- )
578
- btn_gen = gr.Button("تشغيل المعالجة", variant="primary")
579
- out_json = gr.JSON(label="النتيجة (JSON)")
580
- out_file = gr.File(label="تحميل mcqs.json")
581
- out_log = gr.Textbox(label="Logs", lines=10)
582
- btn_send_to_quiz = gr.Button("إرسال الأسئلة إلى تبويب الاختبار")
583
-
584
- btn_gen.click(
585
- fn=process_pdf,
586
- inputs=[inp_pdf, num_q, gr.State(DEFAULT_LANG), trocr_model, trocr_zoom],
587
- outputs=[out_json, out_file, out_log]
588
- )
589
-
590
- # إرسال الناتج مباشرة إلى التبويب الثاني
591
- def _send_to_quiz(records):
592
- if not records: return None, "لا يوجد أسئلة لإرسالها."
593
- return init_quiz_state(records), "تم إرسال الأسئلة إلى تبويب الاختبار. افتحه واضغط 'إظهار السؤال'."
594
- btn_send_to_quiz.click(_send_to_quiz, inputs=[out_json], outputs=[quiz_state, toast])
595
-
596
- # --- تبويب 2: حلّ الاختبار ---
597
- with gr.TabItem("حلّ الاختبار (Quiz)"):
598
- gr.Markdown("### 1) حمّل JSON للأسئلة أو استخدم زر الإرسال من التبويب الأول")
599
- json_file = gr.File(label="أو ارفع ملف JSON", file_types=[".json"], type="filepath")
600
- btn_load_json = gr.Button("تحميل ملف JSON")
601
- btn_start = gr.Button("بدء الاختبار", variant="primary")
602
-
603
- gr.Markdown("### 2) حل السؤال الحالي")
604
- q_md = gr.Markdown("")
605
- choices = gr.Radio(choices=[], label="اختر الإجابة")
606
- exp_md = gr.Markdown("")
607
- feedback = gr.Markdown("")
608
- progress = gr.Label("")
609
-
610
- with gr.Row():
611
- btn_prev = gr.Button("السابق")
612
- btn_next = gr.Button("التالي")
613
- btn_reveal = gr.Button("إظهار الإجابة")
614
- with gr.Row():
615
- btn_finish = gr.Button("إنهاء الاختبار", variant="stop")
616
- btn_reset = gr.Button("إعادة ضبط")
617
-
618
- score_md = gr.Markdown("")
619
- results_csv = gr.File(label="تحميل نتائج CSV")
620
-
621
- # ربط الأزرار بالدوال
622
- btn_load_json.click(on_load_json_file, inputs=[json_file], outputs=[quiz_state, toast])
623
- btn_start.click(on_start_quiz, inputs=[quiz_state], outputs=[quiz_state, toast])
624
- # عرض السؤال الحالي
625
- def _show_and_render(state):
626
- return on_show_question(state)
627
- # عند البدء أو التنقل أو الإظهار نعيد رندر
628
- btn_start.click(_show_and_render, inputs=[quiz_state], outputs=[q_md, choices, exp_md, feedback, progress])
629
- btn_prev.click(on_prev, inputs=[quiz_state], outputs=[quiz_state]).then(_show_and_render, inputs=[quiz_state], outputs=[q_md, choices, exp_md, feedback, progress])
630
- btn_next.click(on_next, inputs=[quiz_state], outputs=[quiz_state]).then(_show_and_render, inputs=[quiz_state], outputs=[q_md, choices, exp_md, feedback, progress])
631
- btn_reveal.click(on_reveal, inputs=[quiz_state], outputs=[quiz_state, feedback]).then(_show_and_render, inputs=[quiz_state], outputs=[q_md, choices, exp_md, feedback, progress])
632
-
633
- # اختيار الإجابة
634
- def _on_choice(state, choice):
635
- return on_select_choice(state, choice)
636
- choices.change(_on_choice, inputs=[quiz_state, choices], outputs=[quiz_state, feedback])
637
-
638
- # إنهاء
639
- btn_finish.click(on_finish, inputs=[quiz_state], outputs=[quiz_state, score_md, results_csv])
640
- # إعادة ضبط
641
- btn_reset.click(on_reset, outputs=[quiz_state, q_md, exp_md, feedback, progress, score_md, results_csv, toast])
642
 
643
  # Spaces تتعرف على demo تلقائيًا
644
  if __name__ == "__main__":
 
13
  from pathlib import Path
14
  from typing import List, Tuple
15
 
 
16
  from PIL import Image
17
  from pypdf import PdfReader
18
  import fitz # PyMuPDF
19
  import regex as re2
20
  import yake
 
21
 
22
  # =========================
23
  # إعدادات عامة
 
64
  doc.close()
65
  return imgs
66
 
67
+ def extract_text_with_ocr(pdf_path: str, model_id: str, zoom: float = 2.5) -> str:
68
  ocr = _get_ocr_pipeline(model_id)
69
  images = pdf_pages_to_images(pdf_path, zoom=zoom)
70
  page_texts = []
71
+ for idx, img in enumerate(images):
 
72
  try:
73
  out = ocr(img)
74
  txt = out[0]["generated_text"].strip() if out and "generated_text" in out[0] else ""
 
84
  ratio = alnum / max(1, len(text))
85
  return ratio >= min_alpha_ratio
86
 
87
+ def pdf_to_text(pdf_path: str,
88
+ ocr_model: str = DEFAULT_TROCR_MODEL,
89
+ ocr_zoom: float = DEFAULT_TROCR_ZOOM) -> Tuple[str, str]:
90
+ """
91
+ يرجع (النص النهائي، طريقة الاستخراج) بدون أي حفظ ملفات.
92
+ """
 
 
 
93
  assert os.path.isfile(pdf_path), f"File not found: {pdf_path}"
 
94
  embedded_text = extract_text_with_pypdf(pdf_path)
95
  if is_extraction_good(embedded_text):
96
+ return embedded_text, "embedded (pypdf)"
97
+ if not ocr_model:
98
+ return embedded_text, "embedded (pypdf: weak)"
99
+ return extract_text_with_ocr(pdf_path, model_id=ocr_model, zoom=ocr_zoom), "OCR (Hugging Face TrOCR)"
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  # =========================
102
  # 3) تطبيع/تصحيح عربي
 
119
  text = re2.sub(r"[إأآا]", "ا", text)
120
  text = re2.sub(r"[يى]", "ي", text)
121
  text = re2.sub(r"\s+", " ", text)
122
+ # إزالة تكرار الحروف
123
  text = re2.sub(r'(\p{L})\1{2,}', r'\1', text)
124
  text = re2.sub(r'(\p{L})\1', r'\1', text)
125
  return text.strip()
 
261
  return items
262
 
263
  # =========================
264
+ # 6) تحويل عناصر الأسئلة إلى سجلات لواجهة الحلّ
265
  # =========================
266
  AR_PUNCT = "،؛؟"
267
  EN_PUNCT = ",;?"
 
271
  s = s.replace(",", "،").replace(";", "؛").replace("?", "؟")
272
  return s.strip().strip(AR_PUNCT + EN_PUNCT).strip()
273
 
274
+ def build_quiz_records(items: List[MCQ], lang: str, source_name: str, method: str, num_questions: int):
 
 
 
 
 
 
 
 
 
 
 
275
  json_data = []
276
  letters = ["A", "B", "C", "D"]
277
  for it in items:
278
+ opts = []
279
  for idx, lbl in enumerate(letters):
280
  raw = it.choices[idx] if idx < len(it.choices) else ""
281
+ txt = normalize_punct(raw) or "—"
 
 
 
282
  opts.append({"id": lbl, "text": txt, "is_correct": (it.answer_index == idx)})
283
  q_clean = normalize_punct(it.question)
284
  exp_clean = normalize_punct(it.explanation)
285
  record = {
286
+ "id": it.id,
287
+ "question": q_clean,
288
+ "options": opts,
289
+ "explanation": exp_clean,
290
+ "meta": {"lang": lang, "source": source_name, "extraction_method": method, "num_questions": int(num_questions)}
291
  }
292
  json_data.append(record)
293
  return json_data
294
 
295
  # =========================
296
+ # 7) منطق الاختبار (State + Handlers)
297
  # =========================
298
  def _format_question(rec):
299
  q = rec.get("question","").strip()
300
  return f"### السؤال:\n{q}"
301
 
302
  def _radio_choices(rec):
 
 
303
  out = []
304
  for opt in rec.get("options", []):
305
  lid, text = opt.get("id",""), opt.get("text","")
306
  out.append(f"{lid}) {text}")
 
307
  while len(out) < 4:
308
+ letters = ["A","B","C","D"]
309
  out.append(f"{letters[len(out)]}) —")
310
  return out
311
 
312
  def _correct_letter(rec):
313
  for opt in rec.get("options", []):
314
+ if opt.get("is_correct"): return opt.get("id","")
 
315
  return ""
316
 
317
+ def _explanation(rec): return rec.get("explanation","")
 
318
 
319
  def init_quiz_state(records):
320
+ return {"records": records, "idx": 0, "answers": {}, "revealed": set(), "finished": False}
 
 
 
 
 
 
 
 
 
321
 
322
  def render_current(rec, user_choice=None, revealed=False):
323
  q_md = _format_question(rec)
324
  choices = _radio_choices(rec)
325
  exp = _explanation(rec) if revealed else ""
 
326
  correct = _correct_letter(rec)
327
+ if user_choice and revealed:
328
+ feedback = "✅ إجابة صحيحة" if user_choice == correct else f"❌ إجابة خاطئة — الصحيح: {correct}"
329
+ elif user_choice:
330
+ feedback = f"تم اختيار: {user_choice}"
331
+ else:
332
+ feedback = ""
333
  return q_md, choices, exp, feedback
334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  def on_show_question(state):
336
  if not state: return "", [], "", "",""
337
  recs, idx = state["records"], state["idx"]
338
  rec = recs[idx]
339
+ q_md, choices, exp, feedback = render_current(rec, user_choice=state["answers"].get(rec["id"]),
340
+ revealed=(rec["id"] in state["revealed"]))
 
 
 
341
  pos = f"{idx+1} / {len(recs)}"
342
  return q_md, choices, exp, feedback, pos
343
 
344
  def on_select_choice(state, choice_label):
345
  if not state or not choice_label: return state, ""
346
  rec = state["records"][state["idx"]]
 
347
  chosen_letter = choice_label.split(")")[0].strip()
348
  state["answers"][rec["id"]] = chosen_letter
349
  if rec["id"] in state["revealed"]:
 
350
  correct = _correct_letter(rec)
351
  fb = "✅ إجابة صحيحة" if chosen_letter == correct else f"❌ إجابة خاطئة — الصحيح: {correct}"
352
  else:
 
373
  return state, fb
374
 
375
  def on_finish(state):
376
+ if not state: return state, ""
377
  recs = state["records"]
378
  correct_count, wrong_count, skipped = 0,0,0
 
379
  for rec in recs:
380
  qid = rec["id"]
381
  user = state["answers"].get(qid)
382
  correct = _correct_letter(rec)
 
383
  if user is None: skipped += 1
384
+ elif user == correct: correct_count += 1
385
  else: wrong_count += 1
 
 
 
 
 
 
 
 
 
 
 
386
  total = len(recs)
387
  score = f"النتيجة: {correct_count}/{total} (صحيح: {correct_count}، خطأ: {wrong_count}، متروك: {skipped})"
 
 
 
 
 
388
  state["finished"] = True
389
+ return state, score
 
390
 
391
  def on_reset():
392
+ return None, "", "", "", "", "", "تمت إعادة الضبط."
393
 
394
  # =========================
395
+ # 8) معالجة الملف وبناء الأسئلة (بدون أي ملفات ناتجة)
396
  # =========================
397
+ def process_input_file(uploaded_path,
398
+ num_questions=DEFAULT_NUM_QUESTIONS,
399
+ lang=DEFAULT_LANG,
400
+ trocr_model=DEFAULT_TROCR_MODEL,
401
+ trocr_zoom=DEFAULT_TROCR_ZOOM):
402
+ if not uploaded_path:
403
+ return None, "يرجى رفع ملف PDF/TXT أولاً."
404
+ src_path = str(uploaded_path)
405
+ filename = Path(src_path).name or "input"
406
+ ext = Path(filename).suffix.lower()
407
+ if ext not in [".pdf", ".txt"]:
408
+ return None, "الرجاء رفع PDF أو TXT فقط."
409
+
410
+ # قراءة النص
411
+ if ext == ".txt":
412
+ with open(src_path, "r", encoding="utf-8", errors="ignore") as f:
413
+ raw_text = f.read()
414
+ method = "plain text (no PDF)"
415
+ else:
416
+ raw_text, method = pdf_to_text(src_path, ocr_model=trocr_model, ocr_zoom=float(trocr_zoom))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
 
418
+ cleaned_text = postprocess_text(raw_text, lang=lang)
419
+ items = make_mcqs_from_text(cleaned_text, n=int(num_questions), lang=lang)
420
+ records = build_quiz_records(items, lang=lang, source_name=filename, method=method, num_questions=num_questions)
421
+ return init_quiz_state(records), f"تم توليد {len(records)} سؤالاً. بالتوفيق!"
422
 
423
  # =========================
424
+ # 9) واجهة Gradio (تبويب واحد)
425
  # =========================
426
  import gradio as gr
427
 
428
+ THEME_CSS = """
429
  body { direction: rtl; font-family: system-ui, 'Cairo', 'IBM Plex Arabic', sans-serif; }
430
+ label, .gr-markdown, .gr-button { text-align: right; }
431
+ .gradio-container { max-width: 880px; margin: auto; }
432
+ .card { background: #fff; border-radius: 1rem; padding: 1rem 1.2rem; box-shadow: 0 10px 25px rgba(0,0,0,0.06); }
433
+ .small { opacity: .85; font-size: .9rem; }
434
+ .progress { text-align: left; opacity:.75 }
435
+ """
436
+
437
+ with gr.Blocks(title="اختبار من ملف (PDF/TXT)", css=THEME_CSS) as demo:
438
+ gr.Markdown("## ✨ صانع اختبار من ملف PDF/TXT — واجهة واحدة بسيطة")
439
+ gr.Markdown("ارفع ملفك، حدّد عدد الأسئلة، واضغط **ابدأ**. ثمّ أجب وتحقق من الإجابة.")
440
+
441
+ quiz_state = gr.State(value=None)
442
  toast = gr.Markdown("")
443
 
444
+ with gr.Row():
445
+ inp_file = gr.File(label="ارفع ملف PDF أو TXT", file_count="single", file_types=[".pdf",".txt"], type="filepath")
446
+ num_q = gr.Slider(4, 20, value=DEFAULT_NUM_QUESTIONS, step=1, label="عدد الأسئلة")
447
+ with gr.Accordion("خيارات متقدمة (للـ PDF المصوّر)", open=False):
448
+ trocr_zoom = gr.Slider(2.0, 3.5, value=DEFAULT_TROCR_ZOOM, step=0.1, label="Zoom لتحويل الصفحات لصورة (OCR)")
449
+ trocr_model = gr.Dropdown(
450
+ choices=[
451
+ "microsoft/trocr-base-printed",
452
+ "microsoft/trocr-large-printed",
453
+ "microsoft/trocr-base-handwritten",
454
+ "microsoft/trocr-large-handwritten",
455
+ ],
456
+ value=DEFAULT_TROCR_MODEL, label="نموذج TrOCR"
457
+ )
458
+
459
+ btn_start = gr.Button("ابدأ توليد الاختبار", variant="primary")
460
+
461
+ with gr.Group():
462
+ with gr.Row():
463
+ progress = gr.Label("", elem_classes=["progress"])
464
+ q_md = gr.Markdown("", elem_classes=["card"])
465
+ choices = gr.Radio(choices=[], label="اختر الإجابة", interactive=True)
466
+ feedback = gr.Markdown("")
467
+ exp_md = gr.Markdown("")
468
+ with gr.Row():
469
+ btn_prev = gr.Button("السابق")
470
+ btn_next = gr.Button("التالي")
471
+ btn_reveal = gr.Button("إظهار الإجابة")
472
+ btn_finish = gr.Button("إنهاء الاختبار", variant="stop")
473
+ btn_reset = gr.Button("إعادة ضبط")
474
+
475
+ score_md = gr.Markdown("")
476
+
477
+ # بدء المعالجة وبناء الأسئلة
478
+ btn_start.click(
479
+ process_input_file,
480
+ inputs=[inp_file, num_q, gr.State(DEFAULT_LANG), trocr_model, trocr_zoom],
481
+ outputs=[quiz_state, toast]
482
+ ).then(
483
+ on_show_question, inputs=[quiz_state],
484
+ outputs=[q_md, choices, exp_md, feedback, progress]
485
+ )
486
+
487
+ # التنقل
488
+ btn_prev.click(on_prev, inputs=[quiz_state], outputs=[quiz_state]).then(
489
+ on_show_question, inputs=[quiz_state],
490
+ outputs=[q_md, choices, exp_md, feedback, progress]
491
+ )
492
+ btn_next.click(on_next, inputs=[quiz_state], outputs=[quiz_state]).then(
493
+ on_show_question, inputs=[quiz_state],
494
+ outputs=[q_md, choices, exp_md, feedback, progress]
495
+ )
496
+ btn_reveal.click(on_reveal, inputs=[quiz_state], outputs=[quiz_state, feedback]).then(
497
+ on_show_question, inputs=[quiz_state],
498
+ outputs=[q_md, choices, exp_md, feedback, progress]
499
+ )
500
+
501
+ # اختيار الإجابة
502
+ def _on_choice(state, choice):
503
+ return on_select_choice(state, choice)
504
+ choices.change(_on_choice, inputs=[quiz_state, choices], outputs=[quiz_state, feedback])
505
+
506
+ # إنهاء وإظهار نتيجة
507
+ btn_finish.click(on_finish, inputs=[quiz_state], outputs=[quiz_state, score_md])
508
+
509
+ # إعادة ضبط
510
+ btn_reset.click(lambda: on_reset(), outputs=[quiz_state, q_md, choices, exp_md, feedback, score_md, toast])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
 
512
  # Spaces تتعرف على demo تلقائيًا
513
  if __name__ == "__main__":