Leen172 commited on
Commit
d01ce1f
·
verified ·
1 Parent(s): 35b1e43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -190
app.py CHANGED
@@ -1,6 +1,5 @@
1
  # -*- coding: utf-8 -*-
2
- # app.py ثيم داكن ثابت + كل الأسئلة دفعة واحدة + Submit لعرض النتيجة والأخطاء
3
-
4
  import os, json, uuid, random, unicodedata
5
  from dataclasses import dataclass
6
  from pathlib import Path
@@ -13,14 +12,13 @@ import regex as re2
13
  import yake
14
  import gradio as gr
15
 
16
- # ---------- إعدادات عامة ----------
17
  random.seed(42)
18
- DEFAULT_LANG = "ar"
19
  DEFAULT_NUM_QUESTIONS = 6
20
  DEFAULT_TROCR_MODEL = "microsoft/trocr-base-printed"
21
  DEFAULT_TROCR_ZOOM = 2.6
22
 
23
- # ---------- OCR (تحميل كسول) ----------
24
  _OCR = {}
25
  def get_ocr(model_id: str):
26
  from transformers import pipeline
@@ -30,22 +28,25 @@ def get_ocr(model_id: str):
30
  _OCR[model_id] = pipeline("image-to-text", model=model_id, device=dev)
31
  return _OCR[model_id]
32
 
33
- # ---------- PDF/TXT → نص ----------
34
  def extract_text_with_pypdf(path: str) -> str:
35
  reader = PdfReader(path)
36
- out = []
37
  for p in reader.pages:
38
- try: t = p.extract_text() or ""
39
- except Exception: t = ""
40
- out.append(t)
41
- return "\n".join(out).strip()
 
 
42
 
43
  def pdf_to_images(path: str, zoom: float=2.5) -> List[Image.Image]:
44
- doc = fitz.open(path); M = fitz.Matrix(zoom, zoom)
 
45
  imgs = []
46
  for pg in doc:
47
  pix = pg.get_pixmap(matrix=M, alpha=False)
48
- imgs.append(Image.frombytes("RGB",(pix.width,pix.height),pix.samples))
49
  doc.close()
50
  return imgs
51
 
@@ -55,7 +56,7 @@ def extract_text_with_ocr(path: str, model_id: str, zoom: float) -> str:
55
  for i, img in enumerate(pdf_to_images(path, zoom=zoom), start=1):
56
  try:
57
  out = ocr(img)
58
- txt = out[0].get("generated_text","").strip() if out else ""
59
  except Exception:
60
  txt = ""
61
  parts.append(f"--- [Page {i}] ---\n{txt}")
@@ -64,20 +65,21 @@ def extract_text_with_ocr(path: str, model_id: str, zoom: float) -> str:
64
  def is_good(t: str, min_chars=250, min_alpha=0.15) -> bool:
65
  if len(t) < min_chars: return False
66
  alnum = sum(ch.isalnum() for ch in t)
67
- return (alnum/max(1,len(t))) >= min_alpha
68
 
69
- def file_to_text(path: str, model_id=DEFAULT_TROCR_MODEL, zoom=DEFAULT_TROCR_ZOOM) -> Tuple[str,str]:
70
  ext = Path(path).suffix.lower()
71
  if ext == ".txt":
72
- with open(path,"r",encoding="utf-8",errors="ignore") as f: return f.read(), "plain text"
 
73
  raw = extract_text_with_pypdf(path)
74
  if is_good(raw): return raw, "embedded (pypdf)"
75
  return extract_text_with_ocr(path, model_id, zoom), "OCR (TrOCR)"
76
 
77
- # ---------- تنظيف عربي ----------
78
  AR_DIAC = r"[ًٌٍَُِّْ]"
79
- def strip_headers(t:str)->str:
80
- out=[]
81
  for ln in t.splitlines():
82
  if re2.match(r"^\s*--- \[Page \d+\] ---\s*$", ln): continue
83
  if re2.match(r"^\s*(Page\s*\d+|صفحة\s*\d+)\s*$", ln): continue
@@ -85,246 +87,234 @@ def strip_headers(t:str)->str:
85
  out.append(ln)
86
  return "\n".join(out)
87
 
88
- def norm_ar(t:str)->str:
89
  t = unicodedata.normalize("NFKC", t)
90
  t = re2.sub(r"[ـ]", "", t)
91
  t = re2.sub(AR_DIAC, "", t)
92
  t = re2.sub(r"[إأآا]", "ا", t)
93
  t = re2.sub(r"[يى]", "ي", t)
94
- t = re2.sub(r"\س+", " ", t) if False else re2.sub(r"\s+", " ", t)
95
  t = re2.sub(r'(\p{L})\1{2,}', r'\1', t)
96
  t = re2.sub(r'(\p{L})\1', r'\1', t)
97
  return t.strip()
98
 
99
- def postprocess(raw:str)->str:
100
- t = strip_headers(raw).replace("\r","\n")
101
  t = re2.sub(r"\n{3,}", "\n\n", t)
102
  t = re2.sub(r"\d+\s*[\[\(][^\]\)]*[\]\)]", " ", t)
103
  t = re2.sub(r"\[\d+\]", " ", t)
104
  return norm_ar(t)
105
 
106
- # ---------- YAKE + تقسيم ----------
107
  SENT_SPLIT = re2.compile(r"(?<=[\.!؟\?])\s+")
108
  AR_STOP = set("""في على من إلى عن مع لدى ذلك هذه هذا الذين التي الذي أو أم إن أن كان تكون كانوا كانت كنت ثم قد لقد ربما بل لكن إلا سوى حتى حيث كما لما ما لماذا متى أين كيف أي هناك هنا هؤلاء أولئك نحن هو هي هم هن أنت أنتم أنتن""".split())
109
 
110
- def split_sents(t:str)->List[str]:
111
- s=[x.strip() for x in SENT_SPLIT.split(t) if x.strip()]
112
- return [x for x in s if len(x)>=25]
113
-
114
- def yake_keywords(t:str, k:int=160)->List[str]:
115
- ex = yake.KeywordExtractor(lan='ar', n=1, top=k)
116
- cands = [w for w,_ in ex.extract_keywords(t)]
117
- out=[]; seen=set()
118
- for k in cands:
119
- k=k.strip()
120
- if not k or k in seen or k in AR_STOP: continue
121
- if len(k)<3 or re2.match(r"^[\p{P}\p{S}]+$",k): continue
122
- seen.add(k); out.append(k)
123
- return out
124
-
125
- # ---------- مولّد MCQ ----------
126
  @dataclass
127
  class MCQ:
128
  id: str
129
  question: str
130
  choices: List[str]
131
  answer_index: int
132
- explanation: str
133
 
134
- def good_kw(kw:str)->bool:
135
- return kw and len(kw)>=3 and kw not in AR_STOP and not re2.match(r"^[\p{P}\p{S}\d_]+$", kw)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
- def distractors(correct:str, pool:List[str], k:int=3)->List[str]:
138
- L=len(correct.strip()); cand=[]
139
  for w in pool:
140
- w=w.strip()
141
- if not w or w==correct or w in AR_STOP: continue
142
  if re2.match(r"^[\p{P}\p{S}\d_]+$", w): continue
143
- if abs(len(w)-L)<=3: cand.append(w)
144
  random.shuffle(cand)
145
- out=cand[:k]
146
- while len(out)<k: out.append("—")
147
  return out
148
 
149
- def make_mcqs(text:str, n:int=6)->List[MCQ]:
150
- sents=split_sents(text)
151
  if not sents: raise ValueError("النص قصير أو غير صالح.")
152
- kws=yake_keywords(text) or [w for w,_ in sorted(((t, text.count(t)) for t in re2.findall(r"[\p{L}\p{N}_]+",text)), key=lambda x:-x[1])][:80]
153
- sent_for={}
154
  for s in sents:
155
  for kw in kws:
156
  if good_kw(kw) and re2.search(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", s) and kw not in sent_for:
157
- sent_for[kw]=s
158
- items=[]; used=set()
159
  for kw in [k for k in kws if k in sent_for]:
160
- if len(items)>=n: break
161
- s=sent_for[kw]
162
  if s in used: continue
163
- q=re2.sub(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", "_____", s, count=1)
164
- ch=distractors(kw, [x for x in kws if x!=kw], 3)+[kw]
165
- random.shuffle(ch); ans=ch.index(kw)
166
- exp=f"مقتبس من الجملة: {s[:220]}" + ("..." if len(s)>220 else "")
167
- items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=ch, answer_index=ans, explanation=exp))
168
  used.add(s)
169
- if not items: raise RuntimeError("تعذّر توليد أسئلة. جرّب نصاً أطول.")
170
  return items
171
 
172
- # ---------- تحويل للسجلات ----------
173
- def to_records(items:List[MCQ], source:str, method:str, n:int)->List[dict]:
174
- recs=[]
175
  for it in items:
176
- opts=[]
177
- for i,lbl in enumerate(["A","B","C","D"]):
178
- txt=(it.choices[i] if i<len(it.choices) else "—").strip()
179
- txt=txt.replace(",", "،").replace("?", "؟").replace(";", "؛")
180
- opts.append({"id":lbl,"text":txt or "—","is_correct":(i==it.answer_index)})
181
- recs.append({
182
- "id": it.id,
183
- "question": it.question.strip(),
184
- "options": opts,
185
- "explanation": it.explanation.strip(),
186
- "meta": {"source": source, "extraction_method": method, "num_questions": int(n)}
187
- })
188
  return recs
189
 
190
- # ---------- منطق الحالة ----------
191
- def correct_letter(rec):
192
- for o in rec["options"]:
193
- if o["is_correct"]: return o["id"]
194
- return ""
195
-
196
- def init_state(records):
197
- return {"records": records, "finished": False}
198
-
199
- # ---------- HTML للواجهة/الاختبار ----------
200
  def render_quiz_html(records: List[dict]) -> str:
201
  parts = []
202
  for i, rec in enumerate(records, start=1):
203
- qid = rec["id"]
204
- qtxt = rec["question"]
205
- opts = rec["options"]
206
  opts_html = []
207
  for o in opts:
208
- lid = o["id"]; txt = o["text"]
209
  opts_html.append(f"""
210
  <label class="opt">
211
- <input type="radio" name="q_{qid}" value="{lid}" />
212
  <span class="opt-letter">{lid}</span>
213
  <span class="opt-text">{txt}</span>
214
  </label>
215
  """)
216
  parts.append(f"""
217
  <div class="q-card" data-qid="{qid}">
218
- <div class="q-title">السؤال {i}:</div>
 
 
 
219
  <div class="q-text">{qtxt}</div>
220
  <div class="opts">{''.join(opts_html)}</div>
221
  </div>
222
  """)
223
- return f"""<div id="quiz" class="quiz-wrap">{''.join(parts)}</div>"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
- # ---------- معالجة الإدخال ----------
226
  def build_quiz(text_area, file_path, n, model_id, zoom):
227
- text_area = (text_area or "").strip()
228
- if not text_area and not file_path:
229
- return None, "", "🛈 أدخل نصًا أو ارفع ملفًا أولًا."
230
- if text_area:
231
- src_name = "pasted_text.txt"
232
- raw, method = text_area, "user text"
233
  else:
234
  raw, method = file_to_text(file_path, model_id=model_id, zoom=float(zoom))
235
- src_name = Path(file_path).name
236
  cleaned = postprocess(raw)
237
  items = make_mcqs(cleaned, n=int(n))
238
- records = to_records(items, source=src_name, method=method, n=n)
239
- state = init_state(records)
240
- html = render_quiz_html(records)
241
- return state, html, f"تم توليد {len(records)} سؤالًا."
242
 
243
- # ---------- تصحيح الإجابات ----------
244
  def grade(state, answers_json):
245
  try:
246
  user_map = json.loads(answers_json or "{}")
247
  except Exception:
248
- user_map = {}
249
  recs = state["records"] if state else []
250
  total = len(recs)
 
 
 
 
 
251
  correct = 0
252
- wrong_details = []
253
  for rec in recs:
254
  qid = rec["id"]
255
- chosen = (user_map.get(qid) or "").strip()
256
- cor = correct_letter(rec)
257
- if not chosen:
258
- wrong_details.append((rec, chosen, cor, "لم يتم اختيار إجابة"))
259
- elif chosen == cor:
260
- correct += 1
261
- else:
262
- wrong_details.append((rec, chosen, cor, ""))
263
-
264
- score_md = f"### نتيجتك: **{correct} / {total}**"
265
- if wrong_details:
266
- md = ["### الإجابات الخاطئة:"]
267
- for rec, chosen, cor, note in wrong_details:
268
- opts = {o["id"]: o["text"] for o in rec["options"]}
269
- md.append(
270
- f"- **السؤال:** {rec['question']}\n"
271
- f" - إجابتك: **{chosen or '—'}** — {opts.get(chosen,'')}\n"
272
- f" - الصحيحة: **{cor}** — {opts.get(cor,'')}\n"
273
- f" - الشرح: {rec['explanation']}\n"
274
- + (f" - ملاحظة: {note}\n" if note else "")
275
- )
276
- mistakes_md = "\n".join(md)
277
- else:
278
- mistakes_md = "### ممتاز! جميع الإجابات صحيحة ✅"
279
-
280
- return score_md, mistakes_md
281
-
282
- # ---------- الثيم (CSS داكن ثابت) ----------
283
  CSS = """
284
  :root{
285
- --bg:#0f0f0f; --panel:#1a1a1a; --card:#1b1b1b; --muted:#9aa0a6;
286
- --text:#f5efe6; --accent:#ff7d2d; --accent2:#ff9a55; --border:#2a2a2a;
287
  }
288
  body{direction:rtl; font-family:system-ui,'Cairo','IBM Plex Arabic',sans-serif; background:var(--bg);}
289
- .gradio-container{max-width:980px;margin:0 auto;padding:8px 8px 40px;}
290
- .top-title{color:#e9ded6;margin:8px 0 16px 0}
291
- .panel{background:var(--panel);border:1px solid var(--border);border-radius:16px;padding:16px;box-shadow:0 20px 45px rgba(0,0,0,.4)}
292
- .small{opacity:.85;color:#ddd}
293
-
294
- .button-primary > button{background:linear-gradient(180deg,var(--accent2),var(--accent));border:none;color:#161616;font-weight:700;}
295
- .button-primary > button:hover{filter:brightness(.95)}
296
- .upload-like{border:2px dashed #ff9a5555;background:#141414;border-radius:12px;padding:10px;color:#ddd}
297
-
298
- /* ترتيب عمودي ثابت */
299
- .input-stack > div {margin-bottom:12px}
300
-
301
- /* حقل النص أصغر وثابت */
302
- textarea{min-height:140px}
303
-
304
- /* بطاقة السؤال */
305
- .q-card{background:var(--card);border:1px solid var(--border);border-radius:16px;padding:16px;margin:14px 0}
306
- .q-title{color:#f0e6dc;font-weight:700;margin-bottom:8px}
307
- .q-text{color:var(--text);font-size:1.08rem;line-height:1.8;margin-bottom:10px}
 
 
308
  .opts{display:flex;flex-direction:column;gap:8px}
309
- .opt{display:flex;gap:10px;align-items:center;background:#191919;border:1px solid #282828;border-radius:12px;padding:10px}
310
- .opt input{accent-color:var(--accent)}
311
- .opt-letter{display:inline-flex;width:28px;height:28px;border-radius:8px;background:#222;align-items:center;justify-content:center;font-weight:800;color:#f1f1f1}
312
- .opt-text{color:#eaeaea}
313
- .result-card{background:#121212;border:1px solid #2a2a2a;border-radius:16px;padding:16px;margin-top:18px}
314
  """
315
 
316
- # ---------- واجهة Gradio ----------
317
  with gr.Blocks(title="Question Generator", css=CSS) as demo:
318
- gr.Markdown("<h2 class='top-title'>Question Generator</h2>")
319
 
320
- # لوحة إدخال عمودية ثابتة (لا تتقلب)
321
- with gr.Group(elem_classes=["panel","input-stack"]):
322
- gr.Markdown("**أدخل نصًا أو ارفع ملفًا، حدّد عدد الأسئلة ثم اضغط توليد.**", elem_classes=["small"])
323
- text_area = gr.Textbox(lines=6, placeholder="ألصق هنا مقطع نصي...", label="أدخل نصًا أو ارفع ملفًا")
324
  num_q = gr.Slider(4, 20, value=DEFAULT_NUM_QUESTIONS, step=1, label="عدد الأسئلة")
325
- file_comp = gr.File(label="اختر ملف PDF أو TXT", file_count="single",
326
  file_types=[".pdf",".txt"], type="filepath", elem_classes=["upload-like"])
327
- with gr.Accordion("خيارات متقدمة (لـ PDF المصوّر)", open=False):
328
  trocr_model = gr.Dropdown(
329
  choices=[
330
  "microsoft/trocr-base-printed",
@@ -338,41 +328,78 @@ with gr.Blocks(title="Question Generator", css=CSS) as demo:
338
  btn_build = gr.Button("توليد الأسئلة", elem_classes=["button-primary"])
339
  toast = gr.Markdown("", elem_classes=["small"])
340
 
341
- # حالة عامة + مكان عرض الاختبار + إرساله
342
  state = gr.State(None)
343
- quiz_html = gr.HTML("") # كل الأسئلة ستُعرض هنا دفعة واحدة
344
  btn_submit = gr.Button("إنهاء وإرسال الإجابات", elem_classes=["button-primary"])
345
  answers_box = gr.Textbox(visible=False)
346
  score_md = gr.Markdown("")
347
- mistakes_md = gr.Markdown("")
348
 
349
- # توليد الأسئلة
350
  btn_build.click(
351
  build_quiz,
352
  inputs=[text_area, file_comp, num_q, trocr_model, trocr_zoom],
353
  outputs=[state, quiz_html, toast]
354
  )
355
 
356
- # JS لالتقاط الإجابات + سكرول للأسئلة بعد التوليد
357
  js_collect = """
358
  function () {
359
- const quiz = document.getElementById('quiz');
360
- if (quiz) { quiz.scrollIntoView({behavior:'smooth', block:'start'}); }
361
- const data = {};
362
- document.querySelectorAll('.q-card').forEach(card => {
363
- const qid = card.getAttribute('data-qid');
364
- const checked = card.querySelector('input[type="radio"]:checked');
365
- data[qid] = checked ? checked.value : null;
 
 
366
  });
367
- return JSON.stringify(data);
 
 
 
 
 
 
 
 
 
 
368
  }
369
  """
370
 
371
- # Submit: نجمع الإجابات بالـJS ثم نقيّمها
 
372
  btn_submit.click(
373
- None, inputs=None, outputs=[answers_box], js=js_collect
 
 
 
374
  ).then(
375
- grade, inputs=[state, answers_box], outputs=[score_md, mistakes_md]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  )
377
 
378
  if __name__ == "__main__":
 
1
  # -*- coding: utf-8 -*-
2
+ # واجهة حديثة ثابتة: كل الأسئلة دفعة واحدة + منع الإرسال قبل الإجابة على الجميع
 
3
  import os, json, uuid, random, unicodedata
4
  from dataclasses import dataclass
5
  from pathlib import Path
 
12
  import yake
13
  import gradio as gr
14
 
15
+ # ------------------ إعدادات عامة ------------------
16
  random.seed(42)
 
17
  DEFAULT_NUM_QUESTIONS = 6
18
  DEFAULT_TROCR_MODEL = "microsoft/trocr-base-printed"
19
  DEFAULT_TROCR_ZOOM = 2.6
20
 
21
+ # ------------------ OCR (تحميل كسول) ------------------
22
  _OCR = {}
23
  def get_ocr(model_id: str):
24
  from transformers import pipeline
 
28
  _OCR[model_id] = pipeline("image-to-text", model=model_id, device=dev)
29
  return _OCR[model_id]
30
 
31
+ # ------------------ PDF/TXT → نص ------------------
32
  def extract_text_with_pypdf(path: str) -> str:
33
  reader = PdfReader(path)
34
+ chunks = []
35
  for p in reader.pages:
36
+ try:
37
+ t = p.extract_text() or ""
38
+ except Exception:
39
+ t = ""
40
+ chunks.append(t)
41
+ return "\n".join(chunks).strip()
42
 
43
  def pdf_to_images(path: str, zoom: float=2.5) -> List[Image.Image]:
44
+ doc = fitz.open(path)
45
+ M = fitz.Matrix(zoom, zoom)
46
  imgs = []
47
  for pg in doc:
48
  pix = pg.get_pixmap(matrix=M, alpha=False)
49
+ imgs.append(Image.frombytes("RGB", (pix.width, pix.height), pix.samples))
50
  doc.close()
51
  return imgs
52
 
 
56
  for i, img in enumerate(pdf_to_images(path, zoom=zoom), start=1):
57
  try:
58
  out = ocr(img)
59
+ txt = out[0].get("generated_text", "").strip() if out else ""
60
  except Exception:
61
  txt = ""
62
  parts.append(f"--- [Page {i}] ---\n{txt}")
 
65
  def is_good(t: str, min_chars=250, min_alpha=0.15) -> bool:
66
  if len(t) < min_chars: return False
67
  alnum = sum(ch.isalnum() for ch in t)
68
+ return (alnum / max(1, len(t))) >= min_alpha
69
 
70
+ def file_to_text(path: str, model_id=DEFAULT_TROCR_MODEL, zoom=DEFAULT_TROCR_ZOOM) -> Tuple[str, str]:
71
  ext = Path(path).suffix.lower()
72
  if ext == ".txt":
73
+ with open(path, "r", encoding="utf-8", errors="ignore") as f:
74
+ return f.read(), "plain text"
75
  raw = extract_text_with_pypdf(path)
76
  if is_good(raw): return raw, "embedded (pypdf)"
77
  return extract_text_with_ocr(path, model_id, zoom), "OCR (TrOCR)"
78
 
79
+ # ------------------ تنظيف عربي مبسّط ------------------
80
  AR_DIAC = r"[ًٌٍَُِّْ]"
81
+ def strip_headers(t: str) -> str:
82
+ out = []
83
  for ln in t.splitlines():
84
  if re2.match(r"^\s*--- \[Page \d+\] ---\s*$", ln): continue
85
  if re2.match(r"^\s*(Page\s*\d+|صفحة\s*\d+)\s*$", ln): continue
 
87
  out.append(ln)
88
  return "\n".join(out)
89
 
90
+ def norm_ar(t: str) -> str:
91
  t = unicodedata.normalize("NFKC", t)
92
  t = re2.sub(r"[ـ]", "", t)
93
  t = re2.sub(AR_DIAC, "", t)
94
  t = re2.sub(r"[إأآا]", "ا", t)
95
  t = re2.sub(r"[يى]", "ي", t)
96
+ t = re2.sub(r"\s+", " ", t)
97
  t = re2.sub(r'(\p{L})\1{2,}', r'\1', t)
98
  t = re2.sub(r'(\p{L})\1', r'\1', t)
99
  return t.strip()
100
 
101
+ def postprocess(raw: str) -> str:
102
+ t = strip_headers(raw).replace("\r", "\n")
103
  t = re2.sub(r"\n{3,}", "\n\n", t)
104
  t = re2.sub(r"\d+\s*[\[\(][^\]\)]*[\]\)]", " ", t)
105
  t = re2.sub(r"\[\d+\]", " ", t)
106
  return norm_ar(t)
107
 
108
+ # ------------------ توليد أسئلة ------------------
109
  SENT_SPLIT = re2.compile(r"(?<=[\.!؟\?])\s+")
110
  AR_STOP = set("""في على من إلى عن مع لدى ذلك هذه هذا الذين التي الذي أو أم إن أن كان تكون كانوا كانت كنت ثم قد لقد ربما بل لكن إلا سوى حتى حيث كما لما ما لماذا متى أين كيف أي هناك هنا هؤلاء أولئك نحن هو هي هم هن أنت أنتم أنتن""".split())
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  @dataclass
113
  class MCQ:
114
  id: str
115
  question: str
116
  choices: List[str]
117
  answer_index: int
 
118
 
119
+ def split_sents(t: str) -> List[str]:
120
+ s = [x.strip() for x in SENT_SPLIT.split(t) if x.strip()]
121
+ return [x for x in s if len(x) >= 25]
122
+
123
+ def yake_keywords(t: str, k: int = 160) -> List[str]:
124
+ ex = yake.KeywordExtractor(lan='ar', n=1, top=k)
125
+ cands = [w for w, _ in ex.extract_keywords(t)]
126
+ out, seen = [], set()
127
+ for k in cands:
128
+ k = k.strip()
129
+ if not k or k in seen or k in AR_STOP: continue
130
+ if len(k) < 3 or re2.match(r"^[\p{P}\p{S}]+$", k): continue
131
+ seen.add(k); out.append(k)
132
+ return out
133
+
134
+ def good_kw(kw: str) -> bool:
135
+ return kw and len(kw) >= 3 and kw not in AR_STOP and not re2.match(r"^[\p{P}\p{S}\d_]+$", kw)
136
 
137
+ def distractors(correct: str, pool: List[str], k: int = 3) -> List[str]:
138
+ L = len(correct.strip()); cand = []
139
  for w in pool:
140
+ w = w.strip()
141
+ if not w or w == correct or w in AR_STOP: continue
142
  if re2.match(r"^[\p{P}\p{S}\d_]+$", w): continue
143
+ if abs(len(w) - L) <= 3: cand.append(w)
144
  random.shuffle(cand)
145
+ out = cand[:k]
146
+ while len(out) < k: out.append("—")
147
  return out
148
 
149
+ def make_mcqs(text: str, n: int = 6) -> List[MCQ]:
150
+ sents = split_sents(text)
151
  if not sents: raise ValueError("النص قصير أو غير صالح.")
152
+ kws = yake_keywords(text) or [w for w, _ in sorted(((t, text.count(t)) for t in re2.findall(r"[\p{L}\p{N}_]+", text)), key=lambda x: -x[1])][:80]
153
+ sent_for = {}
154
  for s in sents:
155
  for kw in kws:
156
  if good_kw(kw) and re2.search(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", s) and kw not in sent_for:
157
+ sent_for[kw] = s
158
+ items, used = [], set()
159
  for kw in [k for k in kws if k in sent_for]:
160
+ if len(items) >= n: break
161
+ s = sent_for[kw]
162
  if s in used: continue
163
+ q = re2.sub(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", "_____", s, count=1)
164
+ ch = distractors(kw, [x for x in kws if x != kw], 3) + [kw]
165
+ random.shuffle(ch); ans = ch.index(kw)
166
+ items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=ch, answer_index=ans))
 
167
  used.add(s)
168
+ if not items: raise RuntimeError("تعذّر توليد أسئلة.")
169
  return items
170
 
171
+ # ------------------ تحويل إلى سجلات ------------------
172
+ def to_records(items: List[MCQ]) -> List[dict]:
173
+ recs = []
174
  for it in items:
175
+ opts = []
176
+ for i, lbl in enumerate(["A","B","C","D"]):
177
+ txt = (it.choices[i] if i < len(it.choices) else "—").strip()
178
+ txt = txt.replace(",", "،").replace("?", "؟").replace(";", "؛")
179
+ opts.append({"id": lbl, "text": txt or "—", "is_correct": (i == it.answer_index)})
180
+ recs.append({"id": it.id, "question": it.question.strip(), "options": opts})
 
 
 
 
 
 
181
  return recs
182
 
183
+ # ------------------ HTML للامتحان (كل الأسئلة دفعة واحدة) ------------------
 
 
 
 
 
 
 
 
 
184
  def render_quiz_html(records: List[dict]) -> str:
185
  parts = []
186
  for i, rec in enumerate(records, start=1):
187
+ qid = rec["id"]; qtxt = rec["question"]; opts = rec["options"]
 
 
188
  opts_html = []
189
  for o in opts:
190
+ lid, txt = o["id"], o["text"]
191
  opts_html.append(f"""
192
  <label class="opt">
193
+ <input type="radio" name="q_{qid}" value="{lid}">
194
  <span class="opt-letter">{lid}</span>
195
  <span class="opt-text">{txt}</span>
196
  </label>
197
  """)
198
  parts.append(f"""
199
  <div class="q-card" data-qid="{qid}">
200
+ <div class="q-header">
201
+ <div class="q-title">السؤال {i}</div>
202
+ <div class="q-badge" id="b_{qid}" hidden></div>
203
+ </div>
204
  <div class="q-text">{qtxt}</div>
205
  <div class="opts">{''.join(opts_html)}</div>
206
  </div>
207
  """)
208
+ # عدّاد تقدّم بسيط أعلى مجموعة الأسئلة
209
+ html = f"""
210
+ <div id="quiz" class="quiz-wrap">
211
+ <div class="progress-pill"><span id="ans_count">0</span>/<span id="total">{len(records)}</span> تمّت الإجابة</div>
212
+ {''.join(parts)}
213
+ </div>
214
+ <script>
215
+ // تحديث العدّاد كلما تغيرت إجابة
216
+ const updateCounter = () => {{
217
+ const cards = document.querySelectorAll('.q-card');
218
+ let filled = 0;
219
+ cards.forEach(c => {{
220
+ if (c.querySelector('input[type="radio"]:checked')) filled += 1;
221
+ }});
222
+ const el = document.getElementById('ans_count');
223
+ if (el) el.textContent = String(filled);
224
+ }};
225
+ document.querySelectorAll('.q-card input[type="radio"]').forEach(i => i.addEventListener('change', updateCounter));
226
+ updateCounter();
227
+ </script>
228
+ """
229
+ return html
230
 
231
+ # ------------------ بناء الامتحان ------------------
232
  def build_quiz(text_area, file_path, n, model_id, zoom):
233
+ txt = (text_area or "").strip()
234
+ if not txt and not file_path:
235
+ return None, "", "🛈 أدخل نصًا أو ارفع ملفًا."
236
+ if txt:
237
+ raw, method = txt, "user text"
 
238
  else:
239
  raw, method = file_to_text(file_path, model_id=model_id, zoom=float(zoom))
 
240
  cleaned = postprocess(raw)
241
  items = make_mcqs(cleaned, n=int(n))
242
+ recs = to_records(items)
243
+ state = {"records": recs, "method": method}
244
+ return state, render_quiz_html(recs), f"تم توليد {len(recs)} سؤالًا. أجب عن جميعها ثم اضغط إرسال."
 
245
 
246
+ # ------------------ التصحيح (يعيد الدرجة + خريطة الصحة) ------------------
247
  def grade(state, answers_json):
248
  try:
249
  user_map = json.loads(answers_json or "{}")
250
  except Exception:
251
+ return "حدث خطأ في قراءة الإجابات.", "{}"
252
  recs = state["records"] if state else []
253
  total = len(recs)
254
+ # التحقق من الإجابة على الجميع
255
+ missing = [r["id"] for r in recs if not user_map.get(r["id"])]
256
+ if missing:
257
+ return "⚠️ يجب الإجابة على جميع الأسئلة قبل الإرسال.", "{}"
258
+ correctness = {}
259
  correct = 0
 
260
  for rec in recs:
261
  qid = rec["id"]
262
+ chosen = user_map.get(qid)
263
+ cor = next((o["id"] for o in rec["options"] if o["is_correct"]), "")
264
+ ok = (chosen == cor)
265
+ correctness[qid] = ok
266
+ if ok: correct += 1
267
+ score_text = f"### نتيجتك: **{correct} / {total}**"
268
+ return score_text, json.dumps(correctness, ensure_ascii=False)
269
+
270
+ # =======================================================
271
+ # واجهة
272
+ # =======================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  CSS = """
274
  :root{
275
+ --bg:#0e0e11; --panel:#15161a; --card:#1a1b20; --muted:#a7b0be;
276
+ --text:#f6f7fb; --accent:#6ee7b7; --accent2:#34d399; --danger:#ef4444; --border:#262833;
277
  }
278
  body{direction:rtl; font-family:system-ui,'Cairo','IBM Plex Arabic',sans-serif; background:var(--bg);}
279
+ .gradio-container{max-width:1000px;margin:0 auto;padding:12px 12px 40px;}
280
+ h2.top{color:#eaeaf2;margin:4px 0 16px}
281
+ .panel{background:var(--panel);border:1px solid var(--border);border-radius:14px;padding:14px;box-shadow:0 16px 38px rgba(0,0,0,.35)}
282
+ .small{opacity:.9;color:#d9dee8}
283
+
284
+ .button-primary>button{background:linear-gradient(180deg,var(--accent),var(--accent2));border:none;color:#0b0d10;font-weight:800}
285
+ .button-primary>button:hover{filter:brightness(.95)}
286
+ .upload-like{border:2px dashed #3b3f52;background:#121318;border-radius:12px;padding:10px;color:#cfd5e3}
287
+
288
+ textarea{min-height:120px}
289
+
290
+ /* الامتحان */
291
+ .progress-pill{display:inline-block;background:#0f1116;border:1px solid #2a2d3a;border-radius:999px;padding:6px 12px;color:#cfd5e3;margin:10px 0}
292
+ .q-card{background:var(--card);border:1px solid var(--border);border-radius:14px;padding:14px;margin:12px 0}
293
+ .q-header{display:flex;gap:10px;align-items:center;justify-content:space-between;margin-bottom:6px}
294
+ .q-title{color:#eaeaf2;font-weight:800}
295
+ .q-badge{padding:8px 12px;border-radius:10px;font-weight:700}
296
+ .q-badge.ok{background:#083a2a;color:#b6f4db;border:1px solid #145b44}
297
+ .q-badge.err{background:#3a0d14;color:#ffd1d6;border:1px solid #6a1e2b}
298
+
299
+ .q-text{color:var(--text);font-size:1.06rem;line-height:1.8;margin:8px 0 12px}
300
  .opts{display:flex;flex-direction:column;gap:8px}
301
+ .opt{display:flex;gap:10px;align-items:center;background:#14161c;border:1px solid #2a2d3a;border-radius:12px;padding:10px}
302
+ .opt input{accent-color:var(--accent2)}
303
+ .opt-letter{display:inline-flex;width:28px;height:28px;border-radius:8px;background:#0f1116;border:1px solid #2a2d3a;align-items:center;justify-content:center;font-weight:800;color:#dfe6f7}
304
+ .opt-text{color:#eaeaf2}
 
305
  """
306
 
 
307
  with gr.Blocks(title="Question Generator", css=CSS) as demo:
308
+ gr.Markdown("<h2 class='top'>Question Generator</h2>")
309
 
310
+ with gr.Group(elem_classes=["panel"]):
311
+ gr.Markdown("**أدخل نصًا أو ارفع ملفًا، حدّد عدد الأسئلة، ثم اضغط توليد.**\
312
+ <br>يجب الإجابة على <u>جميع</u> الأسئلة قبل الإرسال.", elem_classes=["small"])
313
+ text_area = gr.Textbox(lines=6, placeholder="ألصق هنا مقطع نصي...", label="أدخل نصًا")
314
  num_q = gr.Slider(4, 20, value=DEFAULT_NUM_QUESTIONS, step=1, label="عدد الأسئلة")
315
+ file_comp = gr.File(label="أو اختر ملف PDF/TXT", file_count="single",
316
  file_types=[".pdf",".txt"], type="filepath", elem_classes=["upload-like"])
317
+ with gr.Accordion("خيارات متقدمة (PDF مصوّر)", open=False):
318
  trocr_model = gr.Dropdown(
319
  choices=[
320
  "microsoft/trocr-base-printed",
 
328
  btn_build = gr.Button("توليد الأسئلة", elem_classes=["button-primary"])
329
  toast = gr.Markdown("", elem_classes=["small"])
330
 
 
331
  state = gr.State(None)
332
+ quiz_html = gr.HTML("") # مكان عرض جميع الأسئلة دفعة واحدة
333
  btn_submit = gr.Button("إنهاء وإرسال الإجابات", elem_classes=["button-primary"])
334
  answers_box = gr.Textbox(visible=False)
335
  score_md = gr.Markdown("")
336
+ correctness_box = gr.Textbox(visible=False) # نستقبل فيها خريطة الصحة لتلوين الواجهة
337
 
338
+ # توليد الامتحان
339
  btn_build.click(
340
  build_quiz,
341
  inputs=[text_area, file_comp, num_q, trocr_model, trocr_zoom],
342
  outputs=[state, quiz_html, toast]
343
  )
344
 
345
+ # JS: جمع الإجابات + المنع إن كان هناك سؤال غير مُجاب
346
  js_collect = """
347
  function () {
348
+ // عدّاد وإجابات
349
+ const cards = Array.from(document.querySelectorAll('.q-card'));
350
+ const map = {};
351
+ let missing = 0;
352
+ cards.forEach(c => {
353
+ const qid = c.getAttribute('data-qid');
354
+ const chosen = c.querySelector('input[type="radio"]:checked');
355
+ if (!chosen) { missing += 1; }
356
+ map[qid] = chosen ? chosen.value : null;
357
  });
358
+ if (missing > 0) {
359
+ // أظهر رسالة منع بسيطة قرب أعلى الاختبار
360
+ let pill = document.querySelector('.progress-pill');
361
+ if (pill) {
362
+ pill.style.borderColor = '#6a1e2b';
363
+ pill.style.color = '#ffd1d6';
364
+ pill.textContent = `لا يمكن الإرسال: ${missing} سؤال/أسئلة بدون إجابة`;
365
+ }
366
+ return ["", ""]; // لا نرسل شيئًا للتصحيح
367
+ }
368
+ return [JSON.stringify(map), "go"]; // go = سمح بالإرسال
369
  }
370
  """
371
 
372
+ # عند الضغط Submit:
373
+ # 1) اجمع الإجابات (JS). إذا لم يجب على الجميع، لن نرسل للتصحيح.
374
  btn_submit.click(
375
+ None, inputs=None, outputs=[answers_box, correctness_box], js=js_collect
376
+ ).then(
377
+ # 2) صحّح فقط إذا وُجدت إجابات (answers_box غير فارغ)
378
+ grade, inputs=[state, answers_box], outputs=[score_md, correctness_box]
379
  ).then(
380
+ # 3) لون الواجهة بالصح/الخطأ (Correct!/Incorrect.) بدون تعليل
381
+ None, inputs=[correctness_box], outputs=None,
382
+ js="""
383
+ (correctness_json) => {
384
+ if (!correctness_json) return;
385
+ let okmap = {};
386
+ try { okmap = JSON.parse(correctness_json); } catch(e){ return; }
387
+ Object.entries(okmap).forEach(([qid, ok]) => {
388
+ const badge = document.getElementById('b_'+qid);
389
+ if (!badge) return;
390
+ badge.hidden = false;
391
+ if (ok) {
392
+ badge.classList.remove('err'); badge.classList.add('ok');
393
+ badge.textContent = 'Correct!';
394
+ } else {
395
+ badge.classList.remove('ok'); badge.classList.add('err');
396
+ badge.textContent = 'Incorrect.';
397
+ }
398
+ });
399
+ const el = document.querySelector('.progress-pill');
400
+ if (el) { el.style.borderColor = '#2a2d3a'; el.style.color = '#cfd5e3'; }
401
+ }
402
+ """
403
  )
404
 
405
  if __name__ == "__main__":