Leen172 commited on
Commit
09c0b08
·
verified ·
1 Parent(s): 49a5e3d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -86
app.py CHANGED
@@ -1,7 +1,7 @@
1
  # -*- coding: utf-8 -*-
2
  # صفحتان ثابتتان + Submit لكل سؤال يعمل فعليًا + منع تغيّر أبعاد صفحة الإدخال
3
 
4
- import os, json, uuid, random, unicodedata
5
  from dataclasses import dataclass
6
  from pathlib import Path
7
  from typing import List, Tuple, Optional
@@ -93,7 +93,7 @@ def norm_ar(t:str)->str:
93
  t = re2.sub(AR_DIAC, "", t)
94
  t = re2.sub(r"[إأآا]", "ا", t)
95
  t = re2.sub(r"[يى]", "ي", t)
96
- t = re2.sub(r"\s+", " ", t)
97
  t = re2.sub(r'(\p{L})\1{2,}', r'\1', t)
98
  t = re2.sub(r'(\p{L})\1', r'\1', t)
99
  return t.strip()
@@ -138,7 +138,6 @@ def yake_keyphrases(t: str, top_k: int = 180) -> List[str]:
138
  continue
139
  if 2 <= len(w) <= 42:
140
  phrases.append(w); seen.add(w)
141
- # إزالة العبارات التي هي جزء من أطول
142
  phrases_sorted = sorted(phrases, key=lambda x: (-len(x), x))
143
  kept=[]
144
  for p in phrases_sorted:
@@ -191,7 +190,6 @@ def mlm_fill(sentence_with_blank: str, correct: str, k: int = 20) -> List[str]:
191
  tok = o["token_str"].strip()
192
  if tok and tok != correct and len(tok) >= 2 and not re2.match(r"^[\p{P}\p{S}\d_]+$", tok):
193
  cands.append(tok)
194
- # فريد مع الحفاظ على الترتيب
195
  seen=set(); uniq=[]
196
  for w in cands:
197
  if w not in seen:
@@ -269,102 +267,158 @@ def sentence_score(s: str) -> float:
269
  bonus = 0.2 if ("،" in s or ":" in s) else 0.0
270
  return base + bonus + penalties
271
 
272
- # --- (H) ترتيب المرشّحات بالانسجام مع الجملة ---
273
- def rank_by_sentence_coherence(sentence_with_blank: str, correct: str, candidates: List[str], topk: int=3) -> List[str]:
274
- emb = get_embedder()
275
- if not emb or not candidates:
276
- return candidates[:topk]
277
- filled = [sentence_with_blank.replace("_____", c) for c in candidates]
278
- ref = sentence_with_blank.replace("_____", correct)
279
- vecs = embed_texts([ref] + filled)
280
- if vecs is None:
281
- return candidates[:topk]
282
- import numpy as np
283
- ref_vec = vecs[0]
284
- cand_vecs = vecs[1:]
285
- sims = cand_vecs @ ref_vec
286
- order = list(reversed(sorted(range(len(candidates)), key=lambda i: sims[i])))
287
- ranked = [candidates[i] for i in order]
288
- return ranked[:topk]
289
 
290
- # --- (I) حصاد مصطلحات احتياطية عالية التكرار من النص كله ---
291
- def harvest_backup_terms(text: str, limit: int = 400) -> List[str]:
292
- toks = re2.findall(r"[\p{L}][\p{L}\p{N}_\-]{1,}", text)
293
- stats = {}
294
- for t in toks:
295
- tt = norm_ar(t)
296
- if not good_kw(tt):
297
- continue
298
- stats[tt] = stats.get(tt, 0) + 1
299
- top = [w for w,_ in sorted(stats.items(), key=lambda kv: -kv[1])]
300
- return top[:limit]
301
-
302
- # ================== (NEW) موازنة الطول والتطويل ==================
303
-
304
- # كاش صغير لعبارة الصحيحة لاستخدامها أثناء التطويل
305
  ref_phrase_cache = {}
306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  def word_len(s: str) -> int:
308
  return len([w for w in re2.split(r"\s+", s.strip()) if w])
309
 
310
- def within_ratio(cand: str, target_len: int, tol: float = 0.2) -> bool:
311
  L = word_len(cand)
312
  return (target_len*(1-tol) <= L <= target_len*(1+tol))
313
 
314
- # قوالب عربية عامة للتطويل عند غياب الـMLM أو فشل توقع مناسب
315
- GENERIC_PREFIXES = ["تقنيات", "مجال", "أنظمة", "تطبيقات", "مفاهيم", "ممارسات", "نماذج", "آليات"]
316
- GENERIC_SUFFIXES = ["الذكية", "التعليمية", "الحديثة", "المتقدمة", "المبتكرة", "الرقمية"]
317
-
318
  def shape_phrase_like(ref: str, cand: str) -> str:
319
- """مواءمة التعريف/التنكير لتقارب الشكل العام."""
320
  return with_same_definiteness(ref, cand)
321
 
322
  def try_mlm_expand(cand: str, sentence_with_blank: str, target_len: int) -> Optional[str]:
323
- """توسيع المشتّت عبر MLم بإضافة كلمة قبل/بعد ليقترب الطول من الصحيحة."""
324
  masker = get_masker()
325
  if not masker:
326
  return None
327
- trials = []
328
- trials.append(sentence_with_blank.replace("_____", f"{masker.tokenizer.mask_token} {cand}"))
329
- trials.append(sentence_with_blank.replace("_____", f"{cand} {masker.tokenizer.mask_token}"))
330
-
331
- for masked_sent in trials:
332
  try:
333
- outs = masker(masked_sent, top_k=8)
334
  except Exception:
335
  continue
336
  for o in outs:
337
  tok = o["token_str"].strip()
338
- if not tok or re2.match(r"^[\p{P}\p{S}\d_]+$", tok):
339
  continue
340
- if masked_sent.strip().startswith(masker.tokenizer.mask_token):
 
 
341
  phrase = f"{tok} {cand}"
342
  else:
 
 
343
  phrase = f"{cand} {tok}"
344
- # تجنب التطابق مع الصحيحة بعد التطبيع
345
- if within_ratio(phrase, target_len) and norm_ar(phrase) != norm_ar(ref_phrase_cache.get("correct","")):
346
  return phrase
347
  return None
348
 
349
  def fallback_expand(cand: str, target_len: int) -> str:
350
- """تطويل بسيط بقوالب عامة إذا فشل الـMLM."""
351
- for p in GENERIC_PREFIXES:
352
  phrase = f"{p} {cand}"
353
- if within_ratio(phrase, target_len):
354
- return phrase
355
- for sfx in GENERIC_SUFFIXES:
356
  phrase = f"{cand} {sfx}"
357
- if within_ratio(phrase, target_len):
358
- return phrase
359
- phrase = f"{random.choice(GENERIC_PREFIXES)} {cand} {random.choice(GENERIC_SUFFIXES)}"
360
- return phrase
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
 
362
  # --- (J) مشتّتات ذكية تضمن دائمًا ≥3 خيارات فعلية + موازنة الطول ---
363
  def smart_distractors(correct: str, phrase_pool: List[str], sentence_with_blank: str, backup_terms: List[str], k: int = 3) -> List[str]:
364
  target = correct.strip()
365
- ref_phrase_cache["correct"] = target # لتجنّب مطابقة بعد التطويل
366
 
367
- # 1) مصادر متعددة
368
  neigh = nearest_terms(target, phrase_pool, k=48)
369
  mlm = mlm_fill(sentence_with_blank, target, k=24)
370
 
@@ -379,7 +433,6 @@ def smart_distractors(correct: str, phrase_pool: List[str], sentence_with_blank:
379
  if w not in seen:
380
  seen.add(w); raw_pool.append(w)
381
 
382
- # 2) إن لم يكفِ، أضف من backup_terms (من النص كله)
383
  for w in backup_terms:
384
  if len(raw_pool) >= max(60, k*10): break
385
  if not w or norm_ar(w) == norm_ar(target):
@@ -389,7 +442,6 @@ def smart_distractors(correct: str, phrase_pool: List[str], sentence_with_blank:
389
  if w not in seen:
390
  seen.add(w); raw_pool.append(w)
391
 
392
- # 3) فلترة POS إن توفّر
393
  filtered = []
394
  for w in raw_pool:
395
  if same_pos(target, w):
@@ -399,39 +451,35 @@ def smart_distractors(correct: str, phrase_pool: List[str], sentence_with_blank:
399
  if not filtered:
400
  filtered = raw_pool[:max(24, k*6)]
401
 
402
- # 4) موازنة الطول (أساسي): اجعل المشتّت قريب طولًا من الصحيحة
403
  target_words = word_len(target)
404
  shaped = []
405
  for w in filtered:
406
- cand = shape_phrase_like(target, w) # مواءمة "الـ"
407
- if within_ratio(cand, target_words, tol=0.2):
408
- shaped.append(cand)
409
  continue
410
- # جرّب توسيع بالـMLM
411
  expanded = try_mlm_expand(cand, sentence_with_blank, target_words)
412
- if expanded and within_ratio(expanded, target_words, tol=0.2):
413
- shaped.append(expanded)
414
  continue
415
- # fallback بقوالب عامة
416
  fb = fallback_expand(cand, target_words)
417
- shaped.append(fb)
 
418
 
419
- # إزالة أي تطويل خرج متطابقًا مع الصحيحة بعد التطبيع
420
  shaped = [s for s in shaped if norm_ar(s) != norm_ar(target)]
421
 
422
- # 5) ترتيب بالانسجام
423
- ranked = rank_by_sentence_coherence(sentence_with_blank, target, shaped, topk=max(k, 12))
 
424
 
425
- # 6) خذ أفضل k؛ وإن لم يكفِ، أكمل من shaped ثم filtered ثم raw_pool ثم backup_terms
426
  out = []
427
  for src in [ranked, shaped, filtered, raw_pool, backup_terms]:
428
  for w in src:
429
  if len(out) >= k: break
430
- if w and norm_ar(w) != norm_ar(target) and w not in out:
431
  out.append(w)
432
  if len(out) >= k: break
433
 
434
- # ضمان العدد بدون أي placeholders
435
  if len(out) < k:
436
  while len(out) < k and ranked:
437
  out.append(ranked[len(out) % len(ranked)])
@@ -440,6 +488,9 @@ def smart_distractors(correct: str, phrase_pool: List[str], sentence_with_blank:
440
 
441
  # ------------------ مُولِّد الأسئلة ------------------
442
  def make_mcqs(text:str, n:int=6)->List[MCQ]:
 
 
 
443
  sents = split_sents(text)
444
  if not sents:
445
  raise ValueError("النص قصير أو غير صالح.")
@@ -450,7 +501,6 @@ def make_mcqs(text:str, n:int=6)->List[MCQ]:
450
  freq = [w for w,_ in sorted(((t, text.count(t)) for t in tokens), key=lambda x:-x[1])]
451
  keyphrases = freq[:160]
452
 
453
- # مصطلحات احتياطية عامة من النص لضمان تعبئة المشتتات دائمًا
454
  backup_terms = harvest_backup_terms(text, limit=400)
455
 
456
  kp2best_sent = {}
@@ -479,12 +529,18 @@ def make_mcqs(text:str, n:int=6)->List[MCQ]:
479
 
480
  q = re2.sub(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", "_____", s, count=1)
481
 
482
- # مشتّتات ذكية تضمن دائمًا ≥3 ومتوازنة الطول
483
  pool = [x for x in keyphrases if x != kp]
484
  distracts = smart_distractors(kp, pool, q, backup_terms, k=3)
485
 
486
  ch = distracts + [kp]
487
- random.shuffle(ch)
 
 
 
 
 
 
 
488
  ans = ch.index(kp)
489
 
490
  items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=ch, answer_index=ans))
@@ -501,7 +557,6 @@ def to_records(items:List[MCQ])->List[dict]:
501
  for i,lbl in enumerate(["A","B","C","D"]):
502
  txt=(it.choices[i] if i<len(it.choices) else "—").strip()
503
  txt=txt.replace(",", "،").replace("?", "؟").replace(";", "؛")
504
- # منع أي Placeholder يظهر للمستخدم
505
  if txt == "—" or not txt:
506
  txt = "خيار"
507
  opts.append({"id":lbl,"text":txt or "خيار","is_correct":(i==it.answer_index)})
@@ -587,7 +642,7 @@ textarea{min-height:120px}
587
  .q-badge.ok{background:#0f2f22;color:#b6f4db;border:1px solid #145b44}
588
  .q-badge.err{background:#3a0d14;color:#ffd1d6;border:1px solid #6a1e2b}
589
 
590
- .q-text{color:var(--text);font-size:1.06rem;line-height:1.8;margin:8px 0 12px}
591
  .opts{display:flex;flex-direction:column;gap:8px}
592
  .opt{display:flex;gap:10px;align-items:center;background:#14161c;border:1px solid #2a2d3a;border-radius:12px;padding:10px;transition:background .15s,border-color .15s}
593
  .opt input{accent-color:var(--accent2)}
 
1
  # -*- coding: utf-8 -*-
2
  # صفحتان ثابتتان + Submit لكل سؤال يعمل فعليًا + منع تغيّر أبعاد صفحة الإدخال
3
 
4
+ import os, json, uuid, random, unicodedata, difflib
5
  from dataclasses import dataclass
6
  from pathlib import Path
7
  from typing import List, Tuple, Optional
 
93
  t = re2.sub(AR_DIAC, "", t)
94
  t = re2.sub(r"[إأآا]", "ا", t)
95
  t = re2.sub(r"[يى]", "ي", t)
96
+ t = re2.sub(r"\س+", " ", t) if False else re2.sub(r"\s+", " ", t)
97
  t = re2.sub(r'(\p{L})\1{2,}', r'\1', t)
98
  t = re2.sub(r'(\p{L})\1', r'\1', t)
99
  return t.strip()
 
138
  continue
139
  if 2 <= len(w) <= 42:
140
  phrases.append(w); seen.add(w)
 
141
  phrases_sorted = sorted(phrases, key=lambda x: (-len(x), x))
142
  kept=[]
143
  for p in phrases_sorted:
 
190
  tok = o["token_str"].strip()
191
  if tok and tok != correct and len(tok) >= 2 and not re2.match(r"^[\p{P}\p{S}\d_]+$", tok):
192
  cands.append(tok)
 
193
  seen=set(); uniq=[]
194
  for w in cands:
195
  if w not in seen:
 
267
  bonus = 0.2 if ("،" in s or ":" in s) else 0.0
268
  return base + bonus + penalties
269
 
270
+ # ================== (NEW) جودة المشتِّتات والتطويل ==================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
+ # كاش نص كامل لتحسين تقييم الجودة
273
+ global_full_text_cache = ""
274
+ # كاش عبارة صحيحة لتجنّب التطابق بعد التطويل
 
 
 
 
 
 
 
 
 
 
 
 
275
  ref_phrase_cache = {}
276
 
277
+ ADJ_WHITELIST = {"التعليمية","الذكية","الرقمية","الافتراضية","التكيفية","الحديثة","المتقدمة"}
278
+ NOUN_PREFIXES = {"مجال","تقنيات","أنظمة","مفاهيم","نماذج","ممارسات","آليات","تطبيقات"}
279
+
280
+ def is_arabic_word(w:str)->bool:
281
+ return bool(re2.match(r"^[\p{Arabic}]+$", w))
282
+
283
+ def clean_spaces(s:str)->str:
284
+ s = re2.sub(r"\s+", " ", s).strip()
285
+ s = re2.sub(r"\bال\s+ال\b", "ال", s)
286
+ return s
287
+
288
+ def bad_token(w:str)->bool:
289
+ return (not is_arabic_word(w)) or (len(w) < 2 or len(w) > 18)
290
+
291
+ def looks_weird(phrase:str)->bool:
292
+ toks = [t for t in re2.split(r"\s+", phrase.strip()) if t]
293
+ if len(toks) == 0: return True
294
+ if any(bad_token(t) for t in toks): return True
295
+ for i in range(1, len(toks)):
296
+ if toks[i] == toks[i-1]:
297
+ return True
298
+ if len(set(toks)) <= len(toks) - 1:
299
+ if any(toks.count(t) > 1 for t in toks):
300
+ return True
301
+ pos = [phrase_pos(t) or "" for t in toks]
302
+ streak = 0
303
+ for p in pos:
304
+ if p.startswith("ADJ"):
305
+ streak += 1
306
+ if streak > 2: return True
307
+ else:
308
+ streak = 0
309
+ return False
310
+
311
+ def quality_score(phrase:str, sentence:str, full_text:str)->float:
312
+ phrase = clean_spaces(phrase)
313
+ if looks_weird(phrase):
314
+ return 0.0
315
+ hits = sum(1 for t in set(phrase.split()) if t in full_text)
316
+ toks = phrase.split()
317
+ pos0 = phrase_pos(toks[0]) or ""
318
+ pos1 = phrase_pos(toks[1]) if len(toks)>1 else ""
319
+ nominal_bonus = 0.2 if (pos0.startswith("N") and (not pos1 or pos1.startswith("ADJ"))) else 0.0
320
+ return min(1.0, 0.3 + 0.1*hits + nominal_bonus)
321
+
322
  def word_len(s: str) -> int:
323
  return len([w for w in re2.split(r"\s+", s.strip()) if w])
324
 
325
+ def within_ratio(cand: str, target_len: int, tol: float = 0.15) -> bool:
326
  L = word_len(cand)
327
  return (target_len*(1-tol) <= L <= target_len*(1+tol))
328
 
 
 
 
 
329
  def shape_phrase_like(ref: str, cand: str) -> str:
 
330
  return with_same_definiteness(ref, cand)
331
 
332
  def try_mlm_expand(cand: str, sentence_with_blank: str, target_len: int) -> Optional[str]:
 
333
  masker = get_masker()
334
  if not masker:
335
  return None
336
+ trials = [
337
+ sentence_with_blank.replace("_____", f"{masker.tokenizer.mask_token} {cand}"),
338
+ sentence_with_blank.replace("_____", f"{cand} {masker.tokenizer.mask_token}")
339
+ ]
340
+ for masked in trials:
341
  try:
342
+ outs = masker(masked, top_k=12)
343
  except Exception:
344
  continue
345
  for o in outs:
346
  tok = o["token_str"].strip()
347
+ if not is_arabic_word(tok):
348
  continue
349
+ if masked.startswith(masker.tokenizer.mask_token):
350
+ if tok not in NOUN_PREFIXES:
351
+ continue
352
  phrase = f"{tok} {cand}"
353
  else:
354
+ if tok not in ADJ_WHITELIST:
355
+ continue
356
  phrase = f"{cand} {tok}"
357
+ phrase = clean_spaces(phrase)
358
+ if within_ratio(phrase, target_len, tol=0.15) and norm_ar(phrase) != norm_ar(ref_phrase_cache.get("correct","")) and not looks_weird(phrase):
359
  return phrase
360
  return None
361
 
362
  def fallback_expand(cand: str, target_len: int) -> str:
363
+ for p in NOUN_PREFIXES:
 
364
  phrase = f"{p} {cand}"
365
+ if within_ratio(phrase, target_len, tol=0.15):
366
+ return clean_spaces(phrase)
367
+ for sfx in ADJ_WHITELIST:
368
  phrase = f"{cand} {sfx}"
369
+ if within_ratio(phrase, target_len, tol=0.15):
370
+ return clean_spaces(phrase)
371
+ candidates = [f"{p} {cand}" for p in NOUN_PREFIXES] + [f"{cand} {sfx}" for sfx in ADJ_WHITELIST]
372
+ candidates = sorted(candidates, key=lambda ph: abs(word_len(ph) - target_len))
373
+ return clean_spaces(candidates[0])
374
+
375
+ # --- (H*) ترتيب المرشّحات بالانسجام + الجودة + منع التشابه ---
376
+ def rank_by_sentence_coherence(sentence_with_blank: str, correct: str, candidates: List[str], topk: int=3, full_text: str="") -> List[str]:
377
+ emb = get_embedder()
378
+ if not candidates:
379
+ return []
380
+ coherence = {}
381
+ if emb:
382
+ filled = [sentence_with_blank.replace("_____", c) for c in candidates]
383
+ ref = sentence_with_blank.replace("_____", correct)
384
+ vecs = embed_texts([ref] + filled)
385
+ if vecs is not None:
386
+ import numpy as np
387
+ ref_vec = vecs[0]; cand_vecs = vecs[1:]
388
+ sims = cand_vecs @ ref_vec
389
+ for i, c in enumerate(candidates):
390
+ coherence[c] = float(sims[i])
391
+ qscore = {c: quality_score(c, sentence_with_blank, full_text) for c in candidates}
392
+ def final_score(c):
393
+ coh = coherence.get(c, 0.0)
394
+ return 0.7*coh + 0.3*qscore.get(c, 0.0)
395
+ ranked = sorted(candidates, key=lambda c: final_score(c), reverse=True)
396
+
397
+ kept = []
398
+ for c in ranked:
399
+ if all(difflib.SequenceMatcher(None, c, x).ratio() < 0.90 for x in kept):
400
+ kept.append(c)
401
+ if len(kept) >= topk:
402
+ break
403
+ return kept[:topk]
404
+
405
+ # --- (I) حصاد مصطلحات احتياطية عالية التكرار من النص كله ---
406
+ def harvest_backup_terms(text: str, limit: int = 400) -> List[str]:
407
+ toks = re2.findall(r"[\p{L}][\p{L}\p{N}_\-]{1,}", text)
408
+ stats = {}
409
+ for t in toks:
410
+ tt = norm_ar(t)
411
+ if not good_kw(tt):
412
+ continue
413
+ stats[tt] = stats.get(tt, 0) + 1
414
+ top = [w for w,_ in sorted(stats.items(), key=lambda kv: -kv[1])]
415
+ return top[:limit]
416
 
417
  # --- (J) مشتّتات ذكية تضمن دائمًا ≥3 خيارات فعلية + موازنة الطول ---
418
  def smart_distractors(correct: str, phrase_pool: List[str], sentence_with_blank: str, backup_terms: List[str], k: int = 3) -> List[str]:
419
  target = correct.strip()
420
+ ref_phrase_cache["correct"] = target
421
 
 
422
  neigh = nearest_terms(target, phrase_pool, k=48)
423
  mlm = mlm_fill(sentence_with_blank, target, k=24)
424
 
 
433
  if w not in seen:
434
  seen.add(w); raw_pool.append(w)
435
 
 
436
  for w in backup_terms:
437
  if len(raw_pool) >= max(60, k*10): break
438
  if not w or norm_ar(w) == norm_ar(target):
 
442
  if w not in seen:
443
  seen.add(w); raw_pool.append(w)
444
 
 
445
  filtered = []
446
  for w in raw_pool:
447
  if same_pos(target, w):
 
451
  if not filtered:
452
  filtered = raw_pool[:max(24, k*6)]
453
 
 
454
  target_words = word_len(target)
455
  shaped = []
456
  for w in filtered:
457
+ cand = shape_phrase_like(target, w)
458
+ if within_ratio(cand, target_words, tol=0.15) and not looks_weird(cand):
459
+ shaped.append(clean_spaces(cand))
460
  continue
 
461
  expanded = try_mlm_expand(cand, sentence_with_blank, target_words)
462
+ if expanded and within_ratio(expanded, target_words, tol=0.15) and not looks_weird(expanded):
463
+ shaped.append(clean_spaces(expanded))
464
  continue
 
465
  fb = fallback_expand(cand, target_words)
466
+ if not looks_weird(fb):
467
+ shaped.append(clean_spaces(fb))
468
 
 
469
  shaped = [s for s in shaped if norm_ar(s) != norm_ar(target)]
470
 
471
+ ranked = rank_by_sentence_coherence(
472
+ sentence_with_blank, target, shaped, topk=max(k, 12), full_text=global_full_text_cache
473
+ )
474
 
 
475
  out = []
476
  for src in [ranked, shaped, filtered, raw_pool, backup_terms]:
477
  for w in src:
478
  if len(out) >= k: break
479
+ if w and norm_ar(w) != norm_ar(target) and w not in out and not looks_weird(w):
480
  out.append(w)
481
  if len(out) >= k: break
482
 
 
483
  if len(out) < k:
484
  while len(out) < k and ranked:
485
  out.append(ranked[len(out) % len(ranked)])
 
488
 
489
  # ------------------ مُولِّد الأسئلة ------------------
490
  def make_mcqs(text:str, n:int=6)->List[MCQ]:
491
+ global global_full_text_cache
492
+ global_full_text_cache = text
493
+
494
  sents = split_sents(text)
495
  if not sents:
496
  raise ValueError("النص قصير أو غير صالح.")
 
501
  freq = [w for w,_ in sorted(((t, text.count(t)) for t in tokens), key=lambda x:-x[1])]
502
  keyphrases = freq[:160]
503
 
 
504
  backup_terms = harvest_backup_terms(text, limit=400)
505
 
506
  kp2best_sent = {}
 
529
 
530
  q = re2.sub(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", "_____", s, count=1)
531
 
 
532
  pool = [x for x in keyphrases if x != kp]
533
  distracts = smart_distractors(kp, pool, q, backup_terms, k=3)
534
 
535
  ch = distracts + [kp]
536
+
537
+ # ترتيب غير عشوائي: تدوير حتمي لموضع الصحيحة
538
+ # 1) ضع الصحيحة مؤقتًا في النهاية
539
+ ch_sorted = sorted(ch, key=lambda c: c != kp)
540
+ # 2) تدوير بناءً على رقم السؤال (طول القائمة الحالية) وهاش العبارة
541
+ rot = (len(items) + (hash(kp) & 3)) % 4
542
+ ch = ch_sorted[-rot:] + ch_sorted[:-rot]
543
+
544
  ans = ch.index(kp)
545
 
546
  items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=ch, answer_index=ans))
 
557
  for i,lbl in enumerate(["A","B","C","D"]):
558
  txt=(it.choices[i] if i<len(it.choices) else "—").strip()
559
  txt=txt.replace(",", "،").replace("?", "؟").replace(";", "؛")
 
560
  if txt == "—" or not txt:
561
  txt = "خيار"
562
  opts.append({"id":lbl,"text":txt or "خيار","is_correct":(i==it.answer_index)})
 
642
  .q-badge.ok{background:#0f2f22;color:#b6f4db;border:1px solid #145b44}
643
  .q-badge.err{background:#3a0d14;color:#ffd1d6;border:1px solid #6a1e2b}
644
 
645
+ .q-text{color:#الtext;font-size:1.06rem;line-height:1.8;margin:8px 0 12px}
646
  .opts{display:flex;flex-direction:column;gap:8px}
647
  .opt{display:flex;gap:10px;align-items:center;background:#14161c;border:1px solid #2a2d3a;border-radius:12px;padding:10px;transition:background .15s,border-color .15s}
648
  .opt input{accent-color:var(--accent2)}