Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
-
# واجهة
|
|
|
|
| 3 |
import os, json, uuid, random, unicodedata
|
| 4 |
from dataclasses import dataclass
|
| 5 |
from pathlib import Path
|
|
@@ -31,22 +32,19 @@ def get_ocr(model_id: str):
|
|
| 31 |
# ------------------ PDF/TXT → نص ------------------
|
| 32 |
def extract_text_with_pypdf(path: str) -> str:
|
| 33 |
reader = PdfReader(path)
|
| 34 |
-
|
| 35 |
for p in reader.pages:
|
| 36 |
-
try:
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
chunks.append(t)
|
| 41 |
-
return "\n".join(chunks).strip()
|
| 42 |
|
| 43 |
def pdf_to_images(path: str, zoom: float=2.5) -> List[Image.Image]:
|
| 44 |
-
doc = fitz.open(path)
|
| 45 |
-
M = fitz.Matrix(zoom, zoom)
|
| 46 |
imgs = []
|
| 47 |
for pg in doc:
|
| 48 |
pix = pg.get_pixmap(matrix=M, alpha=False)
|
| 49 |
-
imgs.append(Image.frombytes("RGB",
|
| 50 |
doc.close()
|
| 51 |
return imgs
|
| 52 |
|
|
@@ -56,7 +54,7 @@ def extract_text_with_ocr(path: str, model_id: str, zoom: float) -> str:
|
|
| 56 |
for i, img in enumerate(pdf_to_images(path, zoom=zoom), start=1):
|
| 57 |
try:
|
| 58 |
out = ocr(img)
|
| 59 |
-
txt = out[0].get("generated_text",
|
| 60 |
except Exception:
|
| 61 |
txt = ""
|
| 62 |
parts.append(f"--- [Page {i}] ---\n{txt}")
|
|
@@ -65,21 +63,20 @@ def extract_text_with_ocr(path: str, model_id: str, zoom: float) -> str:
|
|
| 65 |
def is_good(t: str, min_chars=250, min_alpha=0.15) -> bool:
|
| 66 |
if len(t) < min_chars: return False
|
| 67 |
alnum = sum(ch.isalnum() for ch in t)
|
| 68 |
-
return (alnum
|
| 69 |
|
| 70 |
-
def file_to_text(path: str, model_id=DEFAULT_TROCR_MODEL, zoom=DEFAULT_TROCR_ZOOM) -> Tuple[str,
|
| 71 |
ext = Path(path).suffix.lower()
|
| 72 |
if ext == ".txt":
|
| 73 |
-
with open(path,
|
| 74 |
-
return f.read(), "plain text"
|
| 75 |
raw = extract_text_with_pypdf(path)
|
| 76 |
if is_good(raw): return raw, "embedded (pypdf)"
|
| 77 |
return extract_text_with_ocr(path, model_id, zoom), "OCR (TrOCR)"
|
| 78 |
|
| 79 |
-
# ------------------ تنظيف عربي
|
| 80 |
AR_DIAC = r"[ًٌٍَُِّْ]"
|
| 81 |
-
def strip_headers(t:
|
| 82 |
-
out
|
| 83 |
for ln in t.splitlines():
|
| 84 |
if re2.match(r"^\s*--- \[Page \d+\] ---\s*$", ln): continue
|
| 85 |
if re2.match(r"^\s*(Page\s*\d+|صفحة\s*\d+)\s*$", ln): continue
|
|
@@ -87,7 +84,7 @@ def strip_headers(t: str) -> str:
|
|
| 87 |
out.append(ln)
|
| 88 |
return "\n".join(out)
|
| 89 |
|
| 90 |
-
def norm_ar(t:
|
| 91 |
t = unicodedata.normalize("NFKC", t)
|
| 92 |
t = re2.sub(r"[ـ]", "", t)
|
| 93 |
t = re2.sub(AR_DIAC, "", t)
|
|
@@ -98,8 +95,8 @@ def norm_ar(t: str) -> str:
|
|
| 98 |
t = re2.sub(r'(\p{L})\1', r'\1', t)
|
| 99 |
return t.strip()
|
| 100 |
|
| 101 |
-
def postprocess(raw:
|
| 102 |
-
t = strip_headers(raw).replace("\r",
|
| 103 |
t = re2.sub(r"\n{3,}", "\n\n", t)
|
| 104 |
t = re2.sub(r"\d+\s*[\[\(][^\]\)]*[\]\)]", " ", t)
|
| 105 |
t = re2.sub(r"\[\d+\]", " ", t)
|
|
@@ -116,168 +113,171 @@ class MCQ:
|
|
| 116 |
choices: List[str]
|
| 117 |
answer_index: int
|
| 118 |
|
| 119 |
-
def split_sents(t:
|
| 120 |
-
s
|
| 121 |
-
return [x for x in s if len(x)
|
| 122 |
|
| 123 |
-
def yake_keywords(t:
|
| 124 |
ex = yake.KeywordExtractor(lan='ar', n=1, top=k)
|
| 125 |
-
cands = [w for w,
|
| 126 |
-
out
|
| 127 |
for k in cands:
|
| 128 |
-
k
|
| 129 |
if not k or k in seen or k in AR_STOP: continue
|
| 130 |
-
if len(k)
|
| 131 |
seen.add(k); out.append(k)
|
| 132 |
return out
|
| 133 |
|
| 134 |
-
def good_kw(kw:
|
| 135 |
-
return kw and len(kw)
|
| 136 |
|
| 137 |
-
def distractors(correct:
|
| 138 |
-
L
|
| 139 |
for w in pool:
|
| 140 |
-
w
|
| 141 |
-
if not w or w
|
| 142 |
if re2.match(r"^[\p{P}\p{S}\d_]+$", w): continue
|
| 143 |
-
if abs(len(w)
|
| 144 |
random.shuffle(cand)
|
| 145 |
-
out
|
| 146 |
-
while len(out)
|
| 147 |
return out
|
| 148 |
|
| 149 |
-
def make_mcqs(text:
|
| 150 |
-
sents
|
| 151 |
if not sents: raise ValueError("النص قصير أو غير صالح.")
|
| 152 |
-
kws
|
| 153 |
-
sent_for
|
| 154 |
for s in sents:
|
| 155 |
for kw in kws:
|
| 156 |
if good_kw(kw) and re2.search(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", s) and kw not in sent_for:
|
| 157 |
-
sent_for[kw]
|
| 158 |
-
items
|
| 159 |
for kw in [k for k in kws if k in sent_for]:
|
| 160 |
-
if len(items)
|
| 161 |
-
s
|
| 162 |
if s in used: continue
|
| 163 |
-
q
|
| 164 |
-
ch
|
| 165 |
-
random.shuffle(ch); ans
|
| 166 |
items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=ch, answer_index=ans))
|
| 167 |
used.add(s)
|
| 168 |
if not items: raise RuntimeError("تعذّر توليد أسئلة.")
|
| 169 |
return items
|
| 170 |
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
recs = []
|
| 174 |
for it in items:
|
| 175 |
-
opts
|
| 176 |
-
for i,
|
| 177 |
-
txt
|
| 178 |
-
txt
|
| 179 |
-
opts.append({"id":
|
| 180 |
-
recs.append({"id":
|
| 181 |
return recs
|
| 182 |
|
| 183 |
-
# ------------------ HTML
|
| 184 |
def render_quiz_html(records: List[dict]) -> str:
|
| 185 |
-
parts
|
| 186 |
for i, rec in enumerate(records, start=1):
|
| 187 |
-
qid = rec["id"]
|
| 188 |
-
|
| 189 |
-
for o in
|
|
|
|
|
|
|
| 190 |
lid, txt = o["id"], o["text"]
|
| 191 |
opts_html.append(f"""
|
| 192 |
-
<label class="opt">
|
| 193 |
<input type="radio" name="q_{qid}" value="{lid}">
|
| 194 |
<span class="opt-letter">{lid}</span>
|
| 195 |
<span class="opt-text">{txt}</span>
|
| 196 |
</label>
|
| 197 |
""")
|
| 198 |
parts.append(f"""
|
| 199 |
-
<div class="q-card" data-qid="{qid}">
|
| 200 |
<div class="q-header">
|
| 201 |
<div class="q-title">السؤال {i}</div>
|
| 202 |
<div class="q-badge" id="b_{qid}" hidden></div>
|
| 203 |
</div>
|
| 204 |
<div class="q-text">{qtxt}</div>
|
| 205 |
<div class="opts">{''.join(opts_html)}</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
</div>
|
| 207 |
""")
|
| 208 |
-
# عدّاد تقدّم بسيط أعلى مجموعة الأسئلة
|
| 209 |
html = f"""
|
| 210 |
<div id="quiz" class="quiz-wrap">
|
| 211 |
-
<div class="
|
| 212 |
{''.join(parts)}
|
| 213 |
</div>
|
| 214 |
<script>
|
| 215 |
-
//
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
}});
|
| 222 |
-
|
| 223 |
-
if (el) el.textContent = String(filled);
|
| 224 |
-
}};
|
| 225 |
-
document.querySelectorAll('.q-card input[type="radio"]').forEach(i => i.addEventListener('change', updateCounter));
|
| 226 |
-
updateCounter();
|
| 227 |
</script>
|
| 228 |
"""
|
| 229 |
return html
|
| 230 |
|
| 231 |
# ------------------ بناء الامتحان ------------------
|
| 232 |
def build_quiz(text_area, file_path, n, model_id, zoom):
|
| 233 |
-
|
| 234 |
-
if not
|
| 235 |
-
return
|
| 236 |
-
if
|
| 237 |
-
raw
|
| 238 |
else:
|
| 239 |
-
raw,
|
| 240 |
cleaned = postprocess(raw)
|
| 241 |
items = make_mcqs(cleaned, n=int(n))
|
| 242 |
recs = to_records(items)
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
# ------------------ التصحيح (يعيد الدرجة + خريطة الصحة) ------------------
|
| 247 |
-
def grade(state, answers_json):
|
| 248 |
-
try:
|
| 249 |
-
user_map = json.loads(answers_json or "{}")
|
| 250 |
-
except Exception:
|
| 251 |
-
return "حدث خطأ في قراءة الإجابات.", "{}"
|
| 252 |
-
recs = state["records"] if state else []
|
| 253 |
-
total = len(recs)
|
| 254 |
-
# التحقق من الإجابة على الجميع
|
| 255 |
-
missing = [r["id"] for r in recs if not user_map.get(r["id"])]
|
| 256 |
-
if missing:
|
| 257 |
-
return "⚠️ يجب الإجابة على جميع الأسئلة قبل الإرسال.", "{}"
|
| 258 |
-
correctness = {}
|
| 259 |
-
correct = 0
|
| 260 |
-
for rec in recs:
|
| 261 |
-
qid = rec["id"]
|
| 262 |
-
chosen = user_map.get(qid)
|
| 263 |
-
cor = next((o["id"] for o in rec["options"] if o["is_correct"]), "")
|
| 264 |
-
ok = (chosen == cor)
|
| 265 |
-
correctness[qid] = ok
|
| 266 |
-
if ok: correct += 1
|
| 267 |
-
score_text = f"### نتيجتك: **{correct} / {total}**"
|
| 268 |
-
return score_text, json.dumps(correctness, ensure_ascii=False)
|
| 269 |
-
|
| 270 |
-
# =======================================================
|
| 271 |
-
# واجهة
|
| 272 |
-
# =======================================================
|
| 273 |
CSS = """
|
| 274 |
:root{
|
| 275 |
--bg:#0e0e11; --panel:#15161a; --card:#1a1b20; --muted:#a7b0be;
|
| 276 |
--text:#f6f7fb; --accent:#6ee7b7; --accent2:#34d399; --danger:#ef4444; --border:#262833;
|
| 277 |
}
|
| 278 |
body{direction:rtl; font-family:system-ui,'Cairo','IBM Plex Arabic',sans-serif; background:var(--bg);}
|
| 279 |
-
.gradio-container{max-width:
|
| 280 |
-
h2.top{color:#eaeaf2;margin:
|
| 281 |
.panel{background:var(--panel);border:1px solid var(--border);border-radius:14px;padding:14px;box-shadow:0 16px 38px rgba(0,0,0,.35)}
|
| 282 |
.small{opacity:.9;color:#d9dee8}
|
| 283 |
|
|
@@ -288,7 +288,7 @@ h2.top{color:#eaeaf2;margin:4px 0 16px}
|
|
| 288 |
textarea{min-height:120px}
|
| 289 |
|
| 290 |
/* الامتحان */
|
| 291 |
-
.
|
| 292 |
.q-card{background:var(--card);border:1px solid var(--border);border-radius:14px;padding:14px;margin:12px 0}
|
| 293 |
.q-header{display:flex;gap:10px;align-items:center;justify-content:space-between;margin-bottom:6px}
|
| 294 |
.q-title{color:#eaeaf2;font-weight:800}
|
|
@@ -298,18 +298,31 @@ textarea{min-height:120px}
|
|
| 298 |
|
| 299 |
.q-text{color:var(--text);font-size:1.06rem;line-height:1.8;margin:8px 0 12px}
|
| 300 |
.opts{display:flex;flex-direction:column;gap:8px}
|
| 301 |
-
.opt{display:flex;gap:10px;align-items:center;background:#14161c;border:1px solid #2a2d3a;border-radius:12px;padding:10px}
|
| 302 |
.opt input{accent-color:var(--accent2)}
|
| 303 |
.opt-letter{display:inline-flex;width:28px;height:28px;border-radius:8px;background:#0f1116;border:1px solid #2a2d3a;align-items:center;justify-content:center;font-weight:800;color:#dfe6f7}
|
| 304 |
.opt-text{color:#eaeaf2}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
"""
|
| 306 |
|
|
|
|
| 307 |
with gr.Blocks(title="Question Generator", css=CSS) as demo:
|
| 308 |
gr.Markdown("<h2 class='top'>Question Generator</h2>")
|
| 309 |
|
| 310 |
with gr.Group(elem_classes=["panel"]):
|
| 311 |
-
gr.Markdown("**أدخل نصًا أو ارفع ملفًا، حدّد عدد الأسئلة، ثم اضغط
|
| 312 |
-
<br>يجب الإجابة على <u>جميع</u> الأسئلة قبل الإرسال.", elem_classes=["small"])
|
| 313 |
text_area = gr.Textbox(lines=6, placeholder="ألصق هنا مقطع نصي...", label="أدخل نصًا")
|
| 314 |
num_q = gr.Slider(4, 20, value=DEFAULT_NUM_QUESTIONS, step=1, label="عدد الأسئلة")
|
| 315 |
file_comp = gr.File(label="أو اختر ملف PDF/TXT", file_count="single",
|
|
@@ -328,78 +341,12 @@ with gr.Blocks(title="Question Generator", css=CSS) as demo:
|
|
| 328 |
btn_build = gr.Button("توليد الأسئلة", elem_classes=["button-primary"])
|
| 329 |
toast = gr.Markdown("", elem_classes=["small"])
|
| 330 |
|
| 331 |
-
|
| 332 |
-
quiz_html = gr.HTML("") # مكان عرض جميع الأسئلة دفعة واحدة
|
| 333 |
-
btn_submit = gr.Button("إنهاء وإرسال الإجابات", elem_classes=["button-primary"])
|
| 334 |
-
answers_box = gr.Textbox(visible=False)
|
| 335 |
-
score_md = gr.Markdown("")
|
| 336 |
-
correctness_box = gr.Textbox(visible=False) # نستقبل فيها خريطة الصحة لتلوين الواجهة
|
| 337 |
|
| 338 |
-
# توليد الامتحان
|
| 339 |
btn_build.click(
|
| 340 |
build_quiz,
|
| 341 |
inputs=[text_area, file_comp, num_q, trocr_model, trocr_zoom],
|
| 342 |
-
outputs=[
|
| 343 |
-
)
|
| 344 |
-
|
| 345 |
-
# JS: جمع الإجابات + المنع إن كان هناك سؤال غير مُجاب
|
| 346 |
-
js_collect = """
|
| 347 |
-
function () {
|
| 348 |
-
// عدّاد وإجابات
|
| 349 |
-
const cards = Array.from(document.querySelectorAll('.q-card'));
|
| 350 |
-
const map = {};
|
| 351 |
-
let missing = 0;
|
| 352 |
-
cards.forEach(c => {
|
| 353 |
-
const qid = c.getAttribute('data-qid');
|
| 354 |
-
const chosen = c.querySelector('input[type="radio"]:checked');
|
| 355 |
-
if (!chosen) { missing += 1; }
|
| 356 |
-
map[qid] = chosen ? chosen.value : null;
|
| 357 |
-
});
|
| 358 |
-
if (missing > 0) {
|
| 359 |
-
// أظهر رسالة منع بسيطة قرب أعلى الاختبار
|
| 360 |
-
let pill = document.querySelector('.progress-pill');
|
| 361 |
-
if (pill) {
|
| 362 |
-
pill.style.borderColor = '#6a1e2b';
|
| 363 |
-
pill.style.color = '#ffd1d6';
|
| 364 |
-
pill.textContent = `لا يمكن الإرسال: ${missing} سؤال/أسئلة بدون إجابة`;
|
| 365 |
-
}
|
| 366 |
-
return ["", ""]; // لا نرسل شيئًا للتصحيح
|
| 367 |
-
}
|
| 368 |
-
return [JSON.stringify(map), "go"]; // go = سمح بالإرسال
|
| 369 |
-
}
|
| 370 |
-
"""
|
| 371 |
-
|
| 372 |
-
# عند الضغط Submit:
|
| 373 |
-
# 1) اجمع الإجابات (JS). إذا لم يجب على الجميع، لن نرسل للتصحيح.
|
| 374 |
-
btn_submit.click(
|
| 375 |
-
None, inputs=None, outputs=[answers_box, correctness_box], js=js_collect
|
| 376 |
-
).then(
|
| 377 |
-
# 2) صحّح فقط إذا وُجدت إجابات (answers_box غير فارغ)
|
| 378 |
-
grade, inputs=[state, answers_box], outputs=[score_md, correctness_box]
|
| 379 |
-
).then(
|
| 380 |
-
# 3) لون الواجهة بالصح/الخطأ (Correct!/Incorrect.) بدون تعليل
|
| 381 |
-
None, inputs=[correctness_box], outputs=None,
|
| 382 |
-
js="""
|
| 383 |
-
(correctness_json) => {
|
| 384 |
-
if (!correctness_json) return;
|
| 385 |
-
let okmap = {};
|
| 386 |
-
try { okmap = JSON.parse(correctness_json); } catch(e){ return; }
|
| 387 |
-
Object.entries(okmap).forEach(([qid, ok]) => {
|
| 388 |
-
const badge = document.getElementById('b_'+qid);
|
| 389 |
-
if (!badge) return;
|
| 390 |
-
badge.hidden = false;
|
| 391 |
-
if (ok) {
|
| 392 |
-
badge.classList.remove('err'); badge.classList.add('ok');
|
| 393 |
-
badge.textContent = 'Correct!';
|
| 394 |
-
} else {
|
| 395 |
-
badge.classList.remove('ok'); badge.classList.add('err');
|
| 396 |
-
badge.textContent = 'Incorrect.';
|
| 397 |
-
}
|
| 398 |
-
});
|
| 399 |
-
const el = document.querySelector('.progress-pill');
|
| 400 |
-
if (el) { el.style.borderColor = '#2a2d3a'; el.style.color = '#cfd5e3'; }
|
| 401 |
-
}
|
| 402 |
-
"""
|
| 403 |
)
|
| 404 |
|
| 405 |
if __name__ == "__main__":
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
+
# واجهة حديثة: سؤال + زر تحقق لكل سؤال، تلوين أخضر/أحمر مباشرة، بدون نتيجة نهائية
|
| 3 |
+
|
| 4 |
import os, json, uuid, random, unicodedata
|
| 5 |
from dataclasses import dataclass
|
| 6 |
from pathlib import Path
|
|
|
|
| 32 |
# ------------------ PDF/TXT → نص ------------------
|
| 33 |
def extract_text_with_pypdf(path: str) -> str:
|
| 34 |
reader = PdfReader(path)
|
| 35 |
+
out = []
|
| 36 |
for p in reader.pages:
|
| 37 |
+
try: t = p.extract_text() or ""
|
| 38 |
+
except Exception: t = ""
|
| 39 |
+
out.append(t)
|
| 40 |
+
return "\n".join(out).strip()
|
|
|
|
|
|
|
| 41 |
|
| 42 |
def pdf_to_images(path: str, zoom: float=2.5) -> List[Image.Image]:
|
| 43 |
+
doc = fitz.open(path); M = fitz.Matrix(zoom, zoom)
|
|
|
|
| 44 |
imgs = []
|
| 45 |
for pg in doc:
|
| 46 |
pix = pg.get_pixmap(matrix=M, alpha=False)
|
| 47 |
+
imgs.append(Image.frombytes("RGB",(pix.width,pix.height),pix.samples))
|
| 48 |
doc.close()
|
| 49 |
return imgs
|
| 50 |
|
|
|
|
| 54 |
for i, img in enumerate(pdf_to_images(path, zoom=zoom), start=1):
|
| 55 |
try:
|
| 56 |
out = ocr(img)
|
| 57 |
+
txt = out[0].get("generated_text","").strip() if out else ""
|
| 58 |
except Exception:
|
| 59 |
txt = ""
|
| 60 |
parts.append(f"--- [Page {i}] ---\n{txt}")
|
|
|
|
| 63 |
def is_good(t: str, min_chars=250, min_alpha=0.15) -> bool:
|
| 64 |
if len(t) < min_chars: return False
|
| 65 |
alnum = sum(ch.isalnum() for ch in t)
|
| 66 |
+
return (alnum/max(1,len(t))) >= min_alpha
|
| 67 |
|
| 68 |
+
def file_to_text(path: str, model_id=DEFAULT_TROCR_MODEL, zoom=DEFAULT_TROCR_ZOOM) -> Tuple[str,str]:
|
| 69 |
ext = Path(path).suffix.lower()
|
| 70 |
if ext == ".txt":
|
| 71 |
+
with open(path,"r",encoding="utf-8",errors="ignore") as f: return f.read(), "plain text"
|
|
|
|
| 72 |
raw = extract_text_with_pypdf(path)
|
| 73 |
if is_good(raw): return raw, "embedded (pypdf)"
|
| 74 |
return extract_text_with_ocr(path, model_id, zoom), "OCR (TrOCR)"
|
| 75 |
|
| 76 |
+
# ------------------ تنظيف عربي ------------------
|
| 77 |
AR_DIAC = r"[ًٌٍَُِّْ]"
|
| 78 |
+
def strip_headers(t:str)->str:
|
| 79 |
+
out=[]
|
| 80 |
for ln in t.splitlines():
|
| 81 |
if re2.match(r"^\s*--- \[Page \d+\] ---\s*$", ln): continue
|
| 82 |
if re2.match(r"^\s*(Page\s*\d+|صفحة\s*\d+)\s*$", ln): continue
|
|
|
|
| 84 |
out.append(ln)
|
| 85 |
return "\n".join(out)
|
| 86 |
|
| 87 |
+
def norm_ar(t:str)->str:
|
| 88 |
t = unicodedata.normalize("NFKC", t)
|
| 89 |
t = re2.sub(r"[ـ]", "", t)
|
| 90 |
t = re2.sub(AR_DIAC, "", t)
|
|
|
|
| 95 |
t = re2.sub(r'(\p{L})\1', r'\1', t)
|
| 96 |
return t.strip()
|
| 97 |
|
| 98 |
+
def postprocess(raw:str)->str:
|
| 99 |
+
t = strip_headers(raw).replace("\r","\n")
|
| 100 |
t = re2.sub(r"\n{3,}", "\n\n", t)
|
| 101 |
t = re2.sub(r"\d+\s*[\[\(][^\]\)]*[\]\)]", " ", t)
|
| 102 |
t = re2.sub(r"\[\d+\]", " ", t)
|
|
|
|
| 113 |
choices: List[str]
|
| 114 |
answer_index: int
|
| 115 |
|
| 116 |
+
def split_sents(t:str)->List[str]:
|
| 117 |
+
s=[x.strip() for x in SENT_SPLIT.split(t) if x.strip()]
|
| 118 |
+
return [x for x in s if len(x)>=25]
|
| 119 |
|
| 120 |
+
def yake_keywords(t:str, k:int=160)->List[str]:
|
| 121 |
ex = yake.KeywordExtractor(lan='ar', n=1, top=k)
|
| 122 |
+
cands = [w for w,_ in ex.extract_keywords(t)]
|
| 123 |
+
out=[]; seen=set()
|
| 124 |
for k in cands:
|
| 125 |
+
k=k.strip()
|
| 126 |
if not k or k in seen or k in AR_STOP: continue
|
| 127 |
+
if len(k)<3 or re2.match(r"^[\p{P}\p{S}]+$",k): continue
|
| 128 |
seen.add(k); out.append(k)
|
| 129 |
return out
|
| 130 |
|
| 131 |
+
def good_kw(kw:str)->bool:
|
| 132 |
+
return kw and len(kw)>=3 and kw not in AR_STOP and not re2.match(r"^[\p{P}\p{S}\d_]+$", kw)
|
| 133 |
|
| 134 |
+
def distractors(correct:str, pool:List[str], k:int=3)->List[str]:
|
| 135 |
+
L=len(correct.strip()); cand=[]
|
| 136 |
for w in pool:
|
| 137 |
+
w=w.strip()
|
| 138 |
+
if not w or w==correct or w in AR_STOP: continue
|
| 139 |
if re2.match(r"^[\p{P}\p{S}\d_]+$", w): continue
|
| 140 |
+
if abs(len(w)-L)<=3: cand.append(w)
|
| 141 |
random.shuffle(cand)
|
| 142 |
+
out=cand[:k]
|
| 143 |
+
while len(out)<k: out.append("—")
|
| 144 |
return out
|
| 145 |
|
| 146 |
+
def make_mcqs(text:str, n:int=6)->List[MCQ]:
|
| 147 |
+
sents=split_sents(text)
|
| 148 |
if not sents: raise ValueError("النص قصير أو غير صالح.")
|
| 149 |
+
kws=yake_keywords(text) or [w for w,_ in sorted(((t, text.count(t)) for t in re2.findall(r"[\p{L}\p{N}_]+",text)), key=lambda x:-x[1])][:80]
|
| 150 |
+
sent_for={}
|
| 151 |
for s in sents:
|
| 152 |
for kw in kws:
|
| 153 |
if good_kw(kw) and re2.search(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", s) and kw not in sent_for:
|
| 154 |
+
sent_for[kw]=s
|
| 155 |
+
items=[]; used=set()
|
| 156 |
for kw in [k for k in kws if k in sent_for]:
|
| 157 |
+
if len(items)>=n: break
|
| 158 |
+
s=sent_for[kw]
|
| 159 |
if s in used: continue
|
| 160 |
+
q=re2.sub(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", "_____", s, count=1)
|
| 161 |
+
ch=distractors(kw, [x for x in kws if x!=kw], 3)+[kw]
|
| 162 |
+
random.shuffle(ch); ans=ch.index(kw)
|
| 163 |
items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=ch, answer_index=ans))
|
| 164 |
used.add(s)
|
| 165 |
if not items: raise RuntimeError("تعذّر توليد أسئلة.")
|
| 166 |
return items
|
| 167 |
|
| 168 |
+
def to_records(items:List[MCQ])->List[dict]:
|
| 169 |
+
recs=[]
|
|
|
|
| 170 |
for it in items:
|
| 171 |
+
opts=[]
|
| 172 |
+
for i,lbl in enumerate(["A","B","C","D"]):
|
| 173 |
+
txt=(it.choices[i] if i<len(it.choices) else "—").strip()
|
| 174 |
+
txt=txt.replace(",", "،").replace("?", "؟").replace(";", "؛")
|
| 175 |
+
opts.append({"id":lbl,"text":txt or "—","is_correct":(i==it.answer_index)})
|
| 176 |
+
recs.append({"id":it.id,"question":it.question.strip(),"options":opts})
|
| 177 |
return recs
|
| 178 |
|
| 179 |
+
# ------------------ HTML: كل الأسئلة + زر تحقق لكل سؤال ------------------
|
| 180 |
def render_quiz_html(records: List[dict]) -> str:
|
| 181 |
+
parts=[]
|
| 182 |
for i, rec in enumerate(records, start=1):
|
| 183 |
+
qid = rec["id"]
|
| 184 |
+
qtxt = rec["question"]
|
| 185 |
+
cor = next((o["id"] for o in rec["options"] if o["is_correct"]), "")
|
| 186 |
+
opts_html=[]
|
| 187 |
+
for o in rec["options"]:
|
| 188 |
lid, txt = o["id"], o["text"]
|
| 189 |
opts_html.append(f"""
|
| 190 |
+
<label class="opt" data-letter="{lid}">
|
| 191 |
<input type="radio" name="q_{qid}" value="{lid}">
|
| 192 |
<span class="opt-letter">{lid}</span>
|
| 193 |
<span class="opt-text">{txt}</span>
|
| 194 |
</label>
|
| 195 |
""")
|
| 196 |
parts.append(f"""
|
| 197 |
+
<div class="q-card" data-qid="{qid}" data-correct="{cor}">
|
| 198 |
<div class="q-header">
|
| 199 |
<div class="q-title">السؤال {i}</div>
|
| 200 |
<div class="q-badge" id="b_{qid}" hidden></div>
|
| 201 |
</div>
|
| 202 |
<div class="q-text">{qtxt}</div>
|
| 203 |
<div class="opts">{''.join(opts_html)}</div>
|
| 204 |
+
<div class="q-actions">
|
| 205 |
+
<button class="q-submit">تحقّق</button>
|
| 206 |
+
<span class="q-note" id="n_{qid}"></span>
|
| 207 |
+
</div>
|
| 208 |
</div>
|
| 209 |
""")
|
|
|
|
| 210 |
html = f"""
|
| 211 |
<div id="quiz" class="quiz-wrap">
|
| 212 |
+
<div class="hint">اختر إجابة لكل سؤال ثم اضغط <b>تحقّق</b>، سيتم تلوين اختيارك أخضر/أحمر ويُقفل السؤال.</div>
|
| 213 |
{''.join(parts)}
|
| 214 |
</div>
|
| 215 |
<script>
|
| 216 |
+
// تفويض حدث للنقر على كل أزرار "تحقّق"
|
| 217 |
+
document.querySelectorAll('.q-card .q-submit').forEach(btn => {{
|
| 218 |
+
btn.addEventListener('click', (e) => {{
|
| 219 |
+
const card = e.target.closest('.q-card');
|
| 220 |
+
const qid = card.getAttribute('data-qid');
|
| 221 |
+
const correct = card.getAttribute('data-correct');
|
| 222 |
+
const note = document.getElementById('n_'+qid);
|
| 223 |
+
const badge = document.getElementById('b_'+qid);
|
| 224 |
+
const chosenInput = card.querySelector('input[type="radio"]:checked');
|
| 225 |
+
|
| 226 |
+
if (!chosenInput) {{
|
| 227 |
+
if (note) {{
|
| 228 |
+
note.textContent = 'اختر إجابة أولاً';
|
| 229 |
+
note.className = 'q-note warn';
|
| 230 |
+
}}
|
| 231 |
+
return;
|
| 232 |
+
}}
|
| 233 |
+
|
| 234 |
+
// امسح ألوان قديمة
|
| 235 |
+
card.querySelectorAll('.opt').forEach(l => l.classList.remove('ok','err'));
|
| 236 |
+
|
| 237 |
+
const chosen = chosenInput.value;
|
| 238 |
+
const chosenLabel = chosenInput.closest('.opt');
|
| 239 |
+
|
| 240 |
+
if (chosen === correct) {{
|
| 241 |
+
chosenLabel.classList.add('ok');
|
| 242 |
+
if (badge) {{ badge.hidden = false; badge.className='q-badge ok'; badge.textContent='Correct!'; }}
|
| 243 |
+
}} else {{
|
| 244 |
+
chosenLabel.classList.add('err');
|
| 245 |
+
if (badge) {{ badge.hidden = false; badge.className='q-badge err'; badge.textContent='Incorrect.'; }}
|
| 246 |
+
}}
|
| 247 |
+
|
| 248 |
+
// قفل السؤال بعد التحقق
|
| 249 |
+
card.querySelectorAll('input[type="radio"]').forEach(i => i.disabled = true);
|
| 250 |
+
e.target.disabled = true;
|
| 251 |
+
if (note) note.textContent = '';
|
| 252 |
}});
|
| 253 |
+
}});
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
</script>
|
| 255 |
"""
|
| 256 |
return html
|
| 257 |
|
| 258 |
# ------------------ بناء الامتحان ------------------
|
| 259 |
def build_quiz(text_area, file_path, n, model_id, zoom):
|
| 260 |
+
text_area = (text_area or "").strip()
|
| 261 |
+
if not text_area and not file_path:
|
| 262 |
+
return "", "🛈 أدخل نصًا أو ارفع ملفًا."
|
| 263 |
+
if text_area:
|
| 264 |
+
raw = text_area
|
| 265 |
else:
|
| 266 |
+
raw, _ = file_to_text(file_path, model_id=model_id, zoom=float(zoom))
|
| 267 |
cleaned = postprocess(raw)
|
| 268 |
items = make_mcqs(cleaned, n=int(n))
|
| 269 |
recs = to_records(items)
|
| 270 |
+
return render_quiz_html(recs), f"تم توليد {len(recs)} سؤالًا. لكل سؤال زر تحقّق مستقل."
|
| 271 |
+
|
| 272 |
+
# ------------------ الثيم ------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
CSS = """
|
| 274 |
:root{
|
| 275 |
--bg:#0e0e11; --panel:#15161a; --card:#1a1b20; --muted:#a7b0be;
|
| 276 |
--text:#f6f7fb; --accent:#6ee7b7; --accent2:#34d399; --danger:#ef4444; --border:#262833;
|
| 277 |
}
|
| 278 |
body{direction:rtl; font-family:system-ui,'Cairo','IBM Plex Arabic',sans-serif; background:var(--bg);}
|
| 279 |
+
.gradio-container{max-width:980px;margin:0 auto;padding:12px 12px 40px;}
|
| 280 |
+
h2.top{color:#eaeaf2;margin:6px 0 16px}
|
| 281 |
.panel{background:var(--panel);border:1px solid var(--border);border-radius:14px;padding:14px;box-shadow:0 16px 38px rgba(0,0,0,.35)}
|
| 282 |
.small{opacity:.9;color:#d9dee8}
|
| 283 |
|
|
|
|
| 288 |
textarea{min-height:120px}
|
| 289 |
|
| 290 |
/* الامتحان */
|
| 291 |
+
.hint{color:#cfd5e3;background:#0f1116;border:1px solid #2a2d3a;border-radius:10px;padding:10px;margin:10px 0}
|
| 292 |
.q-card{background:var(--card);border:1px solid var(--border);border-radius:14px;padding:14px;margin:12px 0}
|
| 293 |
.q-header{display:flex;gap:10px;align-items:center;justify-content:space-between;margin-bottom:6px}
|
| 294 |
.q-title{color:#eaeaf2;font-weight:800}
|
|
|
|
| 298 |
|
| 299 |
.q-text{color:var(--text);font-size:1.06rem;line-height:1.8;margin:8px 0 12px}
|
| 300 |
.opts{display:flex;flex-direction:column;gap:8px}
|
| 301 |
+
.opt{display:flex;gap:10px;align-items:center;background:#14161c;border:1px solid #2a2d3a;border-radius:12px;padding:10px;transition:background .15s,border-color .15s}
|
| 302 |
.opt input{accent-color:var(--accent2)}
|
| 303 |
.opt-letter{display:inline-flex;width:28px;height:28px;border-radius:8px;background:#0f1116;border:1px solid #2a2d3a;align-items:center;justify-content:center;font-weight:800;color:#dfe6f7}
|
| 304 |
.opt-text{color:#eaeaf2}
|
| 305 |
+
|
| 306 |
+
/* التلوين بعد التحقق */
|
| 307 |
+
.opt.ok{background:#0f2f22;border-color:#1b6a52}
|
| 308 |
+
.opt.err{background:#3a0d14;border-color:#6a1e2b}
|
| 309 |
+
|
| 310 |
+
.q-actions{display:flex;gap:10px;align-items:center;margin-top:10px}
|
| 311 |
+
.q-actions .q-submit{
|
| 312 |
+
background:linear-gradient(180deg,var(--accent),var(--accent2));
|
| 313 |
+
border:none;color:#0b0d10;font-weight:800;border-radius:10px;padding:8px 14px;cursor:pointer;
|
| 314 |
+
}
|
| 315 |
+
.q-actions .q-submit:disabled{opacity:.5;cursor:not-allowed}
|
| 316 |
+
.q-note{color:#ffd1d6}
|
| 317 |
+
.q-note.warn{color:#ffd1d6}
|
| 318 |
"""
|
| 319 |
|
| 320 |
+
# ------------------ واجهة Gradio ------------------
|
| 321 |
with gr.Blocks(title="Question Generator", css=CSS) as demo:
|
| 322 |
gr.Markdown("<h2 class='top'>Question Generator</h2>")
|
| 323 |
|
| 324 |
with gr.Group(elem_classes=["panel"]):
|
| 325 |
+
gr.Markdown("**أدخل نصًا أو ارفع ملفًا، حدّد عدد الأسئلة، ثم اضغط توليد.**", elem_classes=["small"])
|
|
|
|
| 326 |
text_area = gr.Textbox(lines=6, placeholder="ألصق هنا مقطع نصي...", label="أدخل نصًا")
|
| 327 |
num_q = gr.Slider(4, 20, value=DEFAULT_NUM_QUESTIONS, step=1, label="عدد الأسئلة")
|
| 328 |
file_comp = gr.File(label="أو اختر ملف PDF/TXT", file_count="single",
|
|
|
|
| 341 |
btn_build = gr.Button("توليد الأسئلة", elem_classes=["button-primary"])
|
| 342 |
toast = gr.Markdown("", elem_classes=["small"])
|
| 343 |
|
| 344 |
+
quiz_html = gr.HTML("") # أسئلة + زر تحقق لكل سؤال
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
|
|
|
|
| 346 |
btn_build.click(
|
| 347 |
build_quiz,
|
| 348 |
inputs=[text_area, file_comp, num_q, trocr_model, trocr_zoom],
|
| 349 |
+
outputs=[quiz_html, toast]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
)
|
| 351 |
|
| 352 |
if __name__ == "__main__":
|