| |
| |
| |
| |
| |
| |
| 'use strict'; |
|
|
| const STOP = new Set('a an the is are was were be been being of to in on at and or but if then so as with for from by that this these those it its their his her my your our we you they he she i do does can cannot not only any anyone where what who why how when which there here now during has have had will would may might must shall'.split(/\s+/)); |
|
|
| |
| |
| function tokensOf(text) { |
| const words = (text.match(/[A-Za-z][A-Za-z'-]*/g) || []); |
| const toks = new Set(); |
| for (const w of words) { |
| const lw = w.toLowerCase(); |
| if (STOP.has(lw) || lw.length < 3) continue; |
| toks.add(lw); |
| } |
| return toks; |
| } |
|
|
| |
| function buildGraph(facts) { |
| const nodes = facts.map((text, i) => ({ i, text, toks: tokensOf(text) })); |
| |
| const byTok = new Map(); |
| for (const n of nodes) for (const t of n.toks) { if (!byTok.has(t)) byTok.set(t, []); byTok.get(t).push(n.i); } |
| |
| const adj = nodes.map(() => []); |
| for (const [tok, idxs] of byTok) { |
| if (idxs.length < 2) continue; |
| for (let a = 0; a < idxs.length; a++) for (let b = a + 1; b < idxs.length; b++) { |
| adj[idxs[a]].push({ j: idxs[b], via: tok }); |
| adj[idxs[b]].push({ j: idxs[a], via: tok }); |
| } |
| } |
| return { nodes, adj, byTok }; |
| } |
|
|
| |
| |
| |
| |
| function findChains(graph, qTokens, opts = {}) { |
| const maxLen = opts.maxLen || 5, maxChains = opts.maxChains || 8; |
| const { nodes, adj } = graph; |
| |
| const seeds = nodes.map(n => [n.i, [...n.toks].filter(t => qTokens.has(t)).length]) |
| .filter(([, k]) => k > 0).sort((a, b) => b[1] - a[1]).slice(0, 6).map(([i]) => i); |
| const chains = []; |
| for (const seed of seeds) { |
| |
| const stack = [{ path: [seed], used: new Set([seed]), toks: new Set(nodes[seed].toks) }]; |
| while (stack.length) { |
| const cur = stack.pop(); |
| chains.push({ path: cur.path.slice(), score: scoreChain(cur, qTokens, nodes) }); |
| if (cur.path.length >= maxLen) continue; |
| const tail = cur.path[cur.path.length - 1]; |
| |
| const nbrs = adj[tail].filter(e => !cur.used.has(e.j)) |
| .sort((a, b) => newRel(nodes[b.j], cur.toks, qTokens) - newRel(nodes[a.j], cur.toks, qTokens)) |
| .slice(0, 3); |
| for (const e of nbrs) { |
| const used = new Set(cur.used); used.add(e.j); |
| const toks = new Set(cur.toks); for (const t of nodes[e.j].toks) toks.add(t); |
| stack.push({ path: [...cur.path, e.j], used, toks }); |
| } |
| } |
| } |
| |
| const seen = new Set(); const out = []; |
| for (const c of chains.sort((a, b) => b.score - a.score)) { |
| const key = c.path.slice().sort((a, b) => a - b).join(','); |
| if (seen.has(key)) continue; seen.add(key); |
| out.push(c); |
| if (out.length >= maxChains) break; |
| } |
| return out; |
| } |
|
|
| function newRel(node, haveToks, qToks) { |
| let s = 0; |
| for (const t of node.toks) { if (!haveToks.has(t)) { s += 1; if (qToks.has(t)) s += 2; } } |
| return s; |
| } |
| |
| |
| function scoreChain(c, qToks, nodes) { |
| const covered = new Set(); |
| for (const i of c.path) for (const t of nodes[i].toks) if (qToks.has(t)) covered.add(t); |
| const qCover = covered.size / Math.max(1, qToks.size); |
| const lenPenalty = 1 / (1 + Math.max(0, c.path.length - 3) * 0.4); |
| return qCover * 2 + c.path.length * 0.15 * lenPenalty; |
| } |
|
|
| |
| |
| function trimChain(path, qToks, nodes) { |
| const target = new Set(); |
| for (const i of path) for (const t of nodes[i].toks) if (qToks.has(t)) target.add(t); |
| const got = new Set(); const kept = []; |
| for (const i of path) { |
| kept.push(i); |
| for (const t of nodes[i].toks) if (qToks.has(t)) got.add(t); |
| if (got.size >= target.size) break; |
| } |
| return kept; |
| } |
|
|
| |
| |
| |
| const PROPER = /\b([A-Z][a-z]+)\b/g; |
| |
| |
| const PROPER_PHRASE = /\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b/g; |
| const ART = /^(The|A|An)\s+/; |
| function properPhrases(text) { |
| return (text.match(PROPER_PHRASE) || []).map(s => s.replace(ART, '').trim()).filter(Boolean); |
| } |
| function extractAnswer(path, q, nodes) { |
| const facts = path.map(i => nodes[i].text); |
| const qLow = q.toLowerCase(); |
| const qProper = new Set(properPhrases(q)); |
| const qWords = new Set((q.match(PROPER) || [])); |
| if (/\bwhy\b/.test(qLow)) return facts[0]; |
| if (/\bwhat would|what.*end|how .* end/.test(qLow)) return facts[0]; |
| |
| const found = []; |
| for (const f of facts) for (const m of properPhrases(f)) { |
| if (qProper.has(m)) continue; |
| if (m.split(/\s+/).every(w => qWords.has(w))) continue; |
| if (!found.includes(m)) found.push(m); |
| } |
| if (found.length) return found[found.length - 1]; |
| |
| |
| |
| const last = facts[facts.length - 1]; |
| const qLower = new Set([...qWords].map(w => w.toLowerCase())); |
| const relToks = [...tokensOf(q)].filter(t => !qLower.has(t)); |
| for (const v of relToks) { |
| const m = last.match(new RegExp('\\b' + stem(v) + "\\w*\\s+(.+?)[.!?]?$", 'i')); |
| if (m) return objectPhrase(m[1]); |
| } |
| return last; |
| } |
| |
| function objectPhrase(s) { |
| s = s.trim().replace(/[.!?]+$/, ''); |
| const called = s.match(/\bcalled\s+(.+)$/i); |
| if (called) return called[1].trim(); |
| return s.replace(/^(a|an|the)\s+/i, '').trim(); |
| } |
|
|
| |
| |
| |
| |
| const NEG = /\b(cannot|can't|forbidden|barred|never|must not|may not|not allowed|are not|is not)\b/i; |
| const PROPN = /\b([A-Z][a-z]+)\b/g; |
|
|
| |
| function classesOf(subject, facts) { |
| const out = new Set(); |
| for (const f of facts) { |
| const m = f.match(new RegExp('\\b' + subject + "\\b\\s+is\\s+(?:a|an)\\s+([a-z]+)", 'i')); |
| if (m) out.add(m[1].toLowerCase().replace(/s$/, '')); |
| } |
| return out; |
| } |
| |
| |
| function requiredEntities(goalToks, graph) { |
| const { nodes } = graph; |
| const req = new Set(); |
| const seeds = nodes.filter(n => [...n.toks].some(t => goalToks.has(t))); |
| const gate = /\b(only in|grows only|must pass through|must enter|lies beneath|beneath|sealed behind|behind|requires|reached? through|access to)\b/i; |
| |
| |
| |
| for (const t of goalToks) if (!STOP.has(t) && t.length > 3) req.add(t); |
| let frontier = seeds, seen = new Set(seeds.map(n => n.i)); |
| for (let d = 0; d < 3; d++) { |
| const next = []; |
| for (const n of frontier) { |
| if (gate.test(n.text)) { |
| for (const m of (n.text.match(PROPN) || [])) req.add(m); |
| for (const t of n.toks) if (!STOP.has(t) && t.length > 3) req.add(t); |
| } |
| for (const e of graph.adj[n.i]) if (!seen.has(e.j)) { seen.add(e.j); next.push(nodes[e.j]); } |
| } |
| frontier = next; |
| } |
| return req; |
| } |
| |
| |
| const QWORD = new Set(['Can', 'Does', 'Do', 'Is', 'Are', 'Could', 'Will', 'Would', 'May', 'Should', 'Who', 'What', 'Where', 'Why', 'When', 'Which', 'How', 'The', 'A', 'An']); |
| function answerCan(q, facts, graph) { |
| const subject = (q.match(PROPN) || []).find(w => !QWORD.has(w)) || null; |
| if (!subject) return null; |
| const qToks = tokensOf(q); |
| const goalToks = new Set([...qToks].filter(t => !t.includes(subject.toLowerCase()))); |
| const req = requiredEntities(goalToks, graph); |
| const classes = classesOf(subject, facts); |
| |
| for (const f of facts) { |
| if (!NEG.test(f)) continue; |
| const ents = (f.match(PROPN) || []); |
| const hitsReq = ents.find(e => req.has(e)); |
| if (!hitsReq) continue; |
| const clsHit = [...classes].find(c => new RegExp('\\b' + c + 's?\\b', 'i').test(f)); |
| if (clsHit) { |
| |
| const why = facts.filter(x => |
| new RegExp('\\b' + subject + "\\b\\s+is\\s+(?:a|an)\\s+" + clsHit, 'i').test(x) || x === f |
| ); |
| |
| const gate = facts.find(x => x.includes(hitsReq) && /\b(must pass through|only in|grows only|beneath)\b/i.test(x)); |
| if (gate) why.push(gate); |
| return { yesno: 'NO', subject, blockedBy: hitsReq, cls: clsHit, why }; |
| } |
| } |
| |
| |
| const goalProper = (q.match(PROPN) || []).filter(w => !QWORD.has(w) && w !== subject); |
| for (const f of facts) { |
| if (!new RegExp('^' + subject + '\\b').test(f)) continue; |
| if (!/\b(can|travels?|travel|move|go|freely|may|enter)\b/i.test(f)) continue; |
| if (!goalProper.length || goalProper.some(e => f.includes(e))) return { yesno: 'YES', subject, why: [f] }; |
| } |
| return { yesno: 'UNKNOWN', subject, unknown: true }; |
| } |
|
|
| |
| |
| |
| |
| |
| function answerWho(q, facts, graph) { |
| const qToks = tokensOf(q); |
| const subj = (q.match(PROPN) || []).find(w => !QWORD.has(w)) || null; |
| const goalToks = new Set([...qToks].filter(t => subj ? !t.includes(subj.toLowerCase()) : true)); |
| const req = requiredEntities(goalToks, graph); |
| |
| const barred = facts.filter(f => NEG.test(f) && (f.match(PROPN) || []).some(e => req.has(e))); |
| const gateEnt = barred.length ? (barred[0].match(PROPN) || []).find(e => req.has(e)) : [...req][0]; |
| |
| const people = new Set(); |
| for (const f of facts) { const m = f.match(/^([A-Z][a-z]+)\s+is\b/); if (m && m[1] !== subj && !QWORD.has(m[1])) people.add(m[1]); } |
| |
| for (const f of facts) { const m = f.match(/^([A-Z][a-z]+)\s+can\b/); if (m && m[1] !== subj) people.add(m[1]); } |
| |
| |
| const roleRule = facts.find(f => /\b(\w+)\s+may go where\b.*\b(cannot|can't|can not)\b/i.test(f)); |
| const enabledRole = roleRule ? (roleRule.match(/\b(\w+)\s+may go where/i) || [])[1].toLowerCase().replace(/s$/, '') : null; |
| const reqList = [...req].concat(gateEnt ? [gateEnt] : []); |
| const viable = []; |
| for (const p of people) { |
| const why = []; |
| |
| const expl = facts.find(f => new RegExp('^' + p + '\\b').test(f) && /\b(travel|go|move|enter|freely|clearance|access|reach)\b/i.test(f) && (!reqList.length || reqList.some(e => f.includes(e)))); |
| if (expl) why.push(expl); |
| |
| if (enabledRole && subj) { |
| const roleFact = facts.find(f => new RegExp('^' + p + "\\b.*\\b" + subj + "'s\\s+" + enabledRole, 'i').test(f)); |
| if (roleFact) { why.push(roleFact); why.push(roleRule); } |
| } |
| |
| const notBarredFact = facts.find(f => new RegExp('^' + p + "\\b.*\\bnot a[n]?\\b", 'i').test(f)); |
| const pClasses = classesOf(p, facts); |
| const barredClass = barred.length && [...pClasses].some(c => barred.some(b => new RegExp('\\b' + c + 's?\\b', 'i').test(b))); |
| if (notBarredFact && !barredClass) why.push(notBarredFact); |
| if (why.length) viable.push({ who: p, why: [...new Set(why)] }); |
| } |
| return { gateEnt, viable }; |
| } |
|
|
| |
| const stem = t => t.toLowerCase().slice(0, 5); |
| function factCovers(fact, tok) { return new RegExp('\\b' + stem(tok), 'i').test(fact); } |
|
|
| |
| |
| |
| |
| function answer(q, facts, graph) { |
| const qt = q.trim().toLowerCase(); |
| const qToks = tokensOf(q); |
| |
| |
| |
| const phrases = (q.match(/\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b/g) || []).map(s => s.trim()) |
| .filter(s => !s.split(/\s+/).every(w => QWORD.has(w))); |
| const subjPhrase = phrases[0] || [...qToks].find(t => !STOP.has(t)) || null; |
| const subjTokSet = new Set((subjPhrase || '').toLowerCase().split(/\s+/).map(stem)); |
| const relToks = [...qToks].filter(t => !subjTokSet.has(stem(t))); |
| const classes = subjPhrase ? classesOf(subjPhrase.split(/\s+/).pop(), facts) : new Set(); |
| const subjForms = subjPhrase ? [...subjPhrase.split(/\s+/), ...classes] : []; |
| |
| const anchorFacts = graph.nodes.filter(n => |
| subjForms.some(s => factCovers(n.text, s)) && relToks.some(t => factCovers(n.text, t))); |
| const hasAnchor = anchorFacts.length > 0; |
|
|
| |
| if (/^(can|does|do|is|are|could|will|would|may|should)\b/.test(qt)) { |
| const a = answerCan(q, facts, graph); |
| if (a && a.yesno === 'NO') return { answer: `NO (${a.subject} is a ${a.cls}; ${a.cls}s barred from ${a.blockedBy})`, why: a.why, unknown: false }; |
| if (a && a.yesno === 'YES') return { answer: 'YES', why: a.why, unknown: false }; |
| return { unknown: true }; |
| } |
| |
| if (/^(who|which)\b/.test(qt) && /\b(can|could|would|may|get|reach|do|go)\b/.test(qt)) { |
| const a = answerWho(q, facts, graph); |
| if (a.viable.length) return { answer: a.viable.map(v => v.who).join(' or '), why: a.viable.flatMap(v => v.why), unknown: false }; |
| return { unknown: true }; |
| } |
| |
| if (/^why\b/.test(qt)) { |
| return hasAnchor ? { answer: anchorFacts[0].text, why: [anchorFacts[0].text], unknown: false } : { unknown: true }; |
| } |
| |
| if (/what would|what .*\bend\b/.test(qt)) { |
| const cf = graph.nodes.find(n => /\b(end|will end|ends)\b/i.test(n.text) && relToks.some(t => factCovers(n.text, t))); |
| return cf ? { answer: cf.text, why: [cf.text], unknown: false } : { unknown: true }; |
| } |
| |
| |
| if (!hasAnchor) return { unknown: true }; |
| const anchor = anchorFacts[0]; |
| |
| |
| const objType = relToks.find(t => /^(city|country|state|town|region|planet|place|person)$/.test(t)); |
| if (objType && !factCovers(anchor.text, objType)) { |
| |
| for (const e of graph.adj[anchor.i]) { |
| const nb = graph.nodes[e.j]; |
| if (factCovers(nb.text, objType)) { |
| return { answer: extractAnswer([anchor.i, nb.i], q, graph.nodes), why: [anchor.text, nb.text], unknown: false }; |
| } |
| } |
| |
| return { unknown: true }; |
| } |
| return { answer: extractAnswer([anchor.i], q, graph.nodes), why: [anchor.text], unknown: false }; |
| } |
|
|
| module.exports = { tokensOf, buildGraph, findChains, trimChain, extractAnswer, classesOf, requiredEntities, answerCan, answerWho, answer, STOP }; |
|
|