// reason.js — bounded REASONING by chain-finding. The composer retrieves the
// right premises but dumps them unordered with no conclusion (reason-test R53).
// Reasoning = find a PATH of facts connected by shared entities, from the
// question's entities toward an answer, ORDERED. Every fact is verbatim corpus,
// so the chain is bounded; the inference is the SELECTION of the path. The
// conclusion is then composed from the path's tail entities (also corpus spans).
'use strict';

const STOP = new Set('a an the is are was were be been being of to in on at and or but if then so as with for from by that this these those it its their his her my your our we you they he she i do does can cannot not only any anyone where what who why how when which there here now during has have had will would may might must shall'.split(/\s+/));

// significant tokens of a fact = content words (proper nouns + nouns/verbs),
// lowercased. These are the LINKS between facts.
function tokensOf(text) {
  const words = (text.match(/[A-Za-z][A-Za-z'-]*/g) || []);
  const toks = new Set();
  for (const w of words) {
    const lw = w.toLowerCase();
    if (STOP.has(lw) || lw.length < 3) continue;
    toks.add(lw);
  }
  return toks;
}

// build the fact graph: facts sharing a significant token are linked by it.
function buildGraph(facts) {
  const nodes = facts.map((text, i) => ({ i, text, toks: tokensOf(text) }));
  // token -> fact indices containing it
  const byTok = new Map();
  for (const n of nodes) for (const t of n.toks) { if (!byTok.has(t)) byTok.set(t, []); byTok.get(t).push(n.i); }
  // adjacency: i -> [{j, via}]
  const adj = nodes.map(() => []);
  for (const [tok, idxs] of byTok) {
    if (idxs.length < 2) continue;                 // a shared token links facts
    for (let a = 0; a < idxs.length; a++) for (let b = a + 1; b < idxs.length; b++) {
      adj[idxs[a]].push({ j: idxs[b], via: tok });
      adj[idxs[b]].push({ j: idxs[a], via: tok });
    }
  }
  return { nodes, adj, byTok };
}

// find reasoning chains: BFS from facts that match the question's entities,
// expanding along shared-entity links, preferring paths that introduce NEW
// query-relevant tokens (progress toward an answer). Returns ranked chains
// (each an ordered list of fact indices).
function findChains(graph, qTokens, opts = {}) {
  const maxLen = opts.maxLen || 5, maxChains = opts.maxChains || 8;
  const { nodes, adj } = graph;
  // seed facts: those sharing the most tokens with the question
  const seeds = nodes.map(n => [n.i, [...n.toks].filter(t => qTokens.has(t)).length])
    .filter(([, k]) => k > 0).sort((a, b) => b[1] - a[1]).slice(0, 6).map(([i]) => i);
  const chains = [];
  for (const seed of seeds) {
    // DFS up to maxLen, scoring by coverage of NEW tokens + query relevance
    const stack = [{ path: [seed], used: new Set([seed]), toks: new Set(nodes[seed].toks) }];
    while (stack.length) {
      const cur = stack.pop();
      chains.push({ path: cur.path.slice(), score: scoreChain(cur, qTokens, nodes) });
      if (cur.path.length >= maxLen) continue;
      const tail = cur.path[cur.path.length - 1];
      // prefer neighbors that add query-relevant tokens
      const nbrs = adj[tail].filter(e => !cur.used.has(e.j))
        .sort((a, b) => newRel(nodes[b.j], cur.toks, qTokens) - newRel(nodes[a.j], cur.toks, qTokens))
        .slice(0, 3);
      for (const e of nbrs) {
        const used = new Set(cur.used); used.add(e.j);
        const toks = new Set(cur.toks); for (const t of nodes[e.j].toks) toks.add(t);
        stack.push({ path: [...cur.path, e.j], used, toks });
      }
    }
  }
  // dedupe by path-set, rank by score
  const seen = new Set(); const out = [];
  for (const c of chains.sort((a, b) => b.score - a.score)) {
    const key = c.path.slice().sort((a, b) => a - b).join(',');
    if (seen.has(key)) continue; seen.add(key);
    out.push(c);
    if (out.length >= maxChains) break;
  }
  return out;
}

function newRel(node, haveToks, qToks) {
  let s = 0;
  for (const t of node.toks) { if (!haveToks.has(t)) { s += 1; if (qToks.has(t)) s += 2; } }
  return s;
}
// a chain scores high if it covers many query tokens AND stays connected AND
// isn't padded with irrelevant facts.
function scoreChain(c, qToks, nodes) {
  const covered = new Set();
  for (const i of c.path) for (const t of nodes[i].toks) if (qToks.has(t)) covered.add(t);
  const qCover = covered.size / Math.max(1, qToks.size);
  const lenPenalty = 1 / (1 + Math.max(0, c.path.length - 3) * 0.4);   // prefer 2-3 hop
  return qCover * 2 + c.path.length * 0.15 * lenPenalty;
}

// TRIM a chain to the minimal prefix that still covers all the query tokens the
// full chain reaches — drops the irrelevant tail hops DFS tacked on.
function trimChain(path, qToks, nodes) {
  const target = new Set();
  for (const i of path) for (const t of nodes[i].toks) if (qToks.has(t)) target.add(t);
  const got = new Set(); const kept = [];
  for (const i of path) {
    kept.push(i);
    for (const t of nodes[i].toks) if (qToks.has(t)) got.add(t);
    if (got.size >= target.size) break;            // covered everything reachable
  }
  return kept;
}

// EXTRACT the answer: the entity the chain DERIVES that the question asks for.
// proper-noun questions (where/who/what X) → the new proper noun the chain
// arrives at (not already in the question). why → the leading causal fact.
const PROPER = /\b([A-Z][a-z]+)\b/g;
// proper-noun PHRASES: consecutive capitalized words = one entity ("Helix
// Academy", "Queen Ilse"), so extraction returns the whole name not the tail.
const PROPER_PHRASE = /\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b/g;
const ART = /^(The|A|An)\s+/;
function properPhrases(text) {
  return (text.match(PROPER_PHRASE) || []).map(s => s.replace(ART, '').trim()).filter(Boolean);
}
function extractAnswer(path, q, nodes) {
  const facts = path.map(i => nodes[i].text);
  const qLow = q.toLowerCase();
  const qProper = new Set(properPhrases(q));
  const qWords = new Set((q.match(PROPER) || []));
  if (/\bwhy\b/.test(qLow)) return facts[0];        // the causal premise leads
  if (/\bwhat would|what.*end|how .* end/.test(qLow)) return facts[0];
  // collect proper-noun PHRASES along the chain not already in the question
  const found = [];
  for (const f of facts) for (const m of properPhrases(f)) {
    if (qProper.has(m)) continue;
    if (m.split(/\s+/).every(w => qWords.has(w))) continue;   // all parts in q
    if (!found.includes(m)) found.push(m);
  }
  if (found.length) return found[found.length - 1];
  // no proper noun → COMMON-NOUN object: the phrase after the relation verb in
  // the last fact ("Theralin treats a rare blood disorder" → "rare blood
  // disorder"; "requires a compound called zerinol" → "zerinol").
  const last = facts[facts.length - 1];
  const qLower = new Set([...qWords].map(w => w.toLowerCase()));
  const relToks = [...tokensOf(q)].filter(t => !qLower.has(t));
  for (const v of relToks) {
    const m = last.match(new RegExp('\\b' + stem(v) + "\\w*\\s+(.+?)[.!?]?$", 'i'));
    if (m) return objectPhrase(m[1]);
  }
  return last;
}
// clean an object phrase: drop leading article; "a compound called X" → "X"
function objectPhrase(s) {
  s = s.trim().replace(/[.!?]+$/, '');
  const called = s.match(/\bcalled\s+(.+)$/i);
  if (called) return called[1].trim();
  return s.replace(/^(a|an|the)\s+/i, '').trim();
}

// ---- yes/no reasoning by CONFLICT detection ----
// A hallucination-free reasoner must be able to say NO with proof. "Can SUBJECT
// reach GOAL?" → find what the goal REQUIRES (chain from the goal object), then
// check if a CONSTRAINT fact forbids SUBJECT's class from a required entity.
const NEG = /\b(cannot|can't|forbidden|barred|never|must not|may not|not allowed|are not|is not)\b/i;
const PROPN = /\b([A-Z][a-z]+)\b/g;

// classes SUBJECT belongs to: "Mara is a healer" -> {healer}
function classesOf(subject, facts) {
  const out = new Set();
  for (const f of facts) {
    const m = f.match(new RegExp('\\b' + subject + "\\b\\s+is\\s+(?:a|an)\\s+([a-z]+)", 'i'));
    if (m) out.add(m[1].toLowerCase().replace(/s$/, ''));
  }
  return out;
}
// entities the GOAL requires: chain from the goal object, collecting the places/
// things gating it ("grows only in X", "must pass through Y").
function requiredEntities(goalToks, graph) {
  const { nodes } = graph;
  const req = new Set();
  const seeds = nodes.filter(n => [...n.toks].some(t => goalToks.has(t)));
  const gate = /\b(only in|grows only|must pass through|must enter|lies beneath|beneath|sealed behind|behind|requires|reached? through|access to)\b/i;
  // gated entities may be PROPER nouns (Korr) OR lowercase common nouns (bridge,
  // reactor) — collect both from gating facts so generalization isn't tied to
  // capitalization. Also seed with the goal's own content nouns.
  for (const t of goalToks) if (!STOP.has(t) && t.length > 3) req.add(t);
  let frontier = seeds, seen = new Set(seeds.map(n => n.i));
  for (let d = 0; d < 3; d++) {
    const next = [];
    for (const n of frontier) {
      if (gate.test(n.text)) {
        for (const m of (n.text.match(PROPN) || [])) req.add(m);
        for (const t of n.toks) if (!STOP.has(t) && t.length > 3) req.add(t);
      }
      for (const e of graph.adj[n.i]) if (!seen.has(e.j)) { seen.add(e.j); next.push(nodes[e.j]); }
    }
    frontier = next;
  }
  return req;
}
// answer a "can SUBJECT ...?" question: NO if a constraint blocks SUBJECT's
// class from a required entity; else YES (if the goal is reachable).
const QWORD = new Set(['Can', 'Does', 'Do', 'Is', 'Are', 'Could', 'Will', 'Would', 'May', 'Should', 'Who', 'What', 'Where', 'Why', 'When', 'Which', 'How', 'The', 'A', 'An']);
function answerCan(q, facts, graph) {
  const subject = (q.match(PROPN) || []).find(w => !QWORD.has(w)) || null;
  if (!subject) return null;
  const qToks = tokensOf(q);
  const goalToks = new Set([...qToks].filter(t => !t.includes(subject.toLowerCase())));
  const req = requiredEntities(goalToks, graph);
  const classes = classesOf(subject, facts);
  // look for a constraint: NEG fact mentioning a required entity + SUBJECT's class
  for (const f of facts) {
    if (!NEG.test(f)) continue;
    const ents = (f.match(PROPN) || []);
    const hitsReq = ents.find(e => req.has(e));
    if (!hitsReq) continue;
    const clsHit = [...classes].find(c => new RegExp('\\b' + c + 's?\\b', 'i').test(f));
    if (clsHit) {
      // SUBJECT is a <class>, <class> is barred from <required entity> -> NO
      const why = facts.filter(x =>
        new RegExp('\\b' + subject + "\\b\\s+is\\s+(?:a|an)\\s+" + clsHit, 'i').test(x) || x === f
      );
      // include a gating fact that shows the goal needs that entity
      const gate = facts.find(x => x.includes(hitsReq) && /\b(must pass through|only in|grows only|beneath)\b/i.test(x));
      if (gate) why.push(gate);
      return { yesno: 'NO', subject, blockedBy: hitsReq, cls: clsHit, why };
    }
  }
  // no block → YES only with an explicit enabler: a subject-ability fact that
  // mentions a goal entity (proper noun in the question other than subject).
  const goalProper = (q.match(PROPN) || []).filter(w => !QWORD.has(w) && w !== subject);
  for (const f of facts) {
    if (!new RegExp('^' + subject + '\\b').test(f)) continue;
    if (!/\b(can|travels?|travel|move|go|freely|may|enter)\b/i.test(f)) continue;
    if (!goalProper.length || goalProper.some(e => f.includes(e))) return { yesno: 'YES', subject, why: [f] };
  }
  return { yesno: 'UNKNOWN', subject, unknown: true };   // no block, no enabler → can't know
}

// ---- constraint-satisfaction: "who/which X can REACH the goal?" ----
// the goal requires some gated entity (Korr). Find the people who CAN reach it:
// either an explicit enabler fact ("X travels freely ... Korr", "apprentice may
// go where master cannot") OR a class NOT barred from it. Returns the viable
// people with their justification — bounded (all facts verbatim).
function answerWho(q, facts, graph) {
  const qToks = tokensOf(q);
  const subj = (q.match(PROPN) || []).find(w => !QWORD.has(w)) || null;   // Mara
  const goalToks = new Set([...qToks].filter(t => subj ? !t.includes(subj.toLowerCase()) : true));
  const req = requiredEntities(goalToks, graph);                          // {Korr, Deeprock,...}
  // the gated location that actually has a barring constraint
  const barred = facts.filter(f => NEG.test(f) && (f.match(PROPN) || []).some(e => req.has(e)));
  const gateEnt = barred.length ? (barred[0].match(PROPN) || []).find(e => req.has(e)) : [...req][0];
  // people = proper nouns with an "X is ..." identity fact, excluding subject
  const people = new Set();
  for (const f of facts) { const m = f.match(/^([A-Z][a-z]+)\s+is\b/); if (m && m[1] !== subj && !QWORD.has(m[1])) people.add(m[1]); }
  // also catch people with an explicit ability fact
  for (const f of facts) { const m = f.match(/^([A-Z][a-z]+)\s+can\b/); if (m && m[1] !== subj) people.add(m[1]); }
  // GENERAL role-enabler rule: "a <role> may go where their <superior> cannot"
  // (works for apprentice/master, deckhand/officer, anything). Captures the role.
  const roleRule = facts.find(f => /\b(\w+)\s+may go where\b.*\b(cannot|can't|can not)\b/i.test(f));
  const enabledRole = roleRule ? (roleRule.match(/\b(\w+)\s+may go where/i) || [])[1].toLowerCase().replace(/s$/, '') : null;
  const reqList = [...req].concat(gateEnt ? [gateEnt] : []);
  const viable = [];
  for (const p of people) {
    const why = [];
    // explicit enabler: "P ... travel/go/enter freely ... <any required entity>"
    const expl = facts.find(f => new RegExp('^' + p + '\\b').test(f) && /\b(travel|go|move|enter|freely|clearance|access|reach)\b/i.test(f) && (!reqList.length || reqList.some(e => f.includes(e))));
    if (expl) why.push(expl);
    // role-enabler: "P is <subj>'s <enabledRole>" + the rule (P may go where subj cannot)
    if (enabledRole && subj) {
      const roleFact = facts.find(f => new RegExp('^' + p + "\\b.*\\b" + subj + "'s\\s+" + enabledRole, 'i').test(f));
      if (roleFact) { why.push(roleFact); why.push(roleRule); }
    }
    // class NOT barred: P explicitly not the barred class ("P is not a/an <class>")
    const notBarredFact = facts.find(f => new RegExp('^' + p + "\\b.*\\bnot a[n]?\\b", 'i').test(f));
    const pClasses = classesOf(p, facts);
    const barredClass = barred.length && [...pClasses].some(c => barred.some(b => new RegExp('\\b' + c + 's?\\b', 'i').test(b)));
    if (notBarredFact && !barredClass) why.push(notBarredFact);
    if (why.length) viable.push({ who: p, why: [...new Set(why)] });
  }
  return { gateEnt, viable };
}

// stem-ish prefix so trained~train, ruled~rule, gather~gathers match
const stem = t => t.toLowerCase().slice(0, 5);
function factCovers(fact, tok) { return new RegExp('\\b' + stem(tok), 'i').test(fact); }

// ---- the honest dispatcher: answer, or admit you can't know ----
// The crown of a bounded reasoner: it can only answer when the asked RELATION is
// attested in a fact CONNECTED to the subject. Otherwise → unknown. No confident
// confabulation: "I can't determine that from what I know."
function answer(q, facts, graph) {
  const qt = q.trim().toLowerCase();
  const qToks = tokensOf(q);
  // subject = the first proper-noun PHRASE (consecutive caps, e.g. "Queen Ilse"),
  // else the first content noun. Capturing the phrase keeps "Ilse" out of the
  // relation tokens (else every Ilse-fact looks like an anchor).
  const phrases = (q.match(/\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b/g) || []).map(s => s.trim())
    .filter(s => !s.split(/\s+/).every(w => QWORD.has(w)));
  const subjPhrase = phrases[0] || [...qToks].find(t => !STOP.has(t)) || null;
  const subjTokSet = new Set((subjPhrase || '').toLowerCase().split(/\s+/).map(stem));
  const relToks = [...qToks].filter(t => !subjTokSet.has(stem(t)));
  const classes = subjPhrase ? classesOf(subjPhrase.split(/\s+/).pop(), facts) : new Set();
  const subjForms = subjPhrase ? [...subjPhrase.split(/\s+/), ...classes] : [];
  // is there a fact ABOUT the subject that covers a relation token?
  const anchorFacts = graph.nodes.filter(n =>
    subjForms.some(s => factCovers(n.text, s)) && relToks.some(t => factCovers(n.text, t)));
  const hasAnchor = anchorFacts.length > 0;

  // YES/NO → conflict / enabler / unknown
  if (/^(can|does|do|is|are|could|will|would|may|should)\b/.test(qt)) {
    const a = answerCan(q, facts, graph);
    if (a && a.yesno === 'NO') return { answer: `NO (${a.subject} is a ${a.cls}; ${a.cls}s barred from ${a.blockedBy})`, why: a.why, unknown: false };
    if (a && a.yesno === 'YES') return { answer: 'YES', why: a.why, unknown: false };
    return { unknown: true };                       // no block, no enabler → can't know
  }
  // WHO/WHICH + ability → constraint satisfaction
  if (/^(who|which)\b/.test(qt) && /\b(can|could|would|may|get|reach|do|go)\b/.test(qt)) {
    const a = answerWho(q, facts, graph);
    if (a.viable.length) return { answer: a.viable.map(v => v.who).join(' or '), why: a.viable.flatMap(v => v.why), unknown: false };
    return { unknown: true };
  }
  // WHY → the causal fact ABOUT the subject (anchor)
  if (/^why\b/.test(qt)) {
    return hasAnchor ? { answer: anchorFacts[0].text, why: [anchorFacts[0].text], unknown: false } : { unknown: true };
  }
  // WHAT-WOULD / WHAT-END → the conditional fact covering the relation
  if (/what would|what .*\bend\b/.test(qt)) {
    const cf = graph.nodes.find(n => /\b(end|will end|ends)\b/i.test(n.text) && relToks.some(t => factCovers(n.text, t)));
    return cf ? { answer: cf.text, why: [cf.text], unknown: false } : { unknown: true };
  }
  // WHERE / WHAT / WHICH — answer from the ANCHOR fact directly (robust to graph
  // size), chaining ONE hop only if the asked object-type isn't in the anchor.
  if (!hasAnchor) return { unknown: true };          // relation not attested about subject → can't know
  const anchor = anchorFacts[0];
  // the asked object-type noun (city/country/...): the last relation token that
  // is itself a generic category word, if the anchor doesn't already name it.
  const objType = relToks.find(t => /^(city|country|state|town|region|planet|place|person)$/.test(t));
  if (objType && !factCovers(anchor.text, objType)) {
    // chain one hop to a fact that covers the object-type, then extract
    for (const e of graph.adj[anchor.i]) {
      const nb = graph.nodes[e.j];
      if (factCovers(nb.text, objType)) {
        return { answer: extractAnswer([anchor.i, nb.i], q, graph.nodes), why: [anchor.text, nb.text], unknown: false };
      }
    }
    // object-type asked but not reachable → we lack that fact
    return { unknown: true };
  }
  return { answer: extractAnswer([anchor.i], q, graph.nodes), why: [anchor.text], unknown: false };
}

module.exports = { tokensOf, buildGraph, findChains, trimChain, extractAnswer, classesOf, requiredEntities, answerCan, answerWho, answer, STOP };