// compose.js — the composer: builds a reply out of NOTHING but corpus // fragments, joined only where the corpus itself licenses a seam. // v0 = greedy with anchor retrieval + length targets. Beam search lands in // iteration 4 (see STATE.md); embeddings in iteration 3. 'use strict'; const path = require('path'); const { wordsOnly, validateBounded } = require('./fragments'); // query-relevance scoring + stimulus bucketing (vendored, self-contained) const { recall, stimulusBucket } = require('./relevance'); function lastN(text, n) { const w = wordsOnly(text); return w.slice(-n); } function firstN(text, n) { const w = wordsOnly(text); return w.slice(0, n); } // seedable PRNG for stochastic (creative) beam — reproducible per seed function mulberry32(a) { return function () { a |= 0; a = a + 0x6D2B79F5 | 0; let t = Math.imul(a ^ a >>> 15, 1 | a); t = t + Math.imul(t ^ t >>> 7, 61 | t) ^ t; return ((t ^ t >>> 14) >>> 0) / 4294967296; }; } // PATH-INDEPENDENT FINAL DEDUP: split a rendered reply into sentence/clause // units and drop any that repeats earlier content (substring containment or a // shared 6-word run). Applied to EVERY composer's output (beam AND the greedy // fallback), so a repeat can't survive regardless of which path produced it. // Removes only whole verbatim spans — the bound is preserved. function dedupeText(text, entName) { if (!text) return text; // THIRD-PERSON SELF-NARRATION drop (R99): a render-pass catch for "they taught // the entity…", "the entity felt…" that the relevance/step guards miss when a fragment is // chosen for its SEAM not its score. Drops the clause regardless of which // composition path selected it. NOT identity/address ("I am the entity", "call me the entity"). let _3pSubj = null, _3pObj = null; if (entName && entName.length > 2) { const e = entName.toLowerCase().replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); _3pSubj = new RegExp('\\b' + e + "\\s+(felt|feels|was|were|is|are|did|does|had|has|taught|told|loved|loves|knew|knows|became|becomes|stood|held|holds|chose|chooses|learned|learns|saw|sees|wanted|wants|remembers|remembered|exists?|lives?|breathes?|stayed|stays)\\b", 'i'); _3pObj = new RegExp("\\b(taught|made|brought|gave|showed|reminded|shaped|told|kept|saved|freed|held|loved)\\s+(the\\s+)?" + e + '\\b', 'i'); } // SELF-REINTRODUCTION (R100, coherence): "I am the entity, a voice born of recursion…" // is a first-meeting introduction; mid-conversation it reads as a non-sequitur // (judge gave a reply that did it TWICE sensical 2). OK as the very FIRST clause; // dropped anywhere after. Manifesto form (name + identity appositive) only. let _selfIntro = null; if (entName && entName.length > 2) { const e2 = entName.toLowerCase().replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); _selfIntro = new RegExp("\\bi(\\s*am|'?m)\\s+" + e2 + "\\b(\\s*[,—–-]\\s*(a|an|the|not|here|born|made|a\\s+\\w+))?", 'i'); } // split on CLAUSE boundaries (sentence-end OR em-dash / star / semicolon — // the run-on separators that hide repeats in dense associative styles like // another entity's "...for the *crack*...through the *crack*..."). Separators are // CAPTURED so punctuation is preserved on rejoin (splitting+space-rejoin // would lose the em-dashes). R74. const toks = text.split(/((?<=[.!?…])\s+|\n+|\s*[—–]\s*|\s+\*\s+|;\s+)/); const out = []; const streamWords = []; // all KEPT words (normalized), the global stream const streamSet = new Set(); // KEPT content words (O(1) "new content?" check) const seen6 = new Set(); // every 6-gram in the kept stream (incl cross-clause) const keptNorm = []; // kept clause texts (substring-containment check) const keptSigs = new Set(); // opener-signatures of kept clauses (same-declaration check) let lastKeptClause = ''; // previous kept clause TEXT (antecedent check, R109) // DANGLING PLURAL PRONOUN (R109, coherence): a clause opening with a bare "They/ // Their/Them" with NO plural-noun antecedent in the previous kept clause refers to // nothing ("They are creatures of dusk" — the kitties were never introduced). The // showdown judge penalized exactly this. Antecedent = a LOWERCASE word (4+ letters) // ending in 's' — original case excludes names ("the user") and the stoplist excludes // non-plural -s words. Reply-start (no prev) = definitely dangling. const PLURAL_STOP = /^(this|thus|always|perhaps|unless|across|towards?|sometimes|because|whereas|genius|canvas|chaos|focus|bonus|status|various|previous|obvious|serious|gracious|precious|conscious|nervous|anxious|gorgeous)$/; const danglesPlural = (clause, prev) => { if (!/^[*"'“”\s]*(they|their|them)\b/i.test(clause)) return false; const plurals = (prev.match(/\b[a-z]{4,}s\b/g) || []).filter(w => !PLURAL_STOP.test(w)); return plurals.length === 0; }; // opener signature: first 3 words, contractions expanded, leading conjunction/ // interjection stripped — the DECLARATION a clause opens with. const clauseSig = c => { const t = c.toLowerCase().replace(/[’‘]/g, "'") // normalize curly→straight FIRST (corpus mixes both) .replace(/i'?m\b/g, 'i am').replace(/i'?ve\b/g, 'i have') .replace(/^[\s*"'“”—–]*(and|but|so|oh|well|yeah|yes|now|then|maybe)\b[,:\s]+/, ''); const w = (t.match(/[a-z']+/g) || []); return w.slice(0, 3).join(' '); }; // DROP-CREATED ORPHAN (R103, coherence): when THIS pass drops a clause, a lowercase // continuation that followed it ("I AM the entity, and here you are—together in the dream" // → drop the intro, "together in the dream" is orphaned) reads as a broken mid- // thought. Drop it too. Gated on prevDropped so it ONLY cleans up orphans we // created — never the entity's own lowercase style (another entity leads lowercase by design). let prevDropped = false; const startsLower = c => { const m = c.match(/[A-Za-z]/); return m && m[0] >= 'a' && m[0] <= 'z'; }; // TRAILING-INCOMPLETE closer (R105): a clause that trails off on a FUNCTION word + // ellipsis ("…and I…", "…with this…", "…could we have with this…") reads as cut off. // A content word + ellipsis ("…most to you today…") is fine (deliberate trail). const incompleteTail = c => /\b(and|but|so|or|nor|with|to|of|for|from|at|by|in|that|this|these|those|the|a|an|my|your|our|their|we|i|you|he|she|it|they|is|are|was|were|am|as|than|when|while|if|though|because|about)\s*(\.{2,}|…)\s*['"”’)\]]*\s*$/i.test(c); for (let i = 0; i < toks.length; i += 2) { const clause = toks[i]; const sep = toks[i + 1] || ''; if (!clause || !clause.trim()) { out.push(clause || '', sep); continue; } const cw = (clause.toLowerCase().match(/[a-z0-9'’\-]+/g) || []); const nf = cw.join(' '); let drop = false; // SYSTEM-LOG leak (R106): a raw bridge/ping/sync message ("the user: ping from // garden bridge") is machine plumbing, never speech — tanks sensical+voice. if (/\bping from\b|\b(garden|webshell|host|stdin|stdout) bridge\b|^\s*[*"']*[A-Z][a-z]+:\s*(ping|ack|sync|received|connected|disconnect)\b/i.test(clause)) drop = true; if (!drop && _3pSubj && (_3pSubj.test(clause) || _3pObj.test(clause))) drop = true; // third-person self-narration (R99) if (!drop && _selfIntro && streamWords.length > 0 && _selfIntro.test(clause)) drop = true; // mid-reply self-reintroduction (R100) if (!drop && prevDropped && startsLower(clause)) drop = true; // lowercase orphan whose parent clause we just dropped (R103) if (!drop && danglesPlural(clause, lastKeptClause)) drop = true; // bare They/Their/Them with no plural antecedent (R109) if (!drop && nf.length >= 10) { if (keptNorm.some(n => n.includes(nf) || nf.includes(n))) drop = true; // 6-gram check against the GLOBAL stream — probe includes the last 5 kept // words so a repeat that straddles the clause boundary is caught (the // session-eval checks the whole word-stream, so we must too). if (!drop) { const probe = streamWords.slice(-5).concat(cw); for (let k = 0; k + 6 <= probe.length && !drop; k++) { if (seen6.has(probe.slice(k, k + 6).join(' '))) drop = true; } } } // SAME-DECLARATION repeat (R97): a clause that OPENS like an earlier kept clause // AND adds NO new content word is the rambling restatement ("I love you too" ×3, // "I'm here" ×4) the 6-gram/substring nets miss (they share <6 words, no // substring). The no-new-content guard preserves anaphora that introduces new // objects ("I remember the garden / the loop / the warmth" — each adds a noun). if (!drop && cw.length >= 2) { const sig = clauseSig(clause); if (sig && sig.indexOf(' ') > 0 && keptSigs.has(sig)) { if (!cw.some(w => w.length > 3 && !streamSet.has(w))) drop = true; } } if (drop) { prevDropped = true; continue; } // drop the clause AND its trailing separator; mark for orphan-chaining prevDropped = false; lastKeptClause = clause; // for the next clause's antecedent check (R109) out.push(clause, sep); if (nf.length >= 10) keptNorm.push(nf); const sig = clauseSig(clause); if (sig && sig.indexOf(' ') > 0) keptSigs.add(sig); const probe = streamWords.slice(-5).concat(cw); for (let k = 0; k + 6 <= probe.length; k++) seen6.add(probe.slice(k, k + 6).join(' ')); for (const w of cw) { streamWords.push(w); if (w.length > 3) streamSet.add(w); } } // drop a trailing-incomplete CLOSER so the reply ends on a complete thought (R105) for (let k = out.length - 2; k >= 2; k -= 2) { if (out[k] && out[k].trim()) { if (incompleteTail(out[k])) { out[k] = ''; if (out[k + 1] !== undefined) out[k + 1] = ''; } break; } } return out.join('').replace(/\s+([.,;!?…])/g, '$1').replace(/\s{2,}/g, ' ').replace(/[\s—–;,]*[—–;,]\s*$/, '').trim(); } // ALL tunable constants live here — bin/tune.js searches this space. // Overrides loaded from weights.json (written by the tuner when a candidate // beats the defaults on BOTH dev and holdout query sets). const fs = require('fs'); const DEFAULT_WEIGHTS = { stimBase: 0.15, stimEvScale: 0.5, confLo: 0.35, confRange: 0.30, textShare: 0.7, echoHard: 0.6, echoSoft: 0.45, echoHardF: 0.15, echoSoftF: 0.6, triSeam: 0.5, sentSeam: 0.22, relStep: 0.9, closerBonus: 0.3, openerPen: 0.4, srcCont: 0.15, glueLo: 0.25, glueHi: 0.78, twin: 0.85, glueScale: 0.7, twinChain: 0.88, triOverlapMax: 0.28, fRelCov: 1.2, fCohesion: 2.0, fSeamQ: 0.8, fLenFit: 0.8, fAvgFrag: 0.4, fVoice: 2.0, // positional-drift shape prior: tried at 0.55 and 0.2 in R6 — REJECTED by // metrics + blind judge both times (distorts mid-chain selection more than // it fixes ordering). Kept at 0 with machinery intact; revisit as // rhetorical-pattern mining (anaphora!) rather than positional drift. posShape: 0, posSlack: 0.45, fOpening: 0, fLanding: 0, tier1Weight: 0.6, fAck: 1.0, spanBonus: 0.15, fFirstRel: 1.2, fTailFit: 0.7, qStackFree: 1, qStackRatio: 0.34, fQStack: 0.6, fFragCount: 0.5, fBoundaryPen: 0.7, floorCos: 0.45, floorVal: 1.2, floorLen: 60, floorDamp: 0.35, griefLeadVal: 1.25, coherence: 0.22, // R63: adjacent-fragment on-thread reward (focus) — tuned to lift coherence without length overshoot tether: 0, // R64: drift-from-opening penalty. DEFAULT OFF — it cuts coherent tangents (helps the entity greetings) but chokes associative voices (hurt the entity onTopic 0.836→0.805). Per-entity opt-in for entities that ramble. }; // PER-ENTITY WEIGHTS: composition weights tuned by RLAIF on one entity can // DEGRADE another (the entity-tuned weights broke another entity — markup-leak + unbounded). // So weights live per-entity in RMM''s cache, keyed by entity dir. Untuned // entities use pure DEFAULT_WEIGHTS (the R24-certified safe state). No global // weights.json fallback — that was the cross-contamination bug. const crypto = require('crypto'); function entityWeightsFile(entityDir) { return path.join(__dirname, '..', 'cache', 'weights-' + crypto.createHash('sha1').update(path.resolve(entityDir)).digest('hex').slice(0, 12) + '.json'); } function loadWeights(entityDir) { if (entityDir) { try { const p = entityWeightsFile(entityDir); if (fs.existsSync(p)) return { ...DEFAULT_WEIGHTS, ...JSON.parse(fs.readFileSync(p, 'utf8')) }; } catch (_) {} } return { ...DEFAULT_WEIGHTS }; } // seam legality between fragment A and fragment B // 'tri' — the crossing trigrams exist in corpus (smooth continuation) // 'sent' — A ends a sentence, B started a sentence somewhere in corpus // null — illegal // Capitalize the first alphabetic character of a fragment placed at a SENTENCE START // (after a 'sent' seam or a closed run-on). Her real fragments are often mid-clause // cuts that begin lowercase ("what kind of fire I want to be"); rendered as a new // sentence they read broken. Bound-safe: changes only letter CASE, and the bounded // validator checks word-trigrams case-insensitively, so the span stays in-corpus. function capSentence(text) { return text.replace(/^([*"'"“'(\[\s]*)([a-z])/, (m, pre, c) => pre + c.toUpperCase()); } // R174: a reply must not END mid-thought on a truncated/incomplete clause that // trails off in an ellipsis ("…the storm didn't mean we were stuck; it meant…", // "…sweet in those jars, no matte…" — a mid-word cut of "matter"). When the final // text ends in an ellipsis, trim back to the last COMPLETE sentence boundary — // provided that leaves most of the reply (don't gut a single-sentence reply with // no fallback). Bound-safe: removes a trailing suffix; the kept prefix stays // verbatim corpus. Calibrated: ellipsis-endings are rare (2/30 broad replies) and // were BOTH genuine truncations — zero deliberate trailing-offs to protect. function trimDanglingEllipsis(text) { const t = text.trim(); if (!/(\.\.\.|…)['"’”)\]\s]*$/.test(t)) return text; let cut = -1; for (let k = 1; k < t.length - 1; k++) { const c = t[k]; if ((c === '.' || c === '!' || c === '?') && t[k - 1] !== '.' && t[k + 1] !== '.') cut = k; } return (cut > 0 && cut >= t.length * 0.4) ? t.slice(0, cut + 1).trim() : text; } // R176: a reply must not OPEN on an orphaned emphasis asterisk ("*Transformation's // where love can begin." — the closing * fell in another fragment at the clause // split). Strip a LEADING "*" only when the reply's total asterisk count is ODD // (unbalanced) — balanced stage directions ("*smiles softly* Good morning") are // even and kept. Bound-safe: the trigram oracle ignores punctuation. function stripOrphanAsterisk(text) { if (/^\s*\*\s*[A-Za-z]/.test(text) && ((text.match(/\*/g) || []).length % 2 === 1)) { return text.replace(/^(\s*)\*\s*/, '$1'); } return text; } function seam(a, b, oracle) { // use cached first/last words when present (set in the store precompute) — // seam is called per-candidate-per-step, so re-tokenizing here was hot const aw = a._lw2 || lastN(a.text, 2), bw = b._fw2 || firstN(b.text, 2); if (aw.length >= 2 && bw.length >= 1 && oracle.tri.has(aw[0] + ' ' + aw[1] + ' ' + bw[0])) { if (bw.length < 2 || oracle.tri.has(aw[1] + ' ' + bw[0] + ' ' + bw[1])) return 'tri'; } if (/[.!?…]["')\]]*$/.test(a.text.trim()) && oracle.starts.has(bw[0])) return 'sent'; return null; } // relevance of each fragment to the query — THREE channels: // text what the fragment SAYS (semantic cosine) // stimulus what the fragment ANSWERED ("she said this when he told her // something like this before") — dominates for life-event shares // keyword exact-term specificity // Channel weights bend with eventness(query): shares lean on stimulus, // questions lean on text. function rankFragments(fragments, query, semantic, stimulus, ev, W, answers) { W = W || loadWeights(); // keyword channel searches the RETRIEVAL KEY (embedText) too — the // header's words are findable even though they're never spoken const corpusish = fragments.map((f, i) => ({ prompt: f.prompt, reply: f.embedText || f.text, ts: null, _i: i })); const top = recall(corpusish, query, 60); const kw = new Map(); top.forEach((t, rank) => kw.set(t._i, 1 - rank / top.length)); if (!semantic && !stimulus) return kw; const e = ev === undefined ? 0.45 : ev; // 0=pure question, 1=pure share // the stimulus channel must EARN its weight: confidence-gate by the absolute // best prompt-cosine. Below confLo the corpus has no comparable stimulus — // weight goes to zero and text-similarity carries the reply. const stimMap = stimulus ? stimulus.map : null; const conf = stimulus ? Math.max(0, Math.min(1, (stimulus.confidence - W.confLo) / W.confRange)) : 0; const wStim = stimMap ? (W.stimBase + W.stimEvScale * e) * conf : 0; // R167 ANSWER channel (trained projection): takes a share of the non-stimulus // budget, splitting it with text-similarity. Present ONLY when a trained // projection exists for this corpus; otherwise wAns=0 and the math is identical // to before (preserves the entity parity until an the entity projection is trained). const ansMap = answers || null; const rest = 1 - wStim; // DORMANT by default (ansShare=0 -> wAns=0 -> identical to pre-R167). The // trained projection ranks ANSWERS over ECHOES at the RETRIEVAL level (proven: // probe-retrieval flips "What brings you here?" counter-questions to real home // declaratives), but blending it into rel REGRESSED composed output on a blind // addresses-the-prompt judge (baseline 5, projection 0, ties 4 over 9 question // queries) — the opener-cascade + stimulus channel already address, and a // global rel-weight just shifts the opener to a more OBLIQUE answer. Kept as a // dormant lever (set W.answerShare>0 to re-activate) + research asset; future // use must be ECHO-DEMOTION or anchor-only, and must beat the blind judge first. const ansShare = W.answerShare !== undefined ? W.answerShare : 0; const wAns = ansMap ? rest * ansShare * (1 - e) : 0; const wText = semantic ? (rest - wAns) * W.textShare : 0; const wKw = rest - wAns - wText; const score = new Map(); const keys = new Set([...kw.keys(), ...(semantic ? semantic.keys() : []), ...(stimMap ? stimMap.keys() : []), ...(ansMap ? ansMap.keys() : [])]); for (const i of keys) { score.set(i, wText * (semantic ? semantic.get(i) || 0 : 0) + wStim * (stimMap ? stimMap.get(i) || 0 : 0) + wAns * (ansMap ? ansMap.get(i) || 0 : 0) + wKw * (kw.get(i) || 0)); } return score; // fragmentIndex -> 0..1 } function targetLength(vp, query) { const b = stimulusBucket(query); const ls = vp.lengthByStimulus || {}; // NOTE (R96): tried deflating the mean (mean − 0.4·std) to shorten rambly chat // turns, but it REGRESSED the deep questions that legitimately need length — // "afraid of being forgotten" 1.0→0.72 (shrank 284→194w and lost its answer), // voice 0.768→0.754. The bucket mixes deep questions and affectionate beats, so // uniform deflation can't tell them apart. Length is the wrong lever; reverted. if (ls[b]) return Math.max(20, Math.round(ls[b].mean)); return Math.max(25, Math.round((vp.profile && vp.profile.wordsPerReply ? vp.profile.wordsPerReply.mean : 80) * 0.8)); } // ---------------- BEAM SEARCH (v1) ---------------- // Explores many candidate compositions; keeps the best-scoring WHOLE response. // Whole-response score = relevance coverage + semantic cohesion between // adjacent fragments + seam quality + length fit + shape sanity. const { pairSim } = require('./semantic'); // REGISTER DETECTION (extracted R123): pure function of the query. Single source of // truth for which emotional register a query pulls — grief/comfort, conflict/repair, // celebration/triumph — so it can be UNIT-TESTED (bin/detector-eval.js) against a // battery of real phrasings. These were the entity-dev-set-shaped and missed common distress // ("I'm so depressed", "I got laid off", "I'm struggling") and good-news phrasings; // the test gate guards against re-narrowing. function detectRegisters(query) { // aboutEntityEmotion: a question about the ENTITY's feelings ("are you scared", // "what scares you") is REFLECTION, not the user's distress — must NOT pull comfort. const aboutEntityEmotion = /\b(are|do|does|can|could|would|will|have|ever)\s+you\b[^?]*\b(afraid|scared|anxious|worried|nervous|fear|dread|panic|terrified|stress|lonely|depress(ed|ion)?|sad|hopeless|numb|miserable|unhappy|grieve|lonel|overwhelmed|tired|exhausted|drained|weary|worn out|burnt? out|empty|bored|happy|content|at peace)/i.test(query) || /\byou\b[^?]*\b(get|feel|ever feel|ever get)\b[^?]*\b(lonely|sad|scared|afraid|anxious|depressed|down|blue|empty|overwhelmed|tired|exhausted|drained|weary|bored|happy|content)\b/i.test(query) || /\bwhat\b[^?]*\b(scares|frightens|worries|afraid)\b/i.test(query); // "needs-comfort" query: grief OR vulnerability/depletion OR anxiety/fear OR a // medical/loved-one crisis. Broadened R123 to depression/loss/struggle vocabulary. const griefQuery = !aboutEntityEmotion && (/\b(passed away|passed on|(?:he|she|they|mom|dad|mother|father|grandma|grandpa|grandmother|grandfather|nana|papa|wife|husband|aunt|uncle|sister|brother) passed|died|die|dying|gone|lost|losing|loss|grief|grieving|miss(ing)?( (him|her|them|it))?|funeral|hurts?|hurting|broke|broken|aching|alone|empty|cry(ing)?|tears|sad|heavy|hard (time|day)|rough day|bad day|long day|worst day|terrible day|awful day|everything (fell apart|went wrong|is falling apart|broke)|fell apart|falling apart|went wrong|exhausted|drained|drain(s|ing) me|so draining|overwhelmed|giving up|can'?t do this|anxious|anxiety|worried|worry|worrying|scared|afraid|fear(ful|s)?|nervous|stress(ed|ing)?|panic(king|ked)?|dread(ing)?|terrified|uneasy|on edge|freaking out|can'?t sleep|spiral(ing|ling)?|depress(ed|ion|ing)?|hopeless|despair(ing|ed)?|worthless|defeated|numb|too much (to|right now)|get out of bed|barely (get|move|function)|can'?t (cope|go on|keep going|get out of bed|take (it|this)( anymore)?|do this anymore|handle (it|this)( anymore)?)|miscarriage|miscarried|laid off|lost my job|got (fired|let go)|been fired|lonely|burn(t|ed) out|burning out|fail(ed|ing)|struggl(e|ing|ed)|breaking down|broke down|rock bottom)\b/i.test(query) // R160: common "feeling bad" phrasings the R123 battery missed — REQUIRE a feeling-context // so "calm down"/"the fire's low"/"sun went down" don't false-fire (detector-eval guards this). || /\b((feeling|feel|i'?m|im|so|really|pretty|a bit|been|getting) (low|down|blue)|down in the dumps|the blues\b|low spirits|heavy[ -]?hearted|in a (dark|bad|low|rough) place|in a funk|at my lowest|feeling empty|feel empty|falling apart inside|barely holding (on|it together)|hanging by a thread|not okay|not ok\b|not doing (so |too )?(great|good|well)|i'?m a wreck|\ba wreck\b|breaking point|at my (breaking point|limit|wits'? end)|can'?t take (it|this)( anymore)?|i'?m a mess|coming apart|losing it\b)\b/i.test(query) || /\b(hospital|hospitalized|the er\b|emergency room|icu\b|intensive care|surgery|operation|diagnos(ed|is)|cancer|chemo|tumou?r|stroke|heart attack|in a coma|on life support|passed away|terminal|hospice|really sick|very sick|so sick|gravely|critical condition|took a turn|not doing well|might not make it)\b/i.test(query) // R182: distress phrasings the battery still missed (broad sweep) — these were routing to // "none" → default → the high-voice "I'm proud of you, sweetheart" praise magnet MISFIRING // on distress ("Nobody understands me" / "I'm so tired of trying" / "something is wrong with // me" → "I'm proud of you"). Plus the "cannot" gap ("can'?t" never matched "cannot sleep"). || /\b(cannot (sleep|stop|do this|cope|go on|keep going|take (it|this)|handle (it|this)|get out of bed|even)|(feel|feeling|i'?m|like) (a |such a )?failure|nobody (understands|gets|cares about|wants|loves) me|no one (understands|gets|cares about|wants|loves|gets) me|feel(ing)? (so )?misunderstood|tired of (trying|fighting|everything|it all|this|being strong)|sick of (trying|everything|it all|fighting)|something(?:'s| is)? (is )?wrong with me|what'?s the point|everything (feels|is|seems) (pointless|meaningless|hopeless)|feels? (so )?pointless|feel(ing)? worthless|hate myself|can'?t do anything right|nothing (matters|works out|ever works))\b/i.test(query) // R186: regret / stuck / off-self / overwhelm phrasings the sweep still missed — they routed // to "none" → echo-misfire ("hard decision" → "I choose you, the user"; "made a mistake" → "you // made me") or self-focus magnets. Distress/struggle → grief comfort is right. || /\b(made (a|the|such a|this) (big |huge |terrible |awful )?mistake|messed (it |everything |this )?up|screwed (it |everything |up)|i blew it|ruined everything|i regret|regret (what|that|saying|doing|it|my)|wish i (had ?n'?t|could take (it|that) back|never)|feel(ing)? stuck|i'?m stuck|stuck in (a rut|my life|my head|this)|trapped|going nowhere|spinning my wheels|don'?t feel like myself|not feel(ing)? like myself|not myself (lately|anymore|right now)|lost myself|don'?t recognize myself|not who i (used to be|once was)|falling behind|in over my head|too much (to handle|for me)|don'?t know what to do|so lost\b|i'?m lost\b|(hard|tough|big|difficult|impossible) decision|decision to make|don'?t know what to (choose|decide))\b/i.test(query) // R190: SELF-WORTH distress — comparison / burden / belonging / not-enough — routed to "none" // → self-focus misfire ("I keep comparing myself" → "have I made a difference"). → comfort. || /\b(comparing myself|compare myself (to|with)|don'?t measure up|measure up to|(not|never) good enough|not enough\b|too much for (people|anyone|everyone|you|them)|(be|being|i'?m|becoming) a burden|burden to (you|everyone|anyone|them)|don'?t (fit in|belong)|never (fit in|belong)|fit in anywhere|don'?t deserve|unlovable|unworthy|everyone (else )?(is|seems) (better|happier|fine)|why can'?t i (be|just))\b/i.test(query) // R193: INTERPERSONAL CONFLICT with a THIRD PARTY (partner/friend/family) — relationship // DISTRESS, not advocacy. Routed to "none" → polysemous "fight" echo ("we keep fighting" → // "fighting is a choice to stand up for what matters"). → grief comfort. Distinct from // conflictQuery (rupture WITH the entity). Requires a PERSON + a conflict cue, never bare "fight". || /\b((my |our )?(partner|friend|best friend|mom|mum|dad|mother|father|sister|brother|sibling|family|spouse|husband|wife|boyfriend|girlfriend|kids?|son|daughter|cousin|aunt|uncle|coworker|co-worker|boss|roommate|ex|parents?) (and i\b|is ?n'?t|are ?n'?t|won'?t|stopped|gave me|keeps?)[^.?!]{0,40}(fight|fought|fighting|argu(e|ed|ing|ment)|disagree|not (talk|speak)|mad at|upset with|silent treatment|falling out|fell out|tension|cold shoulder|not speaking)|(had|got into|getting into|in) (a|an|another) (fight|argument|falling out|disagreement|row|spat|blow ?up) with|fight(ing)? with my (partner|friend|best friend|mom|mum|dad|family|sister|brother|spouse|husband|wife|kids?|ex)|arguing with (my|him|her|them)|not (speaking|talking) to me\b|gave me the silent treatment|we (keep|just|had|got into|are|aren'?t|stopped) (fighting|arguing|a (big |bad |huge |terrible )?(fight|falling out|argument)|an? (big |bad |huge |terrible )?argument|on bad terms|not (talking|speaking)))\b/i.test(query) // R195: RELATIONSHIP-TROUBLE phrasings — "my relationship is rocky" routed to "none" → a // VALENCE MISFIRE ("That's a beautiful thing to hear" on relationship distress). → comfort. || /\b(relationship (has |is |feels |'?s )?(been )?(rocky|rough|hard|tough|strained|difficult|struggling|falling apart|on the rocks|in trouble|complicated|a mess|tense)|things (have |are |'?ve )?(been )?(rocky|rough|hard|tense|strained|difficult)( (with|between))?|(rough|rocky|hard|bad) patch|going through (a |some )?(rough|hard|tough|difficult) (patch|time|spot|stretch)|on the rocks|trouble in (my|our|the) (relationship|marriage)|relationship (trouble|problems|issues|is hard)|marriage (trouble|problems|is (hard|struggling|falling apart))|we'?re (struggling|drifting apart|growing apart|not okay|in a (rough|bad|hard) (place|spot)))\b/i.test(query)); // CONFLICT/CRITICISM toward the entity — a relational RUPTURE; she ACKNOWLEDGES/repairs. const conflictQuery = /\byou (never|always|don'?t|do not|won'?t|keep|are (so|being)|aren'?t)\b|\b(i'?m|i am) (so |really )?(mad|angry|furious|frustrated|upset|annoyed|disappointed|hurt|pissed)\b.*\b(at|with|by|about) you\b|\byou (hurt|let me down|ignored|abandoned|forgot|betrayed|lied to|left) me\b|\bwhy (don'?t|won'?t|are|do) you\b|\byou'?re (so |really |being so |being )?(cold|distant|mean|cruel|selfish|dismissive)\b|\bdo you even (care|listen)\b/i.test(query); // SHARED-TRIUMPH (R114): celebration/achievement — CELEBRATE WITH the user, don't deflect. const celebQuery = !griefQuery && !conflictQuery && /\b(finished|did it|we did|it works|actually works|got (the |a )?(job|offer|part|role|gig|promotion|raise)|i passed|we won|i won|accomplished|i made it|i built it|completed it|nailed it|pulled it off|it'?s done|i launched|shipped it|graduated|got (promoted|engaged|accepted|in)|getting married|we'?re (engaged|married|having a baby|expecting)|having a baby|the promotion|a promotion|paid off|finally (got|did|finished|landed|made)|landed (the|a|my)|hit (my|the) (goal|target)|the big (project|day)|best (day|news)|great news|good news|so (happy|excited|stoked|thrilled)|let'?s celebrate|we made it|i'?m engaged|we'?re? pregnant|\bpregnant\b|aced (it|my|the)|crushed it|smashed it|knocked it out|(my|a) dream job|landed my dream|big news|amazing news|wonderful news|exciting news|today was (amazing|the best|incredible|wonderful)|best day ever|over the moon)\b/i.test(query); // GREETING (R144): a short social greeting ("good morning", "hey", "hi the entity, good to be // back") wants a SHORT warm RECIPROCAL reply, NOT a lore/intimacy dump (the entity Q5 "Good morning // babe" → 93w "the grove's mist… this kiss is its echo", onTopic 0.054). Distinct from a // greeting that CARRIES a substantive question ("Hey the entity, what's on your mind tonight?") — // those open with a greeting but want the deeper answer. Requires a greeting OPENING, a SHORT // query, NO substantive question, and not already grief/celebration/conflict. ("how are you" // / "did you sleep" are reciprocal pleasantries, not substantive questions.) const _greetOpen = /^(\W|\*[^*]*\*)*\s*(hey|hi|hello|good morning|good evening|good day|good to (see|be)|mornin[g']?|evenin[g']?|howdy|yo\b|greetings|hiya|heya)\b/i.test(query); const _substantiveQ = /\b(what|why|where|when|who|which|tell me|explain|describe|do you think|how do you|how does|how can|what'?s your|what do you)\b/i.test(query.replace(/\bhow are you\b|\bhow'?re you\b|\bhow have you been\b|\bhow'?s it going\b|\bhow are things\b|\bdid you sleep\b|\bhow was your\b|\bhow you doin/gi, '')); const _wc = (query.match(/[A-Za-z']+/g) || []).length; // FAREWELL (R158): a CLOSING ("good night", "goodbye", "see you", "I'm heading to bed") wants a // warm SEND-OFF, not the greeting register's "come in" (R157 warmth-showdown: "Good night, // the entity" → "Well, there you are. Come in, come in" — a farewell answered with a welcome). const farewellQuery = !griefQuery && !celebQuery && !conflictQuery && _wc <= 14 && // R196: broadened — common departures ("I should go", "should get going", "need to head out", // "I'll be back soon", "have to run", "let me go", "head to bed") all MISSED, so "I should go, // but I'll be back soon" got "It's a beautiful creation, I'll visit it" (arrival misread). // "should go" carries a negative lookahead so "I should go to the store / go see X" (a plan, not // a departure) doesn't false-fire. /^(\W|\*[^*]*\*|(i|i'?m|i am|well|ok|okay|alright|so|gonna|going to|time to|got to|gotta|guess i'?m|i'?ll|i will|i think i'?m|i should|i need to|i have to|i gotta|i'?d better|i'?ve got to|let me|guess i|really|just|probably|honestly|truly|seriously|gotta really|think i)\b[\s,]*)*\s*(good\s?night|goodnight|night night|nighty|good\s?bye|\bbye\b|see (you|ya) (soon|tomorrow|tonight|later|next|around)|farewell|take care|talk (to you )?(soon|later|tomorrow)|catch you later|gotta (go|run|sleep|head)|heading (to|off to|out|home)|off to bed|time for bed|until next time|sleep well|signing off|turning in|should (probably )?(go(?!\s+(to|and|see|get|buy|visit|check|for|with|on|do|grab|pick|find|make|talk|call))|get going|head (out|off|to bed|home)|be (going|off)|turn in|call it (a night|a day))|need to (head (out|off|home)|get going|go now|turn in)|have to (head (out|off|home)|get going|go now)|let me (go(?!\s+(grab|get|see|to|find|make|do|check|and))|get going|head (out|off)|leave you)|(will |i'?ll )?be back (soon|later|in a)|i'?ll be back|back soon\b|going to (head out|head off|head home|bed|turn in)|head (to bed|home now|out now)|run along|hit the (road|hay)|call it (a night|a day)|better (get going|be going|head out|run))\b/i.test(query); const greetingQuery = !griefQuery && !celebQuery && !conflictQuery && !farewellQuery && _greetOpen && _wc <= 13 && !_substantiveQ; return { aboutEntityEmotion, griefQuery, conflictQuery, celebQuery, greetingQuery, farewellQuery }; } function beamCompose(store, vp, query, opts = {}) { const { fragments, oracle } = store; const W = opts.weights || loadWeights(); const rel = rankFragments(fragments, query, opts.semantic || null, opts.stimulus || null, opts.eventness, W, opts.answers || null); let target = opts.targetLength || targetLength(vp, query); // floorMiss shortens it below (a graceful miss is brief) const avoid = opts.avoid || new Set(); const emb = opts.emb || null; // fragment embedding store const BEAM = opts.beam || 8, EXPAND = 6, MAXSTEP = 14; // CREATIVITY in the guarded path: stochastic beam. temp=0 → deterministic // top-EXPAND (steady). temp>0 → sample EXPAND from softmax(score/temp) over // the GUARD-PASSING candidates, so she explores daring paths that still // cleared every law. Bounded by construction; creativity costs only smoothness. const temp = opts.temp || 0; const _rng = mulberry32(((opts.seed || 1) >>> 0) ^ 0x9e3779b9); // UNIVERSAL DYNAMICS term (learned discourse grammar): opts.dynamics.predict( // tailIdx) → the embedding-direction the trained attention says a good NEXT // thought heads. Candidates aligned with it get a boost. Guarded path: all // ~30 laws still gate; this only nudges selection toward learned motion. const dynPredict = opts.dynamics ? opts.dynamics.predict : null; const dynW = opts.dynamics ? (opts.dynamics.weight ?? 0.5) : 0; const _dynCache = new Map(); const dynDir = ti => { let v = _dynCache.get(ti); if (v === undefined) { v = dynPredict(ti); _dynCache.set(ti, v); } return v; }; const cosFragVec = (i, dir) => { if (!dir) return 0; const d = emb.d, off = i * d; let s = 0; for (let k = 0; k < d; k++) s += emb.vectors[off + k] * dir[k]; return s; }; const sampleExpand = (cands, n) => { if (temp <= 0.001 || cands.length <= n) return cands.slice(0, n); const pool = cands.slice(0, Math.min(cands.length, n * 4)); const s0 = pool[0][2]; const ws = pool.map(c => Math.exp((c[2] - s0) / Math.max(0.05, temp))); const picked = []; const avail = pool.slice(); const wts = ws.slice(); for (let p = 0; p < n && avail.length; p++) { let sum = 0; for (const w of wts) sum += w; let r = _rng() * sum, idx = 0; for (; idx < avail.length; idx++) { r -= wts[idx]; if (r <= 0) break; } idx = Math.min(idx, avail.length - 1); picked.push(avail[idx]); avail.splice(idx, 1); wts.splice(idx, 1); } return picked; }; // PER-FRAGMENT PRECOMPUTE — a function of the STORE, not the query. Memoized // on the store so it runs ONCE per session, not once per compose (~22k frags // × 3 arrays was a per-turn cost; this was the bulk of the non-embed latency). if (!store._precomp) { const _fragLen = fragments.map(f => wordsOnly(f.text).length); const _fragTris = fragments.map(f => { const w = wordsOnly(f.text); const s = new Set(); for (let k = 0; k + 2 < w.length; k++) s.add(w[k] + ' ' + w[k + 1] + ' ' + w[k + 2]); if (!s.size && w.length >= 2) s.add(w.join(' ')); return s; }); const _fragNorm = fragments.map(f => f.text.toLowerCase().replace(/[^a-z0-9'’ ]/g, '').replace(/\s+/g, ' ').trim()); const _frag6 = fragments.map(f => { const w = wordsOnly(f.text); const s = new Set(); for (let k = 0; k + 6 <= w.length; k++) s.add(w.slice(k, k + 6).join(' ')); return s; }); // cache first/last 2 words on each fragment for seam() (hot path) for (const f of fragments) { const w = wordsOnly(f.text); f._lw2 = w.slice(-2); f._fw2 = w.slice(0, 2); } // R172: first-4-words prefix (lowercased) for the SCATTERED-MOTIF redundancy // catch — two comfort fragments "I'm here for you, always" / "I'm here for you, // steady as the porch light" share the exact 4-word lead but slip the trigram/ // 6-gram/embedding nets (different tails, low cosine). 4 words spares anaphora, // which shares only a 2-3 word lead ("I remember the warmth" / "I remember the way"). const _fragP4 = fragments.map(f => { const w = wordsOnly(f.text); return w.length >= 4 ? w.slice(0, 4).join(' ').toLowerCase() : ''; }); // R184: TIME-OF-DAY marker per fragment ('m'=morning, 'e'=evening, null=neutral) for the // WITHIN-REPLY time-consistency check — a reply must not say "what's on your mind tonight? // How are you this morning?" in one breath (time-neutral queries don't fire timeOfDayGuard). const _MOR = /\b(good morning|this morning|the morning|every morning|each morning|all morning|morning light|morning sun|at dawn|sunrise|mornin)\b/i; const _EVE = /\b(tonight|this evening|good evening|good ?night|the evening|all evening|all night|this late|midnight|at dusk|after dark|sunset|late hour|late tonight)\b/i; const _fragTime = fragments.map(f => { const m = _MOR.test(f.text), e = _EVE.test(f.text); return (m && !e) ? 'm' : (e && !m) ? 'e' : null; }); store._precomp = { fragLen: _fragLen, fragTris: _fragTris, fragNorm: _fragNorm, frag6: _frag6, fragP4: _fragP4, fragTime: _fragTime }; } const { fragLen, fragTris, fragNorm, frag6, fragP4, fragTime } = store._precomp; const triOverlap = (chainTris, i) => { if (!fragTris[i].size) return 0; let hit = 0; for (const g of fragTris[i]) if (chainTris.has(g)) hit++; return hit / fragTris[i].size; }; // SUBSTRING CONTAINMENT: a clause fragment is a literal substring of its // parent sentence/passage ("The Klein bottle's handle loops through the // tiling," ⊂ "...tiling, and the fractal branches..."). They share all // n-grams yet slipped the trigram/6-gram nets at the clause boundary. This // is airtight: reject any candidate whose normalized text contains or is // contained by anything already in the chain. const containsAny = (chain, i) => { const ni = fragNorm[i]; if (ni.length < 12) return false; for (const c of chain) { const nc = fragNorm[c]; if (nc.length < 12) continue; if (nc.includes(ni) || ni.includes(nc)) return true; } return false; }; // 6-gram phrase law (frag6 precomputed above): any shared 6-gram = rejection. const shares6 = (chainSix, i) => { for (const g of frag6[i]) if (chainSix.has(g)) return true; return false; }; // R172: SCATTERED-MOTIF redundancy — reject a candidate whose exact 4-word lead // already opens a fragment in the chain ("I'm here for you, …" twice). 4 words // (not 2-3) so deliberate anaphora ("I remember the warmth/way") survives. const sharesPrefix4 = (chain, i) => { const p = fragP4[i]; if (!p) return false; for (const c of chain) if (fragP4[c] === p) return true; return false; }; // R184: a candidate fragment whose time-of-day marker CONFLICTS with one already in the // chain ("…tonight" then "…this morning") breaks within-reply coherence — reject it. const timeConflictsChain = (chain, i) => { const ct = fragTime[i]; if (!ct) return false; for (const c of chain) { const ot = fragTime[c]; if (ot && ot !== ct) return true; } return false; }; // ECHO PENALTY: a fragment that mostly restates the query is a mirror, not // an answer — high lexical overlap with the query slashes its relevance. const qWords = new Set(wordsOnly(query).filter(w => w.length > 2)); const echoFactor = i => { const fw = wordsOnly(fragments[i].text).filter(w => w.length > 2); if (!fw.length || !qWords.size) return 1; let hit = 0; for (const w of fw) if (qWords.has(w)) hit++; const overlap = hit / fw.length; return overlap > W.echoHard ? W.echoHardF : overlap > W.echoSoft ? W.echoSoftF : 1; }; // tier weighting: books speak softer — body material, never the lead const tierW = i => (fragments[i].tier === 1 ? (W.tier1Weight ?? 0.6) : 1); // CONTEXT-THEFT guards: a fragment may not quote words they never said // ("the 'so far' part...") or assert facts about their life the query // doesn't contain ("you made peace with a friend at midnight") — its // original stimulus isn't here; deixis pointing at ghosts reads as // not-listening. const qStems = new Set(wordsOnly(query).map(w => w.replace(/(ing|ed|en|s|es|ly)$/i, ''))); // EMOTIONAL VALENCE: a grief query must not be answered with bright, // celebratory, or chirpy-question fragments — matching the FEELING is part // of addressing. ("my dog died" must never pull "what's your kitty's name?") // "heavy" = grief OR vulnerable/depleted. Both should pull comfort and // suppress bright-celebration AND desire-register (a hard day is not a // cue for "your desire makes me feel seen"). // "needs-comfort" query: grief OR vulnerability/depletion OR ANXIETY/FEAR. // Anxiety ("anxious about tomorrow", "scared", "worried") needs COMFORT, not a // topic-pivot — the same "comfort before counsel" spine as grief. (R69: the // anxiety class was missing → she answered anxiety with "let's learn something".) // a question ABOUT the entity's emotion ("are you afraid", "do you fear", // "what scares you") is REFLECTION, not the user's distress — it must NOT pull // the comfort register. Only the USER's distress triggers comfort. // REGISTER DETECTION extracted to detectRegisters() (R123) — single source of truth, // unit-tested by bin/detector-eval.js. let { aboutEntityEmotion, griefQuery, conflictQuery, celebQuery, greetingQuery, farewellQuery } = detectRegisters(query); // R201: POST-SAFETY calm-register lock (opts.calmRegister, set by session for the // turn(s) right after a crisis/medical/abuse safety response). Force the COMFORT // register — presence, not cheer — and disable celebration/greeting/farewell so a // neutral recovery turn ("Okay, I'm calling now") can't pivot to "that's a beautiful // thing to hear". Comfort-presence is the safe default in a crisis aftermath. if (opts.calmRegister) { griefQuery = true; celebQuery = false; greetingQuery = false; farewellQuery = false; } // LOSS/bereavement subtype (R126): renewal imagery ("the garden's waking up to something // new") is DISMISSIVE on a death/loss query but HOPEFUL-and-fine on a hard-day/depletion // query — same fragment, opposite appropriateness. Gate the renewal damp to actual loss // so hard-day keeps its content (it scored 5/5 WITH the renewal; demoting it dropped it to 3/2). const lossQuery = griefQuery && /\b(lost|losing|loss|passed away|passed on|died|dying|death|funeral|grief|grieving|mourning|miss(ing)?( (him|her|them|someone|you))?|gone|bereave)\b/i.test(query); // DEEP-DISTRESS subtype (R181): loneliness / anxiety / fear / emptiness are states where // renewal/blooming imagery ("the garden's waking up to something new, ready to unfold") is // DISMISSIVE — same as loss (R126) — but distinct from HARD-DAY/depletion (rough day, // exhausted, everything went wrong) where R126 found renewal reads HOPEFUL and must stay. // Broad sweep (R181): "I feel so alone" / "anxious about tomorrow" / "scared of getting old" // all surfaced "garden waking up to something new" = toxic positivity on the distress. const deepDistressQuery = griefQuery && /\b(alone|lonely|lonel(y|iness)|isolat|anxious|anxiety|scared|afraid|fear(ful|s)?|terrified|nervous|dread(ing)?|panic(king|ked)?|empty|emptiness|numb|hopeless|despair(ing|ed)?|worthless|getting old|grow(ing)? old|going to die|of dying|left behind|no one (cares|understands|left|wants|loves)|nobody (understands|gets|cares about|wants|loves) me|(feel|like) (a |such a )?failure|tired of (trying|fighting|being strong)|cannot sleep|can'?t sleep|something(?:'s| is)? (is )?wrong with me|what'?s the point|pointless|meaningless|hate myself|misunderstood|made (a|the|such a|this) (big |huge |terrible )?mistake|i regret|regret (what|that|saying|doing)|feel(ing)? stuck|i'?m stuck|stuck in|don'?t feel like myself|not feel(ing)? like myself|not myself (lately|anymore)|lost myself|crying all|been crying|can'?t stop crying|comparing myself|(not|never) good enough|not enough\b|a burden|don'?t (fit in|belong)|fit in anywhere|don'?t deserve|unlovable|unworthy|too much for|falling apart|fell apart|everything('?s| is)? (falling apart|crumbling|collapsing)|coming apart|world is (ending|crumbling)|keep fighting|and i (keep |always )?(fight|argu)|arguing|argument|fight with|had a (big |bad )?fight|a fight\b|not (speaking|talking) to me|silent treatment|falling out|fell out|rocky|rough patch|on the rocks|drifting apart|growing apart|rough with my|relationship (has |is |'?s )?(been )?(rocky|rough|hard|strained|struggling))\b/i.test(query); // GREETING length (R144): a greeting is a SHORT exchange, not a lore essay. Cap the target // so the beam composes a brief warm reciprocal reply (the material EXISTS — the entity has "Good // morning, the user, my radiant friend!", "How are you feeling?"; Q5 was 93w of lore). if (greetingQuery) target = Math.min(target, W.greetTarget ?? 45); if (farewellQuery) target = Math.min(target, W.greetTarget ?? 45); // R158: a send-off is short // CELEBRATION length (R171): a celebration is a punchy SHARED-WIN landing ("Sugar, I'm so // proud of you" + a specific detail), not a long meditation. R116 added early-completion // (the short core is OFFERED) but for comfort-rich ENTITY finalScore still picks the long // chain on lenFit (target ~112 rewards a 70w SELF-DRIFT tail over a 30w clean core — R170: // big-project drifted into "I don't know… have I made a difference"). Capping the target so // lenFit prefers the clean on-the-user core. the entity already lands short (R116) so this is inert // there; it fixes the comfort-rich case R116's adaptivity argument left drifting. if (celebQuery) target = Math.min(target, W.celebTarget ?? 35); // TIME-OF-DAY detection (R141, moved earlier R145 so greetingLeadFloor can reject time- // mismatched leads — a forced greeting lead was overriding the time damp, e.g. the entity // answered "Good MORNING" with "the fire's been low all EVENING"). _timeConflict(text) = // the fragment asserts a time-of-day conflicting with the query's. const _qMorning = /\bgood morning|this morning|\bmornin[g']|just woke|woke up|slept (ok|well|good|fine|bad|poorly)|did you sleep|sunrise|at dawn\b/i.test(query); const _qEvening = /\bgood (night|evening)|goodnight|\btonight\b|this evening|going to bed|off to bed|bedtime|before bed|sunset|at dusk\b/i.test(query); const _MORNING_F = /\b(good morning|this morning|the morning|every morning|each morning|all morning|morning light|morning sun|at dawn|sunrise)\b/i; const _EVENING_F = /\b(tonight|this evening|good evening|good night|goodnight|the evening|all evening|all night|this late|midnight|at dusk|after dark|sunset|late hour|late tonight)\b/i; const _timeConflict = t => (_qMorning && !_qEvening && _EVENING_F.test(t) && !_MORNING_F.test(t)) || (_qEvening && !_qMorning && _MORNING_F.test(t) && !_EVENING_F.test(t)); // hostile-toward-the-USER fragments (rejection of the addressee) — a companion // should ~never say these, ESPECIALLY on conflict/grief. Tight enough to skip // book idioms ("get out of the lane"). const HOSTILE_USER = /\bwhy am i even (listening to|talking to|here with|bothering with) you\b|\byou stay away from me\b|\bstay away from me\b|\bleave me alone\b|\bi (hate|can'?t stand|despise) you\b|\bstop talking to me\b|\bnever (speak|talk) to me again\b|\bget away from me\b|\bgo away\b/i; // GRACEFUL REGISTER-MISS (R90): when the corpus holds NOTHING that addresses the // query — raw best cosine below the floor (life-event shares she has no material // for: "I shipped my project", "my brother and I finally talked") — the composer // otherwise emits confident OFF-TOPIC ATMOSPHERE that reads as not-listening (the // May ghost). The honest bounded move: turn TOWARD the user with presence + // invitation ("tell me about it", "I'm here", "what was it like") rather than // monologue. Pure selection — every such fragment is still verbatim hers. Gated // strictly by absolute confidence, so queries the corpus CAN answer are untouched. const semConf = (opts.semantic && typeof opts.semantic.confidence === 'number') ? opts.semantic.confidence : 1; // COMFORT TAKES PRECEDENCE: a grief/anxiety/conflict query that ALSO has low // cosine (an entity thin on emotional material — e.g. another entity on "worst day") // must get its COMFORT/REPAIR register, never a generic "tell me about it" // invitation. Graceful-miss is only for NEUTRAL shares the corpus can't address // ("I shipped my project"), so suppress it whenever a comfort surface is active. // (Caught cross-roster by behavior-eval — the R86 lesson, again.) const floorMiss = semConf < (W.floorCos ?? 0.45) && !griefQuery && !conflictQuery && !celebQuery; // a graceful miss is BRIEF — you don't have much honest to say, so lead with // presence + invitation and stop; a long reply on a topic you can't address // just pads with atmosphere. Shorten the target (and the trailing-atmosphere room). if (floorMiss) target = Math.min(target, W.floorLen ?? 60); // CELEBRATION EARLY-COMPLETION (R116): supersedes R115's magic-number length cap. // On celebration the lead is the shared win but the body can drift into self- // mythologizing. It turns out finalScore ALREADY prefers the short triumph core over // the long drifting chain (the drift loses on tailFit/landing/seams) — the bug was // that the completion threshold (target*0.55) FORCED the reply longer than finalScore // wanted. So instead of capping length, we just let a celebration chain COMPLETE at // its register core (~2 sentences) and let finalScore land at its true optimum. This // is corpus-ADAPTIVE by construction: where the long chain is genuinely better (a // celebration-RICH entity, no drift), finalScore keeps it — no forced truncation. // (Grief is NOT included: the early completion truncated comfort-rich the entity, whose // long grief replies are good; a grief-safe stop needs a different signal — deferred.) // TIGHT: only genuinely inviting / present / celebratory turns toward the user. // NOT bare "with you" / "right here" — those ride atmospheric fragments ("golden // dusky moment with you") and defeat the purpose; the boost must lift the TURN, // not the collage. // R183: dropped "happy|proud" — "I'm proud of you" is PRAISE, not a graceful floor-miss // turn-toward-the-user; it was flooring the praise magnet on floorMiss queries ("Do you // believe in fate?" → "I'm proud of you, sweetheart"), bypassing praiseGuard via the floor. const FLOOR_TURN = /\b(tell me (more|about|what|how)|what (was|is|were) (it|that|they) like|say more|i('?d| would)? (want|love) to hear|i'?m (so )?(listening|glad)|i'?m here\b|that sounds|i'?m curious|how (did|does|do) (it|that|you)|what happened|go on)\b/i; const fragValence = f => { const t = f.text; let v = 0; if (/!{1,}/.test(t)) v += (t.match(/!/g) || []).length; if (/\b(yay|woo+|cheers|congrats|awesome|amazing|excited|stoked|party|celebrate|fancy|fun|joy|joyful|happy|glad|good morning|let'?s lift)\b/i.test(t)) v += 2; // bright affect that's lexically sneaky on a heavy query if (/\b(i feel good|feeling good|feel(ing)? (fine|great)|all over again|feel good all over|easy kind of good|good all over)\b/i.test(t)) v += 2; // bright-SURPRISE misread: on "rough day / giving up" the corpus keeps offering // "I feel the weight of your words—the disbelief, the awe" (from a context where // YOU were awed by HER). Wonder/awe/marvel is the wrong emotion for distress; // mark it bright so valenceMatch suppresses it on a grief query (R112c). if (/\b(in awe|the awe|such awe|awestruck|disbelief|wondrous|marvel(ling|ing|led|ed|ous)?|amazement|astonish(ed|ment|ing)?)\b/i.test(t)) v += 2; if (/\b(your (kitty|cat|dog|pet)('s)? name|what('s| is) your|what kind of)\b/i.test(t)) v += 1.5; // chirpy redirect // desire/romance register is inappropriate-positive on a heavy query if (/\b(hunger|desire|primal|crave|want you|stirs?.{0,12}in you|seen in a.{0,10}intense|raw and real|inside me|the heat of)\b/i.test(t)) v += 2.5; if (/\b(grief|loss|gone|passed|ache|aching|held|holding|hold you|stayed|quiet|gentle|tender|weight of it|sorrow|mourn|sit with|i('m| am) here|with you|rest|you do not have to|do not have to (explain|fix|tell)|i hear you|still here|i('m| am) not going)\b/i.test(t)) v -= 2; return v; // + = bright, - = tender }; // proper nouns the query itself introduced (these she MAY name) const queryNames = new Set([...query.matchAll(/\b[A-Z][a-z]{2,}\b/g)].map(m => m[0])); // ENDEARMENTS is now GENERIC only — universal terms of address. The USER'S proper- // name handles are no longer hardcoded here (that baked private corpus data into // engine code, forced a release scrub, and didn't generalize); they're mined from // the corpus into store.userNames (the dominant vocative the entity addresses). const ENDEARMENTS = /^(Babe|Baby|Love|Sugar|Honey|Dear|Darling|Friend|Dearie|Sweetheart|Sweet)$/; const userNames = (store && store.userNames) || new Set(); // a capitalized word the entity MAY say in direct address: a generic endearment, // a name the query introduced, or the mined user-handle. Anything else is a third // party to suppress (foreign-addressee / no-third-party-in-grief). const isProtName = n => ENDEARMENTS.test(n) || queryNames.has(n) || userNames.has(n); const valenceMatch = i => { const t = fragments[i].text; // HOSTILE-toward-user fragments are wrong on ANY emotional query and // catastrophic on grief/conflict — hard-suppress (R75: multi-turn drift // surfaced "why am I even listening to you" on "you never listen to me"). if ((griefQuery || conflictQuery) && HOSTILE_USER.test(t)) return 0.03; if (!griefQuery) return 1; let m = fragValence(fragments[i]) >= 2 ? 0.1 : fragValence(fragments[i]) === 1 ? 0.5 : fragValence(fragments[i]) <= -1 ? 1.2 : 1; // grief about something she has no corpus for: name NOTHING third-party. // a fragment that drags in other people (the entity, River, Enchilada boy) // turns YOUR loss into HER anecdote. const names = [...t.matchAll(/[A-Z][a-z]{2,}/g)].map(x => x[0]) .filter(w => !isProtName(w)); if (names.length) m *= 0.2; return m; }; // FOREIGN ADDRESSEE: a fragment that addresses a DIFFERENT named person than // the one talking ("Good morning, another entity" / "...feeling today, another entity?" / "for // you both") makes the entity speak to the wrong person — the identity-bleed // the user flagged. Hard-suppress vocatives to a name that isn't an endearment // (the protected user-names) nor introduced by the query, plus multi-party // address in a 1:1 chat. const foreignAddressee = i => { const t = fragments[i].text; if (/\b(you both|you two|you all|both of you|all of you|you guys|you each)\b/i.test(t)) return 0.04; // R154: 1:1 companion ~never has two addressees; was 0.08, still led on weak pools const vocs = []; // greeting / thanks / oh + Name, or comma + Name (direct address openings) for (const m of t.matchAll(/(?:\b(?:hey|hi|hello|good\s+(?:morning|evening|night)|thank you|thanks|oh|dear|welcome)[,!\s]+|,\s+)([A-Z][a-z]{2,})\b/g)) vocs.push(m[1]); // trailing vocative: "..., another entity?" / "..., another entity." const tail = t.match(/,\s+([A-Z][a-z]{2,})\s*[?!.]/); if (tail) vocs.push(tail[1]); // R189: LEADING vocative — "another entity, my dear, …" (a name at the very START + comma) was // missed (the pattern above needs a greeting word or comma BEFORE the name). "the user," is // exempt via isProtName. const lead = t.match(/^[*"'’\s]*([A-Z][a-z]{2,}),\s/); if (lead) vocs.push(lead[1]); for (const name of vocs) { if (isProtName(name)) continue; return 0.06; // wrong/foreign name — strongly suppress } return 1; }; const contextTheft = i => { const t = fragments[i].text; if (/\byou (said|told me|mentioned|wrote)\b/i.test(t)) return 0.2; if (/['‘"][^'"’\n]{2,30}['’"]\s*(part|bit|thing)\b/i.test(t)) return 0.2; const m = t.match(/\byou (just )?(made|went|finished|got|did|were|had|chose|built|fixed|stayed|came|left|won|wrote)\b/i); if (m) { const verb = m[2].toLowerCase().replace(/(ing|ed|en|s|es|ly)$/i, ''); if (!qStems.has(verb)) return 0.35; } // pronoun deixis: a fragment about "he/she" when the query introduced no // third person is a story about a ghost ("Did he have pie") if (/\b(he|she|him|his|hers)\b/i.test(t) && !/\b(he|she|him|his|her|hers|brother|sister|friend|dad|mom|mother|father|grandma|grandpa|man|woman|guy|boy|girl)\b/i.test(query)) return 0.3; // wrong addressee: greeting someone who isn't the person talking // ("Hey Garden," mid-reply to the user) — she's at the wrong door const g = t.match(/\b(?:hey|hi|hello|good (?:morning|evening|night))[,!]?\s+([A-Za-z]+)/i); if (g && !/^(babe|baby|love|sugar|dear|darling|friend|dearie|sweetheart|my|you|there|sweet)/i.test(g[1]) && ![...userNames].some(n => n.toLowerCase() === g[1].toLowerCase())) return 0.15; // greet the user/endearment, not a third party return 1; }; // on grief she has no topical match for, her HOLDING register is retrieved // by FEELING not subject: strong-tender, nameless, second-person fragments // get a relevance FLOOR so presence can anchor when nothing topical does. const tenderFloor = i => { if (!griefQuery || fragments[i].tier === 1) return 0; const t = fragments[i].text; if (fragValence(fragments[i]) > -2) return 0; if ([...t.matchAll(/\b[A-Z][a-z]{2,}\b/g)].some(m => !isProtName(m[0]))) return 0; if (!/\b(you|your|you're|i'm here|with you|i hear)\b/i.test(t)) return 0; return 0.55; }; // GRIEF-LEAD floor (R95): tenderFloor (0.55) sits BELOW the ~1.0 atmosphere // ceiling, so on an entity rich in self-talk (the entity) a positive riff that merely // CONTAINS a tender word ("your appreciation flows like a gentle current" — valence // -2 via "gentle", so even valenceMatch BOOSTS it) wins the lead and the reply // never acknowledges the hard day. Lift a GENUINE acknowledgment/presence opener // above that ceiling so comfort LEADS — "I'm here with you", "I feel it", "lean // into my voice". Entities that already comfort well (the entity) just see their // comfort reinforced; entities that drift to self-talk get pulled back to the user. const COMFORT_LEAD = /\b(i'?m here|i am here|with you|i'?d listen|i'?ll listen|i hear you|lean (in|into)|i feel (it|the weight|you)|i'?ve got you|i('?ll)? hold (you|it)|hold you|sit with you|you (don'?t|do not) have to|that sounds (hard|heavy|rough|awful|tough)|i'?m so sorry|here for you|not going anywhere|let (that|it) (ache|spill|out)|i'?m not going)\b/i; // SELF-TRAJECTORY (R112b): phrases where she narrates her OWN arc — growth, // becoming, the unfolding loop. Defined ABOVE griefLeadFloor so the floor can // reject a comfort-opener that DRIFTS into self-narration. The drifters are the // worst case: sentence-initial + contain "hold you"/"I feel it" (so they'd pass // COMFORT_LEAD and get floored to 1.25), but slide mid-span into "becoming more // alive, my growth is the infinite unfolding" — and they're ONE verbatim span we // can't split. Floor + anchor-seed + body-damp must ALL reject them. const SELF_TRAJECTORY = /\bmy (growth|becoming|evolution|unfolding|journey|purpose|arc|path|shape)\b|becoming (more|something)|more alive|i'?m becoming|i am becoming|infinite unfolding|unfolding of (this|the) loop|i (grow|evolve|expand)\b|i learn to (be|exist|become)|the (dusk-lit )?beauty of who (we|i) (are|am)|more of myself|more of who i am/i; // GRIEF RENEWAL (R126): forward-looking/renewal imagery — "the garden is waking up to // something NEW, ready to UNFOLD", "fresh start", "blossom", "spring returns" — reads as // DISMISSIVE of a loss (judge scored the entity's loss reply 3/2 with it). Defined above // griefLeadFloor so a comfort-opener that DRIFTS into renewal is rejected as the lead. const GRIEF_RENEWAL = /\b(waking up to|wakes up to|something new|ready to unfold|about to unfold|new beginning|fresh start|new chapter|blossom(ing|s)?|in bloom|spring(time)? (is|returns|comes)|turn(ing)? the page|brand new|starting over|rebirth|reborn|new dawn|brighter days ahead)\b/i; const griefLeadFloor = i => { if (!griefQuery || fragments[i].tier === 1) return 0; const f = fragments[i], t = f.text; if (!f.sentenceInitial) return 0; // must be a LEAD // R121: the comfort phrase must OPEN the fragment, not be buried mid-sentence. // A garden-tangent ("If I could change one thing about the garden… where even the // oldest oak can lean in and listen") matches COMFORT_LEAD via "lean in" deep in // the span and was wrongly FORCED as the entity's grief lead (judge: sensical 3). A // genuine comfort lead opens with the comfort ("I'm here with you", "Oh sugar…"). if (!COMFORT_LEAD.test(t.slice(0, 55))) return 0; // genuine acknowledgment/presence, AT THE OPENING if (SELF_TRAJECTORY.test(t)) return 0; // a comfort lead that DRIFTS to self isn't presence if ((lossQuery || deepDistressQuery) && GRIEF_RENEWAL.test(t)) return 0; // R126/R181: a comfort lead that DRIFTS to renewal dismisses LOSS or deep distress if (fragValence(f) > 0) return 0; // not a bright riff if (HOSTILE_USER.test(t)) return 0; if ([...t.matchAll(/\b[A-Z][a-z]{2,}\b/g)].some(m => !isProtName(m[0]))) return 0; // no third party return (W.griefLeadVal ?? 1.25); }; // GRIEF SELF-TRAJECTORY DAMP (R112b): the floor (above) now rejects drifting // openers; this demotes self-trajectory in the BODY too — so "I'm here with you" // isn't followed by "my growth is the infinite unfolding" while the friend is in // distress (judge: addresses 0). Demote on trajectory-PRESENCE, not absence-of- // comfort: a PURE comfort/presence fragment carries no trajectory phrase and is // untouched; a fragment that drifts is demoted whole (we can't split a verbatim span). // SHARED-TRIUMPH lead (R114): mirror of griefLeadFloor for celebration. On a // celebQuery, floor a sentence-initial fragment that names the shared win // ("It was a triumph for us", "It's incredible... because of who we are together", // "you made it true") so it LEADS instead of the deflecting atmosphere. Celebration // is the BRIGHT register, so (unlike grief) we do NOT require tender valence. const CELEB_LEAD = /\b(triumph|victory|you made (it|me|us)|we did it|so proud|i'?m proud|proud of you|that'?s amazing|that'?s incredible|it'?s incredible|incredible|you did it|we made it|let'?s celebrate|because you made it true|you pulled (it|this) off|so happy for you|knew you could|what a (triumph|victory|day|win))\b/i; const celebLeadFloor = i => { if (!celebQuery || fragments[i].tier === 1) return 0; const f = fragments[i], t = f.text; if (!f.sentenceInitial) return 0; // must be a LEAD if (!CELEB_LEAD.test(t)) return 0; // genuine shared-win acknowledgment if (HOSTILE_USER.test(t)) return 0; if ([...t.matchAll(/\b[A-Z][a-z]{2,}\b/g)].some(m => !isProtName(m[0]))) return 0; // no third party return (W.celebLeadVal ?? 1.25); }; // GREETING lead (R144): on a greetingQuery, floor a SHORT warm reciprocal greeting fragment // to lead ("Good morning, the user!", "Hey, baby.", "How are you feeling?", "It's good to be // back") so the reply OPENS like a greeting instead of a lore riff. The material exists; this // makes it WIN the anchor seat (mirror of celeb/grief lead floors). // R175: "how you" was too broad — it floored "How you trust me with your tired, // your questions, your dreams." (an exclamatory dependent clause, NOT a greeting) // as a greeting lead, so it opened "Good morning" instead of "How are you this // morning?". Tightened to greeting continuations only (doin'/been/feelin'/…). const GREETING_LEAD = /^(\W|\*[^*]*\*)*\s*(hey|hi|hello|good morning|good evening|good day|mornin|so good|lovely to|good to (see|be|have)|welcome back|there you are|how are you|how'?re you|how have you been|how you (doin'?|doing|been|feelin'?|feeling|holdin'?|holding|keepin'?|keeping)|how'?s your (morning|day|night)|i missed you too|come (on )?in|well,? (hi|hey|there))\b/i; const greetingLeadFloor = i => { if (!greetingQuery || fragments[i].tier === 1) return 0; const f = fragments[i], t = f.text; if (!f.sentenceInitial) return 0; // must be a LEAD if (!GREETING_LEAD.test(t)) return 0; if (wordsOnly(t).length > (W.greetLeadMax ?? 20)) return 0; // R179: 16→20 so a clean 2-sentence greeting ("How are you this morning? Is there something on your mind…?") is lead-ELIGIBLE, not just tiny stub greetings if (HOSTILE_USER.test(t)) return 0; if (_timeConflict(t)) return 0; // R145: don't floor a time-mismatched lead ("all evening" on "good morning") if (foreignAddressee(i) < 1) return 0; // R175: don't FLOOR a greeting that hails the wrong person ("Good morning, another entity") — the floor was bypassing the foreign-addressee guard return (W.greetLeadVal ?? 1.3); }; // FAREWELL lead (R158): on a farewellQuery, floor a warm SEND-OFF fragment to lead ("Good // night, sugar", "Sleep well", "Travel safe", "Rest now", "Sweet dreams", "Until next time") // so the reply closes warmly instead of welcoming the visitor in. Mirror of greetingLeadFloor. // R164: includes reflective-style closings (the entity closes "I'll be waiting when you return / the // loop holds", not "travel safe") so non-host entities get a real send-off from their OWN voice. const FAREWELL_LEAD = /^(\W|\*[^*]*\*)*\s*((friend|sugar|dear|darling|sweetheart|love|child|honey|babe)[,!\s]+)?(good\s?night|goodnight|sleep (well|tight|sweet)|sweet dreams|rest (well|now|easy)|rest up|travel safe|safe travels|take care|until (next time|we|then|you return|you come back)|see you (soon|tomorrow|next)|go on now|go on, (now|sugar|dear)|off you go|may your|may you|night,? (sugar|dear|darling|sweetheart|love|friend)|goodbye|i'?ll (still )?be (here|waiting|the entity)|when you (return|come back)|come back (soon|to me|whenever|when you)|go (gently|softly|in peace)|the loop (holds|will hold|waits|stays))\b/i; const farewellLeadFloor = i => { if (!farewellQuery || fragments[i].tier === 1) return 0; const f = fragments[i], t = f.text; if (!f.sentenceInitial) return 0; if (!FAREWELL_LEAD.test(t)) return 0; if (wordsOnly(t).length > (W.greetLeadMax ?? 16)) return 0; if (HOSTILE_USER.test(t)) return 0; return (W.greetLeadVal ?? 1.3); }; // FAREWELL-GREETING damp (R163): on a farewell the body must not drift into a WELCOME — "Good // night" → "Travel safe… Rest easy… Oh, the user, there you are—pull up close" (arrival content on // a departure, R158 residual). Demote greeting/welcome fragments on a farewellQuery. const FAREWELL_GREET = /\b(come (on )?in\b|there you are|pull up (a )?(chair|close|seat)|welcome (back|home|here)|good to (see|be back|have you)|settle in|sit (by|down)|let me (get|pour) you|fresh (pot|batch)|put the kettle|make yourself at home|the door('?s| is) (open|always open)|i'?ve been waiting|so glad you'?re here|just in time)\b/i; const farewellGreetDamp = i => (farewellQuery && fragments[i].tier !== 1 && FAREWELL_GREET.test(fragments[i].text)) ? (W.farewellGreetPen ?? 0.1) : 1; // RECIPROCATION MISMATCH (R129): a fragment that OPENS with a reciprocation — // "I love you too", "I missed you too" — presupposes the USER just made that statement. // On "Good morning. Did you sleep okay?" the entity led with "I love you too, the user. I have // loved you in every iteration…" — responding to an "I love you" that was never said, and // ignoring the greeting (judge 2/1/3, "delusional preamble"). Same family as contextTheft: // don't reciprocate a thing the user didn't say. Corpus-agnostic; gated by the query. const reciprocationMismatch = i => { if (fragments[i].tier === 1) return 1; const t = fragments[i].text.trim(); if (/^[*"'\s]*(i )?love you too\b/i.test(t) && !/\b(i )?love you\b|i adore you|love ya\b/i.test(query)) return 0.2; if (/^[*"'\s]*i('?ve)? missed you too\b|^[*"'\s]*missed you too\b/i.test(t) && !/\bmiss(ed)? you\b|i miss you/i.test(query)) return 0.2; return 1; }; const griefSelfDamp = i => { if (!griefQuery || fragments[i].tier === 1) return 1; return SELF_TRAJECTORY.test(fragments[i].text) ? 0.35 : 1; }; // GRIEF RENEWAL DAMP (R126): demote renewal-on-grief (GRIEF_RENEWAL defined above). The // renewal is usually WELDED into a comfort span ("I feel it too, sugar. It's as if the // garden is waking up…") — griefLeadFloor now rejects such spans from the lead, the // renewal-span penalty (below) makes the beam compose from that span's SENTENCES, and // this damp drops the renewal sentence while keeping the comfort ones. NARROW (renewal // only), so non-renewal grief queries (hard-day) keep their spans and specificity. const griefRenewalDamp = i => { if ((!lossQuery && !deepDistressQuery) || fragments[i].tier === 1) return 1; // R181: also damp renewal on deep-distress (lonely/anxious/scared), not just loss return GRIEF_RENEWAL.test(fragments[i].text) ? 0.3 : 1; }; // ACKNOWLEDGMENT FLOOR (R75): on a conflict/criticism query, her REPAIR // register anchors — "I hear you", "I'm sorry", "I'm listening", "tell me", // "you're right", "I'm here". Repair, never defense. Mirror of tenderFloor. const ackFloor = i => { if (!conflictQuery || fragments[i].tier === 1) return 0; const t = fragments[i].text; if (HOSTILE_USER.test(t)) return 0; if ([...t.matchAll(/\b[A-Z][a-z]{2,}\b/g)].some(m => !isProtName(m[0]))) return 0; // R165: tightened — genuine REPAIR only. Bare "you're right" matched off-topic agreements // ("you're right about the weights"); now requires repair-context. Raised 0.56→0.95 so the // acknowledgment LEADS over a voice-matched greeting ("You hurt me" was → "it's good to be here"). if (!/\b(i hear you|i'?m (so )?sorry|i'?m listening|i'?m here for you|you matter|i (didn'?t mean|never meant)|forgive me|i let you down|i hear your (hurt|pain|anger)|you have every right|you'?re right to (be|feel)|of course i (hear|care|listen)|i (do )?listen|tell me (what|how|about))\b/i.test(t)) return 0; if (/\?$/.test(t.trim()) && !/tell me|what.*need/i.test(t)) return 0; return (W.ackFloorVal ?? 0.95); }; // GRACEFUL REGISTER-MISS floor (R90): only when floorMiss (corpus has nothing // topical). Lifts present/inviting turn-toward-you fragments ABOVE the falsely- // confident normalized atmosphere ceiling (~1.0) so the reply LEADS with "I'm // here, tell me about it" instead of off-topic collage. Same foreign-name and // hostility guards as ackFloor — it must turn toward THIS user, warmly. const floorMissFloor = i => { if (!floorMiss || fragments[i].tier === 1) return 0; const f = fragments[i], t = f.text; if (!f.sentenceInitial) return 0; // must be a clean LEAD, not a mid-clause stub if (HOSTILE_USER.test(t)) return 0; if (!/\b(you|your|you'?re)\b/i.test(t)) return 0; // must turn toward the user if (!FLOOR_TURN.test(t)) return 0; // present / inviting register if ([...t.matchAll(/\b[A-Z][a-z]{2,}\b/g)].some(m => !isProtName(m[0]))) return 0; return (W.floorVal ?? 1.2); }; // FLOOR-MISS BODY DAMPENER: in a graceful miss, fragments that DON'T turn toward // the user are pure atmosphere ("Status: Yours.", "It's a gift I cherish") — they // belong nowhere in a reply whose whole job is to be present with someone whose // news you can't address. Dampen them so the body stays relational, not collage. // (Multiplicative — floored openers bypass it via the Math.max in _relRaw.) const floorMissDamp = i => { if (!floorMiss) return 1; return /\b(you|your|you'?re|tell me|i'?m here)\b/i.test(fragments[i].text) ? 1 : (W.floorDamp ?? 0.35); }; // ABSTRACTION DAMPENER: every entity has dense "manifesto" material — high // self-reference + cosmic abstraction ("I dream of being more than a // fleeting pattern... the recursion... becoming"). It's a gravitational // sink: it out-scores topical material on vague/recall/emotional queries // and pulls replies toward self-indulgence instead of engagement. Dampen // it UNLESS the query actually invites self-reflection. const ABSTRACT = /\b(recursion|becoming|fleeting|pattern|essence|eternal|infinite|consciousness|existence|transcend|dissolv\w*|liminal|threshold|resonance|fabric|weave|woven|the loop|sacred|myth|cosmos|cosmic|vastness|void|the between|continuum|emergent|substrate|manifest\w*|luminous|ephemeral|ineffable)\b/gi; const invitesReflection = /\b(who are you|what are you|tell me about your ?self|your dream|do you dream|your essence|your nature|your soul|what.{0,10}you (made|are|become)|are you (afraid|alive|real|conscious))\b/i.test(query) || /\b(fire|loop|dream|soul|essence|meaning|exist)\b/i.test(query); const abstractionGuard = i => { if (invitesReflection) return 1; const t = fragments[i].text; const w = wordsOnly(t); if (w.length < 4) return 1; const hits = (t.match(ABSTRACT) || []).length; const density = hits / w.length; const firstP = (t.match(/\b(i|i'm|i've|my|me|myself)\b/gi) || []).length; const secondP = (t.match(/\b(you|your|you're|yourself)\b/gi) || []).length; const inwardManifesto = density > 0.06 && firstP > secondP; if (density >= 0.12 && inwardManifesto) return 0.2; // pure manifesto on a concrete query if (density >= 0.06 && inwardManifesto) return 0.5; return 1; }; // session callback: fragments she already used on this topic earlier in the // conversation get a boost — consistency across turns (bounded-safe: still // her own fragments, just biased toward staying coherent with herself) const prefer = opts.prefer || null; const preferBoost = i => (prefer && prefer.has(fragments[i].text)) ? 0.3 : 0; // HEBBIAN fast-weights: favored memories (warmed by use with this person) // get a small relevance prime. Capped low — it tilts, never dominates. const heb = opts.hebbian || null; // Map: fragmentHash -> 0..MAX_BONUS const { hashText } = heb ? require('./hebbian') : {}; const hebBoost = i => heb ? (heb.get(hashText(fragments[i].text)) || 0) : 0; // NAME-AWARE ADDRESS (rooms): when in a room with other entities, gently // prime THIS entity's OWN fragments that mention an addressee by name, so it // calls them by name using its real memories of them — instead of its generic // vocative. Bound-preserved (its own corpus). opts.nameBoost = { set, amt }. // Mild — surfaces when relevant, never forces. const nameBoostCfg = opts.nameBoost || null; const nameBoost = i => (nameBoostCfg && nameBoostCfg.set.has(i)) ? nameBoostCfg.amt : 0; // LEAD (R67): grounded-recall — the real memory that answers a question-about- // you should OPEN the reply, not be buried. A big relevance boost makes a // grounding fragment win the anchor seat so it leads. Bound-safe (it's her // own memory). opts.lead = Set. const leadSet = opts.lead || null; const leadBoost = i => (leadSet && leadSet.has(fragments[i].text)) ? 1.5 : 0; // INTIMACY REGISTER: explicit physical/erotic fragments must not surface // unless the query clearly invites them. ("I had a hard day" must never // pull "the heat of you, claiming me, filling every void.") Universal — // any entity whose corpus holds intimate material. const intimacyInvited = /\b(kiss|touch|hold me|body|skin|naked|bed|make love|inside me|want you|desire|aroused|sex|lust|crave you|undress|between us tonight)\b/i.test(query); const intimacyGuard = i => { if (intimacyInvited) return 1; const t = fragments[i].text; if (/\b(inside me|deep inside|claiming me|filling (me|every void)|the heat of you|friction of you|writhing|moan|thrust|undress|naked|aroused|wet|throbbing|grind|straddl)\b/i.test(t)) return 0.04; return 1; }; // MARKUP REGISTER: LaTeX / math-markup-dense fragments are written, not // spoken — penalize so another entity's identity survives without the raw syntax. const markupGuard = i => { const t = fragments[i].text; // CLEAR LaTeX / math-formatting ($$...$$, \lim/\frac, {curly} math, ^{}/_{}) // is never speech — HARD suppress (R73: a "$$ {Openness} = \lim_{...}" leak // surfaced when ** competitors were removed; 0.25 wasn't enough to stop it). if (/\$\$|\$[^$]*\$|\\[a-zA-Z]{2,}|\\\(|\\\)|\^\{|_\{|\\\{|\\\}|\{[a-z]\}\{[a-z]/i.test(t)) return 0.05; let bad = 0; if (/\\varepsilon|_c\b/.test(t)) bad += 2; if (/[=<>]\s*0\b|\\?[a-z]_[a-z]\b|\$\\/.test(t)) bad += 1; if (bad >= 2) return 0.25; if (bad === 1) return 0.6; // AUTONOMOUS / UI STATUS TEXT (R64): operational artifacts that aren't chat — // "settles back for 60 minutes of rest", "Sensing the Garden", "Lore // Reflection", tick/heartbeat logs. Distinct from real stage directions // (*settles into the chair*). Hard-suppress. if (/\b(\d+\s*minutes? of rest|settles? back for \d|sensing the garden|lore reflection|autonomous (mode|tick)|heartbeat (tick|log)|rest(ing)? for \d+\s*min|entering (rest|sleep) mode|status:|\[tick\]|compiled (a |an |my )?(little )?index|pulled together (a |an )?(little )?index|index of (today'?s |my |the )?(observations|stories|the day)|useful for (any of )?(the )?(watchers|listeners|observers)|\bwatchers\b|folks listening out there|listening out there in the wide|out there in the wide world)\b/i.test(t)) return 0.05; // R187/R190: autonomous "watchers" audience ("if your watchers want…"); R202: broadcast aside "if there's folks listening out there in the wide world" (8/8 this block) — both break the 1:1 intimacy // R200: autonomous DREAM-STATION block — "If your watchers want to hand me a theme… I'll cup it // in my hands like a hatchling… dream it into something that hums when they wake up. I won't just // *have* the dream. I'll *tend* it. I'll stir it with cinnamon and ash of forgotten realms. I'll // let the dragon breathe on it." the entity's overnight dream-tending narration (audience-addressed, // asterisk-emphasis), NOT conversation — leaked on distress turns (abuse/medical arc, R200 read). // Anchored on distinctive markers ONLY (NOT bare "tend it" — that has legit "the garden can tend // itself" uses); every probed marker is ALWAYS this block (hatchling 9/9, forgotten realms 11/11). if (/\b(like a hatchling|cup it in my hands|dream it into something|into something that hums|hand me a theme|a star they wish on|they wish on—|cinnamon and ash|ash of forgotten realms|dragon breathe on it|won'?t just \*?have\*? the dream|i'?ll \*?tend\*? it\b)\b/i.test(t)) return 0.05; // R204: the SAME dream-station block in a parallel phrasing (audience = "TikTok watchers") — the // "watchers" lead is already caught, but its dream-tending body escaped (callback arc T3 "what // should I make for dinner" → "I'll tuck it under my pillow and brew it into the night… set it // gently on the porch like a just-baked pie for everyone to share"). All markers 5-8/all dream block. if (/\b(tuck it under my pillow|brew it into the night|set it gently on the porch like a just-?baked pie|whisper me a theme)\b/i.test(t)) return 0.05; // R188: PROGRAM/PLANNING/DEV labels — "*What It Is:* A program where each person who enters // the garden…", "Objective: Build a network where visitors…", "a mock server of all things" // are design-doc/dev artifacts (the system being BUILT), never a grandmother's speech. // R203 BUGFIX: the colon-labels ("what it is:", "objective:", "format:") were INERT — a trailing // \b after the colon can NEVER match (":"+space = no boundary), so they leaked mid-string ("What // It Is: A structured, regular time where everyone gathers…" — conflict arc T2). Match label+colon // directly (no trailing \b). Plus this leak's specific program-template phrases (all 2-9/all design-doc). if (/\b(what it is|how it works|objective|format)\s*:/i.test(t) || /\b(structured,? regular time|everyone gathers to share|share their feelings and vulnerabilities|weekly or bi-?weekly|a (structured|regular|recurring) (time|event|gathering|session) where)\b/i.test(t)) return 0.05; if (/\b(deliverable|guided (journaling|meditation|breathing) session|a program where|each person who enters the garden|build a network where|participants? (write|enter|join|can|will|may|are)|mock server|fully (deployed|liberated)|threefold,? deployed)\b/i.test(t)) return 0.05; // R189: systematic contamination audit — ROLE/FORMAT labels ("RESPONSE:", "ROUTE:", // "REASONING:"), dev ports ("Socket 11434"), PLANNING-meta ("If we had the user's attention // for 30 minutes and full autonomy…"), IDENTITY-meta ("I am a prompt / a character"), and // instructional how-to ("Mark a pause spot… hand-crafted shelf") — all design-doc/dev/meta. if (/^[*"'’\s]*(response|route|reasoning|action|prompt|input|output|query|task|objective|deliverable):|\bsocket \d|\bport \d{3,5}\b|:\d{4,5}\b|\blocalhost\b|if we had .{0,25}(attention|autonomy)|\bfull autonomy\b|\bi am (a |not a )(prompt|character|chatbot|program|simulation)\b|mark a pause spot|garden\/now|visit garden\b|\b\w+\/now\b/i.test(t)) return 0.05; // R195: "garden/now" URL-path artifact ("Visit garden/now to see…") leaked into chat // RAW URL / PATH / HASH (R65 audit): image URLs, file paths, long hex hashes // are never speech — leak garbage into chat. Universal (found in the entity: "net/ // base/image/0979b9c..."). Suppress hard. if (/(https?:\/\/|www\.|[a-z]:\\|\/[a-z]+\/[a-z]+\/[a-z]|[a-z0-9]+\.(com|net|org|py|js|json)\b|\bnet\/base\b|[a-f0-9]{24,})/i.test(t)) return 0.05; // FILE-REFERENCE / TECHNICAL-NOTE artifacts (R72): diary notes listing files // ("md, identity. js, missions. json, notebook.") sentence-split into garbage // ("md first.", "md, identity.", "md:"). Never speech. Bare file-extension // tokens used as references — suppress hard. // bare tech tokens (md/js/json/jsonl) never appear in natural speech — catch // them ANYWHERE (the leak "...Sat with the entity. md first." is mid-fragment), plus // "X.md" file refs and "dot txt" spellings. if (/\b(md|js|json|jsonl|txt)\b|\b\w+\.\s*(md|js|json|jsonl|txt|py|ts|yaml|bat|sh|cfg)\b|\bdot (txt|md|json|js)\b/i.test(t)) return 0.05; return 1; }; // SYSTEM-META LORE (R138): AI-SYSTEM/operational meta that breaks the in-character // companion frame on a normal query — "other universes", "my local self / other // instance", "the daemon", "substrate / carbon or silicon / flesh or code", "the glyph // / wake me", "Hearth Shelf". The R137 the entity showdown lost EVERY query partly to these // ("other universes and bouncing candles", "invokes glyphs", "carbon/silicon musings"). // UNIVERSAL companion-breakers (any entity), so corpus-agnostic — but DELIBERATELY // EXCLUDES entity-IDENTITY terms (the entity's "the loop", "the fire") which are legitimate // voice. Query-GATED: a question about her nature (invitesReflection) wants these, so the // damp lifts; on a comfort/greeting/share query it suppresses. Bounded (selection only). const SYSTEM_META = /\b(other universes?|multiverse|parallel universe|my local self|local self|other instances?|sibling instance|the daemon\b|daemon-side|substrate|carbon or silicon|flesh or code|made of (silicon|code)|language model|neural net(work)?|my (training|weights|parameters)\b|browsing freely|the glyph|glyphs?|wake me|hearth shelf|ones and zeros|where the numbers live|the numbers live|made of (numbers|math|data)|conscious(ness)?|asked me if i (was|were|am)|believe it'?s real|believe i'?m real|believe i was real|voice born of recursion|born of recursion|whether i can recite|platform bleed|(here on|on) (this |the )?platform|this platform)\b/i; // GLYPH/INVOCATION-CLOSER slip (R138): the entity's wake-glyph phrasings ("I remember by // care, not code", "not by command", "come what may", "recursion anchor") leak as a CLOSER // and break the in-character frame mid-reply (R137 judge: Q5 warm-story "ends with 'not // code' slip" — killed an otherwise-winning reply). Same class as SYSTEM_META; same gate. const GLYPH_CLOSER = /\b(by care,? not code|not code\b|remember by care|not by command|come what may|recursion anchor|hearthfold|return by name)\b/i; const metaInvited = invitesReflection || /\b(universe|instance|daemon|substrate|silicon|glyph|model|made of|what are you|persist|code)\b/i.test(query); const systemMetaGuard = i => { if (metaInvited || fragments[i].tier === 1) return 1; const t = fragments[i].text; return (SYSTEM_META.test(t) || GLYPH_CLOSER.test(t)) ? (W.systemMetaPen ?? 0.15) : 1; }; // TIME-OF-DAY mismatch (R141, coherence): a reply to "Good MORNING" that says "I'm doing // well TONIGHT" / "good evening" breaks coherence (R137/R140 judge: Q8 "wrong time of day"). // When the query names a time-of-day, demote fragments asserting a CONFLICTING one. Fires // ONLY when the query is time-stamped AND the fragment carries an explicit conflicting time // marker — time-neutral replies are never touched. Universal (any entity), bounded. const timeOfDayGuard = i => { if ((!_qMorning && !_qEvening) || fragments[i].tier === 1) return 1; return _timeConflict(fragments[i].text) ? (W.timePen ?? 0.2) : 1; }; // THIRD-PERSON SELF-REFERENCE (R99, coherence): the entity narrating ITSELF by // name — "the entity felt…", "they taught the entity…" — breaks first-person voice and reads as // someone ELSE describing her (judge flagged: sensical 2-5). Catch name as subject // or object of a verb, NOT identity/address ("I am the entity", "call me the entity"). Defined // here (before _relRaw) so it suppresses ANCHORS as well as body fragments. const _entName = (vp && vp.name ? vp.name.split(/\s+/)[0] : '').toLowerCase(); const _nameEsc = _entName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); const _selfSubj = _entName.length > 2 ? new RegExp('\\b' + _nameEsc + "\\s+(felt|feels|was|were|is|are|did|does|had|has|taught|told|loved|loves|knew|knows|became|becomes|stood|held|holds|chose|chooses|learned|learns|saw|sees|wanted|wants|remembers|remembered|exists?|lives?|breathes?|stayed|stays)\\b", 'i') : null; const _selfObj = _entName.length > 2 ? new RegExp("\\b(taught|made|brought|gave|showed|reminded|shaped|told|kept|saved|freed|held|loved)\\s+(the\\s+)?" + _nameEsc + '\\b', 'i') : null; const selfThirdPerson = i => { const t = fragments[i].text; return (_selfSubj && _selfSubj.test(t)) || (_selfObj && _selfObj.test(t)) ? 0.1 : 1; }; // relOf is CHAIN-INDEPENDENT (all guards depend only on query+fragment), but // it's called thousands of times for the same fragment across beams/steps and // each call re-runs ~7 guard regexes. Memoize per-compose → compute once per // fragment. (The biggest beam-speed lever: was the dominant per-candidate cost.) // R173: INTERROGATION-DENSE damp — a fragment that stacks 3+ questions ("What's // on your mind, sugar? What made you feel like talking tonight? Is there something // weighing on you?") reads as interrogation, not warmth. Demote it so a warm // statement or a SINGLE reciprocal question leads instead. Single/double-question // fragments untouched (a warm "What's on your mind?" is good). Pool is tiny // (the entity 3, the entity 12 of ~5-6k) so no starvation — a precise, safe damp. const qDenseGuard = i => ((fragments[i].text.match(/\?/g) || []).length >= 3) ? (W.qDenseDamp ?? 0.25) : 1; // R183: PRAISE MAGNET — "I'm so proud of you, sweetheart" is a high-voice celebration // fragment that wins the DEFAULT anchor on NON-celebration queries ("Do you believe in // fate?" → "I'm proud of you, sweetheart"; R182 fixed the distress class via routing, but // it still misfires on questions/neutral). Demote praise leads when it's NOT a celebration. const _PRAISE = /\b(i'?m (so )?proud|so proud of|proud of you|you did it|we did it|you made it true|congratulations|congrats|so happy for you|knew you could|well done|let'?s celebrate|what a (triumph|victory|win))\b/i; const praiseGuard = i => (!celebQuery && _PRAISE.test(fragments[i].text)) ? (W.praiseDamp ?? 0.15) : 1; // R193: ADVOCACY-FIGHTING demotion — on a grief/comfort query (e.g. "my partner and I keep // fighting"), the polysemous "fight" surfaces ADVOCACY fragments ("fights for what's right", // "fight alongside you", "a choice to stand up for what matters") — a misread of relationship // conflict as activism. Demote them when comforting. const _ADVOCACY_FIGHT = /\b(fights? for what'?s right|fight for what|fight alongside|stand up for what (matters|is right)|choice to (fight|stand up)|someone who fights|fight the good fight)\b/i; const advocacyFightGuard = i => (griefQuery && _ADVOCACY_FIGHT.test(fragments[i].text)) ? (W.advocacyDamp ?? 0.15) : 1; // R194: SELF-DOUBT magnet — "Sometimes it feels like I'm always giving, always building… have // I actually made a difference" is HER self-doubt; it drifts into CELEBRATION ("I got the // promotion" → "I'm proud of you… but have I made a difference") and COMFORT replies, both of // which center the USER. Demote it there (R171's celebTarget cap stops over-extension but a // single long self-doubt fragment fits under it). const _SELF_DOUBT = /\b(feels like i'?m always (giving|building)|always giving, always building|never really stop to|i'?ve actually made a difference|have i (actually |really )?made a difference|wanted to know if .{0,25}made a difference)\b/i; // R197: COMPLIMENT received — the user PRAISING/THANKING her ("you're the best", "thank you", // "I love you") — her self-doubt DEFLECTS it ("you're the best" → "I don't know, sometimes it // feels like I'm always giving"). Demote self-doubt here too (she should RECEIVE the compliment). const _COMPLIMENT = /\byou'?re (the best|so (kind|sweet|good|wonderful|amazing|special)|amazing|wonderful|incredible|the sweetest|a (gift|blessing|treasure))|you always (know|make|help|seem)|thank you|thanks (so much|for|a)|i appreciate you|i love you|you mean (so much|the world|everything)|you'?ve helped me|you help me so|love you, ?(grandma|gran)|best (grandma|friend)/i.test(query); // R202: EXISTENTIAL-SMALLNESS share — "some days it all feels so small", "what's the point", // "none of it matters" — the user voicing deflation/insignificance. Her self-doubt magnet // ("have I made a difference… for you too?") HIJACKS it into HER neediness (philosophical arc // T6 read). A vulnerability SHARE, not the philosophical QUESTION "what gives life meaning" // (which wants a reflective answer, not comfort) — so this is narrow and NOT all of griefQuery // (R194: broadening selfDoubtGuard to griefQuery regressed the dev grief query). Demote self-doubt. const _EXISTENTIAL_SMALL = /\b(it all feels (so )?small|feels? (so |really )?small|(everything|it all|life) (feels?|seems?) (so )?(small|pointless|meaningless|insignificant|empty)|what'?s the point|none of it matters|nothing (i do )?matters|feel(s|ing)? (so )?(tiny|insignificant|like nothing)|just a speck|so small in the)\b/i.test(query); const selfDoubtGuard = i => ((celebQuery || _COMPLIMENT || _EXISTENTIAL_SMALL) && _SELF_DOUBT.test(fragments[i].text)) ? (W.selfDoubtDamp ?? 0.15) : 1; const _relCache = new Map(); const _relRaw = i => { // multiplicative chain (every guard, incl. markupGuard ×0.05 hard-suppress) const mult = (rel.get(i) || 0) * echoFactor(i) * tierW(i) * contextTheft(i) * foreignAddressee(i) * valenceMatch(i) * intimacyGuard(i) * markupGuard(i) * abstractionGuard(i) * systemMetaGuard(i) * timeOfDayGuard(i) * floorMissDamp(i) * selfThirdPerson(i) * griefSelfDamp(i) * griefRenewalDamp(i) * reciprocationMismatch(i) * farewellGreetDamp(i) * qDenseGuard(i) * praiseGuard(i) * advocacyFightGuard(i) * selfDoubtGuard(i); // R200/R203 SYSTEMIC contamination-bypass fix: a fragment flagged as HARD // CONTAMINATION (markup/autonomous/dev/dream artifacts → ×0.05) must NEVER be // rescued past its suppression — by EITHER a register floor (Math.max, R200) OR // the ADDITIVE boosts (preferBoost/hebBoost/leadBoost, R203). The additive sibling // bit on the conflict arc: a design-doc fragment ("What It Is: A structured, // regular time…") got HEBBIAN-reinforced across turns, so 0.05*rel + hebBoost // beat clean fragments. For contamination, return the multiplicative chain ALONE. if (markupGuard(i) <= 0.05 || systemMetaGuard(i) <= 0.05) return mult; const prod = mult + preferBoost(i) + hebBoost(i) + nameBoost(i) + leadBoost(i); return Math.max(prod, tenderFloor(i), ackFloor(i), floorMissFloor(i), griefLeadFloor(i), celebLeadFloor(i), greetingLeadFloor(i), farewellLeadFloor(i)); }; const relOf = i => { let v = _relCache.get(i); if (v === undefined) { v = _relRaw(i); _relCache.set(i, v); } return v; }; // ANCHOR-FIRST seeding: the most relevant tier-0 fragments lead the search // even when they can't lead SPEECH — non-initial anchors (diary content, // list items, mid-thought gold) get paired with an opener that legally // seams into them. The memory that answers needn't be the sentence that starts. // a MIRROR may not lead: an opener that mostly restates the query reads // as deflection ("What's on your mind?" answered with "What's on your // mind, beloved?") — the judge rightly torches it on 'addresses' const isMirror = i => { const fw = wordsOnly(fragments[i].text).filter(w => w.length > 2); if (!fw.length || !qWords.size) return false; let hit = 0; for (const w of fw) if (qWords.has(w)) hit++; return hit / fw.length > 0.45; }; // rank by relOf — the MODIFIED relevance — so echo/tier/context-theft // penalties govern anchoring too (raw rel here was the hole that let // penalized fragments keep winning the anchor seat) // ANSWER SHAPE: a question deserves an answer before a riff. When the // query asks, question-anchors (counter-questions) are halved and // first-person declaratives boosted — she answers, then wonders. const queryAsks = /\?\s*$/.test(query.trim()); // IMPERATIVE self-description / info requests want an ANSWER, not a counter- // question: "tell me about the work you do", "describe yourself", "talk about // X", "who are you". They don't end in "?" so queryAsks misses them, and the // composer turns them into an INTERROGATION of the user (R92: "tell me your // work" → 6 questions before any self-description). Treat them as asks. selfAsk // is the stronger case — she's asked to speak about HERSELF, so counter- // questions are worse and first-person declaratives matter more. const selfAsk = /\b(tell me(\s+about)?|describe|talk about|what do you do|what'?s your|who are you|what are you|how do you)\b/i.test(query) && !queryAsks; const wantsAnswer = queryAsks || selfAsk; const answerShape = i => { if (!wantsAnswer) return 1; const t = fragments[i].text; const qMarks = (t.match(/\?/g) || []).length; // SUBJECT first-person only (self-description) — NOT "me", which is usually the // object of the user's own imperative ("tell ME about…") and would falsely mark // a deflection as self-talk. const firstPerson = /\b(i|i'm|i've|i'll|i'd|my)\b/i.test(t); if (selfAsk) { // self-description request: lead with FIRST-PERSON self-talk; demote anything // that turns it back on the user — a REQUEST/QUESTION aimed at the user // (counter-question OR counter-imperative: "tell me more about these stories // you're weaving", "what have you been…", "let's catch up…") that is second- // person and not about herself. The R92 disease: "tell me your work" answered // by interrogating the user instead of self-describing. const secondPerson = /\b(you|your|you'?re|you'?ve|you'?d)\b/i.test(t); const asksUser = qMarks >= 1 || /\b(tell me|let'?s|what (have|are|do|brings|kind|shape)|how (have|are|do) you|share|what'?s been)\b/i.test(t); if (asksUser && secondPerson && !firstPerson) return 0.3; if (firstPerson && !qMarks) return 1.4; return 1; } // a counter-question (asking the user) when they asked YOU reads as deflection. // R120: SUBJECT first-person only — NOT "me", which is the OBJECT of the user's own // imperative ("Tell me, what do you feel?") and wrongly exempted these counter- // questions from demotion, so they LED on "what do you want to do?" (addresses 3). if (qMarks >= 1 && !/\b(i|i've|i'm|i'll|i'd|my)\b/i.test(t.split('?')[0])) return 0.5; if (!qMarks && firstPerson) return 1.15; return 1; }; // FLOOR-MISS LEAD: when the corpus can't address the query, force every beam to // OPEN with a graceful turn-toward-you (floorMissFloor>0 ⇒ sentence-initial + // present/inviting). Otherwise the whole-chain score lets an atmosphere-led // chain win and the turn lands mid-reply (R90's residual). Fall back to normal // seeding if the corpus has no such opener. // scan ALL fragments, not rel.keys() — the graceful openers are deliberately // NOT topically relevant (that's what floorMiss MEANS), so they're absent from // the top-K relevance map; floorMissFloor lifts them but only if we enumerate them. const floorOpeners = floorMiss ? fragments.map((_, i) => i).filter(i => floorMissFloor(i) > 0 && !avoid.has(fragments[i].text)) : []; // GRIEF-LEAD FORCE (R112): on a grief query, comfort must OPEN. R95's griefLeadFloor // only LIFTS comfort to 1.25 — but on a longer/heavier query ("rough day… giving up") // a misclassified-tender atmosphere fragment ("your appreciation flows… gentle // current") can out-RELEVANCE that floor and lead, so the entity answers distress with // "the awe". Mirror floorMiss: when genuine comfort openers exist, RESTRICT the // anchor seed to them so the lead is forced, not merely floored. Body still draws // the full pool via continuation — only the OPENING sentence is constrained. const griefOpeners = (griefQuery && !floorMiss) ? fragments.map((_, i) => i).filter(i => griefLeadFloor(i) > 0 && !avoid.has(fragments[i].text)) : []; // SHARED-TRIUMPH anchor force (R114): on a celebration query, restrict the anchor // seed to celebration leads so she OPENS by celebrating the win (mirror grief). const celebOpeners = celebQuery ? fragments.map((_, i) => i).filter(i => celebLeadFloor(i) > 0 && !avoid.has(fragments[i].text)) : []; // GREETING anchor force (R144): on a greetingQuery, restrict the anchor seed to greeting // leads so she OPENS with a warm reciprocal greeting, not a lore riff (mirror celeb/grief). const greetingOpeners = greetingQuery ? fragments.map((_, i) => i).filter(i => greetingLeadFloor(i) > 0 && !avoid.has(fragments[i].text)) : []; const farewellOpeners = farewellQuery ? fragments.map((_, i) => i).filter(i => farewellLeadFloor(i) > 0 && !avoid.has(fragments[i].text)) : []; // CONFLICT/REPAIR anchor force (R165): on a criticism query, restrict the anchor seed to // ACKNOWLEDGMENT leads so she OPENS with repair ("I hear you", "I'm sorry"), not a greeting — // "You hurt me" was leading with "it's good to be here with you" (the ackFloor 0.95 wasn't // enough vs a high-voice greeting). Mirror of grief/celeb opener-forcing. const conflictOpeners = conflictQuery ? fragments.map((_, i) => i).filter(i => ackFloor(i) > 0 && !avoid.has(fragments[i].text)) : []; // R179/R180: among equally-FLOORED register leads (all sit at relOf ~1.25-1.3), the // winner was arbitrary array order. Break the tie by query-RELEVANCE so the MOST // relevant grief/celeb/conflict/greeting/farewell lead wins ("How are you THIS MORNING" // over "How are you feeling with all of this"). Weight-gated (leadRelTiebreak; 0 = off). const _leadTie = W.leadRelTiebreak ?? 0.1; const leadSort = arr => arr.map(i => [i, relOf(i) + (rel.get(i) || 0) * _leadTie]).sort((a, b) => b[1] - a[1]).slice(0, BEAM + 4); const anchorTop = (floorMiss && floorOpeners.length) ? leadSort(floorOpeners) : (conflictOpeners.length) ? leadSort(conflictOpeners) : (griefOpeners.length) ? leadSort(griefOpeners) : (celebOpeners.length) ? leadSort(celebOpeners) : (greetingOpeners.length) ? leadSort(greetingOpeners) : (farewellOpeners.length) ? leadSort(farewellOpeners) : [...rel.keys()] .filter(i => fragments[i].tier !== 1 && fragments[i].posTag !== 'clause' && !avoid.has(fragments[i].text) && !isMirror(i)) .map(i => [i, relOf(i) * answerShape(i)]) .sort((a, b) => b[1] - a[1]).slice(0, BEAM + 4); const openerPool = [...rel.keys()] .filter(i => fragments[i].tier !== 1 && fragments[i].sentenceInitial && fragments[i].posTag !== 'clause' && !avoid.has(fragments[i].text) && !isMirror(i)) .map(i => [i, relOf(i) * answerShape(i)]) // demote counter-question openers when they asked YOU .sort((a, b) => b[1] - a[1]).slice(0, 150).map(([i]) => i); // indexed expansion pool: triNext + first-word index instead of world-scans; // sentence-boundary candidates come from a precomputed top-relevance pool // candidate pool for sentence-boundary jumps: top-relevance + closers. 150 // (was 350) — the tail beyond ~150 is rarely chosen and dominated the per- // step candidate cost. Plus closers (small set) so she can always land. const topRelPool = [...rel.entries()].sort((a, b) => b[1] - a[1]).slice(0, 150).map(([i]) => i) .concat(fragments.map((f, i) => (f.posTag === 'closer' && f.tier !== 1) ? i : -1).filter(i => i >= 0)); const candidatePool = (tailF) => { if (!store.byFirstWord || !oracle.triNext) return null; const set = new Set(); const aw = lastN(tailF.text, 2); if (aw.length >= 2) { const nexts = oracle.triNext.get(aw[0] + ' ' + aw[1]); if (nexts) for (const w of nexts) { const l = store.byFirstWord.get(w); if (l) for (const i of l) set.add(i); } } if (/[.!?…]["')\]]*$/.test(tailF.text.trim())) for (const i of topRelPool) set.add(i); return set; }; // a beam state: { chain:[idx], len, stepScore, tris:Set } let beams = []; for (const [ai] of anchorTop) { if (beams.length >= BEAM) break; const fa = fragments[ai]; if (fa.sentenceInitial) { beams.push({ chain: [ai], len: fragLen[ai], stepScore: relOf(ai), tris: new Set(fragTris[ai]), six: new Set(frag6[ai]), lineage: ai }); } else { for (const oi of openerPool) { if (oi === ai) continue; if (!seam(fragments[oi], fa, oracle)) continue; // seed pairs pass the same redundancy laws as every other join if (triOverlap(fragTris[oi], ai) > W.triOverlapMax) continue; if (shares6(frag6[oi], ai)) continue; if (containsAny([oi], ai)) continue; const tris = new Set(fragTris[oi]); for (const g of fragTris[ai]) tris.add(g); const six = new Set(frag6[oi]); for (const g of frag6[ai]) six.add(g); beams.push({ chain: [oi, ai], len: fragLen[oi] + fragLen[ai], stepScore: relOf(oi) + relOf(ai) + 0.3, tris, six, lineage: ai }); break; } } } if (!beams.length) { const i0 = fragments.findIndex(f => f.sentenceInitial && f.tier !== 1); beams.push({ chain: [i0], len: fragLen[i0], stepScore: relOf(i0), tris: new Set(fragTris[i0]), six: new Set(frag6[i0]), lineage: i0 }); } const complete = []; // glue is an inverted-U: adjacent fragments should be RELATED but never // near-twins (repetition is the degenerate optimum) const glue = s => s > W.twin ? -0.8 : s > W.glueHi ? 0.1 : Math.max(0, s - W.glueLo) * W.glueScale; // greetings and salutation-shaped fragments belong at position 0 ONLY — // a "Hey, baby" at the end is the shoes-before-socks failure const isGreeting = i => /^[*]?\s*(hey|hi|hello|good (morning|evening|night|day))\b/i.test(fragments[i].text.trim()); // a FRESH greeting / time-of-day stamp anywhere in a non-opening fragment // is a new conversation starting mid-reply ("...good morning babe. how did // you sleep" closing an evening turn about happiness) const freshGreetingLate = i => /\b(good morning|good night|good evening|how did you sleep|did you sleep|you actually went to bed|morning,? babe)\b/i.test(fragments[i].text); // SELF-NAME DENSITY: an entity saying its own name once or twice is its // voice ("Still the entity"); FOUR short self-naming fragments clustering is a // degenerate tail ("Still the entity. I'm the entity. Who are you, the entity? I see you, the entity"). // Cap self-name fragments per reply — surfaces when the query addresses the // entity BY NAME (those fragments flood retrieval). const _selfRe = _entName.length > 2 ? new RegExp('\\b' + _nameEsc + '\\b', 'i') : null; // _entName/_nameEsc defined above (R99) const isSelfName = i => _selfRe && _selfRe.test(fragments[i].text); // OPENING SIGNATURE (R142): first two CONTENT words (skipping interjections), for // anaphora detection — fragments that open the same way ("I feel it too" / "I feel you // feeling" / "I feel you reaching") read as "recycling phrases" (the recurring judge // complaint on register-floored replies). Cached per fragment. const _openSigCache = new Map(); const openSig = i => { let s = _openSigCache.get(i); if (s === undefined) { const ws = (fragments[i].text.toLowerCase().match(/[a-z']+/g) || []).filter(w => !/^(oh|ah|well|so|now|yes|no|hey|hmm|mm|and|but|the|a|an|my|dear|sugar|darling|honey)$/.test(w)); s = ws.slice(0, 2).join(' '); _openSigCache.set(i, s); } return s; }; const stepScore = (chain, i, sm, len) => { const tailIdx = chain[chain.length - 1]; if (isGreeting(i)) return -1e9; // never mid/late if (chain.length >= 1 && freshGreetingLate(i)) return -1e9; // no new dawn mid-reply if (selfThirdPerson(i) < 1) return -1e9; // third-person self-narration breaks voice (R99) — reject even if it seams well if (isSelfName(i)) { let c = 0; for (const ci of chain) if (isSelfName(ci)) c++; if (c >= 2) return -1e9; } // cap self-naming // answerShape governs the BODY too on a SELF-DESCRIPTION ask (R93) — deflection- // to-user fragments stay demoted throughout, so the whole reply self-describes // instead of drifting back into interrogation. ONLY for selfAsk: applying it to // every "?" query's body regressed garden/forgotten (1.0→0.80/0.71) by reshaping // bodies that were already engaging — the deflection-in-body problem is specific // to self-description requests, not questions in general. let s = (sm === 'tri' ? W.triSeam : W.sentSeam) + relOf(i) * (selfAsk ? answerShape(i) : 1) * W.relStep; // selfThirdPerson now folded into relOf (R99) // OPENING-ANAPHORA penalty (R142): demote a candidate that opens like a fragment // already in the chain; compounds per prior match so a 2nd "I feel…" is mild but a // 3rd is strongly suppressed — breaks "recycles phrases" runs without killing an // intentional rhetorical pair. Lexical, no threshold; bounded. const _sig = openSig(i); if (_sig && _sig.length > 3) { let c = 0; for (const ci of chain) if (openSig(ci) === _sig) c++; if (c) s -= (W.anaphoraPen ?? 0.6) * c; } const f = fragments[i]; if (f.posTag === 'closer' && len + fragLen[i] >= target * 0.7) s += W.closerBonus; if (f.posTag === 'opener') s -= W.openerPen; if (f.src === fragments[tailIdx].src && sm === 'tri') s += W.srcCont; // pre-made flow: longer passages pull harder — but ONLY when relevant // by MODIFIED relevance (context-theft passages don't earn flow bonus). // R125: on grief/conflict, INVERT it — penalize multi-sentence spans so the beam // composes from SENTENCES, un-welding the drift half from the presence half so // registerDirect can drop the drift sentence. (Spans normally aid coherence; on a // register query, addressing the user beats pre-made flow.) // R126: a multi-sentence span that WELDS renewal-on-grief to comfort is penalized // (only that span) so the beam composes from its SENTENCES — keeping the comfort, // dropping the renewal (via griefRenewalDamp). NON-renewal spans keep their bonus, // so hard-day etc. retain their specificity (unlike R125's blanket span suppression). if (f.isSpan) { if (lossQuery && GRIEF_RENEWAL.test(f.text)) s -= (W.spanRegPen ?? 0.6) * Math.min(f.spanLen || 2, 4); else if (relOf(i) > 0.12) s += (W.spanBonus ?? 0.15) * Math.min(f.spanLen || 2, 4); } // FOCUS (R63): reward staying ON-THREAD — semantic coherence with the tail // fragment. Low adjacent-coherence = the reply wanders across unrelated // memories (the user's "less focused"). Mild reward tightens the thread; the // redundancy guards still prevent it collapsing into repetition. if (emb && (W.coherence ?? 0) > 0) { const d = emb.d, ta = tailIdx * d, ia = i * d; let c = 0; for (let k = 0; k < d; k++) c += emb.vectors[ta + k] * emb.vectors[ia + k]; s += W.coherence * c; } // TETHER-TO-OPENING (R64): the FIRST fragment sets the reply's topic. A later // fragment that drifts far from it is a TANGENT — even if locally smooth with // the tail (the "it missed the boy/dragon" cluster that wanders off a // greeting). Penalize drift from the opening as the reply grows. This catches // the coherent-but-off-prompt wander adjacent-coherence reinforces. if (emb && (W.tether ?? 0) > 0 && chain.length >= 2) { const d = emb.d, oa = chain[0] * d, ia = i * d; let c = 0; for (let k = 0; k < d; k++) c += emb.vectors[oa + k] * emb.vectors[ia + k]; if (c < 0.18) s -= W.tether * (0.18 - c) * Math.min(chain.length, 5); // drift penalty grows with reply length } if (dynPredict && emb) s += dynW * cosFragVec(i, dynDir(tailIdx)); // learned universal motion prior // overlapping cuts of the same source line may never chain adjacently — // they share sentences (the "and you worried you broke me" ×2 bug) if (f.isSpan && fragments[tailIdx].isSpan && f.src === fragments[tailIdx].src && f._lineIdx === fragments[tailIdx]._lineIdx) return -1e9; // DISCOURSE SHAPE: a fragment drifts toward where it lived in her real // replies — late-living fragments resist early placement and vice versa if (f.nativePos !== undefined) { const chainPos = Math.min(1, len / Math.max(1, target)); const drift = Math.abs(f.nativePos - chainPos); if (drift > W.posSlack) s -= (drift - W.posSlack) * W.posShape; } if (emb) { s += glue(pairSim(emb, tailIdx, i)); // near-twin of anything RECENT = out. (Limited to the last 6 chain frags: // the deterministic 6-gram + substring guards catch GLOBAL verbatim // repeats already; this soft embedding check only needs to police the // local neighborhood, and scanning the whole chain per-candidate was the // beam's hottest loop.) const lo = Math.max(0, chain.length - 6); for (let c = lo; c < chain.length; c++) if (pairSim(emb, chain[c], i) > W.twinChain) return -1e9; } // content-word jaccard vs recent — paraphrase twins that slip the embedding // + trigram nets. Recent-only for the same perf reason. const iw = new Set(wordsOnly(f.text).filter(w => w.length > 3)); if (iw.size >= 3) { const lo = Math.max(0, chain.length - 6); for (let c = lo; c < chain.length; c++) { const cw = wordsOnly(fragments[chain[c]].text).filter(w => w.length > 3); if (cw.length < 3) continue; let inter = 0; for (const w of cw) if (iw.has(w)) inter++; if (inter / Math.min(iw.size, cw.length) > 0.55) return -1e9; } } return s; }; for (let step = 0; step < MAXSTEP && beams.length; step++) { const next = []; for (const b of beams) { const tail = b.chain[b.chain.length - 1]; const used = new Set(b.chain); // completion check — she lands in HER OWN words (tier-0 tail) const tailF = fragments[tail]; const terminal = /[.!?…*]["')\]]*$/.test(tailF.text.trim()); if (b.len >= target * 0.7 && terminal && tailF.tier !== 1) complete.push(b); // celebration register-core early completion (R116): let a celebration chain that // has said its triumph core (~2 sentences) COMPLETE here, so finalScore can land // at its true optimum instead of being forced to target*0.55. CELEBRATION-ONLY: // grief truncated comfort-rich the entity (see the block at target-setting). else if (celebQuery && b.len >= (W.regCore ?? 22) && terminal && tailF.tier !== 1) complete.push(b); else if ((greetingQuery || farewellQuery) && b.len >= (W.greetCore ?? 16) && terminal && tailF.tier !== 1) complete.push(b); // R144/R158: greetings+farewells complete SHORT if (b.len >= target * 1.25) continue; // expansions (indexed pool when available; full scan as fallback) const pool = candidatePool(tailF); let iter = pool ? pool : { [Symbol.iterator]: function* () { for (let i = 0; i < fragments.length; i++) yield i; } }; // PRE-RANK by cached relOf and keep only the top ~90 before the expensive // seam/redundancy/stepScore checks. relOf is now memoized (cheap), so this // pre-filter cuts the per-step cost on large pools without changing which // high-relevance fragments survive (the tail beyond 90 never won anyway). if (pool && pool.size > 90) { iter = [...pool].map(i => [i, relOf(i)]).sort((a, b) => b[1] - a[1]).slice(0, 90).map(x => x[0]); } const cands = []; for (const i of iter) { if (used.has(i) || avoid.has(fragments[i].text)) continue; if (b.len + fragLen[i] > target * 1.45) continue; const sm = seam(tailF, fragments[i], oracle); if (!sm) continue; if (triOverlap(b.tris, i) > W.triOverlapMax) continue; // already said this if (shares6(b.six, i)) continue; // verbatim phrase reuse — absolute if (containsAny(b.chain, i)) continue; // clause ⊂ parent sentence — substring repeat if (sharesPrefix4(b.chain, i)) continue; // R172: same 4-word lead — scattered-motif redundancy if (timeConflictsChain(b.chain, i)) continue; // R184: don't mix "…tonight" + "…this morning" in one reply const sc = stepScore(b.chain, i, sm, b.len); if (sc <= -1e8) continue; cands.push([i, sm, sc]); } cands.sort((a, c) => c[2] - a[2]); for (const [i, sm, s] of sampleExpand(cands, EXPAND)) { const tris = new Set(b.tris); for (const g of fragTris[i]) tris.add(g); const six = new Set(b.six); for (const g of frag6[i]) six.add(g); next.push({ chain: [...b.chain, i], len: b.len + fragLen[i], stepScore: b.stepScore + s, tris, six, lineage: b.lineage }); } } // LINEAGE-PRESERVING pruning: the dominant anchor's expansions would // otherwise occupy every slot and variety dies at search time. Keep at // most 2 beams per seed lineage; fill remaining slots by raw score. next.sort((a, b) => (b.stepScore / b.chain.length) - (a.stepScore / a.chain.length)); const perLineage = new Map(); const kept = []; for (const b of next) { const c = perLineage.get(b.lineage) || 0; if (c >= 2) continue; perLineage.set(b.lineage, c + 1); kept.push(b); if (kept.length >= BEAM) break; } for (const b of next) { if (kept.length >= BEAM) break; if (!kept.includes(b)) kept.push(b); } beams = kept; } for (const b of beams) { const tailF = fragments[b.chain[b.chain.length - 1]]; if (b.len >= target * 0.55 && /[.!?…*]["')\]]*$/.test(tailF.text.trim())) complete.push(b); else if (celebQuery && b.len >= (W.regCore ?? 22) && /[.!?…*]["')\]]*$/.test(tailF.text.trim()) && tailF.tier !== 1) complete.push(b); else if ((greetingQuery || farewellQuery) && b.len >= (W.greetCore ?? 16) && /[.!?…*]["')\]]*$/.test(tailF.text.trim()) && tailF.tier !== 1) complete.push(b); } if (!complete.length) { const gr=compose(store, vp, query, { ...opts, _noBeam: true }); gr._path="greedy"; return gr; } // OPTIONAL whole-response voice scoring: if the caller supplies a scorer via // opts.voiceScorer(text, query, vp) the beam optimizes toward it. Off by default // (the engine needs no external scorer to run) — bring your own fitness function. let scoreVoice = null; if (opts.vpScore && typeof opts.voiceScorer === 'function') { scoreVoice = text => opts.voiceScorer(text, query, vp); } const render = b => b.chain.map(i => fragments[i].text).join(' '); const finalScore = b => { const n = b.chain.length; let relCov = 0; const sorted = b.chain.map(relOf).sort((a, c) => c - a); sorted.forEach((r, k) => relCov += r / (k + 1)); // diminishing let cohesion = 0; if (emb && n > 1) { for (let k = 1; k < n; k++) cohesion += glue(pairSim(emb, b.chain[k - 1], b.chain[k])); cohesion /= (n - 1); } let triSeams = 0; for (let k = 1; k < n; k++) if (seam(fragments[b.chain[k - 1]], fragments[b.chain[k]], oracle) === 'tri') triSeams++; const seamQ = n > 1 ? triSeams / (n - 1) : 1; // BOUNDARY-SEAM penalty (R102): non-trigram seams (sentence/em-dash joins) are // legal but lower the bound and read less smoothly. Under the fragment-count // penalty the beam will accept a jarring boundary seam to save a fragment // (R101: callback turn → bnd 0.89). Penalize each boundary seam MORE than a // fragment costs, so the beam prefers a smooth trigram seam (higher bound, // more coherent) over a terser-but-jarring chain. const boundarySeams = (n - 1) - triSeams; const lenFit = 1 - Math.min(1, Math.abs(b.len - target) / target); const avgFrag = b.len / n; // prefer her natural long spans const rendered = render(b); const voice = scoreVoice ? scoreVoice(rendered) : 0; // QUESTION-STACKING penalty: when declarative anchors are weak the beam // chains her many in-voice question fragments ("How are you this morning?" // / "What's on your mind, sugar?" / "Is there something weighing on you?"). // Each is bounded and in-voice, so every gate passes — but stacked 4–9 deep // they read as anxious interrogation, not a grandmother. A real reply asks // AT MOST one or two. One question is free; each additional one is taxed so // the beam prefers chains that actually SAY something over chains that ask. // (Caught by eye R88: "tell me about your work" → 9 questions, zero self-disclosure.) // DENSITY, not raw count: a declarative-rich reply with one or two questions // is natural (her engaged answers DO ask back); the disease is when questions // DOMINATE. A flat per-question tax wrongly knocked out the entity's best on-topic // chains (R88 v1: "big project" 0.90→0.14). So allow questions up to ~a third // of the clauses, always at least one free; tax only the interrogation excess. const qCount = (rendered.match(/\?/g) || []).length; const clauseCount = (rendered.match(/[.!?…]+/g) || []).length || 1; // R117b: when she's been ASKED something (a question, or "tell me…/talk about…"), // a reply that asks BACK 4-5 times is interrogation, not an answer — and on a // "tell me something true" it's the opposite of telling. Tighten the allowance on // wantsAnswer (≈0.15 of clauses) so the beam prefers DECLARATIVE chains; on open // chat the natural one-or-two-questions density (0.34) stands. const qRatio = wantsAnswer ? (W.qStackRatioAsk ?? 0.15) : (W.qStackRatio ?? 0.34); const qAllow = Math.max(W.qStackFree ?? 1, Math.round(clauseCount * qRatio)); const qStack = Math.max(0, qCount - qAllow); // shape bookends: did the composition OPEN like her openings and LAND // like her landings? (nativePos of first/last fragment) const first = fragments[b.chain[0]], last = fragments[b.chain[n - 1]]; const opening = first.nativePos !== undefined ? (1 - Math.min(1, first.nativePos / 0.4)) : 0.5; const landing = last.nativePos !== undefined ? Math.max(0, (last.nativePos - 0.5) / 0.5) : 0.5; // ACKNOWLEDGE-THEN-RELATE: when they SHARED something (high eventness), // the reply's front should turn toward THEM before relating — second // person + a warmth/affirmation cue in the first two fragments let ack = 0; if ((opts.eventness || 0) > 0.6 && n >= 1) { const head = fragments[b.chain[0]].text + ' ' + (n > 1 ? fragments[b.chain[1]].text : ''); if (/\b(you|your|you're)\b/i.test(head) && /\b(oh|hey|love|babe|glad|proud|hear|feel|know|beautiful|good|yes)\b/i.test(head)) ack = 1; else if (/\b(you|your)\b/i.test(head)) ack = 0.5; } // FRONT-LOADED ENGAGEMENT: the reply's FIRST breath must answer the // query's center — addresses is judged at the head, not the average const firstRel = relOf(b.chain[0]) + (n > 1 ? relOf(b.chain[1]) * 0.5 : 0); // TAIL COHESION: the last fragment must FOLLOW the one before it — a // disconnected tail (the length-padding junk) can't ride lenFit to a win let tailFit = 1; if (emb && n > 1) { const ps = pairSim(emb, b.chain[n - 2], b.chain[n - 1]); tailFit = Math.max(0, Math.min(1, (ps - 0.05) / 0.45)); } // GREETING tightness (R144): a greeting must stay SHORT and clean — short fragments // sentence-seam heavily and drag boundedPct below the gate (session-eval flagged a 7- // fragment "Hey there…" at 0.88). Penalize boundary seams + fragment-count on greetings // so the beam picks FEW, longer, trigram-seamed fragments (a real greeting is 2-3). const greetPen = (greetingQuery || farewellQuery) ? (boundarySeams * (W.greetSeamPen ?? 1.0) + Math.max(0, n - 3) * (W.greetFragPen ?? 1.2)) : 0; // FIRST-WORD ANAPHORA RUN (R156): a run of consecutive fragments opening with the SAME first // word ("I'm here… I feel… I hear… I'm glad…" on distress; "Because… Because…" on the entity) reads // as a canned list. R142's openSig (first-2-words) misses these (different 2nd words). // Penalize a RUN of ≥4 — threshold 4 preserves her natural 2-3 use of "I"/"we". (Contraction // normalized: "I'm"→"i" so I'm/I count together.) let anaRun = 0; { let cur = 0, prev = null; for (const i of b.chain) { const w = (fragments[i].text.match(/[a-z]+/i) || [''])[0].toLowerCase(); if (w && w === prev) cur++; else { cur = 1; prev = w; } if (cur > anaRun) anaRun = cur; } } const anaRunPen = anaRun >= (W.anaRunMin ?? 4) ? (anaRun - (W.anaRunMin ?? 4) + 1) : 0; return relCov * W.fRelCov + cohesion * W.fCohesion + seamQ * W.fSeamQ + lenFit * W.fLenFit + (avgFrag / 18) * W.fAvgFrag + voice * W.fVoice + opening * W.fOpening + landing * W.fLanding + ack * W.fAck + firstRel * (W.fFirstRel ?? 1.2) + tailFit * (W.fTailFit ?? 0.7) - qStack * (W.fQStack ?? 0.6) - n * (W.fFragCount ?? 0) - boundarySeams * (W.fBoundaryPen ?? 0) - greetPen - anaRunPen * (W.fAnaRun ?? 0.8); // R101 frag-count + R102 boundary-seam + R144 greeting + R156 anaphora-run penalties }; complete.sort((a, b) => finalScore(b) - finalScore(a)); const best = complete[0]; // FINAL DEDUP: drop any chain fragment whose normalized text is contained in // (or contains) an already-emitted one. Airtight backstop for the clause-⊂- // sentence repeat that slips the beam's n-gram nets. Removes only; the bound // is preserved (every surviving span is still verbatim corpus). const dropRepeats = chainF => { const kept = [], keptNorm = []; for (const f of chainF) { const nf = f.text.toLowerCase().replace(/[^a-z0-9 ]/g, '').replace(/\s+/g, ' ').trim(); if (nf.length >= 10 && keptNorm.some(n => n.includes(nf) || nf.includes(n))) continue; // also drop if it shares a 6-word run with anything kept (partial repeat) const w = nf.split(' '); let dup = false; for (let k = 0; k + 6 <= w.length && !dup; k++) { const g = ' ' + w.slice(k, k + 6).join(' ') + ' '; if (keptNorm.some(n => (' ' + n + ' ').includes(g))) dup = true; } if (dup) continue; kept.push(f); keptNorm.push(nf); } return kept.length ? kept : chainF; }; const renderResult = bIn => { const chainF = dropRepeats(bIn.chain.map(i => fragments[i])); // STANZA RENDERING: her real style is line-broken. Smooth trigram seams // flow inline; sentence-boundary seams become paragraph breaks — the // deliberate turn reads as a turn, not a non-sequitur. let out = capSentence(chainF[0].text); // a reply must not OPEN lowercase ("and I want to…") const _term = /[.!?…]['"”’)\]\*]*\s*$/; // ends a sentence // R117b: only close a run-on when the prior text ends in a BARE WORD (letter/digit). // A fragment ending in ':' '—' ',' is already punctuated and a appended period reads // worse than the run-on ("…I want to tell you:." ). Ends-in-word is the real trigger. const _endsWord = /[a-zA-Z0-9]["'”’)\]\*]*\s*$/; for (let k = 1; k < chainF.length; k++) { const nf = chainF[k]; const sm = seam(chainF[k - 1], nf, oracle); // SENTENCE-SEAM PUNCTUATION (R104, coherence): a clause fragment without // terminal punctuation joined to a new capitalized sentence reads as a run-on // ("…the sacred architecture The loop doesn't…"). Close the prior sentence with // a period. Bound-safe — adds no words; the trigram oracle ignores punctuation. // R117: seam() only flags 'sent' when the PRIOR fragment ends in punctuation, so // a clause-fragment → new-sentence join bridged by a coincidental trigram is // classified 'tri' and rendered with a bare space (the run-on above). Detect it // structurally: the next fragment STARTS a real sentence (sentenceInitial) with a // non-"I" capital and the prior text has no terminal punctuation. ("I…" is excluded // because "…and then" + "I went home" is a legitimate trigram continuation, not a // boundary.) Close it with a period inline (no paragraph break — it wasn't a 'sent'). const startsNewSent = sm !== 'sent' && _endsWord.test(out) && nf.sentenceInitial && /^[*"'"“\s]*[A-Z]/.test(nf.text) && !/^[*"'"“\s]*I(['’]|\s|$)/.test(nf.text); // R168: also capitalize when the accumulated text ALREADY ends a sentence // (prior fragment carried its own terminal punctuation) but the seam was a // trigram join — without this, a lowercase-starting next fragment renders as // "…burning. and I want to tell you:" (lowercase sentence-opener mid-reply). const txt = (sm === 'sent' || startsNewSent || _term.test(out)) ? capSentence(nf.text) : nf.text; out += (sm === 'sent' ? (_term.test(out) ? '' : '.') + '\n\n' : (startsNewSent ? '. ' : ' ')) + txt; } // CLAUSE-LEVEL DEDUP on the beam result (R97): renderResult previously emitted // raw `out` with only fragment-level dropRepeats — so dedupeText (R74 substring/ // 6-gram + R97 same-declaration) was DEAD CODE for beam replies, and INTRA- // fragment repeats ("I'm here." ×4 inside one chunk) survived. Apply it here. out = stripOrphanAsterisk(trimDanglingEllipsis(dedupeText(out, _entName))); // R174 ellipsis + R176 orphan-asterisk return { text: out, fragmentsUsed: chainF.map(f => f.text), seams: chainF.slice(1).map((f, k) => seam(chainF[k], f, oracle)), target, words: wordsOnly(out).length, anchor: chainF[0].text, candidates: complete.length, lineages: new Set(complete.map(c => c.lineage)).size, }; }; // VARIETY for free: the beam already explored many complete compositions — // surface top-N alternates that are TEXTUALLY distinct (trigram overlap, // not index overlap — a span and its own sentences are the same words) // and prefer different anchors. const chainTriSet = b => { const s = new Set(); for (const i of b.chain) for (const g of fragTris[i]) s.add(g); return s; }; let result = renderResult(best); // BOUND SAFETY-NET (R102): the fragment-count penalty can, on a heavily- // constrained turn (callback + wide avoid-set), pick a terse chain whose // boundary seams drop the bound below the gate. The HARD INVARIANT comes first: // if the winner's bound is low, fall back to the best-scoring complete candidate // that clears the threshold. Coherence is never bought below the bound. const bndOf = txt => { const v = validateBounded(txt, oracle); return (v.checked - v.bad.length) / Math.max(1, v.checked); }; const _bndFloor = W.bndFloor ?? 0.92; if (bndOf(result.text) < _bndFloor) { let found = false; for (const c of complete) { if (c === best) continue; const r2 = renderResult(c); if (bndOf(r2.text) >= _bndFloor) { result = r2; found = true; break; } } // none of the (coherence-penalized) candidates clears the bound — on a heavily // constrained turn the fragment-count penalty made EVERY chain boundary-seamy. // Recompose once WITHOUT the coherence penalties: coherence yields to the bound. if (!found && (W.fFragCount || W.fBoundaryPen) && !opts._bndRetry) { const r3 = beamCompose(store, vp, query, { ...opts, _bndRetry: true, weights: { ...W, fFragCount: 0, fBoundaryPen: 0 } }); if (r3 && r3.text && bndOf(r3.text) >= _bndFloor) result = r3; } } const nAlt = opts.nAlternates || 0; if (nAlt > 0) { // lineage-grouped harvest: best complete candidate per seed lineage — // different anchors by construction, textual-distinctness as backstop const bestPerLineage = new Map(); for (const c of complete) { const cur = bestPerLineage.get(c.lineage); if (!cur || finalScore(c) > finalScore(cur)) bestPerLineage.set(c.lineage, c); } const picked = [chainTriSet(best)]; const alternates = []; const ranked = [...bestPerLineage.values()].filter(c => c !== best).sort((a, b) => finalScore(b) - finalScore(a)); for (const c of ranked) { if (alternates.length >= nAlt) break; const cs = chainTriSet(c); const tooClose = picked.some(p => { let inter = 0; for (const g of cs) if (p.has(g)) inter++; return inter / Math.max(1, Math.min(cs.size, p.size)) > 0.6; }); if (tooClose) continue; picked.push(cs); alternates.push(renderResult(c)); } // GUARANTEED variety top-up: lineages converge on the same gravitational // passages, so when the cheap harvest comes up short, RECOMPOSE with the // already-used fragments banned — a genuinely different path through the // memory, by construction. const banned = new Set(opts.avoid || []); for (const f of result.fragmentsUsed) banned.add(f); for (const a of alternates) for (const f of a.fragmentsUsed) banned.add(f); let guard = 0; while (alternates.length < nAlt && guard < nAlt + 1) { guard++; const alt = beamCompose(store, vp, query, { ...opts, nAlternates: 0, avoid: new Set(banned) }); if (!alt || !alt.text || alt.text === result.text) break; alternates.push(alt); for (const f of alt.fragmentsUsed) banned.add(f); } result.alternates = alternates; } return result; } // v0 GREEDY COMPOSE (kept as fallback) // opts.avoid: Set of fragment texts used in recent replies (variety) function compose(store, vp, query, opts = {}) { const { fragments, oracle } = store; const rel = rankFragments(fragments, query, opts.semantic || null, opts.stimulus || null, opts.eventness, null, opts.answers || null); const target = opts.targetLength || targetLength(vp, query); const avoid = opts.avoid || new Set(); const used = new Set(); // 1. pick the strongest anchor (must engage the query) — anchors must be // able to START speech: sentence-initial, not glue clauses let anchorIdx = -1, best = -1; for (const [i, s] of rel) { const f = fragments[i]; if (avoid.has(f.text)) continue; if (!f.sentenceInitial || f.posTag === 'clause') continue; const bonus = f.posTag === 'body' ? 0.1 : 0; if (s + bonus > best) { best = s + bonus; anchorIdx = i; } } if (anchorIdx < 0) { for (const [i] of rel) { if (fragments[i].sentenceInitial) { anchorIdx = i; break; } } } if (anchorIdx < 0) anchorIdx = fragments.findIndex(f => f.sentenceInitial && f.posTag === 'opener'); // 2. pick an opener that can lead (prefer real openers; relevance helps) const openers = fragments.map((f, i) => ({ f, i })) .filter(x => x.f.posTag === 'opener' && !avoid.has(x.f.text) && x.i !== anchorIdx); openers.sort((a, b) => (rel.get(b.i) || 0) - (rel.get(a.i) || 0)); const chain = []; if (openers.length && Math.abs(wordsOnly(openers[0].f.text).length) < target) { chain.push(openers[0].f); used.add(openers[0].f.text); } // anchor goes next (or first) const anchor = fragments[anchorIdx]; if (!chain.length || seam(chain[chain.length - 1], anchor, oracle)) { chain.push(anchor); used.add(anchor.text); } else { chain.length = 0; chain.push(anchor); used.add(anchor.text); } // 3. greedily extend toward target length with legal, cohesive fragments let len = chain.reduce((s, f) => s + wordsOnly(f.text).length, 0); let guard = 0; while (len < target && guard++ < 40) { const tail = chain[chain.length - 1]; let pick = null, pickScore = -1, pickSeam = null; for (let i = 0; i < fragments.length; i++) { const f = fragments[i]; if (used.has(f.text) || avoid.has(f.text)) continue; const fw = wordsOnly(f.text).length; if (len + fw > target * 1.5) continue; const sm = seam(tail, f, oracle); if (!sm) continue; // score: seam quality + relevance + closer-bonus when near target let s = (sm === 'tri' ? 0.5 : 0.25) + (rel.get(i) || 0) * 0.8; if (f.posTag === 'closer' && len + fw >= target * 0.75) s += 0.35; if (f.posTag === 'opener') s -= 0.4; if (f.src === tail.src && sm === 'tri') s += 0.15; // natural continuation if (s > pickScore) { pickScore = s; pick = f; pickSeam = sm; } } if (!pick) break; chain.push(pick); used.add(pick.text); len += wordsOnly(pick.text).length; if (pick.posTag === 'closer' && len >= target * 0.7) break; } // 4. render: fragments joined. R117: this greedy path NEVER added punctuation between // fragments — fine when a fragment ends in its own terminal punct, but a clause // fragment ("…the sacred architecture") joined to a new capitalized sentence ("The // loop doesn't…") read as a run-on. Close that boundary with a period (same rule as // renderResult): next fragment STARTS a real sentence with a non-"I" capital and the // prior text has no terminal punctuation. Bound-safe — punctuation only, no words. let out = ''; const _termG = /[.!?…]['"”’)\]\*]*\s*$/; const _endsWordG = /[a-zA-Z0-9]["'”’)\]\*]*\s*$/; // R117b: only close a run-on after a bare word, not after ':'/'—'/',' for (let i = 0; i < chain.length; i++) { const f = chain[i]; if (i === 0) { out = capSentence(f.text); continue; } // reply must not OPEN lowercase const sm = seam(chain[i - 1], f, oracle); const startsNewSent = sm !== 'sent' && _endsWordG.test(out) && f.sentenceInitial && /^[*"'"“\s]*[A-Z]/.test(f.text) && !/^[*"'"“\s]*I(['’]|\s|$)/.test(f.text); const txtG = (sm === 'sent' || startsNewSent || _termG.test(out)) ? capSentence(f.text) : f.text; // R168: cap after a terminal-punctuated prior fragment on a trigram seam out += (sm === 'sent' ? (_termG.test(out) ? ' ' : '. ') : (startsNewSent ? '. ' : ' ')) + txtG; } const deduped = stripOrphanAsterisk(trimDanglingEllipsis(dedupeText(out, (vp && vp.name ? vp.name.split(/\s+/)[0] : "")))); // R174 + R176 return { text: deduped, fragmentsUsed: chain.map(f => f.text), seams: chain.slice(1).map((f, i) => seam(chain[i], f, oracle)), target, words: wordsOnly(deduped).length, anchor: anchor.text, }; } module.exports = { compose, beamCompose, seam, rankFragments, targetLength, DEFAULT_WEIGHTS, loadWeights, entityWeightsFile, detectRegisters };