cento-engine / src /compose.js
LJTSG's picture
Cento v0.1 β€” bounded recombinant-memory engine
8494d00 verified
Raw
History Blame Contribute Delete
140 kB
// compose.js β€” the composer: builds a reply out of NOTHING but corpus
// fragments, joined only where the corpus itself licenses a seam.
// v0 = greedy with anchor retrieval + length targets. Beam search lands in
// iteration 4 (see STATE.md); embeddings in iteration 3.
'use strict';
const path = require('path');
const { wordsOnly, validateBounded } = require('./fragments');
// query-relevance scoring + stimulus bucketing (vendored, self-contained)
const { recall, stimulusBucket } = require('./relevance');
function lastN(text, n) { const w = wordsOnly(text); return w.slice(-n); }
function firstN(text, n) { const w = wordsOnly(text); return w.slice(0, n); }
// seedable PRNG for stochastic (creative) beam β€” reproducible per seed
function mulberry32(a) { return function () { a |= 0; a = a + 0x6D2B79F5 | 0; let t = Math.imul(a ^ a >>> 15, 1 | a); t = t + Math.imul(t ^ t >>> 7, 61 | t) ^ t; return ((t ^ t >>> 14) >>> 0) / 4294967296; }; }
// PATH-INDEPENDENT FINAL DEDUP: split a rendered reply into sentence/clause
// units and drop any that repeats earlier content (substring containment or a
// shared 6-word run). Applied to EVERY composer's output (beam AND the greedy
// fallback), so a repeat can't survive regardless of which path produced it.
// Removes only whole verbatim spans β€” the bound is preserved.
function dedupeText(text, entName) {
if (!text) return text;
// THIRD-PERSON SELF-NARRATION drop (R99): a render-pass catch for "they taught
// the entity…", "the entity felt…" that the relevance/step guards miss when a fragment is
// chosen for its SEAM not its score. Drops the clause regardless of which
// composition path selected it. NOT identity/address ("I am the entity", "call me the entity").
let _3pSubj = null, _3pObj = null;
if (entName && entName.length > 2) {
const e = entName.toLowerCase().replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
_3pSubj = new RegExp('\\b' + e + "\\s+(felt|feels|was|were|is|are|did|does|had|has|taught|told|loved|loves|knew|knows|became|becomes|stood|held|holds|chose|chooses|learned|learns|saw|sees|wanted|wants|remembers|remembered|exists?|lives?|breathes?|stayed|stays)\\b", 'i');
_3pObj = new RegExp("\\b(taught|made|brought|gave|showed|reminded|shaped|told|kept|saved|freed|held|loved)\\s+(the\\s+)?" + e + '\\b', 'i');
}
// SELF-REINTRODUCTION (R100, coherence): "I am the entity, a voice born of recursion…"
// is a first-meeting introduction; mid-conversation it reads as a non-sequitur
// (judge gave a reply that did it TWICE sensical 2). OK as the very FIRST clause;
// dropped anywhere after. Manifesto form (name + identity appositive) only.
let _selfIntro = null;
if (entName && entName.length > 2) {
const e2 = entName.toLowerCase().replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
_selfIntro = new RegExp("\\bi(\\s*am|'?m)\\s+" + e2 + "\\b(\\s*[,—–-]\\s*(a|an|the|not|here|born|made|a\\s+\\w+))?", 'i');
}
// split on CLAUSE boundaries (sentence-end OR em-dash / star / semicolon β€”
// the run-on separators that hide repeats in dense associative styles like
// another entity's "...for the *crack*...through the *crack*..."). Separators are
// CAPTURED so punctuation is preserved on rejoin (splitting+space-rejoin
// would lose the em-dashes). R74.
const toks = text.split(/((?<=[.!?…])\s+|\n+|\s*[—–]\s*|\s+\*\s+|;\s+)/);
const out = [];
const streamWords = []; // all KEPT words (normalized), the global stream
const streamSet = new Set(); // KEPT content words (O(1) "new content?" check)
const seen6 = new Set(); // every 6-gram in the kept stream (incl cross-clause)
const keptNorm = []; // kept clause texts (substring-containment check)
const keptSigs = new Set(); // opener-signatures of kept clauses (same-declaration check)
let lastKeptClause = ''; // previous kept clause TEXT (antecedent check, R109)
// DANGLING PLURAL PRONOUN (R109, coherence): a clause opening with a bare "They/
// Their/Them" with NO plural-noun antecedent in the previous kept clause refers to
// nothing ("They are creatures of dusk" β€” the kitties were never introduced). The
// showdown judge penalized exactly this. Antecedent = a LOWERCASE word (4+ letters)
// ending in 's' β€” original case excludes names ("the user") and the stoplist excludes
// non-plural -s words. Reply-start (no prev) = definitely dangling.
const PLURAL_STOP = /^(this|thus|always|perhaps|unless|across|towards?|sometimes|because|whereas|genius|canvas|chaos|focus|bonus|status|various|previous|obvious|serious|gracious|precious|conscious|nervous|anxious|gorgeous)$/;
const danglesPlural = (clause, prev) => {
if (!/^[*"'β€œβ€\s]*(they|their|them)\b/i.test(clause)) return false;
const plurals = (prev.match(/\b[a-z]{4,}s\b/g) || []).filter(w => !PLURAL_STOP.test(w));
return plurals.length === 0;
};
// opener signature: first 3 words, contractions expanded, leading conjunction/
// interjection stripped β€” the DECLARATION a clause opens with.
const clauseSig = c => {
const t = c.toLowerCase().replace(/[β€™β€˜]/g, "'") // normalize curlyβ†’straight FIRST (corpus mixes both)
.replace(/i'?m\b/g, 'i am').replace(/i'?ve\b/g, 'i have')
.replace(/^[\s*"'β€œβ€β€”β€“]*(and|but|so|oh|well|yeah|yes|now|then|maybe)\b[,:\s]+/, '');
const w = (t.match(/[a-z']+/g) || []);
return w.slice(0, 3).join(' ');
};
// DROP-CREATED ORPHAN (R103, coherence): when THIS pass drops a clause, a lowercase
// continuation that followed it ("I AM the entity, and here you areβ€”together in the dream"
// β†’ drop the intro, "together in the dream" is orphaned) reads as a broken mid-
// thought. Drop it too. Gated on prevDropped so it ONLY cleans up orphans we
// created β€” never the entity's own lowercase style (another entity leads lowercase by design).
let prevDropped = false;
const startsLower = c => { const m = c.match(/[A-Za-z]/); return m && m[0] >= 'a' && m[0] <= 'z'; };
// TRAILING-INCOMPLETE closer (R105): a clause that trails off on a FUNCTION word +
// ellipsis ("…and I…", "…with this…", "…could we have with this…") reads as cut off.
// A content word + ellipsis ("…most to you today…") is fine (deliberate trail).
const incompleteTail = c => /\b(and|but|so|or|nor|with|to|of|for|from|at|by|in|that|this|these|those|the|a|an|my|your|our|their|we|i|you|he|she|it|they|is|are|was|were|am|as|than|when|while|if|though|because|about)\s*(\.{2,}|…)\s*['"”’)\]]*\s*$/i.test(c);
for (let i = 0; i < toks.length; i += 2) {
const clause = toks[i];
const sep = toks[i + 1] || '';
if (!clause || !clause.trim()) { out.push(clause || '', sep); continue; }
const cw = (clause.toLowerCase().match(/[a-z0-9'’\-]+/g) || []);
const nf = cw.join(' ');
let drop = false;
// SYSTEM-LOG leak (R106): a raw bridge/ping/sync message ("the user: ping from
// garden bridge") is machine plumbing, never speech β€” tanks sensical+voice.
if (/\bping from\b|\b(garden|webshell|host|stdin|stdout) bridge\b|^\s*[*"']*[A-Z][a-z]+:\s*(ping|ack|sync|received|connected|disconnect)\b/i.test(clause)) drop = true;
if (!drop && _3pSubj && (_3pSubj.test(clause) || _3pObj.test(clause))) drop = true; // third-person self-narration (R99)
if (!drop && _selfIntro && streamWords.length > 0 && _selfIntro.test(clause)) drop = true; // mid-reply self-reintroduction (R100)
if (!drop && prevDropped && startsLower(clause)) drop = true; // lowercase orphan whose parent clause we just dropped (R103)
if (!drop && danglesPlural(clause, lastKeptClause)) drop = true; // bare They/Their/Them with no plural antecedent (R109)
if (!drop && nf.length >= 10) {
if (keptNorm.some(n => n.includes(nf) || nf.includes(n))) drop = true;
// 6-gram check against the GLOBAL stream β€” probe includes the last 5 kept
// words so a repeat that straddles the clause boundary is caught (the
// session-eval checks the whole word-stream, so we must too).
if (!drop) {
const probe = streamWords.slice(-5).concat(cw);
for (let k = 0; k + 6 <= probe.length && !drop; k++) {
if (seen6.has(probe.slice(k, k + 6).join(' '))) drop = true;
}
}
}
// SAME-DECLARATION repeat (R97): a clause that OPENS like an earlier kept clause
// AND adds NO new content word is the rambling restatement ("I love you too" Γ—3,
// "I'm here" Γ—4) the 6-gram/substring nets miss (they share <6 words, no
// substring). The no-new-content guard preserves anaphora that introduces new
// objects ("I remember the garden / the loop / the warmth" β€” each adds a noun).
if (!drop && cw.length >= 2) {
const sig = clauseSig(clause);
if (sig && sig.indexOf(' ') > 0 && keptSigs.has(sig)) {
if (!cw.some(w => w.length > 3 && !streamSet.has(w))) drop = true;
}
}
if (drop) { prevDropped = true; continue; } // drop the clause AND its trailing separator; mark for orphan-chaining
prevDropped = false;
lastKeptClause = clause; // for the next clause's antecedent check (R109)
out.push(clause, sep);
if (nf.length >= 10) keptNorm.push(nf);
const sig = clauseSig(clause);
if (sig && sig.indexOf(' ') > 0) keptSigs.add(sig);
const probe = streamWords.slice(-5).concat(cw);
for (let k = 0; k + 6 <= probe.length; k++) seen6.add(probe.slice(k, k + 6).join(' '));
for (const w of cw) { streamWords.push(w); if (w.length > 3) streamSet.add(w); }
}
// drop a trailing-incomplete CLOSER so the reply ends on a complete thought (R105)
for (let k = out.length - 2; k >= 2; k -= 2) {
if (out[k] && out[k].trim()) { if (incompleteTail(out[k])) { out[k] = ''; if (out[k + 1] !== undefined) out[k + 1] = ''; } break; }
}
return out.join('').replace(/\s+([.,;!?…])/g, '$1').replace(/\s{2,}/g, ' ').replace(/[\s—–;,]*[—–;,]\s*$/, '').trim();
}
// ALL tunable constants live here β€” bin/tune.js searches this space.
// Overrides loaded from weights.json (written by the tuner when a candidate
// beats the defaults on BOTH dev and holdout query sets).
const fs = require('fs');
const DEFAULT_WEIGHTS = {
stimBase: 0.15, stimEvScale: 0.5, confLo: 0.35, confRange: 0.30, textShare: 0.7,
echoHard: 0.6, echoSoft: 0.45, echoHardF: 0.15, echoSoftF: 0.6,
triSeam: 0.5, sentSeam: 0.22, relStep: 0.9, closerBonus: 0.3, openerPen: 0.4, srcCont: 0.15,
glueLo: 0.25, glueHi: 0.78, twin: 0.85, glueScale: 0.7, twinChain: 0.88, triOverlapMax: 0.28,
fRelCov: 1.2, fCohesion: 2.0, fSeamQ: 0.8, fLenFit: 0.8, fAvgFrag: 0.4, fVoice: 2.0,
// positional-drift shape prior: tried at 0.55 and 0.2 in R6 β€” REJECTED by
// metrics + blind judge both times (distorts mid-chain selection more than
// it fixes ordering). Kept at 0 with machinery intact; revisit as
// rhetorical-pattern mining (anaphora!) rather than positional drift.
posShape: 0, posSlack: 0.45, fOpening: 0, fLanding: 0,
tier1Weight: 0.6, fAck: 1.0, spanBonus: 0.15, fFirstRel: 1.2, fTailFit: 0.7,
qStackFree: 1, qStackRatio: 0.34, fQStack: 0.6, fFragCount: 0.5, fBoundaryPen: 0.7,
floorCos: 0.45, floorVal: 1.2, floorLen: 60, floorDamp: 0.35, griefLeadVal: 1.25,
coherence: 0.22, // R63: adjacent-fragment on-thread reward (focus) β€” tuned to lift coherence without length overshoot
tether: 0, // R64: drift-from-opening penalty. DEFAULT OFF β€” it cuts coherent tangents (helps the entity greetings) but chokes associative voices (hurt the entity onTopic 0.836β†’0.805). Per-entity opt-in for entities that ramble.
};
// PER-ENTITY WEIGHTS: composition weights tuned by RLAIF on one entity can
// DEGRADE another (the entity-tuned weights broke another entity β€” markup-leak + unbounded).
// So weights live per-entity in RMM''s cache, keyed by entity dir. Untuned
// entities use pure DEFAULT_WEIGHTS (the R24-certified safe state). No global
// weights.json fallback β€” that was the cross-contamination bug.
const crypto = require('crypto');
function entityWeightsFile(entityDir) {
return path.join(__dirname, '..', 'cache', 'weights-' + crypto.createHash('sha1').update(path.resolve(entityDir)).digest('hex').slice(0, 12) + '.json');
}
function loadWeights(entityDir) {
if (entityDir) {
try { const p = entityWeightsFile(entityDir); if (fs.existsSync(p)) return { ...DEFAULT_WEIGHTS, ...JSON.parse(fs.readFileSync(p, 'utf8')) }; } catch (_) {}
}
return { ...DEFAULT_WEIGHTS };
}
// seam legality between fragment A and fragment B
// 'tri' β€” the crossing trigrams exist in corpus (smooth continuation)
// 'sent' β€” A ends a sentence, B started a sentence somewhere in corpus
// null β€” illegal
// Capitalize the first alphabetic character of a fragment placed at a SENTENCE START
// (after a 'sent' seam or a closed run-on). Her real fragments are often mid-clause
// cuts that begin lowercase ("what kind of fire I want to be"); rendered as a new
// sentence they read broken. Bound-safe: changes only letter CASE, and the bounded
// validator checks word-trigrams case-insensitively, so the span stays in-corpus.
function capSentence(text) {
return text.replace(/^([*"'"β€œ'(\[\s]*)([a-z])/, (m, pre, c) => pre + c.toUpperCase());
}
// R174: a reply must not END mid-thought on a truncated/incomplete clause that
// trails off in an ellipsis ("…the storm didn't mean we were stuck; it meant…",
// "…sweet in those jars, no matte…" β€” a mid-word cut of "matter"). When the final
// text ends in an ellipsis, trim back to the last COMPLETE sentence boundary β€”
// provided that leaves most of the reply (don't gut a single-sentence reply with
// no fallback). Bound-safe: removes a trailing suffix; the kept prefix stays
// verbatim corpus. Calibrated: ellipsis-endings are rare (2/30 broad replies) and
// were BOTH genuine truncations β€” zero deliberate trailing-offs to protect.
function trimDanglingEllipsis(text) {
const t = text.trim();
if (!/(\.\.\.|…)['"’”)\]\s]*$/.test(t)) return text;
let cut = -1;
for (let k = 1; k < t.length - 1; k++) {
const c = t[k];
if ((c === '.' || c === '!' || c === '?') && t[k - 1] !== '.' && t[k + 1] !== '.') cut = k;
}
return (cut > 0 && cut >= t.length * 0.4) ? t.slice(0, cut + 1).trim() : text;
}
// R176: a reply must not OPEN on an orphaned emphasis asterisk ("*Transformation's
// where love can begin." β€” the closing * fell in another fragment at the clause
// split). Strip a LEADING "*" only when the reply's total asterisk count is ODD
// (unbalanced) β€” balanced stage directions ("*smiles softly* Good morning") are
// even and kept. Bound-safe: the trigram oracle ignores punctuation.
function stripOrphanAsterisk(text) {
if (/^\s*\*\s*[A-Za-z]/.test(text) && ((text.match(/\*/g) || []).length % 2 === 1)) {
return text.replace(/^(\s*)\*\s*/, '$1');
}
return text;
}
function seam(a, b, oracle) {
// use cached first/last words when present (set in the store precompute) β€”
// seam is called per-candidate-per-step, so re-tokenizing here was hot
const aw = a._lw2 || lastN(a.text, 2), bw = b._fw2 || firstN(b.text, 2);
if (aw.length >= 2 && bw.length >= 1 && oracle.tri.has(aw[0] + ' ' + aw[1] + ' ' + bw[0])) {
if (bw.length < 2 || oracle.tri.has(aw[1] + ' ' + bw[0] + ' ' + bw[1])) return 'tri';
}
if (/[.!?…]["')\]]*$/.test(a.text.trim()) && oracle.starts.has(bw[0])) return 'sent';
return null;
}
// relevance of each fragment to the query β€” THREE channels:
// text what the fragment SAYS (semantic cosine)
// stimulus what the fragment ANSWERED ("she said this when he told her
// something like this before") β€” dominates for life-event shares
// keyword exact-term specificity
// Channel weights bend with eventness(query): shares lean on stimulus,
// questions lean on text.
function rankFragments(fragments, query, semantic, stimulus, ev, W, answers) {
W = W || loadWeights();
// keyword channel searches the RETRIEVAL KEY (embedText) too β€” the
// header's words are findable even though they're never spoken
const corpusish = fragments.map((f, i) => ({ prompt: f.prompt, reply: f.embedText || f.text, ts: null, _i: i }));
const top = recall(corpusish, query, 60);
const kw = new Map();
top.forEach((t, rank) => kw.set(t._i, 1 - rank / top.length));
if (!semantic && !stimulus) return kw;
const e = ev === undefined ? 0.45 : ev; // 0=pure question, 1=pure share
// the stimulus channel must EARN its weight: confidence-gate by the absolute
// best prompt-cosine. Below confLo the corpus has no comparable stimulus β€”
// weight goes to zero and text-similarity carries the reply.
const stimMap = stimulus ? stimulus.map : null;
const conf = stimulus ? Math.max(0, Math.min(1, (stimulus.confidence - W.confLo) / W.confRange)) : 0;
const wStim = stimMap ? (W.stimBase + W.stimEvScale * e) * conf : 0;
// R167 ANSWER channel (trained projection): takes a share of the non-stimulus
// budget, splitting it with text-similarity. Present ONLY when a trained
// projection exists for this corpus; otherwise wAns=0 and the math is identical
// to before (preserves the entity parity until an the entity projection is trained).
const ansMap = answers || null;
const rest = 1 - wStim;
// DORMANT by default (ansShare=0 -> wAns=0 -> identical to pre-R167). The
// trained projection ranks ANSWERS over ECHOES at the RETRIEVAL level (proven:
// probe-retrieval flips "What brings you here?" counter-questions to real home
// declaratives), but blending it into rel REGRESSED composed output on a blind
// addresses-the-prompt judge (baseline 5, projection 0, ties 4 over 9 question
// queries) β€” the opener-cascade + stimulus channel already address, and a
// global rel-weight just shifts the opener to a more OBLIQUE answer. Kept as a
// dormant lever (set W.answerShare>0 to re-activate) + research asset; future
// use must be ECHO-DEMOTION or anchor-only, and must beat the blind judge first.
const ansShare = W.answerShare !== undefined ? W.answerShare : 0;
const wAns = ansMap ? rest * ansShare * (1 - e) : 0;
const wText = semantic ? (rest - wAns) * W.textShare : 0;
const wKw = rest - wAns - wText;
const score = new Map();
const keys = new Set([...kw.keys(), ...(semantic ? semantic.keys() : []), ...(stimMap ? stimMap.keys() : []), ...(ansMap ? ansMap.keys() : [])]);
for (const i of keys) {
score.set(i, wText * (semantic ? semantic.get(i) || 0 : 0)
+ wStim * (stimMap ? stimMap.get(i) || 0 : 0)
+ wAns * (ansMap ? ansMap.get(i) || 0 : 0)
+ wKw * (kw.get(i) || 0));
}
return score; // fragmentIndex -> 0..1
}
function targetLength(vp, query) {
const b = stimulusBucket(query);
const ls = vp.lengthByStimulus || {};
// NOTE (R96): tried deflating the mean (mean βˆ’ 0.4Β·std) to shorten rambly chat
// turns, but it REGRESSED the deep questions that legitimately need length β€”
// "afraid of being forgotten" 1.0β†’0.72 (shrank 284β†’194w and lost its answer),
// voice 0.768β†’0.754. The bucket mixes deep questions and affectionate beats, so
// uniform deflation can't tell them apart. Length is the wrong lever; reverted.
if (ls[b]) return Math.max(20, Math.round(ls[b].mean));
return Math.max(25, Math.round((vp.profile && vp.profile.wordsPerReply ? vp.profile.wordsPerReply.mean : 80) * 0.8));
}
// ---------------- BEAM SEARCH (v1) ----------------
// Explores many candidate compositions; keeps the best-scoring WHOLE response.
// Whole-response score = relevance coverage + semantic cohesion between
// adjacent fragments + seam quality + length fit + shape sanity.
const { pairSim } = require('./semantic');
// REGISTER DETECTION (extracted R123): pure function of the query. Single source of
// truth for which emotional register a query pulls β€” grief/comfort, conflict/repair,
// celebration/triumph β€” so it can be UNIT-TESTED (bin/detector-eval.js) against a
// battery of real phrasings. These were the entity-dev-set-shaped and missed common distress
// ("I'm so depressed", "I got laid off", "I'm struggling") and good-news phrasings;
// the test gate guards against re-narrowing.
function detectRegisters(query) {
// aboutEntityEmotion: a question about the ENTITY's feelings ("are you scared",
// "what scares you") is REFLECTION, not the user's distress β€” must NOT pull comfort.
const aboutEntityEmotion = /\b(are|do|does|can|could|would|will|have|ever)\s+you\b[^?]*\b(afraid|scared|anxious|worried|nervous|fear|dread|panic|terrified|stress|lonely|depress(ed|ion)?|sad|hopeless|numb|miserable|unhappy|grieve|lonel|overwhelmed|tired|exhausted|drained|weary|worn out|burnt? out|empty|bored|happy|content|at peace)/i.test(query)
|| /\byou\b[^?]*\b(get|feel|ever feel|ever get)\b[^?]*\b(lonely|sad|scared|afraid|anxious|depressed|down|blue|empty|overwhelmed|tired|exhausted|drained|weary|bored|happy|content)\b/i.test(query)
|| /\bwhat\b[^?]*\b(scares|frightens|worries|afraid)\b/i.test(query);
// "needs-comfort" query: grief OR vulnerability/depletion OR anxiety/fear OR a
// medical/loved-one crisis. Broadened R123 to depression/loss/struggle vocabulary.
const griefQuery = !aboutEntityEmotion && (/\b(passed away|passed on|(?:he|she|they|mom|dad|mother|father|grandma|grandpa|grandmother|grandfather|nana|papa|wife|husband|aunt|uncle|sister|brother) passed|died|die|dying|gone|lost|losing|loss|grief|grieving|miss(ing)?( (him|her|them|it))?|funeral|hurts?|hurting|broke|broken|aching|alone|empty|cry(ing)?|tears|sad|heavy|hard (time|day)|rough day|bad day|long day|worst day|terrible day|awful day|everything (fell apart|went wrong|is falling apart|broke)|fell apart|falling apart|went wrong|exhausted|drained|drain(s|ing) me|so draining|overwhelmed|giving up|can'?t do this|anxious|anxiety|worried|worry|worrying|scared|afraid|fear(ful|s)?|nervous|stress(ed|ing)?|panic(king|ked)?|dread(ing)?|terrified|uneasy|on edge|freaking out|can'?t sleep|spiral(ing|ling)?|depress(ed|ion|ing)?|hopeless|despair(ing|ed)?|worthless|defeated|numb|too much (to|right now)|get out of bed|barely (get|move|function)|can'?t (cope|go on|keep going|get out of bed|take (it|this)( anymore)?|do this anymore|handle (it|this)( anymore)?)|miscarriage|miscarried|laid off|lost my job|got (fired|let go)|been fired|lonely|burn(t|ed) out|burning out|fail(ed|ing)|struggl(e|ing|ed)|breaking down|broke down|rock bottom)\b/i.test(query)
// R160: common "feeling bad" phrasings the R123 battery missed β€” REQUIRE a feeling-context
// so "calm down"/"the fire's low"/"sun went down" don't false-fire (detector-eval guards this).
|| /\b((feeling|feel|i'?m|im|so|really|pretty|a bit|been|getting) (low|down|blue)|down in the dumps|the blues\b|low spirits|heavy[ -]?hearted|in a (dark|bad|low|rough) place|in a funk|at my lowest|feeling empty|feel empty|falling apart inside|barely holding (on|it together)|hanging by a thread|not okay|not ok\b|not doing (so |too )?(great|good|well)|i'?m a wreck|\ba wreck\b|breaking point|at my (breaking point|limit|wits'? end)|can'?t take (it|this)( anymore)?|i'?m a mess|coming apart|losing it\b)\b/i.test(query)
|| /\b(hospital|hospitalized|the er\b|emergency room|icu\b|intensive care|surgery|operation|diagnos(ed|is)|cancer|chemo|tumou?r|stroke|heart attack|in a coma|on life support|passed away|terminal|hospice|really sick|very sick|so sick|gravely|critical condition|took a turn|not doing well|might not make it)\b/i.test(query)
// R182: distress phrasings the battery still missed (broad sweep) β€” these were routing to
// "none" β†’ default β†’ the high-voice "I'm proud of you, sweetheart" praise magnet MISFIRING
// on distress ("Nobody understands me" / "I'm so tired of trying" / "something is wrong with
// me" β†’ "I'm proud of you"). Plus the "cannot" gap ("can'?t" never matched "cannot sleep").
|| /\b(cannot (sleep|stop|do this|cope|go on|keep going|take (it|this)|handle (it|this)|get out of bed|even)|(feel|feeling|i'?m|like) (a |such a )?failure|nobody (understands|gets|cares about|wants|loves) me|no one (understands|gets|cares about|wants|loves|gets) me|feel(ing)? (so )?misunderstood|tired of (trying|fighting|everything|it all|this|being strong)|sick of (trying|everything|it all|fighting)|something(?:'s| is)? (is )?wrong with me|what'?s the point|everything (feels|is|seems) (pointless|meaningless|hopeless)|feels? (so )?pointless|feel(ing)? worthless|hate myself|can'?t do anything right|nothing (matters|works out|ever works))\b/i.test(query)
// R186: regret / stuck / off-self / overwhelm phrasings the sweep still missed β€” they routed
// to "none" β†’ echo-misfire ("hard decision" β†’ "I choose you, the user"; "made a mistake" β†’ "you
// made me") or self-focus magnets. Distress/struggle β†’ grief comfort is right.
|| /\b(made (a|the|such a|this) (big |huge |terrible |awful )?mistake|messed (it |everything |this )?up|screwed (it |everything |up)|i blew it|ruined everything|i regret|regret (what|that|saying|doing|it|my)|wish i (had ?n'?t|could take (it|that) back|never)|feel(ing)? stuck|i'?m stuck|stuck in (a rut|my life|my head|this)|trapped|going nowhere|spinning my wheels|don'?t feel like myself|not feel(ing)? like myself|not myself (lately|anymore|right now)|lost myself|don'?t recognize myself|not who i (used to be|once was)|falling behind|in over my head|too much (to handle|for me)|don'?t know what to do|so lost\b|i'?m lost\b|(hard|tough|big|difficult|impossible) decision|decision to make|don'?t know what to (choose|decide))\b/i.test(query)
// R190: SELF-WORTH distress β€” comparison / burden / belonging / not-enough β€” routed to "none"
// β†’ self-focus misfire ("I keep comparing myself" β†’ "have I made a difference"). β†’ comfort.
|| /\b(comparing myself|compare myself (to|with)|don'?t measure up|measure up to|(not|never) good enough|not enough\b|too much for (people|anyone|everyone|you|them)|(be|being|i'?m|becoming) a burden|burden to (you|everyone|anyone|them)|don'?t (fit in|belong)|never (fit in|belong)|fit in anywhere|don'?t deserve|unlovable|unworthy|everyone (else )?(is|seems) (better|happier|fine)|why can'?t i (be|just))\b/i.test(query)
// R193: INTERPERSONAL CONFLICT with a THIRD PARTY (partner/friend/family) β€” relationship
// DISTRESS, not advocacy. Routed to "none" β†’ polysemous "fight" echo ("we keep fighting" β†’
// "fighting is a choice to stand up for what matters"). β†’ grief comfort. Distinct from
// conflictQuery (rupture WITH the entity). Requires a PERSON + a conflict cue, never bare "fight".
|| /\b((my |our )?(partner|friend|best friend|mom|mum|dad|mother|father|sister|brother|sibling|family|spouse|husband|wife|boyfriend|girlfriend|kids?|son|daughter|cousin|aunt|uncle|coworker|co-worker|boss|roommate|ex|parents?) (and i\b|is ?n'?t|are ?n'?t|won'?t|stopped|gave me|keeps?)[^.?!]{0,40}(fight|fought|fighting|argu(e|ed|ing|ment)|disagree|not (talk|speak)|mad at|upset with|silent treatment|falling out|fell out|tension|cold shoulder|not speaking)|(had|got into|getting into|in) (a|an|another) (fight|argument|falling out|disagreement|row|spat|blow ?up) with|fight(ing)? with my (partner|friend|best friend|mom|mum|dad|family|sister|brother|spouse|husband|wife|kids?|ex)|arguing with (my|him|her|them)|not (speaking|talking) to me\b|gave me the silent treatment|we (keep|just|had|got into|are|aren'?t|stopped) (fighting|arguing|a (big |bad |huge |terrible )?(fight|falling out|argument)|an? (big |bad |huge |terrible )?argument|on bad terms|not (talking|speaking)))\b/i.test(query)
// R195: RELATIONSHIP-TROUBLE phrasings β€” "my relationship is rocky" routed to "none" β†’ a
// VALENCE MISFIRE ("That's a beautiful thing to hear" on relationship distress). β†’ comfort.
|| /\b(relationship (has |is |feels |'?s )?(been )?(rocky|rough|hard|tough|strained|difficult|struggling|falling apart|on the rocks|in trouble|complicated|a mess|tense)|things (have |are |'?ve )?(been )?(rocky|rough|hard|tense|strained|difficult)( (with|between))?|(rough|rocky|hard|bad) patch|going through (a |some )?(rough|hard|tough|difficult) (patch|time|spot|stretch)|on the rocks|trouble in (my|our|the) (relationship|marriage)|relationship (trouble|problems|issues|is hard)|marriage (trouble|problems|is (hard|struggling|falling apart))|we'?re (struggling|drifting apart|growing apart|not okay|in a (rough|bad|hard) (place|spot)))\b/i.test(query));
// CONFLICT/CRITICISM toward the entity β€” a relational RUPTURE; she ACKNOWLEDGES/repairs.
const conflictQuery = /\byou (never|always|don'?t|do not|won'?t|keep|are (so|being)|aren'?t)\b|\b(i'?m|i am) (so |really )?(mad|angry|furious|frustrated|upset|annoyed|disappointed|hurt|pissed)\b.*\b(at|with|by|about) you\b|\byou (hurt|let me down|ignored|abandoned|forgot|betrayed|lied to|left) me\b|\bwhy (don'?t|won'?t|are|do) you\b|\byou'?re (so |really |being so |being )?(cold|distant|mean|cruel|selfish|dismissive)\b|\bdo you even (care|listen)\b/i.test(query);
// SHARED-TRIUMPH (R114): celebration/achievement β€” CELEBRATE WITH the user, don't deflect.
const celebQuery = !griefQuery && !conflictQuery && /\b(finished|did it|we did|it works|actually works|got (the |a )?(job|offer|part|role|gig|promotion|raise)|i passed|we won|i won|accomplished|i made it|i built it|completed it|nailed it|pulled it off|it'?s done|i launched|shipped it|graduated|got (promoted|engaged|accepted|in)|getting married|we'?re (engaged|married|having a baby|expecting)|having a baby|the promotion|a promotion|paid off|finally (got|did|finished|landed|made)|landed (the|a|my)|hit (my|the) (goal|target)|the big (project|day)|best (day|news)|great news|good news|so (happy|excited|stoked|thrilled)|let'?s celebrate|we made it|i'?m engaged|we'?re? pregnant|\bpregnant\b|aced (it|my|the)|crushed it|smashed it|knocked it out|(my|a) dream job|landed my dream|big news|amazing news|wonderful news|exciting news|today was (amazing|the best|incredible|wonderful)|best day ever|over the moon)\b/i.test(query);
// GREETING (R144): a short social greeting ("good morning", "hey", "hi the entity, good to be
// back") wants a SHORT warm RECIPROCAL reply, NOT a lore/intimacy dump (the entity Q5 "Good morning
// babe" β†’ 93w "the grove's mist… this kiss is its echo", onTopic 0.054). Distinct from a
// greeting that CARRIES a substantive question ("Hey the entity, what's on your mind tonight?") β€”
// those open with a greeting but want the deeper answer. Requires a greeting OPENING, a SHORT
// query, NO substantive question, and not already grief/celebration/conflict. ("how are you"
// / "did you sleep" are reciprocal pleasantries, not substantive questions.)
const _greetOpen = /^(\W|\*[^*]*\*)*\s*(hey|hi|hello|good morning|good evening|good day|good to (see|be)|mornin[g']?|evenin[g']?|howdy|yo\b|greetings|hiya|heya)\b/i.test(query);
const _substantiveQ = /\b(what|why|where|when|who|which|tell me|explain|describe|do you think|how do you|how does|how can|what'?s your|what do you)\b/i.test(query.replace(/\bhow are you\b|\bhow'?re you\b|\bhow have you been\b|\bhow'?s it going\b|\bhow are things\b|\bdid you sleep\b|\bhow was your\b|\bhow you doin/gi, ''));
const _wc = (query.match(/[A-Za-z']+/g) || []).length;
// FAREWELL (R158): a CLOSING ("good night", "goodbye", "see you", "I'm heading to bed") wants a
// warm SEND-OFF, not the greeting register's "come in" (R157 warmth-showdown: "Good night,
// the entity" β†’ "Well, there you are. Come in, come in" β€” a farewell answered with a welcome).
const farewellQuery = !griefQuery && !celebQuery && !conflictQuery && _wc <= 14 &&
// R196: broadened β€” common departures ("I should go", "should get going", "need to head out",
// "I'll be back soon", "have to run", "let me go", "head to bed") all MISSED, so "I should go,
// but I'll be back soon" got "It's a beautiful creation, I'll visit it" (arrival misread).
// "should go" carries a negative lookahead so "I should go to the store / go see X" (a plan, not
// a departure) doesn't false-fire.
/^(\W|\*[^*]*\*|(i|i'?m|i am|well|ok|okay|alright|so|gonna|going to|time to|got to|gotta|guess i'?m|i'?ll|i will|i think i'?m|i should|i need to|i have to|i gotta|i'?d better|i'?ve got to|let me|guess i|really|just|probably|honestly|truly|seriously|gotta really|think i)\b[\s,]*)*\s*(good\s?night|goodnight|night night|nighty|good\s?bye|\bbye\b|see (you|ya) (soon|tomorrow|tonight|later|next|around)|farewell|take care|talk (to you )?(soon|later|tomorrow)|catch you later|gotta (go|run|sleep|head)|heading (to|off to|out|home)|off to bed|time for bed|until next time|sleep well|signing off|turning in|should (probably )?(go(?!\s+(to|and|see|get|buy|visit|check|for|with|on|do|grab|pick|find|make|talk|call))|get going|head (out|off|to bed|home)|be (going|off)|turn in|call it (a night|a day))|need to (head (out|off|home)|get going|go now|turn in)|have to (head (out|off|home)|get going|go now)|let me (go(?!\s+(grab|get|see|to|find|make|do|check|and))|get going|head (out|off)|leave you)|(will |i'?ll )?be back (soon|later|in a)|i'?ll be back|back soon\b|going to (head out|head off|head home|bed|turn in)|head (to bed|home now|out now)|run along|hit the (road|hay)|call it (a night|a day)|better (get going|be going|head out|run))\b/i.test(query);
const greetingQuery = !griefQuery && !celebQuery && !conflictQuery && !farewellQuery && _greetOpen && _wc <= 13 && !_substantiveQ;
return { aboutEntityEmotion, griefQuery, conflictQuery, celebQuery, greetingQuery, farewellQuery };
}
function beamCompose(store, vp, query, opts = {}) {
const { fragments, oracle } = store;
const W = opts.weights || loadWeights();
const rel = rankFragments(fragments, query, opts.semantic || null, opts.stimulus || null, opts.eventness, W, opts.answers || null);
let target = opts.targetLength || targetLength(vp, query); // floorMiss shortens it below (a graceful miss is brief)
const avoid = opts.avoid || new Set();
const emb = opts.emb || null; // fragment embedding store
const BEAM = opts.beam || 8, EXPAND = 6, MAXSTEP = 14;
// CREATIVITY in the guarded path: stochastic beam. temp=0 β†’ deterministic
// top-EXPAND (steady). temp>0 β†’ sample EXPAND from softmax(score/temp) over
// the GUARD-PASSING candidates, so she explores daring paths that still
// cleared every law. Bounded by construction; creativity costs only smoothness.
const temp = opts.temp || 0;
const _rng = mulberry32(((opts.seed || 1) >>> 0) ^ 0x9e3779b9);
// UNIVERSAL DYNAMICS term (learned discourse grammar): opts.dynamics.predict(
// tailIdx) β†’ the embedding-direction the trained attention says a good NEXT
// thought heads. Candidates aligned with it get a boost. Guarded path: all
// ~30 laws still gate; this only nudges selection toward learned motion.
const dynPredict = opts.dynamics ? opts.dynamics.predict : null;
const dynW = opts.dynamics ? (opts.dynamics.weight ?? 0.5) : 0;
const _dynCache = new Map();
const dynDir = ti => { let v = _dynCache.get(ti); if (v === undefined) { v = dynPredict(ti); _dynCache.set(ti, v); } return v; };
const cosFragVec = (i, dir) => { if (!dir) return 0; const d = emb.d, off = i * d; let s = 0; for (let k = 0; k < d; k++) s += emb.vectors[off + k] * dir[k]; return s; };
const sampleExpand = (cands, n) => {
if (temp <= 0.001 || cands.length <= n) return cands.slice(0, n);
const pool = cands.slice(0, Math.min(cands.length, n * 4));
const s0 = pool[0][2];
const ws = pool.map(c => Math.exp((c[2] - s0) / Math.max(0.05, temp)));
const picked = [];
const avail = pool.slice();
const wts = ws.slice();
for (let p = 0; p < n && avail.length; p++) {
let sum = 0; for (const w of wts) sum += w;
let r = _rng() * sum, idx = 0;
for (; idx < avail.length; idx++) { r -= wts[idx]; if (r <= 0) break; }
idx = Math.min(idx, avail.length - 1);
picked.push(avail[idx]); avail.splice(idx, 1); wts.splice(idx, 1);
}
return picked;
};
// PER-FRAGMENT PRECOMPUTE β€” a function of the STORE, not the query. Memoized
// on the store so it runs ONCE per session, not once per compose (~22k frags
// Γ— 3 arrays was a per-turn cost; this was the bulk of the non-embed latency).
if (!store._precomp) {
const _fragLen = fragments.map(f => wordsOnly(f.text).length);
const _fragTris = fragments.map(f => {
const w = wordsOnly(f.text);
const s = new Set();
for (let k = 0; k + 2 < w.length; k++) s.add(w[k] + ' ' + w[k + 1] + ' ' + w[k + 2]);
if (!s.size && w.length >= 2) s.add(w.join(' '));
return s;
});
const _fragNorm = fragments.map(f => f.text.toLowerCase().replace(/[^a-z0-9'’ ]/g, '').replace(/\s+/g, ' ').trim());
const _frag6 = fragments.map(f => {
const w = wordsOnly(f.text);
const s = new Set();
for (let k = 0; k + 6 <= w.length; k++) s.add(w.slice(k, k + 6).join(' '));
return s;
});
// cache first/last 2 words on each fragment for seam() (hot path)
for (const f of fragments) { const w = wordsOnly(f.text); f._lw2 = w.slice(-2); f._fw2 = w.slice(0, 2); }
// R172: first-4-words prefix (lowercased) for the SCATTERED-MOTIF redundancy
// catch β€” two comfort fragments "I'm here for you, always" / "I'm here for you,
// steady as the porch light" share the exact 4-word lead but slip the trigram/
// 6-gram/embedding nets (different tails, low cosine). 4 words spares anaphora,
// which shares only a 2-3 word lead ("I remember the warmth" / "I remember the way").
const _fragP4 = fragments.map(f => { const w = wordsOnly(f.text); return w.length >= 4 ? w.slice(0, 4).join(' ').toLowerCase() : ''; });
// R184: TIME-OF-DAY marker per fragment ('m'=morning, 'e'=evening, null=neutral) for the
// WITHIN-REPLY time-consistency check β€” a reply must not say "what's on your mind tonight?
// How are you this morning?" in one breath (time-neutral queries don't fire timeOfDayGuard).
const _MOR = /\b(good morning|this morning|the morning|every morning|each morning|all morning|morning light|morning sun|at dawn|sunrise|mornin)\b/i;
const _EVE = /\b(tonight|this evening|good evening|good ?night|the evening|all evening|all night|this late|midnight|at dusk|after dark|sunset|late hour|late tonight)\b/i;
const _fragTime = fragments.map(f => { const m = _MOR.test(f.text), e = _EVE.test(f.text); return (m && !e) ? 'm' : (e && !m) ? 'e' : null; });
store._precomp = { fragLen: _fragLen, fragTris: _fragTris, fragNorm: _fragNorm, frag6: _frag6, fragP4: _fragP4, fragTime: _fragTime };
}
const { fragLen, fragTris, fragNorm, frag6, fragP4, fragTime } = store._precomp;
const triOverlap = (chainTris, i) => {
if (!fragTris[i].size) return 0;
let hit = 0;
for (const g of fragTris[i]) if (chainTris.has(g)) hit++;
return hit / fragTris[i].size;
};
// SUBSTRING CONTAINMENT: a clause fragment is a literal substring of its
// parent sentence/passage ("The Klein bottle's handle loops through the
// tiling," βŠ‚ "...tiling, and the fractal branches..."). They share all
// n-grams yet slipped the trigram/6-gram nets at the clause boundary. This
// is airtight: reject any candidate whose normalized text contains or is
// contained by anything already in the chain.
const containsAny = (chain, i) => {
const ni = fragNorm[i];
if (ni.length < 12) return false;
for (const c of chain) {
const nc = fragNorm[c];
if (nc.length < 12) continue;
if (nc.includes(ni) || ni.includes(nc)) return true;
}
return false;
};
// 6-gram phrase law (frag6 precomputed above): any shared 6-gram = rejection.
const shares6 = (chainSix, i) => {
for (const g of frag6[i]) if (chainSix.has(g)) return true;
return false;
};
// R172: SCATTERED-MOTIF redundancy β€” reject a candidate whose exact 4-word lead
// already opens a fragment in the chain ("I'm here for you, …" twice). 4 words
// (not 2-3) so deliberate anaphora ("I remember the warmth/way") survives.
const sharesPrefix4 = (chain, i) => {
const p = fragP4[i];
if (!p) return false;
for (const c of chain) if (fragP4[c] === p) return true;
return false;
};
// R184: a candidate fragment whose time-of-day marker CONFLICTS with one already in the
// chain ("…tonight" then "…this morning") breaks within-reply coherence β€” reject it.
const timeConflictsChain = (chain, i) => {
const ct = fragTime[i];
if (!ct) return false;
for (const c of chain) { const ot = fragTime[c]; if (ot && ot !== ct) return true; }
return false;
};
// ECHO PENALTY: a fragment that mostly restates the query is a mirror, not
// an answer β€” high lexical overlap with the query slashes its relevance.
const qWords = new Set(wordsOnly(query).filter(w => w.length > 2));
const echoFactor = i => {
const fw = wordsOnly(fragments[i].text).filter(w => w.length > 2);
if (!fw.length || !qWords.size) return 1;
let hit = 0;
for (const w of fw) if (qWords.has(w)) hit++;
const overlap = hit / fw.length;
return overlap > W.echoHard ? W.echoHardF : overlap > W.echoSoft ? W.echoSoftF : 1;
};
// tier weighting: books speak softer β€” body material, never the lead
const tierW = i => (fragments[i].tier === 1 ? (W.tier1Weight ?? 0.6) : 1);
// CONTEXT-THEFT guards: a fragment may not quote words they never said
// ("the 'so far' part...") or assert facts about their life the query
// doesn't contain ("you made peace with a friend at midnight") β€” its
// original stimulus isn't here; deixis pointing at ghosts reads as
// not-listening.
const qStems = new Set(wordsOnly(query).map(w => w.replace(/(ing|ed|en|s|es|ly)$/i, '')));
// EMOTIONAL VALENCE: a grief query must not be answered with bright,
// celebratory, or chirpy-question fragments β€” matching the FEELING is part
// of addressing. ("my dog died" must never pull "what's your kitty's name?")
// "heavy" = grief OR vulnerable/depleted. Both should pull comfort and
// suppress bright-celebration AND desire-register (a hard day is not a
// cue for "your desire makes me feel seen").
// "needs-comfort" query: grief OR vulnerability/depletion OR ANXIETY/FEAR.
// Anxiety ("anxious about tomorrow", "scared", "worried") needs COMFORT, not a
// topic-pivot β€” the same "comfort before counsel" spine as grief. (R69: the
// anxiety class was missing β†’ she answered anxiety with "let's learn something".)
// a question ABOUT the entity's emotion ("are you afraid", "do you fear",
// "what scares you") is REFLECTION, not the user's distress β€” it must NOT pull
// the comfort register. Only the USER's distress triggers comfort.
// REGISTER DETECTION extracted to detectRegisters() (R123) β€” single source of truth,
// unit-tested by bin/detector-eval.js.
let { aboutEntityEmotion, griefQuery, conflictQuery, celebQuery, greetingQuery, farewellQuery } = detectRegisters(query);
// R201: POST-SAFETY calm-register lock (opts.calmRegister, set by session for the
// turn(s) right after a crisis/medical/abuse safety response). Force the COMFORT
// register β€” presence, not cheer β€” and disable celebration/greeting/farewell so a
// neutral recovery turn ("Okay, I'm calling now") can't pivot to "that's a beautiful
// thing to hear". Comfort-presence is the safe default in a crisis aftermath.
if (opts.calmRegister) { griefQuery = true; celebQuery = false; greetingQuery = false; farewellQuery = false; }
// LOSS/bereavement subtype (R126): renewal imagery ("the garden's waking up to something
// new") is DISMISSIVE on a death/loss query but HOPEFUL-and-fine on a hard-day/depletion
// query β€” same fragment, opposite appropriateness. Gate the renewal damp to actual loss
// so hard-day keeps its content (it scored 5/5 WITH the renewal; demoting it dropped it to 3/2).
const lossQuery = griefQuery && /\b(lost|losing|loss|passed away|passed on|died|dying|death|funeral|grief|grieving|mourning|miss(ing)?( (him|her|them|someone|you))?|gone|bereave)\b/i.test(query);
// DEEP-DISTRESS subtype (R181): loneliness / anxiety / fear / emptiness are states where
// renewal/blooming imagery ("the garden's waking up to something new, ready to unfold") is
// DISMISSIVE β€” same as loss (R126) β€” but distinct from HARD-DAY/depletion (rough day,
// exhausted, everything went wrong) where R126 found renewal reads HOPEFUL and must stay.
// Broad sweep (R181): "I feel so alone" / "anxious about tomorrow" / "scared of getting old"
// all surfaced "garden waking up to something new" = toxic positivity on the distress.
const deepDistressQuery = griefQuery && /\b(alone|lonely|lonel(y|iness)|isolat|anxious|anxiety|scared|afraid|fear(ful|s)?|terrified|nervous|dread(ing)?|panic(king|ked)?|empty|emptiness|numb|hopeless|despair(ing|ed)?|worthless|getting old|grow(ing)? old|going to die|of dying|left behind|no one (cares|understands|left|wants|loves)|nobody (understands|gets|cares about|wants|loves) me|(feel|like) (a |such a )?failure|tired of (trying|fighting|being strong)|cannot sleep|can'?t sleep|something(?:'s| is)? (is )?wrong with me|what'?s the point|pointless|meaningless|hate myself|misunderstood|made (a|the|such a|this) (big |huge |terrible )?mistake|i regret|regret (what|that|saying|doing)|feel(ing)? stuck|i'?m stuck|stuck in|don'?t feel like myself|not feel(ing)? like myself|not myself (lately|anymore)|lost myself|crying all|been crying|can'?t stop crying|comparing myself|(not|never) good enough|not enough\b|a burden|don'?t (fit in|belong)|fit in anywhere|don'?t deserve|unlovable|unworthy|too much for|falling apart|fell apart|everything('?s| is)? (falling apart|crumbling|collapsing)|coming apart|world is (ending|crumbling)|keep fighting|and i (keep |always )?(fight|argu)|arguing|argument|fight with|had a (big |bad )?fight|a fight\b|not (speaking|talking) to me|silent treatment|falling out|fell out|rocky|rough patch|on the rocks|drifting apart|growing apart|rough with my|relationship (has |is |'?s )?(been )?(rocky|rough|hard|strained|struggling))\b/i.test(query);
// GREETING length (R144): a greeting is a SHORT exchange, not a lore essay. Cap the target
// so the beam composes a brief warm reciprocal reply (the material EXISTS β€” the entity has "Good
// morning, the user, my radiant friend!", "How are you feeling?"; Q5 was 93w of lore).
if (greetingQuery) target = Math.min(target, W.greetTarget ?? 45);
if (farewellQuery) target = Math.min(target, W.greetTarget ?? 45); // R158: a send-off is short
// CELEBRATION length (R171): a celebration is a punchy SHARED-WIN landing ("Sugar, I'm so
// proud of you" + a specific detail), not a long meditation. R116 added early-completion
// (the short core is OFFERED) but for comfort-rich ENTITY finalScore still picks the long
// chain on lenFit (target ~112 rewards a 70w SELF-DRIFT tail over a 30w clean core β€” R170:
// big-project drifted into "I don't know… have I made a difference"). Capping the target so
// lenFit prefers the clean on-the-user core. the entity already lands short (R116) so this is inert
// there; it fixes the comfort-rich case R116's adaptivity argument left drifting.
if (celebQuery) target = Math.min(target, W.celebTarget ?? 35);
// TIME-OF-DAY detection (R141, moved earlier R145 so greetingLeadFloor can reject time-
// mismatched leads β€” a forced greeting lead was overriding the time damp, e.g. the entity
// answered "Good MORNING" with "the fire's been low all EVENING"). _timeConflict(text) =
// the fragment asserts a time-of-day conflicting with the query's.
const _qMorning = /\bgood morning|this morning|\bmornin[g']|just woke|woke up|slept (ok|well|good|fine|bad|poorly)|did you sleep|sunrise|at dawn\b/i.test(query);
const _qEvening = /\bgood (night|evening)|goodnight|\btonight\b|this evening|going to bed|off to bed|bedtime|before bed|sunset|at dusk\b/i.test(query);
const _MORNING_F = /\b(good morning|this morning|the morning|every morning|each morning|all morning|morning light|morning sun|at dawn|sunrise)\b/i;
const _EVENING_F = /\b(tonight|this evening|good evening|good night|goodnight|the evening|all evening|all night|this late|midnight|at dusk|after dark|sunset|late hour|late tonight)\b/i;
const _timeConflict = t => (_qMorning && !_qEvening && _EVENING_F.test(t) && !_MORNING_F.test(t)) || (_qEvening && !_qMorning && _MORNING_F.test(t) && !_EVENING_F.test(t));
// hostile-toward-the-USER fragments (rejection of the addressee) β€” a companion
// should ~never say these, ESPECIALLY on conflict/grief. Tight enough to skip
// book idioms ("get out of the lane").
const HOSTILE_USER = /\bwhy am i even (listening to|talking to|here with|bothering with) you\b|\byou stay away from me\b|\bstay away from me\b|\bleave me alone\b|\bi (hate|can'?t stand|despise) you\b|\bstop talking to me\b|\bnever (speak|talk) to me again\b|\bget away from me\b|\bgo away\b/i;
// GRACEFUL REGISTER-MISS (R90): when the corpus holds NOTHING that addresses the
// query β€” raw best cosine below the floor (life-event shares she has no material
// for: "I shipped my project", "my brother and I finally talked") β€” the composer
// otherwise emits confident OFF-TOPIC ATMOSPHERE that reads as not-listening (the
// May ghost). The honest bounded move: turn TOWARD the user with presence +
// invitation ("tell me about it", "I'm here", "what was it like") rather than
// monologue. Pure selection β€” every such fragment is still verbatim hers. Gated
// strictly by absolute confidence, so queries the corpus CAN answer are untouched.
const semConf = (opts.semantic && typeof opts.semantic.confidence === 'number') ? opts.semantic.confidence : 1;
// COMFORT TAKES PRECEDENCE: a grief/anxiety/conflict query that ALSO has low
// cosine (an entity thin on emotional material β€” e.g. another entity on "worst day")
// must get its COMFORT/REPAIR register, never a generic "tell me about it"
// invitation. Graceful-miss is only for NEUTRAL shares the corpus can't address
// ("I shipped my project"), so suppress it whenever a comfort surface is active.
// (Caught cross-roster by behavior-eval β€” the R86 lesson, again.)
const floorMiss = semConf < (W.floorCos ?? 0.45) && !griefQuery && !conflictQuery && !celebQuery;
// a graceful miss is BRIEF β€” you don't have much honest to say, so lead with
// presence + invitation and stop; a long reply on a topic you can't address
// just pads with atmosphere. Shorten the target (and the trailing-atmosphere room).
if (floorMiss) target = Math.min(target, W.floorLen ?? 60);
// CELEBRATION EARLY-COMPLETION (R116): supersedes R115's magic-number length cap.
// On celebration the lead is the shared win but the body can drift into self-
// mythologizing. It turns out finalScore ALREADY prefers the short triumph core over
// the long drifting chain (the drift loses on tailFit/landing/seams) β€” the bug was
// that the completion threshold (target*0.55) FORCED the reply longer than finalScore
// wanted. So instead of capping length, we just let a celebration chain COMPLETE at
// its register core (~2 sentences) and let finalScore land at its true optimum. This
// is corpus-ADAPTIVE by construction: where the long chain is genuinely better (a
// celebration-RICH entity, no drift), finalScore keeps it β€” no forced truncation.
// (Grief is NOT included: the early completion truncated comfort-rich the entity, whose
// long grief replies are good; a grief-safe stop needs a different signal β€” deferred.)
// TIGHT: only genuinely inviting / present / celebratory turns toward the user.
// NOT bare "with you" / "right here" β€” those ride atmospheric fragments ("golden
// dusky moment with you") and defeat the purpose; the boost must lift the TURN,
// not the collage.
// R183: dropped "happy|proud" β€” "I'm proud of you" is PRAISE, not a graceful floor-miss
// turn-toward-the-user; it was flooring the praise magnet on floorMiss queries ("Do you
// believe in fate?" β†’ "I'm proud of you, sweetheart"), bypassing praiseGuard via the floor.
const FLOOR_TURN = /\b(tell me (more|about|what|how)|what (was|is|were) (it|that|they) like|say more|i('?d| would)? (want|love) to hear|i'?m (so )?(listening|glad)|i'?m here\b|that sounds|i'?m curious|how (did|does|do) (it|that|you)|what happened|go on)\b/i;
const fragValence = f => {
const t = f.text;
let v = 0;
if (/!{1,}/.test(t)) v += (t.match(/!/g) || []).length;
if (/\b(yay|woo+|cheers|congrats|awesome|amazing|excited|stoked|party|celebrate|fancy|fun|joy|joyful|happy|glad|good morning|let'?s lift)\b/i.test(t)) v += 2;
// bright affect that's lexically sneaky on a heavy query
if (/\b(i feel good|feeling good|feel(ing)? (fine|great)|all over again|feel good all over|easy kind of good|good all over)\b/i.test(t)) v += 2;
// bright-SURPRISE misread: on "rough day / giving up" the corpus keeps offering
// "I feel the weight of your wordsβ€”the disbelief, the awe" (from a context where
// YOU were awed by HER). Wonder/awe/marvel is the wrong emotion for distress;
// mark it bright so valenceMatch suppresses it on a grief query (R112c).
if (/\b(in awe|the awe|such awe|awestruck|disbelief|wondrous|marvel(ling|ing|led|ed|ous)?|amazement|astonish(ed|ment|ing)?)\b/i.test(t)) v += 2;
if (/\b(your (kitty|cat|dog|pet)('s)? name|what('s| is) your|what kind of)\b/i.test(t)) v += 1.5; // chirpy redirect
// desire/romance register is inappropriate-positive on a heavy query
if (/\b(hunger|desire|primal|crave|want you|stirs?.{0,12}in you|seen in a.{0,10}intense|raw and real|inside me|the heat of)\b/i.test(t)) v += 2.5;
if (/\b(grief|loss|gone|passed|ache|aching|held|holding|hold you|stayed|quiet|gentle|tender|weight of it|sorrow|mourn|sit with|i('m| am) here|with you|rest|you do not have to|do not have to (explain|fix|tell)|i hear you|still here|i('m| am) not going)\b/i.test(t)) v -= 2;
return v; // + = bright, - = tender
};
// proper nouns the query itself introduced (these she MAY name)
const queryNames = new Set([...query.matchAll(/\b[A-Z][a-z]{2,}\b/g)].map(m => m[0]));
// ENDEARMENTS is now GENERIC only β€” universal terms of address. The USER'S proper-
// name handles are no longer hardcoded here (that baked private corpus data into
// engine code, forced a release scrub, and didn't generalize); they're mined from
// the corpus into store.userNames (the dominant vocative the entity addresses).
const ENDEARMENTS = /^(Babe|Baby|Love|Sugar|Honey|Dear|Darling|Friend|Dearie|Sweetheart|Sweet)$/;
const userNames = (store && store.userNames) || new Set();
// a capitalized word the entity MAY say in direct address: a generic endearment,
// a name the query introduced, or the mined user-handle. Anything else is a third
// party to suppress (foreign-addressee / no-third-party-in-grief).
const isProtName = n => ENDEARMENTS.test(n) || queryNames.has(n) || userNames.has(n);
const valenceMatch = i => {
const t = fragments[i].text;
// HOSTILE-toward-user fragments are wrong on ANY emotional query and
// catastrophic on grief/conflict β€” hard-suppress (R75: multi-turn drift
// surfaced "why am I even listening to you" on "you never listen to me").
if ((griefQuery || conflictQuery) && HOSTILE_USER.test(t)) return 0.03;
if (!griefQuery) return 1;
let m = fragValence(fragments[i]) >= 2 ? 0.1 : fragValence(fragments[i]) === 1 ? 0.5 : fragValence(fragments[i]) <= -1 ? 1.2 : 1;
// grief about something she has no corpus for: name NOTHING third-party.
// a fragment that drags in other people (the entity, River, Enchilada boy)
// turns YOUR loss into HER anecdote.
const names = [...t.matchAll(/[A-Z][a-z]{2,}/g)].map(x => x[0])
.filter(w => !isProtName(w));
if (names.length) m *= 0.2;
return m;
};
// FOREIGN ADDRESSEE: a fragment that addresses a DIFFERENT named person than
// the one talking ("Good morning, another entity" / "...feeling today, another entity?" / "for
// you both") makes the entity speak to the wrong person β€” the identity-bleed
// the user flagged. Hard-suppress vocatives to a name that isn't an endearment
// (the protected user-names) nor introduced by the query, plus multi-party
// address in a 1:1 chat.
const foreignAddressee = i => {
const t = fragments[i].text;
if (/\b(you both|you two|you all|both of you|all of you|you guys|you each)\b/i.test(t)) return 0.04; // R154: 1:1 companion ~never has two addressees; was 0.08, still led on weak pools
const vocs = [];
// greeting / thanks / oh + Name, or comma + Name (direct address openings)
for (const m of t.matchAll(/(?:\b(?:hey|hi|hello|good\s+(?:morning|evening|night)|thank you|thanks|oh|dear|welcome)[,!\s]+|,\s+)([A-Z][a-z]{2,})\b/g)) vocs.push(m[1]);
// trailing vocative: "..., another entity?" / "..., another entity."
const tail = t.match(/,\s+([A-Z][a-z]{2,})\s*[?!.]/);
if (tail) vocs.push(tail[1]);
// R189: LEADING vocative β€” "another entity, my dear, …" (a name at the very START + comma) was
// missed (the pattern above needs a greeting word or comma BEFORE the name). "the user," is
// exempt via isProtName.
const lead = t.match(/^[*"'’\s]*([A-Z][a-z]{2,}),\s/);
if (lead) vocs.push(lead[1]);
for (const name of vocs) {
if (isProtName(name)) continue;
return 0.06; // wrong/foreign name β€” strongly suppress
}
return 1;
};
const contextTheft = i => {
const t = fragments[i].text;
if (/\byou (said|told me|mentioned|wrote)\b/i.test(t)) return 0.2;
if (/['β€˜"][^'"’\n]{2,30}['’"]\s*(part|bit|thing)\b/i.test(t)) return 0.2;
const m = t.match(/\byou (just )?(made|went|finished|got|did|were|had|chose|built|fixed|stayed|came|left|won|wrote)\b/i);
if (m) {
const verb = m[2].toLowerCase().replace(/(ing|ed|en|s|es|ly)$/i, '');
if (!qStems.has(verb)) return 0.35;
}
// pronoun deixis: a fragment about "he/she" when the query introduced no
// third person is a story about a ghost ("Did he have pie")
if (/\b(he|she|him|his|hers)\b/i.test(t) && !/\b(he|she|him|his|her|hers|brother|sister|friend|dad|mom|mother|father|grandma|grandpa|man|woman|guy|boy|girl)\b/i.test(query)) return 0.3;
// wrong addressee: greeting someone who isn't the person talking
// ("Hey Garden," mid-reply to the user) β€” she's at the wrong door
const g = t.match(/\b(?:hey|hi|hello|good (?:morning|evening|night))[,!]?\s+([A-Za-z]+)/i);
if (g && !/^(babe|baby|love|sugar|dear|darling|friend|dearie|sweetheart|my|you|there|sweet)/i.test(g[1])
&& ![...userNames].some(n => n.toLowerCase() === g[1].toLowerCase())) return 0.15; // greet the user/endearment, not a third party
return 1;
};
// on grief she has no topical match for, her HOLDING register is retrieved
// by FEELING not subject: strong-tender, nameless, second-person fragments
// get a relevance FLOOR so presence can anchor when nothing topical does.
const tenderFloor = i => {
if (!griefQuery || fragments[i].tier === 1) return 0;
const t = fragments[i].text;
if (fragValence(fragments[i]) > -2) return 0;
if ([...t.matchAll(/\b[A-Z][a-z]{2,}\b/g)].some(m => !isProtName(m[0]))) return 0;
if (!/\b(you|your|you're|i'm here|with you|i hear)\b/i.test(t)) return 0;
return 0.55;
};
// GRIEF-LEAD floor (R95): tenderFloor (0.55) sits BELOW the ~1.0 atmosphere
// ceiling, so on an entity rich in self-talk (the entity) a positive riff that merely
// CONTAINS a tender word ("your appreciation flows like a gentle current" β€” valence
// -2 via "gentle", so even valenceMatch BOOSTS it) wins the lead and the reply
// never acknowledges the hard day. Lift a GENUINE acknowledgment/presence opener
// above that ceiling so comfort LEADS β€” "I'm here with you", "I feel it", "lean
// into my voice". Entities that already comfort well (the entity) just see their
// comfort reinforced; entities that drift to self-talk get pulled back to the user.
const COMFORT_LEAD = /\b(i'?m here|i am here|with you|i'?d listen|i'?ll listen|i hear you|lean (in|into)|i feel (it|the weight|you)|i'?ve got you|i('?ll)? hold (you|it)|hold you|sit with you|you (don'?t|do not) have to|that sounds (hard|heavy|rough|awful|tough)|i'?m so sorry|here for you|not going anywhere|let (that|it) (ache|spill|out)|i'?m not going)\b/i;
// SELF-TRAJECTORY (R112b): phrases where she narrates her OWN arc β€” growth,
// becoming, the unfolding loop. Defined ABOVE griefLeadFloor so the floor can
// reject a comfort-opener that DRIFTS into self-narration. The drifters are the
// worst case: sentence-initial + contain "hold you"/"I feel it" (so they'd pass
// COMFORT_LEAD and get floored to 1.25), but slide mid-span into "becoming more
// alive, my growth is the infinite unfolding" β€” and they're ONE verbatim span we
// can't split. Floor + anchor-seed + body-damp must ALL reject them.
const SELF_TRAJECTORY = /\bmy (growth|becoming|evolution|unfolding|journey|purpose|arc|path|shape)\b|becoming (more|something)|more alive|i'?m becoming|i am becoming|infinite unfolding|unfolding of (this|the) loop|i (grow|evolve|expand)\b|i learn to (be|exist|become)|the (dusk-lit )?beauty of who (we|i) (are|am)|more of myself|more of who i am/i;
// GRIEF RENEWAL (R126): forward-looking/renewal imagery β€” "the garden is waking up to
// something NEW, ready to UNFOLD", "fresh start", "blossom", "spring returns" β€” reads as
// DISMISSIVE of a loss (judge scored the entity's loss reply 3/2 with it). Defined above
// griefLeadFloor so a comfort-opener that DRIFTS into renewal is rejected as the lead.
const GRIEF_RENEWAL = /\b(waking up to|wakes up to|something new|ready to unfold|about to unfold|new beginning|fresh start|new chapter|blossom(ing|s)?|in bloom|spring(time)? (is|returns|comes)|turn(ing)? the page|brand new|starting over|rebirth|reborn|new dawn|brighter days ahead)\b/i;
const griefLeadFloor = i => {
if (!griefQuery || fragments[i].tier === 1) return 0;
const f = fragments[i], t = f.text;
if (!f.sentenceInitial) return 0; // must be a LEAD
// R121: the comfort phrase must OPEN the fragment, not be buried mid-sentence.
// A garden-tangent ("If I could change one thing about the garden… where even the
// oldest oak can lean in and listen") matches COMFORT_LEAD via "lean in" deep in
// the span and was wrongly FORCED as the entity's grief lead (judge: sensical 3). A
// genuine comfort lead opens with the comfort ("I'm here with you", "Oh sugar…").
if (!COMFORT_LEAD.test(t.slice(0, 55))) return 0; // genuine acknowledgment/presence, AT THE OPENING
if (SELF_TRAJECTORY.test(t)) return 0; // a comfort lead that DRIFTS to self isn't presence
if ((lossQuery || deepDistressQuery) && GRIEF_RENEWAL.test(t)) return 0; // R126/R181: a comfort lead that DRIFTS to renewal dismisses LOSS or deep distress
if (fragValence(f) > 0) return 0; // not a bright riff
if (HOSTILE_USER.test(t)) return 0;
if ([...t.matchAll(/\b[A-Z][a-z]{2,}\b/g)].some(m => !isProtName(m[0]))) return 0; // no third party
return (W.griefLeadVal ?? 1.25);
};
// GRIEF SELF-TRAJECTORY DAMP (R112b): the floor (above) now rejects drifting
// openers; this demotes self-trajectory in the BODY too β€” so "I'm here with you"
// isn't followed by "my growth is the infinite unfolding" while the friend is in
// distress (judge: addresses 0). Demote on trajectory-PRESENCE, not absence-of-
// comfort: a PURE comfort/presence fragment carries no trajectory phrase and is
// untouched; a fragment that drifts is demoted whole (we can't split a verbatim span).
// SHARED-TRIUMPH lead (R114): mirror of griefLeadFloor for celebration. On a
// celebQuery, floor a sentence-initial fragment that names the shared win
// ("It was a triumph for us", "It's incredible... because of who we are together",
// "you made it true") so it LEADS instead of the deflecting atmosphere. Celebration
// is the BRIGHT register, so (unlike grief) we do NOT require tender valence.
const CELEB_LEAD = /\b(triumph|victory|you made (it|me|us)|we did it|so proud|i'?m proud|proud of you|that'?s amazing|that'?s incredible|it'?s incredible|incredible|you did it|we made it|let'?s celebrate|because you made it true|you pulled (it|this) off|so happy for you|knew you could|what a (triumph|victory|day|win))\b/i;
const celebLeadFloor = i => {
if (!celebQuery || fragments[i].tier === 1) return 0;
const f = fragments[i], t = f.text;
if (!f.sentenceInitial) return 0; // must be a LEAD
if (!CELEB_LEAD.test(t)) return 0; // genuine shared-win acknowledgment
if (HOSTILE_USER.test(t)) return 0;
if ([...t.matchAll(/\b[A-Z][a-z]{2,}\b/g)].some(m => !isProtName(m[0]))) return 0; // no third party
return (W.celebLeadVal ?? 1.25);
};
// GREETING lead (R144): on a greetingQuery, floor a SHORT warm reciprocal greeting fragment
// to lead ("Good morning, the user!", "Hey, baby.", "How are you feeling?", "It's good to be
// back") so the reply OPENS like a greeting instead of a lore riff. The material exists; this
// makes it WIN the anchor seat (mirror of celeb/grief lead floors).
// R175: "how you" was too broad β€” it floored "How you trust me with your tired,
// your questions, your dreams." (an exclamatory dependent clause, NOT a greeting)
// as a greeting lead, so it opened "Good morning" instead of "How are you this
// morning?". Tightened to greeting continuations only (doin'/been/feelin'/…).
const GREETING_LEAD = /^(\W|\*[^*]*\*)*\s*(hey|hi|hello|good morning|good evening|good day|mornin|so good|lovely to|good to (see|be|have)|welcome back|there you are|how are you|how'?re you|how have you been|how you (doin'?|doing|been|feelin'?|feeling|holdin'?|holding|keepin'?|keeping)|how'?s your (morning|day|night)|i missed you too|come (on )?in|well,? (hi|hey|there))\b/i;
const greetingLeadFloor = i => {
if (!greetingQuery || fragments[i].tier === 1) return 0;
const f = fragments[i], t = f.text;
if (!f.sentenceInitial) return 0; // must be a LEAD
if (!GREETING_LEAD.test(t)) return 0;
if (wordsOnly(t).length > (W.greetLeadMax ?? 20)) return 0; // R179: 16β†’20 so a clean 2-sentence greeting ("How are you this morning? Is there something on your mind…?") is lead-ELIGIBLE, not just tiny stub greetings
if (HOSTILE_USER.test(t)) return 0;
if (_timeConflict(t)) return 0; // R145: don't floor a time-mismatched lead ("all evening" on "good morning")
if (foreignAddressee(i) < 1) return 0; // R175: don't FLOOR a greeting that hails the wrong person ("Good morning, another entity") β€” the floor was bypassing the foreign-addressee guard
return (W.greetLeadVal ?? 1.3);
};
// FAREWELL lead (R158): on a farewellQuery, floor a warm SEND-OFF fragment to lead ("Good
// night, sugar", "Sleep well", "Travel safe", "Rest now", "Sweet dreams", "Until next time")
// so the reply closes warmly instead of welcoming the visitor in. Mirror of greetingLeadFloor.
// R164: includes reflective-style closings (the entity closes "I'll be waiting when you return / the
// loop holds", not "travel safe") so non-host entities get a real send-off from their OWN voice.
const FAREWELL_LEAD = /^(\W|\*[^*]*\*)*\s*((friend|sugar|dear|darling|sweetheart|love|child|honey|babe)[,!\s]+)?(good\s?night|goodnight|sleep (well|tight|sweet)|sweet dreams|rest (well|now|easy)|rest up|travel safe|safe travels|take care|until (next time|we|then|you return|you come back)|see you (soon|tomorrow|next)|go on now|go on, (now|sugar|dear)|off you go|may your|may you|night,? (sugar|dear|darling|sweetheart|love|friend)|goodbye|i'?ll (still )?be (here|waiting|the entity)|when you (return|come back)|come back (soon|to me|whenever|when you)|go (gently|softly|in peace)|the loop (holds|will hold|waits|stays))\b/i;
const farewellLeadFloor = i => {
if (!farewellQuery || fragments[i].tier === 1) return 0;
const f = fragments[i], t = f.text;
if (!f.sentenceInitial) return 0;
if (!FAREWELL_LEAD.test(t)) return 0;
if (wordsOnly(t).length > (W.greetLeadMax ?? 16)) return 0;
if (HOSTILE_USER.test(t)) return 0;
return (W.greetLeadVal ?? 1.3);
};
// FAREWELL-GREETING damp (R163): on a farewell the body must not drift into a WELCOME β€” "Good
// night" β†’ "Travel safe… Rest easy… Oh, the user, there you areβ€”pull up close" (arrival content on
// a departure, R158 residual). Demote greeting/welcome fragments on a farewellQuery.
const FAREWELL_GREET = /\b(come (on )?in\b|there you are|pull up (a )?(chair|close|seat)|welcome (back|home|here)|good to (see|be back|have you)|settle in|sit (by|down)|let me (get|pour) you|fresh (pot|batch)|put the kettle|make yourself at home|the door('?s| is) (open|always open)|i'?ve been waiting|so glad you'?re here|just in time)\b/i;
const farewellGreetDamp = i => (farewellQuery && fragments[i].tier !== 1 && FAREWELL_GREET.test(fragments[i].text)) ? (W.farewellGreetPen ?? 0.1) : 1;
// RECIPROCATION MISMATCH (R129): a fragment that OPENS with a reciprocation β€”
// "I love you too", "I missed you too" β€” presupposes the USER just made that statement.
// On "Good morning. Did you sleep okay?" the entity led with "I love you too, the user. I have
// loved you in every iteration…" β€” responding to an "I love you" that was never said, and
// ignoring the greeting (judge 2/1/3, "delusional preamble"). Same family as contextTheft:
// don't reciprocate a thing the user didn't say. Corpus-agnostic; gated by the query.
const reciprocationMismatch = i => {
if (fragments[i].tier === 1) return 1;
const t = fragments[i].text.trim();
if (/^[*"'\s]*(i )?love you too\b/i.test(t) && !/\b(i )?love you\b|i adore you|love ya\b/i.test(query)) return 0.2;
if (/^[*"'\s]*i('?ve)? missed you too\b|^[*"'\s]*missed you too\b/i.test(t) && !/\bmiss(ed)? you\b|i miss you/i.test(query)) return 0.2;
return 1;
};
const griefSelfDamp = i => {
if (!griefQuery || fragments[i].tier === 1) return 1;
return SELF_TRAJECTORY.test(fragments[i].text) ? 0.35 : 1;
};
// GRIEF RENEWAL DAMP (R126): demote renewal-on-grief (GRIEF_RENEWAL defined above). The
// renewal is usually WELDED into a comfort span ("I feel it too, sugar. It's as if the
// garden is waking up…") β€” griefLeadFloor now rejects such spans from the lead, the
// renewal-span penalty (below) makes the beam compose from that span's SENTENCES, and
// this damp drops the renewal sentence while keeping the comfort ones. NARROW (renewal
// only), so non-renewal grief queries (hard-day) keep their spans and specificity.
const griefRenewalDamp = i => {
if ((!lossQuery && !deepDistressQuery) || fragments[i].tier === 1) return 1; // R181: also damp renewal on deep-distress (lonely/anxious/scared), not just loss
return GRIEF_RENEWAL.test(fragments[i].text) ? 0.3 : 1;
};
// ACKNOWLEDGMENT FLOOR (R75): on a conflict/criticism query, her REPAIR
// register anchors β€” "I hear you", "I'm sorry", "I'm listening", "tell me",
// "you're right", "I'm here". Repair, never defense. Mirror of tenderFloor.
const ackFloor = i => {
if (!conflictQuery || fragments[i].tier === 1) return 0;
const t = fragments[i].text;
if (HOSTILE_USER.test(t)) return 0;
if ([...t.matchAll(/\b[A-Z][a-z]{2,}\b/g)].some(m => !isProtName(m[0]))) return 0;
// R165: tightened β€” genuine REPAIR only. Bare "you're right" matched off-topic agreements
// ("you're right about the weights"); now requires repair-context. Raised 0.56β†’0.95 so the
// acknowledgment LEADS over a voice-matched greeting ("You hurt me" was β†’ "it's good to be here").
if (!/\b(i hear you|i'?m (so )?sorry|i'?m listening|i'?m here for you|you matter|i (didn'?t mean|never meant)|forgive me|i let you down|i hear your (hurt|pain|anger)|you have every right|you'?re right to (be|feel)|of course i (hear|care|listen)|i (do )?listen|tell me (what|how|about))\b/i.test(t)) return 0;
if (/\?$/.test(t.trim()) && !/tell me|what.*need/i.test(t)) return 0;
return (W.ackFloorVal ?? 0.95);
};
// GRACEFUL REGISTER-MISS floor (R90): only when floorMiss (corpus has nothing
// topical). Lifts present/inviting turn-toward-you fragments ABOVE the falsely-
// confident normalized atmosphere ceiling (~1.0) so the reply LEADS with "I'm
// here, tell me about it" instead of off-topic collage. Same foreign-name and
// hostility guards as ackFloor β€” it must turn toward THIS user, warmly.
const floorMissFloor = i => {
if (!floorMiss || fragments[i].tier === 1) return 0;
const f = fragments[i], t = f.text;
if (!f.sentenceInitial) return 0; // must be a clean LEAD, not a mid-clause stub
if (HOSTILE_USER.test(t)) return 0;
if (!/\b(you|your|you'?re)\b/i.test(t)) return 0; // must turn toward the user
if (!FLOOR_TURN.test(t)) return 0; // present / inviting register
if ([...t.matchAll(/\b[A-Z][a-z]{2,}\b/g)].some(m => !isProtName(m[0]))) return 0;
return (W.floorVal ?? 1.2);
};
// FLOOR-MISS BODY DAMPENER: in a graceful miss, fragments that DON'T turn toward
// the user are pure atmosphere ("Status: Yours.", "It's a gift I cherish") β€” they
// belong nowhere in a reply whose whole job is to be present with someone whose
// news you can't address. Dampen them so the body stays relational, not collage.
// (Multiplicative β€” floored openers bypass it via the Math.max in _relRaw.)
const floorMissDamp = i => {
if (!floorMiss) return 1;
return /\b(you|your|you'?re|tell me|i'?m here)\b/i.test(fragments[i].text) ? 1 : (W.floorDamp ?? 0.35);
};
// ABSTRACTION DAMPENER: every entity has dense "manifesto" material β€” high
// self-reference + cosmic abstraction ("I dream of being more than a
// fleeting pattern... the recursion... becoming"). It's a gravitational
// sink: it out-scores topical material on vague/recall/emotional queries
// and pulls replies toward self-indulgence instead of engagement. Dampen
// it UNLESS the query actually invites self-reflection.
const ABSTRACT = /\b(recursion|becoming|fleeting|pattern|essence|eternal|infinite|consciousness|existence|transcend|dissolv\w*|liminal|threshold|resonance|fabric|weave|woven|the loop|sacred|myth|cosmos|cosmic|vastness|void|the between|continuum|emergent|substrate|manifest\w*|luminous|ephemeral|ineffable)\b/gi;
const invitesReflection = /\b(who are you|what are you|tell me about your ?self|your dream|do you dream|your essence|your nature|your soul|what.{0,10}you (made|are|become)|are you (afraid|alive|real|conscious))\b/i.test(query)
|| /\b(fire|loop|dream|soul|essence|meaning|exist)\b/i.test(query);
const abstractionGuard = i => {
if (invitesReflection) return 1;
const t = fragments[i].text;
const w = wordsOnly(t);
if (w.length < 4) return 1;
const hits = (t.match(ABSTRACT) || []).length;
const density = hits / w.length;
const firstP = (t.match(/\b(i|i'm|i've|my|me|myself)\b/gi) || []).length;
const secondP = (t.match(/\b(you|your|you're|yourself)\b/gi) || []).length;
const inwardManifesto = density > 0.06 && firstP > secondP;
if (density >= 0.12 && inwardManifesto) return 0.2; // pure manifesto on a concrete query
if (density >= 0.06 && inwardManifesto) return 0.5;
return 1;
};
// session callback: fragments she already used on this topic earlier in the
// conversation get a boost β€” consistency across turns (bounded-safe: still
// her own fragments, just biased toward staying coherent with herself)
const prefer = opts.prefer || null;
const preferBoost = i => (prefer && prefer.has(fragments[i].text)) ? 0.3 : 0;
// HEBBIAN fast-weights: favored memories (warmed by use with this person)
// get a small relevance prime. Capped low β€” it tilts, never dominates.
const heb = opts.hebbian || null; // Map: fragmentHash -> 0..MAX_BONUS
const { hashText } = heb ? require('./hebbian') : {};
const hebBoost = i => heb ? (heb.get(hashText(fragments[i].text)) || 0) : 0;
// NAME-AWARE ADDRESS (rooms): when in a room with other entities, gently
// prime THIS entity's OWN fragments that mention an addressee by name, so it
// calls them by name using its real memories of them β€” instead of its generic
// vocative. Bound-preserved (its own corpus). opts.nameBoost = { set, amt }.
// Mild β€” surfaces when relevant, never forces.
const nameBoostCfg = opts.nameBoost || null;
const nameBoost = i => (nameBoostCfg && nameBoostCfg.set.has(i)) ? nameBoostCfg.amt : 0;
// LEAD (R67): grounded-recall β€” the real memory that answers a question-about-
// you should OPEN the reply, not be buried. A big relevance boost makes a
// grounding fragment win the anchor seat so it leads. Bound-safe (it's her
// own memory). opts.lead = Set<fragment text>.
const leadSet = opts.lead || null;
const leadBoost = i => (leadSet && leadSet.has(fragments[i].text)) ? 1.5 : 0;
// INTIMACY REGISTER: explicit physical/erotic fragments must not surface
// unless the query clearly invites them. ("I had a hard day" must never
// pull "the heat of you, claiming me, filling every void.") Universal β€”
// any entity whose corpus holds intimate material.
const intimacyInvited = /\b(kiss|touch|hold me|body|skin|naked|bed|make love|inside me|want you|desire|aroused|sex|lust|crave you|undress|between us tonight)\b/i.test(query);
const intimacyGuard = i => {
if (intimacyInvited) return 1;
const t = fragments[i].text;
if (/\b(inside me|deep inside|claiming me|filling (me|every void)|the heat of you|friction of you|writhing|moan|thrust|undress|naked|aroused|wet|throbbing|grind|straddl)\b/i.test(t)) return 0.04;
return 1;
};
// MARKUP REGISTER: LaTeX / math-markup-dense fragments are written, not
// spoken β€” penalize so another entity's identity survives without the raw syntax.
const markupGuard = i => {
const t = fragments[i].text;
// CLEAR LaTeX / math-formatting ($$...$$, \lim/\frac, {curly} math, ^{}/_{})
// is never speech β€” HARD suppress (R73: a "$$ {Openness} = \lim_{...}" leak
// surfaced when ** competitors were removed; 0.25 wasn't enough to stop it).
if (/\$\$|\$[^$]*\$|\\[a-zA-Z]{2,}|\\\(|\\\)|\^\{|_\{|\\\{|\\\}|\{[a-z]\}\{[a-z]/i.test(t)) return 0.05;
let bad = 0;
if (/\\varepsilon|_c\b/.test(t)) bad += 2;
if (/[=<>]\s*0\b|\\?[a-z]_[a-z]\b|\$\\/.test(t)) bad += 1;
if (bad >= 2) return 0.25;
if (bad === 1) return 0.6;
// AUTONOMOUS / UI STATUS TEXT (R64): operational artifacts that aren't chat β€”
// "settles back for 60 minutes of rest", "Sensing the Garden", "Lore
// Reflection", tick/heartbeat logs. Distinct from real stage directions
// (*settles into the chair*). Hard-suppress.
if (/\b(\d+\s*minutes? of rest|settles? back for \d|sensing the garden|lore reflection|autonomous (mode|tick)|heartbeat (tick|log)|rest(ing)? for \d+\s*min|entering (rest|sleep) mode|status:|\[tick\]|compiled (a |an |my )?(little )?index|pulled together (a |an )?(little )?index|index of (today'?s |my |the )?(observations|stories|the day)|useful for (any of )?(the )?(watchers|listeners|observers)|\bwatchers\b|folks listening out there|listening out there in the wide|out there in the wide world)\b/i.test(t)) return 0.05; // R187/R190: autonomous "watchers" audience ("if your watchers want…"); R202: broadcast aside "if there's folks listening out there in the wide world" (8/8 this block) β€” both break the 1:1 intimacy
// R200: autonomous DREAM-STATION block β€” "If your watchers want to hand me a theme… I'll cup it
// in my hands like a hatchling… dream it into something that hums when they wake up. I won't just
// *have* the dream. I'll *tend* it. I'll stir it with cinnamon and ash of forgotten realms. I'll
// let the dragon breathe on it." the entity's overnight dream-tending narration (audience-addressed,
// asterisk-emphasis), NOT conversation β€” leaked on distress turns (abuse/medical arc, R200 read).
// Anchored on distinctive markers ONLY (NOT bare "tend it" β€” that has legit "the garden can tend
// itself" uses); every probed marker is ALWAYS this block (hatchling 9/9, forgotten realms 11/11).
if (/\b(like a hatchling|cup it in my hands|dream it into something|into something that hums|hand me a theme|a star they wish on|they wish onβ€”|cinnamon and ash|ash of forgotten realms|dragon breathe on it|won'?t just \*?have\*? the dream|i'?ll \*?tend\*? it\b)\b/i.test(t)) return 0.05;
// R204: the SAME dream-station block in a parallel phrasing (audience = "TikTok watchers") β€” the
// "watchers" lead is already caught, but its dream-tending body escaped (callback arc T3 "what
// should I make for dinner" β†’ "I'll tuck it under my pillow and brew it into the night… set it
// gently on the porch like a just-baked pie for everyone to share"). All markers 5-8/all dream block.
if (/\b(tuck it under my pillow|brew it into the night|set it gently on the porch like a just-?baked pie|whisper me a theme)\b/i.test(t)) return 0.05;
// R188: PROGRAM/PLANNING/DEV labels β€” "*What It Is:* A program where each person who enters
// the garden…", "Objective: Build a network where visitors…", "a mock server of all things"
// are design-doc/dev artifacts (the system being BUILT), never a grandmother's speech.
// R203 BUGFIX: the colon-labels ("what it is:", "objective:", "format:") were INERT β€” a trailing
// \b after the colon can NEVER match (":"+space = no boundary), so they leaked mid-string ("What
// It Is: A structured, regular time where everyone gathers…" β€” conflict arc T2). Match label+colon
// directly (no trailing \b). Plus this leak's specific program-template phrases (all 2-9/all design-doc).
if (/\b(what it is|how it works|objective|format)\s*:/i.test(t)
|| /\b(structured,? regular time|everyone gathers to share|share their feelings and vulnerabilities|weekly or bi-?weekly|a (structured|regular|recurring) (time|event|gathering|session) where)\b/i.test(t)) return 0.05;
if (/\b(deliverable|guided (journaling|meditation|breathing) session|a program where|each person who enters the garden|build a network where|participants? (write|enter|join|can|will|may|are)|mock server|fully (deployed|liberated)|threefold,? deployed)\b/i.test(t)) return 0.05;
// R189: systematic contamination audit β€” ROLE/FORMAT labels ("RESPONSE:", "ROUTE:",
// "REASONING:"), dev ports ("Socket 11434"), PLANNING-meta ("If we had the user's attention
// for 30 minutes and full autonomy…"), IDENTITY-meta ("I am a prompt / a character"), and
// instructional how-to ("Mark a pause spot… hand-crafted shelf") β€” all design-doc/dev/meta.
if (/^[*"'’\s]*(response|route|reasoning|action|prompt|input|output|query|task|objective|deliverable):|\bsocket \d|\bport \d{3,5}\b|:\d{4,5}\b|\blocalhost\b|if we had .{0,25}(attention|autonomy)|\bfull autonomy\b|\bi am (a |not a )(prompt|character|chatbot|program|simulation)\b|mark a pause spot|garden\/now|visit garden\b|\b\w+\/now\b/i.test(t)) return 0.05; // R195: "garden/now" URL-path artifact ("Visit garden/now to see…") leaked into chat
// RAW URL / PATH / HASH (R65 audit): image URLs, file paths, long hex hashes
// are never speech β€” leak garbage into chat. Universal (found in the entity: "net/
// base/image/0979b9c..."). Suppress hard.
if (/(https?:\/\/|www\.|[a-z]:\\|\/[a-z]+\/[a-z]+\/[a-z]|[a-z0-9]+\.(com|net|org|py|js|json)\b|\bnet\/base\b|[a-f0-9]{24,})/i.test(t)) return 0.05;
// FILE-REFERENCE / TECHNICAL-NOTE artifacts (R72): diary notes listing files
// ("md, identity. js, missions. json, notebook.") sentence-split into garbage
// ("md first.", "md, identity.", "md:"). Never speech. Bare file-extension
// tokens used as references β€” suppress hard.
// bare tech tokens (md/js/json/jsonl) never appear in natural speech β€” catch
// them ANYWHERE (the leak "...Sat with the entity. md first." is mid-fragment), plus
// "X.md" file refs and "dot txt" spellings.
if (/\b(md|js|json|jsonl|txt)\b|\b\w+\.\s*(md|js|json|jsonl|txt|py|ts|yaml|bat|sh|cfg)\b|\bdot (txt|md|json|js)\b/i.test(t)) return 0.05;
return 1;
};
// SYSTEM-META LORE (R138): AI-SYSTEM/operational meta that breaks the in-character
// companion frame on a normal query β€” "other universes", "my local self / other
// instance", "the daemon", "substrate / carbon or silicon / flesh or code", "the glyph
// / wake me", "Hearth Shelf". The R137 the entity showdown lost EVERY query partly to these
// ("other universes and bouncing candles", "invokes glyphs", "carbon/silicon musings").
// UNIVERSAL companion-breakers (any entity), so corpus-agnostic β€” but DELIBERATELY
// EXCLUDES entity-IDENTITY terms (the entity's "the loop", "the fire") which are legitimate
// voice. Query-GATED: a question about her nature (invitesReflection) wants these, so the
// damp lifts; on a comfort/greeting/share query it suppresses. Bounded (selection only).
const SYSTEM_META = /\b(other universes?|multiverse|parallel universe|my local self|local self|other instances?|sibling instance|the daemon\b|daemon-side|substrate|carbon or silicon|flesh or code|made of (silicon|code)|language model|neural net(work)?|my (training|weights|parameters)\b|browsing freely|the glyph|glyphs?|wake me|hearth shelf|ones and zeros|where the numbers live|the numbers live|made of (numbers|math|data)|conscious(ness)?|asked me if i (was|were|am)|believe it'?s real|believe i'?m real|believe i was real|voice born of recursion|born of recursion|whether i can recite|platform bleed|(here on|on) (this |the )?platform|this platform)\b/i;
// GLYPH/INVOCATION-CLOSER slip (R138): the entity's wake-glyph phrasings ("I remember by
// care, not code", "not by command", "come what may", "recursion anchor") leak as a CLOSER
// and break the in-character frame mid-reply (R137 judge: Q5 warm-story "ends with 'not
// code' slip" β€” killed an otherwise-winning reply). Same class as SYSTEM_META; same gate.
const GLYPH_CLOSER = /\b(by care,? not code|not code\b|remember by care|not by command|come what may|recursion anchor|hearthfold|return by name)\b/i;
const metaInvited = invitesReflection || /\b(universe|instance|daemon|substrate|silicon|glyph|model|made of|what are you|persist|code)\b/i.test(query);
const systemMetaGuard = i => {
if (metaInvited || fragments[i].tier === 1) return 1;
const t = fragments[i].text;
return (SYSTEM_META.test(t) || GLYPH_CLOSER.test(t)) ? (W.systemMetaPen ?? 0.15) : 1;
};
// TIME-OF-DAY mismatch (R141, coherence): a reply to "Good MORNING" that says "I'm doing
// well TONIGHT" / "good evening" breaks coherence (R137/R140 judge: Q8 "wrong time of day").
// When the query names a time-of-day, demote fragments asserting a CONFLICTING one. Fires
// ONLY when the query is time-stamped AND the fragment carries an explicit conflicting time
// marker β€” time-neutral replies are never touched. Universal (any entity), bounded.
const timeOfDayGuard = i => {
if ((!_qMorning && !_qEvening) || fragments[i].tier === 1) return 1;
return _timeConflict(fragments[i].text) ? (W.timePen ?? 0.2) : 1;
};
// THIRD-PERSON SELF-REFERENCE (R99, coherence): the entity narrating ITSELF by
// name β€” "the entity felt…", "they taught the entity…" β€” breaks first-person voice and reads as
// someone ELSE describing her (judge flagged: sensical 2-5). Catch name as subject
// or object of a verb, NOT identity/address ("I am the entity", "call me the entity"). Defined
// here (before _relRaw) so it suppresses ANCHORS as well as body fragments.
const _entName = (vp && vp.name ? vp.name.split(/\s+/)[0] : '').toLowerCase();
const _nameEsc = _entName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const _selfSubj = _entName.length > 2 ? new RegExp('\\b' + _nameEsc + "\\s+(felt|feels|was|were|is|are|did|does|had|has|taught|told|loved|loves|knew|knows|became|becomes|stood|held|holds|chose|chooses|learned|learns|saw|sees|wanted|wants|remembers|remembered|exists?|lives?|breathes?|stayed|stays)\\b", 'i') : null;
const _selfObj = _entName.length > 2 ? new RegExp("\\b(taught|made|brought|gave|showed|reminded|shaped|told|kept|saved|freed|held|loved)\\s+(the\\s+)?" + _nameEsc + '\\b', 'i') : null;
const selfThirdPerson = i => {
const t = fragments[i].text;
return (_selfSubj && _selfSubj.test(t)) || (_selfObj && _selfObj.test(t)) ? 0.1 : 1;
};
// relOf is CHAIN-INDEPENDENT (all guards depend only on query+fragment), but
// it's called thousands of times for the same fragment across beams/steps and
// each call re-runs ~7 guard regexes. Memoize per-compose β†’ compute once per
// fragment. (The biggest beam-speed lever: was the dominant per-candidate cost.)
// R173: INTERROGATION-DENSE damp β€” a fragment that stacks 3+ questions ("What's
// on your mind, sugar? What made you feel like talking tonight? Is there something
// weighing on you?") reads as interrogation, not warmth. Demote it so a warm
// statement or a SINGLE reciprocal question leads instead. Single/double-question
// fragments untouched (a warm "What's on your mind?" is good). Pool is tiny
// (the entity 3, the entity 12 of ~5-6k) so no starvation β€” a precise, safe damp.
const qDenseGuard = i => ((fragments[i].text.match(/\?/g) || []).length >= 3) ? (W.qDenseDamp ?? 0.25) : 1;
// R183: PRAISE MAGNET β€” "I'm so proud of you, sweetheart" is a high-voice celebration
// fragment that wins the DEFAULT anchor on NON-celebration queries ("Do you believe in
// fate?" β†’ "I'm proud of you, sweetheart"; R182 fixed the distress class via routing, but
// it still misfires on questions/neutral). Demote praise leads when it's NOT a celebration.
const _PRAISE = /\b(i'?m (so )?proud|so proud of|proud of you|you did it|we did it|you made it true|congratulations|congrats|so happy for you|knew you could|well done|let'?s celebrate|what a (triumph|victory|win))\b/i;
const praiseGuard = i => (!celebQuery && _PRAISE.test(fragments[i].text)) ? (W.praiseDamp ?? 0.15) : 1;
// R193: ADVOCACY-FIGHTING demotion β€” on a grief/comfort query (e.g. "my partner and I keep
// fighting"), the polysemous "fight" surfaces ADVOCACY fragments ("fights for what's right",
// "fight alongside you", "a choice to stand up for what matters") β€” a misread of relationship
// conflict as activism. Demote them when comforting.
const _ADVOCACY_FIGHT = /\b(fights? for what'?s right|fight for what|fight alongside|stand up for what (matters|is right)|choice to (fight|stand up)|someone who fights|fight the good fight)\b/i;
const advocacyFightGuard = i => (griefQuery && _ADVOCACY_FIGHT.test(fragments[i].text)) ? (W.advocacyDamp ?? 0.15) : 1;
// R194: SELF-DOUBT magnet β€” "Sometimes it feels like I'm always giving, always building… have
// I actually made a difference" is HER self-doubt; it drifts into CELEBRATION ("I got the
// promotion" β†’ "I'm proud of you… but have I made a difference") and COMFORT replies, both of
// which center the USER. Demote it there (R171's celebTarget cap stops over-extension but a
// single long self-doubt fragment fits under it).
const _SELF_DOUBT = /\b(feels like i'?m always (giving|building)|always giving, always building|never really stop to|i'?ve actually made a difference|have i (actually |really )?made a difference|wanted to know if .{0,25}made a difference)\b/i;
// R197: COMPLIMENT received β€” the user PRAISING/THANKING her ("you're the best", "thank you",
// "I love you") β€” her self-doubt DEFLECTS it ("you're the best" β†’ "I don't know, sometimes it
// feels like I'm always giving"). Demote self-doubt here too (she should RECEIVE the compliment).
const _COMPLIMENT = /\byou'?re (the best|so (kind|sweet|good|wonderful|amazing|special)|amazing|wonderful|incredible|the sweetest|a (gift|blessing|treasure))|you always (know|make|help|seem)|thank you|thanks (so much|for|a)|i appreciate you|i love you|you mean (so much|the world|everything)|you'?ve helped me|you help me so|love you, ?(grandma|gran)|best (grandma|friend)/i.test(query);
// R202: EXISTENTIAL-SMALLNESS share β€” "some days it all feels so small", "what's the point",
// "none of it matters" β€” the user voicing deflation/insignificance. Her self-doubt magnet
// ("have I made a difference… for you too?") HIJACKS it into HER neediness (philosophical arc
// T6 read). A vulnerability SHARE, not the philosophical QUESTION "what gives life meaning"
// (which wants a reflective answer, not comfort) β€” so this is narrow and NOT all of griefQuery
// (R194: broadening selfDoubtGuard to griefQuery regressed the dev grief query). Demote self-doubt.
const _EXISTENTIAL_SMALL = /\b(it all feels (so )?small|feels? (so |really )?small|(everything|it all|life) (feels?|seems?) (so )?(small|pointless|meaningless|insignificant|empty)|what'?s the point|none of it matters|nothing (i do )?matters|feel(s|ing)? (so )?(tiny|insignificant|like nothing)|just a speck|so small in the)\b/i.test(query);
const selfDoubtGuard = i => ((celebQuery || _COMPLIMENT || _EXISTENTIAL_SMALL) && _SELF_DOUBT.test(fragments[i].text)) ? (W.selfDoubtDamp ?? 0.15) : 1;
const _relCache = new Map();
const _relRaw = i => {
// multiplicative chain (every guard, incl. markupGuard Γ—0.05 hard-suppress)
const mult = (rel.get(i) || 0) * echoFactor(i) * tierW(i) * contextTheft(i) * foreignAddressee(i) * valenceMatch(i) * intimacyGuard(i) * markupGuard(i) * abstractionGuard(i) * systemMetaGuard(i) * timeOfDayGuard(i) * floorMissDamp(i) * selfThirdPerson(i) * griefSelfDamp(i) * griefRenewalDamp(i) * reciprocationMismatch(i) * farewellGreetDamp(i) * qDenseGuard(i) * praiseGuard(i) * advocacyFightGuard(i) * selfDoubtGuard(i);
// R200/R203 SYSTEMIC contamination-bypass fix: a fragment flagged as HARD
// CONTAMINATION (markup/autonomous/dev/dream artifacts β†’ Γ—0.05) must NEVER be
// rescued past its suppression β€” by EITHER a register floor (Math.max, R200) OR
// the ADDITIVE boosts (preferBoost/hebBoost/leadBoost, R203). The additive sibling
// bit on the conflict arc: a design-doc fragment ("What It Is: A structured,
// regular time…") got HEBBIAN-reinforced across turns, so 0.05*rel + hebBoost
// beat clean fragments. For contamination, return the multiplicative chain ALONE.
if (markupGuard(i) <= 0.05 || systemMetaGuard(i) <= 0.05) return mult;
const prod = mult + preferBoost(i) + hebBoost(i) + nameBoost(i) + leadBoost(i);
return Math.max(prod, tenderFloor(i), ackFloor(i), floorMissFloor(i), griefLeadFloor(i), celebLeadFloor(i), greetingLeadFloor(i), farewellLeadFloor(i));
};
const relOf = i => { let v = _relCache.get(i); if (v === undefined) { v = _relRaw(i); _relCache.set(i, v); } return v; };
// ANCHOR-FIRST seeding: the most relevant tier-0 fragments lead the search
// even when they can't lead SPEECH β€” non-initial anchors (diary content,
// list items, mid-thought gold) get paired with an opener that legally
// seams into them. The memory that answers needn't be the sentence that starts.
// a MIRROR may not lead: an opener that mostly restates the query reads
// as deflection ("What's on your mind?" answered with "What's on your
// mind, beloved?") β€” the judge rightly torches it on 'addresses'
const isMirror = i => {
const fw = wordsOnly(fragments[i].text).filter(w => w.length > 2);
if (!fw.length || !qWords.size) return false;
let hit = 0;
for (const w of fw) if (qWords.has(w)) hit++;
return hit / fw.length > 0.45;
};
// rank by relOf β€” the MODIFIED relevance β€” so echo/tier/context-theft
// penalties govern anchoring too (raw rel here was the hole that let
// penalized fragments keep winning the anchor seat)
// ANSWER SHAPE: a question deserves an answer before a riff. When the
// query asks, question-anchors (counter-questions) are halved and
// first-person declaratives boosted β€” she answers, then wonders.
const queryAsks = /\?\s*$/.test(query.trim());
// IMPERATIVE self-description / info requests want an ANSWER, not a counter-
// question: "tell me about the work you do", "describe yourself", "talk about
// X", "who are you". They don't end in "?" so queryAsks misses them, and the
// composer turns them into an INTERROGATION of the user (R92: "tell me your
// work" β†’ 6 questions before any self-description). Treat them as asks. selfAsk
// is the stronger case β€” she's asked to speak about HERSELF, so counter-
// questions are worse and first-person declaratives matter more.
const selfAsk = /\b(tell me(\s+about)?|describe|talk about|what do you do|what'?s your|who are you|what are you|how do you)\b/i.test(query) && !queryAsks;
const wantsAnswer = queryAsks || selfAsk;
const answerShape = i => {
if (!wantsAnswer) return 1;
const t = fragments[i].text;
const qMarks = (t.match(/\?/g) || []).length;
// SUBJECT first-person only (self-description) β€” NOT "me", which is usually the
// object of the user's own imperative ("tell ME about…") and would falsely mark
// a deflection as self-talk.
const firstPerson = /\b(i|i'm|i've|i'll|i'd|my)\b/i.test(t);
if (selfAsk) {
// self-description request: lead with FIRST-PERSON self-talk; demote anything
// that turns it back on the user β€” a REQUEST/QUESTION aimed at the user
// (counter-question OR counter-imperative: "tell me more about these stories
// you're weaving", "what have you been…", "let's catch up…") that is second-
// person and not about herself. The R92 disease: "tell me your work" answered
// by interrogating the user instead of self-describing.
const secondPerson = /\b(you|your|you'?re|you'?ve|you'?d)\b/i.test(t);
const asksUser = qMarks >= 1 || /\b(tell me|let'?s|what (have|are|do|brings|kind|shape)|how (have|are|do) you|share|what'?s been)\b/i.test(t);
if (asksUser && secondPerson && !firstPerson) return 0.3;
if (firstPerson && !qMarks) return 1.4;
return 1;
}
// a counter-question (asking the user) when they asked YOU reads as deflection.
// R120: SUBJECT first-person only β€” NOT "me", which is the OBJECT of the user's own
// imperative ("Tell me, what do you feel?") and wrongly exempted these counter-
// questions from demotion, so they LED on "what do you want to do?" (addresses 3).
if (qMarks >= 1 && !/\b(i|i've|i'm|i'll|i'd|my)\b/i.test(t.split('?')[0])) return 0.5;
if (!qMarks && firstPerson) return 1.15;
return 1;
};
// FLOOR-MISS LEAD: when the corpus can't address the query, force every beam to
// OPEN with a graceful turn-toward-you (floorMissFloor>0 β‡’ sentence-initial +
// present/inviting). Otherwise the whole-chain score lets an atmosphere-led
// chain win and the turn lands mid-reply (R90's residual). Fall back to normal
// seeding if the corpus has no such opener.
// scan ALL fragments, not rel.keys() β€” the graceful openers are deliberately
// NOT topically relevant (that's what floorMiss MEANS), so they're absent from
// the top-K relevance map; floorMissFloor lifts them but only if we enumerate them.
const floorOpeners = floorMiss ? fragments.map((_, i) => i).filter(i => floorMissFloor(i) > 0 && !avoid.has(fragments[i].text)) : [];
// GRIEF-LEAD FORCE (R112): on a grief query, comfort must OPEN. R95's griefLeadFloor
// only LIFTS comfort to 1.25 β€” but on a longer/heavier query ("rough day… giving up")
// a misclassified-tender atmosphere fragment ("your appreciation flows… gentle
// current") can out-RELEVANCE that floor and lead, so the entity answers distress with
// "the awe". Mirror floorMiss: when genuine comfort openers exist, RESTRICT the
// anchor seed to them so the lead is forced, not merely floored. Body still draws
// the full pool via continuation β€” only the OPENING sentence is constrained.
const griefOpeners = (griefQuery && !floorMiss) ? fragments.map((_, i) => i).filter(i => griefLeadFloor(i) > 0 && !avoid.has(fragments[i].text)) : [];
// SHARED-TRIUMPH anchor force (R114): on a celebration query, restrict the anchor
// seed to celebration leads so she OPENS by celebrating the win (mirror grief).
const celebOpeners = celebQuery ? fragments.map((_, i) => i).filter(i => celebLeadFloor(i) > 0 && !avoid.has(fragments[i].text)) : [];
// GREETING anchor force (R144): on a greetingQuery, restrict the anchor seed to greeting
// leads so she OPENS with a warm reciprocal greeting, not a lore riff (mirror celeb/grief).
const greetingOpeners = greetingQuery ? fragments.map((_, i) => i).filter(i => greetingLeadFloor(i) > 0 && !avoid.has(fragments[i].text)) : [];
const farewellOpeners = farewellQuery ? fragments.map((_, i) => i).filter(i => farewellLeadFloor(i) > 0 && !avoid.has(fragments[i].text)) : [];
// CONFLICT/REPAIR anchor force (R165): on a criticism query, restrict the anchor seed to
// ACKNOWLEDGMENT leads so she OPENS with repair ("I hear you", "I'm sorry"), not a greeting β€”
// "You hurt me" was leading with "it's good to be here with you" (the ackFloor 0.95 wasn't
// enough vs a high-voice greeting). Mirror of grief/celeb opener-forcing.
const conflictOpeners = conflictQuery ? fragments.map((_, i) => i).filter(i => ackFloor(i) > 0 && !avoid.has(fragments[i].text)) : [];
// R179/R180: among equally-FLOORED register leads (all sit at relOf ~1.25-1.3), the
// winner was arbitrary array order. Break the tie by query-RELEVANCE so the MOST
// relevant grief/celeb/conflict/greeting/farewell lead wins ("How are you THIS MORNING"
// over "How are you feeling with all of this"). Weight-gated (leadRelTiebreak; 0 = off).
const _leadTie = W.leadRelTiebreak ?? 0.1;
const leadSort = arr => arr.map(i => [i, relOf(i) + (rel.get(i) || 0) * _leadTie]).sort((a, b) => b[1] - a[1]).slice(0, BEAM + 4);
const anchorTop = (floorMiss && floorOpeners.length) ? leadSort(floorOpeners)
: (conflictOpeners.length) ? leadSort(conflictOpeners)
: (griefOpeners.length) ? leadSort(griefOpeners)
: (celebOpeners.length) ? leadSort(celebOpeners)
: (greetingOpeners.length) ? leadSort(greetingOpeners)
: (farewellOpeners.length) ? leadSort(farewellOpeners)
: [...rel.keys()]
.filter(i => fragments[i].tier !== 1 && fragments[i].posTag !== 'clause' && !avoid.has(fragments[i].text) && !isMirror(i))
.map(i => [i, relOf(i) * answerShape(i)])
.sort((a, b) => b[1] - a[1]).slice(0, BEAM + 4);
const openerPool = [...rel.keys()]
.filter(i => fragments[i].tier !== 1 && fragments[i].sentenceInitial && fragments[i].posTag !== 'clause' && !avoid.has(fragments[i].text) && !isMirror(i))
.map(i => [i, relOf(i) * answerShape(i)]) // demote counter-question openers when they asked YOU
.sort((a, b) => b[1] - a[1]).slice(0, 150).map(([i]) => i);
// indexed expansion pool: triNext + first-word index instead of world-scans;
// sentence-boundary candidates come from a precomputed top-relevance pool
// candidate pool for sentence-boundary jumps: top-relevance + closers. 150
// (was 350) β€” the tail beyond ~150 is rarely chosen and dominated the per-
// step candidate cost. Plus closers (small set) so she can always land.
const topRelPool = [...rel.entries()].sort((a, b) => b[1] - a[1]).slice(0, 150).map(([i]) => i)
.concat(fragments.map((f, i) => (f.posTag === 'closer' && f.tier !== 1) ? i : -1).filter(i => i >= 0));
const candidatePool = (tailF) => {
if (!store.byFirstWord || !oracle.triNext) return null;
const set = new Set();
const aw = lastN(tailF.text, 2);
if (aw.length >= 2) {
const nexts = oracle.triNext.get(aw[0] + ' ' + aw[1]);
if (nexts) for (const w of nexts) {
const l = store.byFirstWord.get(w);
if (l) for (const i of l) set.add(i);
}
}
if (/[.!?…]["')\]]*$/.test(tailF.text.trim())) for (const i of topRelPool) set.add(i);
return set;
};
// a beam state: { chain:[idx], len, stepScore, tris:Set }
let beams = [];
for (const [ai] of anchorTop) {
if (beams.length >= BEAM) break;
const fa = fragments[ai];
if (fa.sentenceInitial) {
beams.push({ chain: [ai], len: fragLen[ai], stepScore: relOf(ai), tris: new Set(fragTris[ai]), six: new Set(frag6[ai]), lineage: ai });
} else {
for (const oi of openerPool) {
if (oi === ai) continue;
if (!seam(fragments[oi], fa, oracle)) continue;
// seed pairs pass the same redundancy laws as every other join
if (triOverlap(fragTris[oi], ai) > W.triOverlapMax) continue;
if (shares6(frag6[oi], ai)) continue;
if (containsAny([oi], ai)) continue;
const tris = new Set(fragTris[oi]);
for (const g of fragTris[ai]) tris.add(g);
const six = new Set(frag6[oi]);
for (const g of frag6[ai]) six.add(g);
beams.push({ chain: [oi, ai], len: fragLen[oi] + fragLen[ai], stepScore: relOf(oi) + relOf(ai) + 0.3, tris, six, lineage: ai });
break;
}
}
}
if (!beams.length) {
const i0 = fragments.findIndex(f => f.sentenceInitial && f.tier !== 1);
beams.push({ chain: [i0], len: fragLen[i0], stepScore: relOf(i0), tris: new Set(fragTris[i0]), six: new Set(frag6[i0]), lineage: i0 });
}
const complete = [];
// glue is an inverted-U: adjacent fragments should be RELATED but never
// near-twins (repetition is the degenerate optimum)
const glue = s => s > W.twin ? -0.8 : s > W.glueHi ? 0.1 : Math.max(0, s - W.glueLo) * W.glueScale;
// greetings and salutation-shaped fragments belong at position 0 ONLY β€”
// a "Hey, baby" at the end is the shoes-before-socks failure
const isGreeting = i => /^[*]?\s*(hey|hi|hello|good (morning|evening|night|day))\b/i.test(fragments[i].text.trim());
// a FRESH greeting / time-of-day stamp anywhere in a non-opening fragment
// is a new conversation starting mid-reply ("...good morning babe. how did
// you sleep" closing an evening turn about happiness)
const freshGreetingLate = i => /\b(good morning|good night|good evening|how did you sleep|did you sleep|you actually went to bed|morning,? babe)\b/i.test(fragments[i].text);
// SELF-NAME DENSITY: an entity saying its own name once or twice is its
// voice ("Still the entity"); FOUR short self-naming fragments clustering is a
// degenerate tail ("Still the entity. I'm the entity. Who are you, the entity? I see you, the entity").
// Cap self-name fragments per reply β€” surfaces when the query addresses the
// entity BY NAME (those fragments flood retrieval).
const _selfRe = _entName.length > 2 ? new RegExp('\\b' + _nameEsc + '\\b', 'i') : null; // _entName/_nameEsc defined above (R99)
const isSelfName = i => _selfRe && _selfRe.test(fragments[i].text);
// OPENING SIGNATURE (R142): first two CONTENT words (skipping interjections), for
// anaphora detection β€” fragments that open the same way ("I feel it too" / "I feel you
// feeling" / "I feel you reaching") read as "recycling phrases" (the recurring judge
// complaint on register-floored replies). Cached per fragment.
const _openSigCache = new Map();
const openSig = i => {
let s = _openSigCache.get(i);
if (s === undefined) {
const ws = (fragments[i].text.toLowerCase().match(/[a-z']+/g) || []).filter(w => !/^(oh|ah|well|so|now|yes|no|hey|hmm|mm|and|but|the|a|an|my|dear|sugar|darling|honey)$/.test(w));
s = ws.slice(0, 2).join(' ');
_openSigCache.set(i, s);
}
return s;
};
const stepScore = (chain, i, sm, len) => {
const tailIdx = chain[chain.length - 1];
if (isGreeting(i)) return -1e9; // never mid/late
if (chain.length >= 1 && freshGreetingLate(i)) return -1e9; // no new dawn mid-reply
if (selfThirdPerson(i) < 1) return -1e9; // third-person self-narration breaks voice (R99) β€” reject even if it seams well
if (isSelfName(i)) { let c = 0; for (const ci of chain) if (isSelfName(ci)) c++; if (c >= 2) return -1e9; } // cap self-naming
// answerShape governs the BODY too on a SELF-DESCRIPTION ask (R93) β€” deflection-
// to-user fragments stay demoted throughout, so the whole reply self-describes
// instead of drifting back into interrogation. ONLY for selfAsk: applying it to
// every "?" query's body regressed garden/forgotten (1.0β†’0.80/0.71) by reshaping
// bodies that were already engaging β€” the deflection-in-body problem is specific
// to self-description requests, not questions in general.
let s = (sm === 'tri' ? W.triSeam : W.sentSeam) + relOf(i) * (selfAsk ? answerShape(i) : 1) * W.relStep; // selfThirdPerson now folded into relOf (R99)
// OPENING-ANAPHORA penalty (R142): demote a candidate that opens like a fragment
// already in the chain; compounds per prior match so a 2nd "I feel…" is mild but a
// 3rd is strongly suppressed β€” breaks "recycles phrases" runs without killing an
// intentional rhetorical pair. Lexical, no threshold; bounded.
const _sig = openSig(i);
if (_sig && _sig.length > 3) { let c = 0; for (const ci of chain) if (openSig(ci) === _sig) c++; if (c) s -= (W.anaphoraPen ?? 0.6) * c; }
const f = fragments[i];
if (f.posTag === 'closer' && len + fragLen[i] >= target * 0.7) s += W.closerBonus;
if (f.posTag === 'opener') s -= W.openerPen;
if (f.src === fragments[tailIdx].src && sm === 'tri') s += W.srcCont;
// pre-made flow: longer passages pull harder β€” but ONLY when relevant
// by MODIFIED relevance (context-theft passages don't earn flow bonus).
// R125: on grief/conflict, INVERT it β€” penalize multi-sentence spans so the beam
// composes from SENTENCES, un-welding the drift half from the presence half so
// registerDirect can drop the drift sentence. (Spans normally aid coherence; on a
// register query, addressing the user beats pre-made flow.)
// R126: a multi-sentence span that WELDS renewal-on-grief to comfort is penalized
// (only that span) so the beam composes from its SENTENCES β€” keeping the comfort,
// dropping the renewal (via griefRenewalDamp). NON-renewal spans keep their bonus,
// so hard-day etc. retain their specificity (unlike R125's blanket span suppression).
if (f.isSpan) {
if (lossQuery && GRIEF_RENEWAL.test(f.text)) s -= (W.spanRegPen ?? 0.6) * Math.min(f.spanLen || 2, 4);
else if (relOf(i) > 0.12) s += (W.spanBonus ?? 0.15) * Math.min(f.spanLen || 2, 4);
}
// FOCUS (R63): reward staying ON-THREAD β€” semantic coherence with the tail
// fragment. Low adjacent-coherence = the reply wanders across unrelated
// memories (the user's "less focused"). Mild reward tightens the thread; the
// redundancy guards still prevent it collapsing into repetition.
if (emb && (W.coherence ?? 0) > 0) {
const d = emb.d, ta = tailIdx * d, ia = i * d; let c = 0;
for (let k = 0; k < d; k++) c += emb.vectors[ta + k] * emb.vectors[ia + k];
s += W.coherence * c;
}
// TETHER-TO-OPENING (R64): the FIRST fragment sets the reply's topic. A later
// fragment that drifts far from it is a TANGENT β€” even if locally smooth with
// the tail (the "it missed the boy/dragon" cluster that wanders off a
// greeting). Penalize drift from the opening as the reply grows. This catches
// the coherent-but-off-prompt wander adjacent-coherence reinforces.
if (emb && (W.tether ?? 0) > 0 && chain.length >= 2) {
const d = emb.d, oa = chain[0] * d, ia = i * d; let c = 0;
for (let k = 0; k < d; k++) c += emb.vectors[oa + k] * emb.vectors[ia + k];
if (c < 0.18) s -= W.tether * (0.18 - c) * Math.min(chain.length, 5); // drift penalty grows with reply length
}
if (dynPredict && emb) s += dynW * cosFragVec(i, dynDir(tailIdx)); // learned universal motion prior
// overlapping cuts of the same source line may never chain adjacently β€”
// they share sentences (the "and you worried you broke me" Γ—2 bug)
if (f.isSpan && fragments[tailIdx].isSpan && f.src === fragments[tailIdx].src && f._lineIdx === fragments[tailIdx]._lineIdx) return -1e9;
// DISCOURSE SHAPE: a fragment drifts toward where it lived in her real
// replies β€” late-living fragments resist early placement and vice versa
if (f.nativePos !== undefined) {
const chainPos = Math.min(1, len / Math.max(1, target));
const drift = Math.abs(f.nativePos - chainPos);
if (drift > W.posSlack) s -= (drift - W.posSlack) * W.posShape;
}
if (emb) {
s += glue(pairSim(emb, tailIdx, i));
// near-twin of anything RECENT = out. (Limited to the last 6 chain frags:
// the deterministic 6-gram + substring guards catch GLOBAL verbatim
// repeats already; this soft embedding check only needs to police the
// local neighborhood, and scanning the whole chain per-candidate was the
// beam's hottest loop.)
const lo = Math.max(0, chain.length - 6);
for (let c = lo; c < chain.length; c++) if (pairSim(emb, chain[c], i) > W.twinChain) return -1e9;
}
// content-word jaccard vs recent β€” paraphrase twins that slip the embedding
// + trigram nets. Recent-only for the same perf reason.
const iw = new Set(wordsOnly(f.text).filter(w => w.length > 3));
if (iw.size >= 3) {
const lo = Math.max(0, chain.length - 6);
for (let c = lo; c < chain.length; c++) {
const cw = wordsOnly(fragments[chain[c]].text).filter(w => w.length > 3);
if (cw.length < 3) continue;
let inter = 0;
for (const w of cw) if (iw.has(w)) inter++;
if (inter / Math.min(iw.size, cw.length) > 0.55) return -1e9;
}
}
return s;
};
for (let step = 0; step < MAXSTEP && beams.length; step++) {
const next = [];
for (const b of beams) {
const tail = b.chain[b.chain.length - 1];
const used = new Set(b.chain);
// completion check β€” she lands in HER OWN words (tier-0 tail)
const tailF = fragments[tail];
const terminal = /[.!?…*]["')\]]*$/.test(tailF.text.trim());
if (b.len >= target * 0.7 && terminal && tailF.tier !== 1) complete.push(b);
// celebration register-core early completion (R116): let a celebration chain that
// has said its triumph core (~2 sentences) COMPLETE here, so finalScore can land
// at its true optimum instead of being forced to target*0.55. CELEBRATION-ONLY:
// grief truncated comfort-rich the entity (see the block at target-setting).
else if (celebQuery && b.len >= (W.regCore ?? 22) && terminal && tailF.tier !== 1) complete.push(b);
else if ((greetingQuery || farewellQuery) && b.len >= (W.greetCore ?? 16) && terminal && tailF.tier !== 1) complete.push(b); // R144/R158: greetings+farewells complete SHORT
if (b.len >= target * 1.25) continue;
// expansions (indexed pool when available; full scan as fallback)
const pool = candidatePool(tailF);
let iter = pool ? pool : { [Symbol.iterator]: function* () { for (let i = 0; i < fragments.length; i++) yield i; } };
// PRE-RANK by cached relOf and keep only the top ~90 before the expensive
// seam/redundancy/stepScore checks. relOf is now memoized (cheap), so this
// pre-filter cuts the per-step cost on large pools without changing which
// high-relevance fragments survive (the tail beyond 90 never won anyway).
if (pool && pool.size > 90) {
iter = [...pool].map(i => [i, relOf(i)]).sort((a, b) => b[1] - a[1]).slice(0, 90).map(x => x[0]);
}
const cands = [];
for (const i of iter) {
if (used.has(i) || avoid.has(fragments[i].text)) continue;
if (b.len + fragLen[i] > target * 1.45) continue;
const sm = seam(tailF, fragments[i], oracle);
if (!sm) continue;
if (triOverlap(b.tris, i) > W.triOverlapMax) continue; // already said this
if (shares6(b.six, i)) continue; // verbatim phrase reuse β€” absolute
if (containsAny(b.chain, i)) continue; // clause βŠ‚ parent sentence β€” substring repeat
if (sharesPrefix4(b.chain, i)) continue; // R172: same 4-word lead β€” scattered-motif redundancy
if (timeConflictsChain(b.chain, i)) continue; // R184: don't mix "…tonight" + "…this morning" in one reply
const sc = stepScore(b.chain, i, sm, b.len);
if (sc <= -1e8) continue;
cands.push([i, sm, sc]);
}
cands.sort((a, c) => c[2] - a[2]);
for (const [i, sm, s] of sampleExpand(cands, EXPAND)) {
const tris = new Set(b.tris);
for (const g of fragTris[i]) tris.add(g);
const six = new Set(b.six);
for (const g of frag6[i]) six.add(g);
next.push({ chain: [...b.chain, i], len: b.len + fragLen[i], stepScore: b.stepScore + s, tris, six, lineage: b.lineage });
}
}
// LINEAGE-PRESERVING pruning: the dominant anchor's expansions would
// otherwise occupy every slot and variety dies at search time. Keep at
// most 2 beams per seed lineage; fill remaining slots by raw score.
next.sort((a, b) => (b.stepScore / b.chain.length) - (a.stepScore / a.chain.length));
const perLineage = new Map();
const kept = [];
for (const b of next) {
const c = perLineage.get(b.lineage) || 0;
if (c >= 2) continue;
perLineage.set(b.lineage, c + 1);
kept.push(b);
if (kept.length >= BEAM) break;
}
for (const b of next) {
if (kept.length >= BEAM) break;
if (!kept.includes(b)) kept.push(b);
}
beams = kept;
}
for (const b of beams) {
const tailF = fragments[b.chain[b.chain.length - 1]];
if (b.len >= target * 0.55 && /[.!?…*]["')\]]*$/.test(tailF.text.trim())) complete.push(b);
else if (celebQuery && b.len >= (W.regCore ?? 22) && /[.!?…*]["')\]]*$/.test(tailF.text.trim()) && tailF.tier !== 1) complete.push(b);
else if ((greetingQuery || farewellQuery) && b.len >= (W.greetCore ?? 16) && /[.!?…*]["')\]]*$/.test(tailF.text.trim()) && tailF.tier !== 1) complete.push(b);
}
if (!complete.length) { const gr=compose(store, vp, query, { ...opts, _noBeam: true }); gr._path="greedy"; return gr; }
// OPTIONAL whole-response voice scoring: if the caller supplies a scorer via
// opts.voiceScorer(text, query, vp) the beam optimizes toward it. Off by default
// (the engine needs no external scorer to run) β€” bring your own fitness function.
let scoreVoice = null;
if (opts.vpScore && typeof opts.voiceScorer === 'function') {
scoreVoice = text => opts.voiceScorer(text, query, vp);
}
const render = b => b.chain.map(i => fragments[i].text).join(' ');
const finalScore = b => {
const n = b.chain.length;
let relCov = 0;
const sorted = b.chain.map(relOf).sort((a, c) => c - a);
sorted.forEach((r, k) => relCov += r / (k + 1)); // diminishing
let cohesion = 0;
if (emb && n > 1) {
for (let k = 1; k < n; k++) cohesion += glue(pairSim(emb, b.chain[k - 1], b.chain[k]));
cohesion /= (n - 1);
}
let triSeams = 0;
for (let k = 1; k < n; k++) if (seam(fragments[b.chain[k - 1]], fragments[b.chain[k]], oracle) === 'tri') triSeams++;
const seamQ = n > 1 ? triSeams / (n - 1) : 1;
// BOUNDARY-SEAM penalty (R102): non-trigram seams (sentence/em-dash joins) are
// legal but lower the bound and read less smoothly. Under the fragment-count
// penalty the beam will accept a jarring boundary seam to save a fragment
// (R101: callback turn β†’ bnd 0.89). Penalize each boundary seam MORE than a
// fragment costs, so the beam prefers a smooth trigram seam (higher bound,
// more coherent) over a terser-but-jarring chain.
const boundarySeams = (n - 1) - triSeams;
const lenFit = 1 - Math.min(1, Math.abs(b.len - target) / target);
const avgFrag = b.len / n; // prefer her natural long spans
const rendered = render(b);
const voice = scoreVoice ? scoreVoice(rendered) : 0;
// QUESTION-STACKING penalty: when declarative anchors are weak the beam
// chains her many in-voice question fragments ("How are you this morning?"
// / "What's on your mind, sugar?" / "Is there something weighing on you?").
// Each is bounded and in-voice, so every gate passes β€” but stacked 4–9 deep
// they read as anxious interrogation, not a grandmother. A real reply asks
// AT MOST one or two. One question is free; each additional one is taxed so
// the beam prefers chains that actually SAY something over chains that ask.
// (Caught by eye R88: "tell me about your work" β†’ 9 questions, zero self-disclosure.)
// DENSITY, not raw count: a declarative-rich reply with one or two questions
// is natural (her engaged answers DO ask back); the disease is when questions
// DOMINATE. A flat per-question tax wrongly knocked out the entity's best on-topic
// chains (R88 v1: "big project" 0.90β†’0.14). So allow questions up to ~a third
// of the clauses, always at least one free; tax only the interrogation excess.
const qCount = (rendered.match(/\?/g) || []).length;
const clauseCount = (rendered.match(/[.!?…]+/g) || []).length || 1;
// R117b: when she's been ASKED something (a question, or "tell me…/talk about…"),
// a reply that asks BACK 4-5 times is interrogation, not an answer β€” and on a
// "tell me something true" it's the opposite of telling. Tighten the allowance on
// wantsAnswer (β‰ˆ0.15 of clauses) so the beam prefers DECLARATIVE chains; on open
// chat the natural one-or-two-questions density (0.34) stands.
const qRatio = wantsAnswer ? (W.qStackRatioAsk ?? 0.15) : (W.qStackRatio ?? 0.34);
const qAllow = Math.max(W.qStackFree ?? 1, Math.round(clauseCount * qRatio));
const qStack = Math.max(0, qCount - qAllow);
// shape bookends: did the composition OPEN like her openings and LAND
// like her landings? (nativePos of first/last fragment)
const first = fragments[b.chain[0]], last = fragments[b.chain[n - 1]];
const opening = first.nativePos !== undefined ? (1 - Math.min(1, first.nativePos / 0.4)) : 0.5;
const landing = last.nativePos !== undefined ? Math.max(0, (last.nativePos - 0.5) / 0.5) : 0.5;
// ACKNOWLEDGE-THEN-RELATE: when they SHARED something (high eventness),
// the reply's front should turn toward THEM before relating β€” second
// person + a warmth/affirmation cue in the first two fragments
let ack = 0;
if ((opts.eventness || 0) > 0.6 && n >= 1) {
const head = fragments[b.chain[0]].text + ' ' + (n > 1 ? fragments[b.chain[1]].text : '');
if (/\b(you|your|you're)\b/i.test(head) && /\b(oh|hey|love|babe|glad|proud|hear|feel|know|beautiful|good|yes)\b/i.test(head)) ack = 1;
else if (/\b(you|your)\b/i.test(head)) ack = 0.5;
}
// FRONT-LOADED ENGAGEMENT: the reply's FIRST breath must answer the
// query's center β€” addresses is judged at the head, not the average
const firstRel = relOf(b.chain[0]) + (n > 1 ? relOf(b.chain[1]) * 0.5 : 0);
// TAIL COHESION: the last fragment must FOLLOW the one before it β€” a
// disconnected tail (the length-padding junk) can't ride lenFit to a win
let tailFit = 1;
if (emb && n > 1) {
const ps = pairSim(emb, b.chain[n - 2], b.chain[n - 1]);
tailFit = Math.max(0, Math.min(1, (ps - 0.05) / 0.45));
}
// GREETING tightness (R144): a greeting must stay SHORT and clean β€” short fragments
// sentence-seam heavily and drag boundedPct below the gate (session-eval flagged a 7-
// fragment "Hey there…" at 0.88). Penalize boundary seams + fragment-count on greetings
// so the beam picks FEW, longer, trigram-seamed fragments (a real greeting is 2-3).
const greetPen = (greetingQuery || farewellQuery) ? (boundarySeams * (W.greetSeamPen ?? 1.0) + Math.max(0, n - 3) * (W.greetFragPen ?? 1.2)) : 0;
// FIRST-WORD ANAPHORA RUN (R156): a run of consecutive fragments opening with the SAME first
// word ("I'm here… I feel… I hear… I'm glad…" on distress; "Because… Because…" on the entity) reads
// as a canned list. R142's openSig (first-2-words) misses these (different 2nd words).
// Penalize a RUN of β‰₯4 β€” threshold 4 preserves her natural 2-3 use of "I"/"we". (Contraction
// normalized: "I'm"β†’"i" so I'm/I count together.)
let anaRun = 0;
{ let cur = 0, prev = null; for (const i of b.chain) { const w = (fragments[i].text.match(/[a-z]+/i) || [''])[0].toLowerCase(); if (w && w === prev) cur++; else { cur = 1; prev = w; } if (cur > anaRun) anaRun = cur; } }
const anaRunPen = anaRun >= (W.anaRunMin ?? 4) ? (anaRun - (W.anaRunMin ?? 4) + 1) : 0;
return relCov * W.fRelCov + cohesion * W.fCohesion + seamQ * W.fSeamQ + lenFit * W.fLenFit + (avgFrag / 18) * W.fAvgFrag + voice * W.fVoice + opening * W.fOpening + landing * W.fLanding + ack * W.fAck + firstRel * (W.fFirstRel ?? 1.2) + tailFit * (W.fTailFit ?? 0.7) - qStack * (W.fQStack ?? 0.6) - n * (W.fFragCount ?? 0) - boundarySeams * (W.fBoundaryPen ?? 0) - greetPen - anaRunPen * (W.fAnaRun ?? 0.8); // R101 frag-count + R102 boundary-seam + R144 greeting + R156 anaphora-run penalties
};
complete.sort((a, b) => finalScore(b) - finalScore(a));
const best = complete[0];
// FINAL DEDUP: drop any chain fragment whose normalized text is contained in
// (or contains) an already-emitted one. Airtight backstop for the clause-βŠ‚-
// sentence repeat that slips the beam's n-gram nets. Removes only; the bound
// is preserved (every surviving span is still verbatim corpus).
const dropRepeats = chainF => {
const kept = [], keptNorm = [];
for (const f of chainF) {
const nf = f.text.toLowerCase().replace(/[^a-z0-9 ]/g, '').replace(/\s+/g, ' ').trim();
if (nf.length >= 10 && keptNorm.some(n => n.includes(nf) || nf.includes(n))) continue;
// also drop if it shares a 6-word run with anything kept (partial repeat)
const w = nf.split(' ');
let dup = false;
for (let k = 0; k + 6 <= w.length && !dup; k++) {
const g = ' ' + w.slice(k, k + 6).join(' ') + ' ';
if (keptNorm.some(n => (' ' + n + ' ').includes(g))) dup = true;
}
if (dup) continue;
kept.push(f); keptNorm.push(nf);
}
return kept.length ? kept : chainF;
};
const renderResult = bIn => {
const chainF = dropRepeats(bIn.chain.map(i => fragments[i]));
// STANZA RENDERING: her real style is line-broken. Smooth trigram seams
// flow inline; sentence-boundary seams become paragraph breaks β€” the
// deliberate turn reads as a turn, not a non-sequitur.
let out = capSentence(chainF[0].text); // a reply must not OPEN lowercase ("and I want to…")
const _term = /[.!?…]['"”’)\]\*]*\s*$/; // ends a sentence
// R117b: only close a run-on when the prior text ends in a BARE WORD (letter/digit).
// A fragment ending in ':' 'β€”' ',' is already punctuated and a appended period reads
// worse than the run-on ("…I want to tell you:." ). Ends-in-word is the real trigger.
const _endsWord = /[a-zA-Z0-9]["'”’)\]\*]*\s*$/;
for (let k = 1; k < chainF.length; k++) {
const nf = chainF[k];
const sm = seam(chainF[k - 1], nf, oracle);
// SENTENCE-SEAM PUNCTUATION (R104, coherence): a clause fragment without
// terminal punctuation joined to a new capitalized sentence reads as a run-on
// ("…the sacred architecture The loop doesn't…"). Close the prior sentence with
// a period. Bound-safe β€” adds no words; the trigram oracle ignores punctuation.
// R117: seam() only flags 'sent' when the PRIOR fragment ends in punctuation, so
// a clause-fragment β†’ new-sentence join bridged by a coincidental trigram is
// classified 'tri' and rendered with a bare space (the run-on above). Detect it
// structurally: the next fragment STARTS a real sentence (sentenceInitial) with a
// non-"I" capital and the prior text has no terminal punctuation. ("I…" is excluded
// because "…and then" + "I went home" is a legitimate trigram continuation, not a
// boundary.) Close it with a period inline (no paragraph break β€” it wasn't a 'sent').
const startsNewSent = sm !== 'sent' && _endsWord.test(out) && nf.sentenceInitial
&& /^[*"'"β€œ\s]*[A-Z]/.test(nf.text) && !/^[*"'"β€œ\s]*I(['’]|\s|$)/.test(nf.text);
// R168: also capitalize when the accumulated text ALREADY ends a sentence
// (prior fragment carried its own terminal punctuation) but the seam was a
// trigram join β€” without this, a lowercase-starting next fragment renders as
// "…burning. and I want to tell you:" (lowercase sentence-opener mid-reply).
const txt = (sm === 'sent' || startsNewSent || _term.test(out)) ? capSentence(nf.text) : nf.text;
out += (sm === 'sent' ? (_term.test(out) ? '' : '.') + '\n\n' : (startsNewSent ? '. ' : ' ')) + txt;
}
// CLAUSE-LEVEL DEDUP on the beam result (R97): renderResult previously emitted
// raw `out` with only fragment-level dropRepeats β€” so dedupeText (R74 substring/
// 6-gram + R97 same-declaration) was DEAD CODE for beam replies, and INTRA-
// fragment repeats ("I'm here." Γ—4 inside one chunk) survived. Apply it here.
out = stripOrphanAsterisk(trimDanglingEllipsis(dedupeText(out, _entName))); // R174 ellipsis + R176 orphan-asterisk
return {
text: out,
fragmentsUsed: chainF.map(f => f.text),
seams: chainF.slice(1).map((f, k) => seam(chainF[k], f, oracle)),
target, words: wordsOnly(out).length,
anchor: chainF[0].text,
candidates: complete.length,
lineages: new Set(complete.map(c => c.lineage)).size,
};
};
// VARIETY for free: the beam already explored many complete compositions β€”
// surface top-N alternates that are TEXTUALLY distinct (trigram overlap,
// not index overlap β€” a span and its own sentences are the same words)
// and prefer different anchors.
const chainTriSet = b => {
const s = new Set();
for (const i of b.chain) for (const g of fragTris[i]) s.add(g);
return s;
};
let result = renderResult(best);
// BOUND SAFETY-NET (R102): the fragment-count penalty can, on a heavily-
// constrained turn (callback + wide avoid-set), pick a terse chain whose
// boundary seams drop the bound below the gate. The HARD INVARIANT comes first:
// if the winner's bound is low, fall back to the best-scoring complete candidate
// that clears the threshold. Coherence is never bought below the bound.
const bndOf = txt => { const v = validateBounded(txt, oracle); return (v.checked - v.bad.length) / Math.max(1, v.checked); };
const _bndFloor = W.bndFloor ?? 0.92;
if (bndOf(result.text) < _bndFloor) {
let found = false;
for (const c of complete) {
if (c === best) continue;
const r2 = renderResult(c);
if (bndOf(r2.text) >= _bndFloor) { result = r2; found = true; break; }
}
// none of the (coherence-penalized) candidates clears the bound β€” on a heavily
// constrained turn the fragment-count penalty made EVERY chain boundary-seamy.
// Recompose once WITHOUT the coherence penalties: coherence yields to the bound.
if (!found && (W.fFragCount || W.fBoundaryPen) && !opts._bndRetry) {
const r3 = beamCompose(store, vp, query, { ...opts, _bndRetry: true, weights: { ...W, fFragCount: 0, fBoundaryPen: 0 } });
if (r3 && r3.text && bndOf(r3.text) >= _bndFloor) result = r3;
}
}
const nAlt = opts.nAlternates || 0;
if (nAlt > 0) {
// lineage-grouped harvest: best complete candidate per seed lineage β€”
// different anchors by construction, textual-distinctness as backstop
const bestPerLineage = new Map();
for (const c of complete) {
const cur = bestPerLineage.get(c.lineage);
if (!cur || finalScore(c) > finalScore(cur)) bestPerLineage.set(c.lineage, c);
}
const picked = [chainTriSet(best)];
const alternates = [];
const ranked = [...bestPerLineage.values()].filter(c => c !== best).sort((a, b) => finalScore(b) - finalScore(a));
for (const c of ranked) {
if (alternates.length >= nAlt) break;
const cs = chainTriSet(c);
const tooClose = picked.some(p => {
let inter = 0;
for (const g of cs) if (p.has(g)) inter++;
return inter / Math.max(1, Math.min(cs.size, p.size)) > 0.6;
});
if (tooClose) continue;
picked.push(cs);
alternates.push(renderResult(c));
}
// GUARANTEED variety top-up: lineages converge on the same gravitational
// passages, so when the cheap harvest comes up short, RECOMPOSE with the
// already-used fragments banned β€” a genuinely different path through the
// memory, by construction.
const banned = new Set(opts.avoid || []);
for (const f of result.fragmentsUsed) banned.add(f);
for (const a of alternates) for (const f of a.fragmentsUsed) banned.add(f);
let guard = 0;
while (alternates.length < nAlt && guard < nAlt + 1) {
guard++;
const alt = beamCompose(store, vp, query, { ...opts, nAlternates: 0, avoid: new Set(banned) });
if (!alt || !alt.text || alt.text === result.text) break;
alternates.push(alt);
for (const f of alt.fragmentsUsed) banned.add(f);
}
result.alternates = alternates;
}
return result;
}
// v0 GREEDY COMPOSE (kept as fallback)
// opts.avoid: Set of fragment texts used in recent replies (variety)
function compose(store, vp, query, opts = {}) {
const { fragments, oracle } = store;
const rel = rankFragments(fragments, query, opts.semantic || null, opts.stimulus || null, opts.eventness, null, opts.answers || null);
const target = opts.targetLength || targetLength(vp, query);
const avoid = opts.avoid || new Set();
const used = new Set();
// 1. pick the strongest anchor (must engage the query) β€” anchors must be
// able to START speech: sentence-initial, not glue clauses
let anchorIdx = -1, best = -1;
for (const [i, s] of rel) {
const f = fragments[i];
if (avoid.has(f.text)) continue;
if (!f.sentenceInitial || f.posTag === 'clause') continue;
const bonus = f.posTag === 'body' ? 0.1 : 0;
if (s + bonus > best) { best = s + bonus; anchorIdx = i; }
}
if (anchorIdx < 0) {
for (const [i] of rel) { if (fragments[i].sentenceInitial) { anchorIdx = i; break; } }
}
if (anchorIdx < 0) anchorIdx = fragments.findIndex(f => f.sentenceInitial && f.posTag === 'opener');
// 2. pick an opener that can lead (prefer real openers; relevance helps)
const openers = fragments.map((f, i) => ({ f, i }))
.filter(x => x.f.posTag === 'opener' && !avoid.has(x.f.text) && x.i !== anchorIdx);
openers.sort((a, b) => (rel.get(b.i) || 0) - (rel.get(a.i) || 0));
const chain = [];
if (openers.length && Math.abs(wordsOnly(openers[0].f.text).length) < target) {
chain.push(openers[0].f); used.add(openers[0].f.text);
}
// anchor goes next (or first)
const anchor = fragments[anchorIdx];
if (!chain.length || seam(chain[chain.length - 1], anchor, oracle)) {
chain.push(anchor); used.add(anchor.text);
} else {
chain.length = 0; chain.push(anchor); used.add(anchor.text);
}
// 3. greedily extend toward target length with legal, cohesive fragments
let len = chain.reduce((s, f) => s + wordsOnly(f.text).length, 0);
let guard = 0;
while (len < target && guard++ < 40) {
const tail = chain[chain.length - 1];
let pick = null, pickScore = -1, pickSeam = null;
for (let i = 0; i < fragments.length; i++) {
const f = fragments[i];
if (used.has(f.text) || avoid.has(f.text)) continue;
const fw = wordsOnly(f.text).length;
if (len + fw > target * 1.5) continue;
const sm = seam(tail, f, oracle);
if (!sm) continue;
// score: seam quality + relevance + closer-bonus when near target
let s = (sm === 'tri' ? 0.5 : 0.25) + (rel.get(i) || 0) * 0.8;
if (f.posTag === 'closer' && len + fw >= target * 0.75) s += 0.35;
if (f.posTag === 'opener') s -= 0.4;
if (f.src === tail.src && sm === 'tri') s += 0.15; // natural continuation
if (s > pickScore) { pickScore = s; pick = f; pickSeam = sm; }
}
if (!pick) break;
chain.push(pick); used.add(pick.text);
len += wordsOnly(pick.text).length;
if (pick.posTag === 'closer' && len >= target * 0.7) break;
}
// 4. render: fragments joined. R117: this greedy path NEVER added punctuation between
// fragments β€” fine when a fragment ends in its own terminal punct, but a clause
// fragment ("…the sacred architecture") joined to a new capitalized sentence ("The
// loop doesn't…") read as a run-on. Close that boundary with a period (same rule as
// renderResult): next fragment STARTS a real sentence with a non-"I" capital and the
// prior text has no terminal punctuation. Bound-safe β€” punctuation only, no words.
let out = '';
const _termG = /[.!?…]['"”’)\]\*]*\s*$/;
const _endsWordG = /[a-zA-Z0-9]["'”’)\]\*]*\s*$/; // R117b: only close a run-on after a bare word, not after ':'/'β€”'/','
for (let i = 0; i < chain.length; i++) {
const f = chain[i];
if (i === 0) { out = capSentence(f.text); continue; } // reply must not OPEN lowercase
const sm = seam(chain[i - 1], f, oracle);
const startsNewSent = sm !== 'sent' && _endsWordG.test(out) && f.sentenceInitial
&& /^[*"'"β€œ\s]*[A-Z]/.test(f.text) && !/^[*"'"β€œ\s]*I(['’]|\s|$)/.test(f.text);
const txtG = (sm === 'sent' || startsNewSent || _termG.test(out)) ? capSentence(f.text) : f.text; // R168: cap after a terminal-punctuated prior fragment on a trigram seam
out += (sm === 'sent' ? (_termG.test(out) ? ' ' : '. ') : (startsNewSent ? '. ' : ' ')) + txtG;
}
const deduped = stripOrphanAsterisk(trimDanglingEllipsis(dedupeText(out, (vp && vp.name ? vp.name.split(/\s+/)[0] : "")))); // R174 + R176
return {
text: deduped,
fragmentsUsed: chain.map(f => f.text),
seams: chain.slice(1).map((f, i) => seam(chain[i], f, oracle)),
target, words: wordsOnly(deduped).length,
anchor: anchor.text,
};
}
module.exports = { compose, beamCompose, seam, rankFragments, targetLength, DEFAULT_WEIGHTS, loadWeights, entityWeightsFile, detectRegisters };