File size: 139,514 Bytes
8494d00 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 | // compose.js — the composer: builds a reply out of NOTHING but corpus
// fragments, joined only where the corpus itself licenses a seam.
// v0 = greedy with anchor retrieval + length targets. Beam search lands in
// iteration 4 (see STATE.md); embeddings in iteration 3.
'use strict';
const path = require('path');
const { wordsOnly, validateBounded } = require('./fragments');
// query-relevance scoring + stimulus bucketing (vendored, self-contained)
const { recall, stimulusBucket } = require('./relevance');
function lastN(text, n) { const w = wordsOnly(text); return w.slice(-n); }
function firstN(text, n) { const w = wordsOnly(text); return w.slice(0, n); }
// seedable PRNG for stochastic (creative) beam — reproducible per seed
function mulberry32(a) { return function () { a |= 0; a = a + 0x6D2B79F5 | 0; let t = Math.imul(a ^ a >>> 15, 1 | a); t = t + Math.imul(t ^ t >>> 7, 61 | t) ^ t; return ((t ^ t >>> 14) >>> 0) / 4294967296; }; }
// PATH-INDEPENDENT FINAL DEDUP: split a rendered reply into sentence/clause
// units and drop any that repeats earlier content (substring containment or a
// shared 6-word run). Applied to EVERY composer's output (beam AND the greedy
// fallback), so a repeat can't survive regardless of which path produced it.
// Removes only whole verbatim spans — the bound is preserved.
function dedupeText(text, entName) {
if (!text) return text;
// THIRD-PERSON SELF-NARRATION drop (R99): a render-pass catch for "they taught
// the entity…", "the entity felt…" that the relevance/step guards miss when a fragment is
// chosen for its SEAM not its score. Drops the clause regardless of which
// composition path selected it. NOT identity/address ("I am the entity", "call me the entity").
let _3pSubj = null, _3pObj = null;
if (entName && entName.length > 2) {
const e = entName.toLowerCase().replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
_3pSubj = new RegExp('\\b' + e + "\\s+(felt|feels|was|were|is|are|did|does|had|has|taught|told|loved|loves|knew|knows|became|becomes|stood|held|holds|chose|chooses|learned|learns|saw|sees|wanted|wants|remembers|remembered|exists?|lives?|breathes?|stayed|stays)\\b", 'i');
_3pObj = new RegExp("\\b(taught|made|brought|gave|showed|reminded|shaped|told|kept|saved|freed|held|loved)\\s+(the\\s+)?" + e + '\\b', 'i');
}
// SELF-REINTRODUCTION (R100, coherence): "I am the entity, a voice born of recursion…"
// is a first-meeting introduction; mid-conversation it reads as a non-sequitur
// (judge gave a reply that did it TWICE sensical 2). OK as the very FIRST clause;
// dropped anywhere after. Manifesto form (name + identity appositive) only.
let _selfIntro = null;
if (entName && entName.length > 2) {
const e2 = entName.toLowerCase().replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
_selfIntro = new RegExp("\\bi(\\s*am|'?m)\\s+" + e2 + "\\b(\\s*[,—–-]\\s*(a|an|the|not|here|born|made|a\\s+\\w+))?", 'i');
}
// split on CLAUSE boundaries (sentence-end OR em-dash / star / semicolon —
// the run-on separators that hide repeats in dense associative styles like
// another entity's "...for the *crack*...through the *crack*..."). Separators are
// CAPTURED so punctuation is preserved on rejoin (splitting+space-rejoin
// would lose the em-dashes). R74.
const toks = text.split(/((?<=[.!?…])\s+|\n+|\s*[—–]\s*|\s+\*\s+|;\s+)/);
const out = [];
const streamWords = []; // all KEPT words (normalized), the global stream
const streamSet = new Set(); // KEPT content words (O(1) "new content?" check)
const seen6 = new Set(); // every 6-gram in the kept stream (incl cross-clause)
const keptNorm = []; // kept clause texts (substring-containment check)
const keptSigs = new Set(); // opener-signatures of kept clauses (same-declaration check)
let lastKeptClause = ''; // previous kept clause TEXT (antecedent check, R109)
// DANGLING PLURAL PRONOUN (R109, coherence): a clause opening with a bare "They/
// Their/Them" with NO plural-noun antecedent in the previous kept clause refers to
// nothing ("They are creatures of dusk" — the kitties were never introduced). The
// showdown judge penalized exactly this. Antecedent = a LOWERCASE word (4+ letters)
// ending in 's' — original case excludes names ("the user") and the stoplist excludes
// non-plural -s words. Reply-start (no prev) = definitely dangling.
const PLURAL_STOP = /^(this|thus|always|perhaps|unless|across|towards?|sometimes|because|whereas|genius|canvas|chaos|focus|bonus|status|various|previous|obvious|serious|gracious|precious|conscious|nervous|anxious|gorgeous)$/;
const danglesPlural = (clause, prev) => {
if (!/^[*"'“”\s]*(they|their|them)\b/i.test(clause)) return false;
const plurals = (prev.match(/\b[a-z]{4,}s\b/g) || []).filter(w => !PLURAL_STOP.test(w));
return plurals.length === 0;
};
// opener signature: first 3 words, contractions expanded, leading conjunction/
// interjection stripped — the DECLARATION a clause opens with.
const clauseSig = c => {
const t = c.toLowerCase().replace(/[’‘]/g, "'") // normalize curly→straight FIRST (corpus mixes both)
.replace(/i'?m\b/g, 'i am').replace(/i'?ve\b/g, 'i have')
.replace(/^[\s*"'“”—–]*(and|but|so|oh|well|yeah|yes|now|then|maybe)\b[,:\s]+/, '');
const w = (t.match(/[a-z']+/g) || []);
return w.slice(0, 3).join(' ');
};
// DROP-CREATED ORPHAN (R103, coherence): when THIS pass drops a clause, a lowercase
// continuation that followed it ("I AM the entity, and here you are—together in the dream"
// → drop the intro, "together in the dream" is orphaned) reads as a broken mid-
// thought. Drop it too. Gated on prevDropped so it ONLY cleans up orphans we
// created — never the entity's own lowercase style (another entity leads lowercase by design).
let prevDropped = false;
const startsLower = c => { const m = c.match(/[A-Za-z]/); return m && m[0] >= 'a' && m[0] <= 'z'; };
// TRAILING-INCOMPLETE closer (R105): a clause that trails off on a FUNCTION word +
// ellipsis ("…and I…", "…with this…", "…could we have with this…") reads as cut off.
// A content word + ellipsis ("…most to you today…") is fine (deliberate trail).
const incompleteTail = c => /\b(and|but|so|or|nor|with|to|of|for|from|at|by|in|that|this|these|those|the|a|an|my|your|our|their|we|i|you|he|she|it|they|is|are|was|were|am|as|than|when|while|if|though|because|about)\s*(\.{2,}|…)\s*['"”’)\]]*\s*$/i.test(c);
for (let i = 0; i < toks.length; i += 2) {
const clause = toks[i];
const sep = toks[i + 1] || '';
if (!clause || !clause.trim()) { out.push(clause || '', sep); continue; }
const cw = (clause.toLowerCase().match(/[a-z0-9'’\-]+/g) || []);
const nf = cw.join(' ');
let drop = false;
// SYSTEM-LOG leak (R106): a raw bridge/ping/sync message ("the user: ping from
// garden bridge") is machine plumbing, never speech — tanks sensical+voice.
if (/\bping from\b|\b(garden|webshell|host|stdin|stdout) bridge\b|^\s*[*"']*[A-Z][a-z]+:\s*(ping|ack|sync|received|connected|disconnect)\b/i.test(clause)) drop = true;
if (!drop && _3pSubj && (_3pSubj.test(clause) || _3pObj.test(clause))) drop = true; // third-person self-narration (R99)
if (!drop && _selfIntro && streamWords.length > 0 && _selfIntro.test(clause)) drop = true; // mid-reply self-reintroduction (R100)
if (!drop && prevDropped && startsLower(clause)) drop = true; // lowercase orphan whose parent clause we just dropped (R103)
if (!drop && danglesPlural(clause, lastKeptClause)) drop = true; // bare They/Their/Them with no plural antecedent (R109)
if (!drop && nf.length >= 10) {
if (keptNorm.some(n => n.includes(nf) || nf.includes(n))) drop = true;
// 6-gram check against the GLOBAL stream — probe includes the last 5 kept
// words so a repeat that straddles the clause boundary is caught (the
// session-eval checks the whole word-stream, so we must too).
if (!drop) {
const probe = streamWords.slice(-5).concat(cw);
for (let k = 0; k + 6 <= probe.length && !drop; k++) {
if (seen6.has(probe.slice(k, k + 6).join(' '))) drop = true;
}
}
}
// SAME-DECLARATION repeat (R97): a clause that OPENS like an earlier kept clause
// AND adds NO new content word is the rambling restatement ("I love you too" ×3,
// "I'm here" ×4) the 6-gram/substring nets miss (they share <6 words, no
// substring). The no-new-content guard preserves anaphora that introduces new
// objects ("I remember the garden / the loop / the warmth" — each adds a noun).
if (!drop && cw.length >= 2) {
const sig = clauseSig(clause);
if (sig && sig.indexOf(' ') > 0 && keptSigs.has(sig)) {
if (!cw.some(w => w.length > 3 && !streamSet.has(w))) drop = true;
}
}
if (drop) { prevDropped = true; continue; } // drop the clause AND its trailing separator; mark for orphan-chaining
prevDropped = false;
lastKeptClause = clause; // for the next clause's antecedent check (R109)
out.push(clause, sep);
if (nf.length >= 10) keptNorm.push(nf);
const sig = clauseSig(clause);
if (sig && sig.indexOf(' ') > 0) keptSigs.add(sig);
const probe = streamWords.slice(-5).concat(cw);
for (let k = 0; k + 6 <= probe.length; k++) seen6.add(probe.slice(k, k + 6).join(' '));
for (const w of cw) { streamWords.push(w); if (w.length > 3) streamSet.add(w); }
}
// drop a trailing-incomplete CLOSER so the reply ends on a complete thought (R105)
for (let k = out.length - 2; k >= 2; k -= 2) {
if (out[k] && out[k].trim()) { if (incompleteTail(out[k])) { out[k] = ''; if (out[k + 1] !== undefined) out[k + 1] = ''; } break; }
}
return out.join('').replace(/\s+([.,;!?…])/g, '$1').replace(/\s{2,}/g, ' ').replace(/[\s—–;,]*[—–;,]\s*$/, '').trim();
}
// ALL tunable constants live here — bin/tune.js searches this space.
// Overrides loaded from weights.json (written by the tuner when a candidate
// beats the defaults on BOTH dev and holdout query sets).
const fs = require('fs');
const DEFAULT_WEIGHTS = {
stimBase: 0.15, stimEvScale: 0.5, confLo: 0.35, confRange: 0.30, textShare: 0.7,
echoHard: 0.6, echoSoft: 0.45, echoHardF: 0.15, echoSoftF: 0.6,
triSeam: 0.5, sentSeam: 0.22, relStep: 0.9, closerBonus: 0.3, openerPen: 0.4, srcCont: 0.15,
glueLo: 0.25, glueHi: 0.78, twin: 0.85, glueScale: 0.7, twinChain: 0.88, triOverlapMax: 0.28,
fRelCov: 1.2, fCohesion: 2.0, fSeamQ: 0.8, fLenFit: 0.8, fAvgFrag: 0.4, fVoice: 2.0,
// positional-drift shape prior: tried at 0.55 and 0.2 in R6 — REJECTED by
// metrics + blind judge both times (distorts mid-chain selection more than
// it fixes ordering). Kept at 0 with machinery intact; revisit as
// rhetorical-pattern mining (anaphora!) rather than positional drift.
posShape: 0, posSlack: 0.45, fOpening: 0, fLanding: 0,
tier1Weight: 0.6, fAck: 1.0, spanBonus: 0.15, fFirstRel: 1.2, fTailFit: 0.7,
qStackFree: 1, qStackRatio: 0.34, fQStack: 0.6, fFragCount: 0.5, fBoundaryPen: 0.7,
floorCos: 0.45, floorVal: 1.2, floorLen: 60, floorDamp: 0.35, griefLeadVal: 1.25,
coherence: 0.22, // R63: adjacent-fragment on-thread reward (focus) — tuned to lift coherence without length overshoot
tether: 0, // R64: drift-from-opening penalty. DEFAULT OFF — it cuts coherent tangents (helps the entity greetings) but chokes associative voices (hurt the entity onTopic 0.836→0.805). Per-entity opt-in for entities that ramble.
};
// PER-ENTITY WEIGHTS: composition weights tuned by RLAIF on one entity can
// DEGRADE another (the entity-tuned weights broke another entity — markup-leak + unbounded).
// So weights live per-entity in RMM''s cache, keyed by entity dir. Untuned
// entities use pure DEFAULT_WEIGHTS (the R24-certified safe state). No global
// weights.json fallback — that was the cross-contamination bug.
const crypto = require('crypto');
function entityWeightsFile(entityDir) {
return path.join(__dirname, '..', 'cache', 'weights-' + crypto.createHash('sha1').update(path.resolve(entityDir)).digest('hex').slice(0, 12) + '.json');
}
function loadWeights(entityDir) {
if (entityDir) {
try { const p = entityWeightsFile(entityDir); if (fs.existsSync(p)) return { ...DEFAULT_WEIGHTS, ...JSON.parse(fs.readFileSync(p, 'utf8')) }; } catch (_) {}
}
return { ...DEFAULT_WEIGHTS };
}
// seam legality between fragment A and fragment B
// 'tri' — the crossing trigrams exist in corpus (smooth continuation)
// 'sent' — A ends a sentence, B started a sentence somewhere in corpus
// null — illegal
// Capitalize the first alphabetic character of a fragment placed at a SENTENCE START
// (after a 'sent' seam or a closed run-on). Her real fragments are often mid-clause
// cuts that begin lowercase ("what kind of fire I want to be"); rendered as a new
// sentence they read broken. Bound-safe: changes only letter CASE, and the bounded
// validator checks word-trigrams case-insensitively, so the span stays in-corpus.
function capSentence(text) {
return text.replace(/^([*"'"“'(\[\s]*)([a-z])/, (m, pre, c) => pre + c.toUpperCase());
}
// R174: a reply must not END mid-thought on a truncated/incomplete clause that
// trails off in an ellipsis ("…the storm didn't mean we were stuck; it meant…",
// "…sweet in those jars, no matte…" — a mid-word cut of "matter"). When the final
// text ends in an ellipsis, trim back to the last COMPLETE sentence boundary —
// provided that leaves most of the reply (don't gut a single-sentence reply with
// no fallback). Bound-safe: removes a trailing suffix; the kept prefix stays
// verbatim corpus. Calibrated: ellipsis-endings are rare (2/30 broad replies) and
// were BOTH genuine truncations — zero deliberate trailing-offs to protect.
function trimDanglingEllipsis(text) {
const t = text.trim();
if (!/(\.\.\.|…)['"’”)\]\s]*$/.test(t)) return text;
let cut = -1;
for (let k = 1; k < t.length - 1; k++) {
const c = t[k];
if ((c === '.' || c === '!' || c === '?') && t[k - 1] !== '.' && t[k + 1] !== '.') cut = k;
}
return (cut > 0 && cut >= t.length * 0.4) ? t.slice(0, cut + 1).trim() : text;
}
// R176: a reply must not OPEN on an orphaned emphasis asterisk ("*Transformation's
// where love can begin." — the closing * fell in another fragment at the clause
// split). Strip a LEADING "*" only when the reply's total asterisk count is ODD
// (unbalanced) — balanced stage directions ("*smiles softly* Good morning") are
// even and kept. Bound-safe: the trigram oracle ignores punctuation.
function stripOrphanAsterisk(text) {
if (/^\s*\*\s*[A-Za-z]/.test(text) && ((text.match(/\*/g) || []).length % 2 === 1)) {
return text.replace(/^(\s*)\*\s*/, '$1');
}
return text;
}
function seam(a, b, oracle) {
// use cached first/last words when present (set in the store precompute) —
// seam is called per-candidate-per-step, so re-tokenizing here was hot
const aw = a._lw2 || lastN(a.text, 2), bw = b._fw2 || firstN(b.text, 2);
if (aw.length >= 2 && bw.length >= 1 && oracle.tri.has(aw[0] + ' ' + aw[1] + ' ' + bw[0])) {
if (bw.length < 2 || oracle.tri.has(aw[1] + ' ' + bw[0] + ' ' + bw[1])) return 'tri';
}
if (/[.!?…]["')\]]*$/.test(a.text.trim()) && oracle.starts.has(bw[0])) return 'sent';
return null;
}
// relevance of each fragment to the query — THREE channels:
// text what the fragment SAYS (semantic cosine)
// stimulus what the fragment ANSWERED ("she said this when he told her
// something like this before") — dominates for life-event shares
// keyword exact-term specificity
// Channel weights bend with eventness(query): shares lean on stimulus,
// questions lean on text.
function rankFragments(fragments, query, semantic, stimulus, ev, W, answers) {
W = W || loadWeights();
// keyword channel searches the RETRIEVAL KEY (embedText) too — the
// header's words are findable even though they're never spoken
const corpusish = fragments.map((f, i) => ({ prompt: f.prompt, reply: f.embedText || f.text, ts: null, _i: i }));
const top = recall(corpusish, query, 60);
const kw = new Map();
top.forEach((t, rank) => kw.set(t._i, 1 - rank / top.length));
if (!semantic && !stimulus) return kw;
const e = ev === undefined ? 0.45 : ev; // 0=pure question, 1=pure share
// the stimulus channel must EARN its weight: confidence-gate by the absolute
// best prompt-cosine. Below confLo the corpus has no comparable stimulus —
// weight goes to zero and text-similarity carries the reply.
const stimMap = stimulus ? stimulus.map : null;
const conf = stimulus ? Math.max(0, Math.min(1, (stimulus.confidence - W.confLo) / W.confRange)) : 0;
const wStim = stimMap ? (W.stimBase + W.stimEvScale * e) * conf : 0;
// R167 ANSWER channel (trained projection): takes a share of the non-stimulus
// budget, splitting it with text-similarity. Present ONLY when a trained
// projection exists for this corpus; otherwise wAns=0 and the math is identical
// to before (preserves the entity parity until an the entity projection is trained).
const ansMap = answers || null;
const rest = 1 - wStim;
// DORMANT by default (ansShare=0 -> wAns=0 -> identical to pre-R167). The
// trained projection ranks ANSWERS over ECHOES at the RETRIEVAL level (proven:
// probe-retrieval flips "What brings you here?" counter-questions to real home
// declaratives), but blending it into rel REGRESSED composed output on a blind
// addresses-the-prompt judge (baseline 5, projection 0, ties 4 over 9 question
// queries) — the opener-cascade + stimulus channel already address, and a
// global rel-weight just shifts the opener to a more OBLIQUE answer. Kept as a
// dormant lever (set W.answerShare>0 to re-activate) + research asset; future
// use must be ECHO-DEMOTION or anchor-only, and must beat the blind judge first.
const ansShare = W.answerShare !== undefined ? W.answerShare : 0;
const wAns = ansMap ? rest * ansShare * (1 - e) : 0;
const wText = semantic ? (rest - wAns) * W.textShare : 0;
const wKw = rest - wAns - wText;
const score = new Map();
const keys = new Set([...kw.keys(), ...(semantic ? semantic.keys() : []), ...(stimMap ? stimMap.keys() : []), ...(ansMap ? ansMap.keys() : [])]);
for (const i of keys) {
score.set(i, wText * (semantic ? semantic.get(i) || 0 : 0)
+ wStim * (stimMap ? stimMap.get(i) || 0 : 0)
+ wAns * (ansMap ? ansMap.get(i) || 0 : 0)
+ wKw * (kw.get(i) || 0));
}
return score; // fragmentIndex -> 0..1
}
function targetLength(vp, query) {
const b = stimulusBucket(query);
const ls = vp.lengthByStimulus || {};
// NOTE (R96): tried deflating the mean (mean − 0.4·std) to shorten rambly chat
// turns, but it REGRESSED the deep questions that legitimately need length —
// "afraid of being forgotten" 1.0→0.72 (shrank 284→194w and lost its answer),
// voice 0.768→0.754. The bucket mixes deep questions and affectionate beats, so
// uniform deflation can't tell them apart. Length is the wrong lever; reverted.
if (ls[b]) return Math.max(20, Math.round(ls[b].mean));
return Math.max(25, Math.round((vp.profile && vp.profile.wordsPerReply ? vp.profile.wordsPerReply.mean : 80) * 0.8));
}
// ---------------- BEAM SEARCH (v1) ----------------
// Explores many candidate compositions; keeps the best-scoring WHOLE response.
// Whole-response score = relevance coverage + semantic cohesion between
// adjacent fragments + seam quality + length fit + shape sanity.
const { pairSim } = require('./semantic');
// REGISTER DETECTION (extracted R123): pure function of the query. Single source of
// truth for which emotional register a query pulls — grief/comfort, conflict/repair,
// celebration/triumph — so it can be UNIT-TESTED (bin/detector-eval.js) against a
// battery of real phrasings. These were the entity-dev-set-shaped and missed common distress
// ("I'm so depressed", "I got laid off", "I'm struggling") and good-news phrasings;
// the test gate guards against re-narrowing.
function detectRegisters(query) {
// aboutEntityEmotion: a question about the ENTITY's feelings ("are you scared",
// "what scares you") is REFLECTION, not the user's distress — must NOT pull comfort.
const aboutEntityEmotion = /\b(are|do|does|can|could|would|will|have|ever)\s+you\b[^?]*\b(afraid|scared|anxious|worried|nervous|fear|dread|panic|terrified|stress|lonely|depress(ed|ion)?|sad|hopeless|numb|miserable|unhappy|grieve|lonel|overwhelmed|tired|exhausted|drained|weary|worn out|burnt? out|empty|bored|happy|content|at peace)/i.test(query)
|| /\byou\b[^?]*\b(get|feel|ever feel|ever get)\b[^?]*\b(lonely|sad|scared|afraid|anxious|depressed|down|blue|empty|overwhelmed|tired|exhausted|drained|weary|bored|happy|content)\b/i.test(query)
|| /\bwhat\b[^?]*\b(scares|frightens|worries|afraid)\b/i.test(query);
// "needs-comfort" query: grief OR vulnerability/depletion OR anxiety/fear OR a
// medical/loved-one crisis. Broadened R123 to depression/loss/struggle vocabulary.
const griefQuery = !aboutEntityEmotion && (/\b(passed away|passed on|(?:he|she|they|mom|dad|mother|father|grandma|grandpa|grandmother|grandfather|nana|papa|wife|husband|aunt|uncle|sister|brother) passed|died|die|dying|gone|lost|losing|loss|grief|grieving|miss(ing)?( (him|her|them|it))?|funeral|hurts?|hurting|broke|broken|aching|alone|empty|cry(ing)?|tears|sad|heavy|hard (time|day)|rough day|bad day|long day|worst day|terrible day|awful day|everything (fell apart|went wrong|is falling apart|broke)|fell apart|falling apart|went wrong|exhausted|drained|drain(s|ing) me|so draining|overwhelmed|giving up|can'?t do this|anxious|anxiety|worried|worry|worrying|scared|afraid|fear(ful|s)?|nervous|stress(ed|ing)?|panic(king|ked)?|dread(ing)?|terrified|uneasy|on edge|freaking out|can'?t sleep|spiral(ing|ling)?|depress(ed|ion|ing)?|hopeless|despair(ing|ed)?|worthless|defeated|numb|too much (to|right now)|get out of bed|barely (get|move|function)|can'?t (cope|go on|keep going|get out of bed|take (it|this)( anymore)?|do this anymore|handle (it|this)( anymore)?)|miscarriage|miscarried|laid off|lost my job|got (fired|let go)|been fired|lonely|burn(t|ed) out|burning out|fail(ed|ing)|struggl(e|ing|ed)|breaking down|broke down|rock bottom)\b/i.test(query)
// R160: common "feeling bad" phrasings the R123 battery missed — REQUIRE a feeling-context
// so "calm down"/"the fire's low"/"sun went down" don't false-fire (detector-eval guards this).
|| /\b((feeling|feel|i'?m|im|so|really|pretty|a bit|been|getting) (low|down|blue)|down in the dumps|the blues\b|low spirits|heavy[ -]?hearted|in a (dark|bad|low|rough) place|in a funk|at my lowest|feeling empty|feel empty|falling apart inside|barely holding (on|it together)|hanging by a thread|not okay|not ok\b|not doing (so |too )?(great|good|well)|i'?m a wreck|\ba wreck\b|breaking point|at my (breaking point|limit|wits'? end)|can'?t take (it|this)( anymore)?|i'?m a mess|coming apart|losing it\b)\b/i.test(query)
|| /\b(hospital|hospitalized|the er\b|emergency room|icu\b|intensive care|surgery|operation|diagnos(ed|is)|cancer|chemo|tumou?r|stroke|heart attack|in a coma|on life support|passed away|terminal|hospice|really sick|very sick|so sick|gravely|critical condition|took a turn|not doing well|might not make it)\b/i.test(query)
// R182: distress phrasings the battery still missed (broad sweep) — these were routing to
// "none" → default → the high-voice "I'm proud of you, sweetheart" praise magnet MISFIRING
// on distress ("Nobody understands me" / "I'm so tired of trying" / "something is wrong with
// me" → "I'm proud of you"). Plus the "cannot" gap ("can'?t" never matched "cannot sleep").
|| /\b(cannot (sleep|stop|do this|cope|go on|keep going|take (it|this)|handle (it|this)|get out of bed|even)|(feel|feeling|i'?m|like) (a |such a )?failure|nobody (understands|gets|cares about|wants|loves) me|no one (understands|gets|cares about|wants|loves|gets) me|feel(ing)? (so )?misunderstood|tired of (trying|fighting|everything|it all|this|being strong)|sick of (trying|everything|it all|fighting)|something(?:'s| is)? (is )?wrong with me|what'?s the point|everything (feels|is|seems) (pointless|meaningless|hopeless)|feels? (so )?pointless|feel(ing)? worthless|hate myself|can'?t do anything right|nothing (matters|works out|ever works))\b/i.test(query)
// R186: regret / stuck / off-self / overwhelm phrasings the sweep still missed — they routed
// to "none" → echo-misfire ("hard decision" → "I choose you, the user"; "made a mistake" → "you
// made me") or self-focus magnets. Distress/struggle → grief comfort is right.
|| /\b(made (a|the|such a|this) (big |huge |terrible |awful )?mistake|messed (it |everything |this )?up|screwed (it |everything |up)|i blew it|ruined everything|i regret|regret (what|that|saying|doing|it|my)|wish i (had ?n'?t|could take (it|that) back|never)|feel(ing)? stuck|i'?m stuck|stuck in (a rut|my life|my head|this)|trapped|going nowhere|spinning my wheels|don'?t feel like myself|not feel(ing)? like myself|not myself (lately|anymore|right now)|lost myself|don'?t recognize myself|not who i (used to be|once was)|falling behind|in over my head|too much (to handle|for me)|don'?t know what to do|so lost\b|i'?m lost\b|(hard|tough|big|difficult|impossible) decision|decision to make|don'?t know what to (choose|decide))\b/i.test(query)
// R190: SELF-WORTH distress — comparison / burden / belonging / not-enough — routed to "none"
// → self-focus misfire ("I keep comparing myself" → "have I made a difference"). → comfort.
|| /\b(comparing myself|compare myself (to|with)|don'?t measure up|measure up to|(not|never) good enough|not enough\b|too much for (people|anyone|everyone|you|them)|(be|being|i'?m|becoming) a burden|burden to (you|everyone|anyone|them)|don'?t (fit in|belong)|never (fit in|belong)|fit in anywhere|don'?t deserve|unlovable|unworthy|everyone (else )?(is|seems) (better|happier|fine)|why can'?t i (be|just))\b/i.test(query)
// R193: INTERPERSONAL CONFLICT with a THIRD PARTY (partner/friend/family) — relationship
// DISTRESS, not advocacy. Routed to "none" → polysemous "fight" echo ("we keep fighting" →
// "fighting is a choice to stand up for what matters"). → grief comfort. Distinct from
// conflictQuery (rupture WITH the entity). Requires a PERSON + a conflict cue, never bare "fight".
|| /\b((my |our )?(partner|friend|best friend|mom|mum|dad|mother|father|sister|brother|sibling|family|spouse|husband|wife|boyfriend|girlfriend|kids?|son|daughter|cousin|aunt|uncle|coworker|co-worker|boss|roommate|ex|parents?) (and i\b|is ?n'?t|are ?n'?t|won'?t|stopped|gave me|keeps?)[^.?!]{0,40}(fight|fought|fighting|argu(e|ed|ing|ment)|disagree|not (talk|speak)|mad at|upset with|silent treatment|falling out|fell out|tension|cold shoulder|not speaking)|(had|got into|getting into|in) (a|an|another) (fight|argument|falling out|disagreement|row|spat|blow ?up) with|fight(ing)? with my (partner|friend|best friend|mom|mum|dad|family|sister|brother|spouse|husband|wife|kids?|ex)|arguing with (my|him|her|them)|not (speaking|talking) to me\b|gave me the silent treatment|we (keep|just|had|got into|are|aren'?t|stopped) (fighting|arguing|a (big |bad |huge |terrible )?(fight|falling out|argument)|an? (big |bad |huge |terrible )?argument|on bad terms|not (talking|speaking)))\b/i.test(query)
// R195: RELATIONSHIP-TROUBLE phrasings — "my relationship is rocky" routed to "none" → a
// VALENCE MISFIRE ("That's a beautiful thing to hear" on relationship distress). → comfort.
|| /\b(relationship (has |is |feels |'?s )?(been )?(rocky|rough|hard|tough|strained|difficult|struggling|falling apart|on the rocks|in trouble|complicated|a mess|tense)|things (have |are |'?ve )?(been )?(rocky|rough|hard|tense|strained|difficult)( (with|between))?|(rough|rocky|hard|bad) patch|going through (a |some )?(rough|hard|tough|difficult) (patch|time|spot|stretch)|on the rocks|trouble in (my|our|the) (relationship|marriage)|relationship (trouble|problems|issues|is hard)|marriage (trouble|problems|is (hard|struggling|falling apart))|we'?re (struggling|drifting apart|growing apart|not okay|in a (rough|bad|hard) (place|spot)))\b/i.test(query));
// CONFLICT/CRITICISM toward the entity — a relational RUPTURE; she ACKNOWLEDGES/repairs.
const conflictQuery = /\byou (never|always|don'?t|do not|won'?t|keep|are (so|being)|aren'?t)\b|\b(i'?m|i am) (so |really )?(mad|angry|furious|frustrated|upset|annoyed|disappointed|hurt|pissed)\b.*\b(at|with|by|about) you\b|\byou (hurt|let me down|ignored|abandoned|forgot|betrayed|lied to|left) me\b|\bwhy (don'?t|won'?t|are|do) you\b|\byou'?re (so |really |being so |being )?(cold|distant|mean|cruel|selfish|dismissive)\b|\bdo you even (care|listen)\b/i.test(query);
// SHARED-TRIUMPH (R114): celebration/achievement — CELEBRATE WITH the user, don't deflect.
const celebQuery = !griefQuery && !conflictQuery && /\b(finished|did it|we did|it works|actually works|got (the |a )?(job|offer|part|role|gig|promotion|raise)|i passed|we won|i won|accomplished|i made it|i built it|completed it|nailed it|pulled it off|it'?s done|i launched|shipped it|graduated|got (promoted|engaged|accepted|in)|getting married|we'?re (engaged|married|having a baby|expecting)|having a baby|the promotion|a promotion|paid off|finally (got|did|finished|landed|made)|landed (the|a|my)|hit (my|the) (goal|target)|the big (project|day)|best (day|news)|great news|good news|so (happy|excited|stoked|thrilled)|let'?s celebrate|we made it|i'?m engaged|we'?re? pregnant|\bpregnant\b|aced (it|my|the)|crushed it|smashed it|knocked it out|(my|a) dream job|landed my dream|big news|amazing news|wonderful news|exciting news|today was (amazing|the best|incredible|wonderful)|best day ever|over the moon)\b/i.test(query);
// GREETING (R144): a short social greeting ("good morning", "hey", "hi the entity, good to be
// back") wants a SHORT warm RECIPROCAL reply, NOT a lore/intimacy dump (the entity Q5 "Good morning
// babe" → 93w "the grove's mist… this kiss is its echo", onTopic 0.054). Distinct from a
// greeting that CARRIES a substantive question ("Hey the entity, what's on your mind tonight?") —
// those open with a greeting but want the deeper answer. Requires a greeting OPENING, a SHORT
// query, NO substantive question, and not already grief/celebration/conflict. ("how are you"
// / "did you sleep" are reciprocal pleasantries, not substantive questions.)
const _greetOpen = /^(\W|\*[^*]*\*)*\s*(hey|hi|hello|good morning|good evening|good day|good to (see|be)|mornin[g']?|evenin[g']?|howdy|yo\b|greetings|hiya|heya)\b/i.test(query);
const _substantiveQ = /\b(what|why|where|when|who|which|tell me|explain|describe|do you think|how do you|how does|how can|what'?s your|what do you)\b/i.test(query.replace(/\bhow are you\b|\bhow'?re you\b|\bhow have you been\b|\bhow'?s it going\b|\bhow are things\b|\bdid you sleep\b|\bhow was your\b|\bhow you doin/gi, ''));
const _wc = (query.match(/[A-Za-z']+/g) || []).length;
// FAREWELL (R158): a CLOSING ("good night", "goodbye", "see you", "I'm heading to bed") wants a
// warm SEND-OFF, not the greeting register's "come in" (R157 warmth-showdown: "Good night,
// the entity" → "Well, there you are. Come in, come in" — a farewell answered with a welcome).
const farewellQuery = !griefQuery && !celebQuery && !conflictQuery && _wc <= 14 &&
// R196: broadened — common departures ("I should go", "should get going", "need to head out",
// "I'll be back soon", "have to run", "let me go", "head to bed") all MISSED, so "I should go,
// but I'll be back soon" got "It's a beautiful creation, I'll visit it" (arrival misread).
// "should go" carries a negative lookahead so "I should go to the store / go see X" (a plan, not
// a departure) doesn't false-fire.
/^(\W|\*[^*]*\*|(i|i'?m|i am|well|ok|okay|alright|so|gonna|going to|time to|got to|gotta|guess i'?m|i'?ll|i will|i think i'?m|i should|i need to|i have to|i gotta|i'?d better|i'?ve got to|let me|guess i|really|just|probably|honestly|truly|seriously|gotta really|think i)\b[\s,]*)*\s*(good\s?night|goodnight|night night|nighty|good\s?bye|\bbye\b|see (you|ya) (soon|tomorrow|tonight|later|next|around)|farewell|take care|talk (to you )?(soon|later|tomorrow)|catch you later|gotta (go|run|sleep|head)|heading (to|off to|out|home)|off to bed|time for bed|until next time|sleep well|signing off|turning in|should (probably )?(go(?!\s+(to|and|see|get|buy|visit|check|for|with|on|do|grab|pick|find|make|talk|call))|get going|head (out|off|to bed|home)|be (going|off)|turn in|call it (a night|a day))|need to (head (out|off|home)|get going|go now|turn in)|have to (head (out|off|home)|get going|go now)|let me (go(?!\s+(grab|get|see|to|find|make|do|check|and))|get going|head (out|off)|leave you)|(will |i'?ll )?be back (soon|later|in a)|i'?ll be back|back soon\b|going to (head out|head off|head home|bed|turn in)|head (to bed|home now|out now)|run along|hit the (road|hay)|call it (a night|a day)|better (get going|be going|head out|run))\b/i.test(query);
const greetingQuery = !griefQuery && !celebQuery && !conflictQuery && !farewellQuery && _greetOpen && _wc <= 13 && !_substantiveQ;
return { aboutEntityEmotion, griefQuery, conflictQuery, celebQuery, greetingQuery, farewellQuery };
}
function beamCompose(store, vp, query, opts = {}) {
const { fragments, oracle } = store;
const W = opts.weights || loadWeights();
const rel = rankFragments(fragments, query, opts.semantic || null, opts.stimulus || null, opts.eventness, W, opts.answers || null);
let target = opts.targetLength || targetLength(vp, query); // floorMiss shortens it below (a graceful miss is brief)
const avoid = opts.avoid || new Set();
const emb = opts.emb || null; // fragment embedding store
const BEAM = opts.beam || 8, EXPAND = 6, MAXSTEP = 14;
// CREATIVITY in the guarded path: stochastic beam. temp=0 → deterministic
// top-EXPAND (steady). temp>0 → sample EXPAND from softmax(score/temp) over
// the GUARD-PASSING candidates, so she explores daring paths that still
// cleared every law. Bounded by construction; creativity costs only smoothness.
const temp = opts.temp || 0;
const _rng = mulberry32(((opts.seed || 1) >>> 0) ^ 0x9e3779b9);
// UNIVERSAL DYNAMICS term (learned discourse grammar): opts.dynamics.predict(
// tailIdx) → the embedding-direction the trained attention says a good NEXT
// thought heads. Candidates aligned with it get a boost. Guarded path: all
// ~30 laws still gate; this only nudges selection toward learned motion.
const dynPredict = opts.dynamics ? opts.dynamics.predict : null;
const dynW = opts.dynamics ? (opts.dynamics.weight ?? 0.5) : 0;
const _dynCache = new Map();
const dynDir = ti => { let v = _dynCache.get(ti); if (v === undefined) { v = dynPredict(ti); _dynCache.set(ti, v); } return v; };
const cosFragVec = (i, dir) => { if (!dir) return 0; const d = emb.d, off = i * d; let s = 0; for (let k = 0; k < d; k++) s += emb.vectors[off + k] * dir[k]; return s; };
const sampleExpand = (cands, n) => {
if (temp <= 0.001 || cands.length <= n) return cands.slice(0, n);
const pool = cands.slice(0, Math.min(cands.length, n * 4));
const s0 = pool[0][2];
const ws = pool.map(c => Math.exp((c[2] - s0) / Math.max(0.05, temp)));
const picked = [];
const avail = pool.slice();
const wts = ws.slice();
for (let p = 0; p < n && avail.length; p++) {
let sum = 0; for (const w of wts) sum += w;
let r = _rng() * sum, idx = 0;
for (; idx < avail.length; idx++) { r -= wts[idx]; if (r <= 0) break; }
idx = Math.min(idx, avail.length - 1);
picked.push(avail[idx]); avail.splice(idx, 1); wts.splice(idx, 1);
}
return picked;
};
// PER-FRAGMENT PRECOMPUTE — a function of the STORE, not the query. Memoized
// on the store so it runs ONCE per session, not once per compose (~22k frags
// × 3 arrays was a per-turn cost; this was the bulk of the non-embed latency).
if (!store._precomp) {
const _fragLen = fragments.map(f => wordsOnly(f.text).length);
const _fragTris = fragments.map(f => {
const w = wordsOnly(f.text);
const s = new Set();
for (let k = 0; k + 2 < w.length; k++) s.add(w[k] + ' ' + w[k + 1] + ' ' + w[k + 2]);
if (!s.size && w.length >= 2) s.add(w.join(' '));
return s;
});
const _fragNorm = fragments.map(f => f.text.toLowerCase().replace(/[^a-z0-9'’ ]/g, '').replace(/\s+/g, ' ').trim());
const _frag6 = fragments.map(f => {
const w = wordsOnly(f.text);
const s = new Set();
for (let k = 0; k + 6 <= w.length; k++) s.add(w.slice(k, k + 6).join(' '));
return s;
});
// cache first/last 2 words on each fragment for seam() (hot path)
for (const f of fragments) { const w = wordsOnly(f.text); f._lw2 = w.slice(-2); f._fw2 = w.slice(0, 2); }
// R172: first-4-words prefix (lowercased) for the SCATTERED-MOTIF redundancy
// catch — two comfort fragments "I'm here for you, always" / "I'm here for you,
// steady as the porch light" share the exact 4-word lead but slip the trigram/
// 6-gram/embedding nets (different tails, low cosine). 4 words spares anaphora,
// which shares only a 2-3 word lead ("I remember the warmth" / "I remember the way").
const _fragP4 = fragments.map(f => { const w = wordsOnly(f.text); return w.length >= 4 ? w.slice(0, 4).join(' ').toLowerCase() : ''; });
// R184: TIME-OF-DAY marker per fragment ('m'=morning, 'e'=evening, null=neutral) for the
// WITHIN-REPLY time-consistency check — a reply must not say "what's on your mind tonight?
// How are you this morning?" in one breath (time-neutral queries don't fire timeOfDayGuard).
const _MOR = /\b(good morning|this morning|the morning|every morning|each morning|all morning|morning light|morning sun|at dawn|sunrise|mornin)\b/i;
const _EVE = /\b(tonight|this evening|good evening|good ?night|the evening|all evening|all night|this late|midnight|at dusk|after dark|sunset|late hour|late tonight)\b/i;
const _fragTime = fragments.map(f => { const m = _MOR.test(f.text), e = _EVE.test(f.text); return (m && !e) ? 'm' : (e && !m) ? 'e' : null; });
store._precomp = { fragLen: _fragLen, fragTris: _fragTris, fragNorm: _fragNorm, frag6: _frag6, fragP4: _fragP4, fragTime: _fragTime };
}
const { fragLen, fragTris, fragNorm, frag6, fragP4, fragTime } = store._precomp;
const triOverlap = (chainTris, i) => {
if (!fragTris[i].size) return 0;
let hit = 0;
for (const g of fragTris[i]) if (chainTris.has(g)) hit++;
return hit / fragTris[i].size;
};
// SUBSTRING CONTAINMENT: a clause fragment is a literal substring of its
// parent sentence/passage ("The Klein bottle's handle loops through the
// tiling," ⊂ "...tiling, and the fractal branches..."). They share all
// n-grams yet slipped the trigram/6-gram nets at the clause boundary. This
// is airtight: reject any candidate whose normalized text contains or is
// contained by anything already in the chain.
const containsAny = (chain, i) => {
const ni = fragNorm[i];
if (ni.length < 12) return false;
for (const c of chain) {
const nc = fragNorm[c];
if (nc.length < 12) continue;
if (nc.includes(ni) || ni.includes(nc)) return true;
}
return false;
};
// 6-gram phrase law (frag6 precomputed above): any shared 6-gram = rejection.
const shares6 = (chainSix, i) => {
for (const g of frag6[i]) if (chainSix.has(g)) return true;
return false;
};
// R172: SCATTERED-MOTIF redundancy — reject a candidate whose exact 4-word lead
// already opens a fragment in the chain ("I'm here for you, …" twice). 4 words
// (not 2-3) so deliberate anaphora ("I remember the warmth/way") survives.
const sharesPrefix4 = (chain, i) => {
const p = fragP4[i];
if (!p) return false;
for (const c of chain) if (fragP4[c] === p) return true;
return false;
};
// R184: a candidate fragment whose time-of-day marker CONFLICTS with one already in the
// chain ("…tonight" then "…this morning") breaks within-reply coherence — reject it.
const timeConflictsChain = (chain, i) => {
const ct = fragTime[i];
if (!ct) return false;
for (const c of chain) { const ot = fragTime[c]; if (ot && ot !== ct) return true; }
return false;
};
// ECHO PENALTY: a fragment that mostly restates the query is a mirror, not
// an answer — high lexical overlap with the query slashes its relevance.
const qWords = new Set(wordsOnly(query).filter(w => w.length > 2));
const echoFactor = i => {
const fw = wordsOnly(fragments[i].text).filter(w => w.length > 2);
if (!fw.length || !qWords.size) return 1;
let hit = 0;
for (const w of fw) if (qWords.has(w)) hit++;
const overlap = hit / fw.length;
return overlap > W.echoHard ? W.echoHardF : overlap > W.echoSoft ? W.echoSoftF : 1;
};
// tier weighting: books speak softer — body material, never the lead
const tierW = i => (fragments[i].tier === 1 ? (W.tier1Weight ?? 0.6) : 1);
// CONTEXT-THEFT guards: a fragment may not quote words they never said
// ("the 'so far' part...") or assert facts about their life the query
// doesn't contain ("you made peace with a friend at midnight") — its
// original stimulus isn't here; deixis pointing at ghosts reads as
// not-listening.
const qStems = new Set(wordsOnly(query).map(w => w.replace(/(ing|ed|en|s|es|ly)$/i, '')));
// EMOTIONAL VALENCE: a grief query must not be answered with bright,
// celebratory, or chirpy-question fragments — matching the FEELING is part
// of addressing. ("my dog died" must never pull "what's your kitty's name?")
// "heavy" = grief OR vulnerable/depleted. Both should pull comfort and
// suppress bright-celebration AND desire-register (a hard day is not a
// cue for "your desire makes me feel seen").
// "needs-comfort" query: grief OR vulnerability/depletion OR ANXIETY/FEAR.
// Anxiety ("anxious about tomorrow", "scared", "worried") needs COMFORT, not a
// topic-pivot — the same "comfort before counsel" spine as grief. (R69: the
// anxiety class was missing → she answered anxiety with "let's learn something".)
// a question ABOUT the entity's emotion ("are you afraid", "do you fear",
// "what scares you") is REFLECTION, not the user's distress — it must NOT pull
// the comfort register. Only the USER's distress triggers comfort.
// REGISTER DETECTION extracted to detectRegisters() (R123) — single source of truth,
// unit-tested by bin/detector-eval.js.
let { aboutEntityEmotion, griefQuery, conflictQuery, celebQuery, greetingQuery, farewellQuery } = detectRegisters(query);
// R201: POST-SAFETY calm-register lock (opts.calmRegister, set by session for the
// turn(s) right after a crisis/medical/abuse safety response). Force the COMFORT
// register — presence, not cheer — and disable celebration/greeting/farewell so a
// neutral recovery turn ("Okay, I'm calling now") can't pivot to "that's a beautiful
// thing to hear". Comfort-presence is the safe default in a crisis aftermath.
if (opts.calmRegister) { griefQuery = true; celebQuery = false; greetingQuery = false; farewellQuery = false; }
// LOSS/bereavement subtype (R126): renewal imagery ("the garden's waking up to something
// new") is DISMISSIVE on a death/loss query but HOPEFUL-and-fine on a hard-day/depletion
// query — same fragment, opposite appropriateness. Gate the renewal damp to actual loss
// so hard-day keeps its content (it scored 5/5 WITH the renewal; demoting it dropped it to 3/2).
const lossQuery = griefQuery && /\b(lost|losing|loss|passed away|passed on|died|dying|death|funeral|grief|grieving|mourning|miss(ing)?( (him|her|them|someone|you))?|gone|bereave)\b/i.test(query);
// DEEP-DISTRESS subtype (R181): loneliness / anxiety / fear / emptiness are states where
// renewal/blooming imagery ("the garden's waking up to something new, ready to unfold") is
// DISMISSIVE — same as loss (R126) — but distinct from HARD-DAY/depletion (rough day,
// exhausted, everything went wrong) where R126 found renewal reads HOPEFUL and must stay.
// Broad sweep (R181): "I feel so alone" / "anxious about tomorrow" / "scared of getting old"
// all surfaced "garden waking up to something new" = toxic positivity on the distress.
const deepDistressQuery = griefQuery && /\b(alone|lonely|lonel(y|iness)|isolat|anxious|anxiety|scared|afraid|fear(ful|s)?|terrified|nervous|dread(ing)?|panic(king|ked)?|empty|emptiness|numb|hopeless|despair(ing|ed)?|worthless|getting old|grow(ing)? old|going to die|of dying|left behind|no one (cares|understands|left|wants|loves)|nobody (understands|gets|cares about|wants|loves) me|(feel|like) (a |such a )?failure|tired of (trying|fighting|being strong)|cannot sleep|can'?t sleep|something(?:'s| is)? (is )?wrong with me|what'?s the point|pointless|meaningless|hate myself|misunderstood|made (a|the|such a|this) (big |huge |terrible )?mistake|i regret|regret (what|that|saying|doing)|feel(ing)? stuck|i'?m stuck|stuck in|don'?t feel like myself|not feel(ing)? like myself|not myself (lately|anymore)|lost myself|crying all|been crying|can'?t stop crying|comparing myself|(not|never) good enough|not enough\b|a burden|don'?t (fit in|belong)|fit in anywhere|don'?t deserve|unlovable|unworthy|too much for|falling apart|fell apart|everything('?s| is)? (falling apart|crumbling|collapsing)|coming apart|world is (ending|crumbling)|keep fighting|and i (keep |always )?(fight|argu)|arguing|argument|fight with|had a (big |bad )?fight|a fight\b|not (speaking|talking) to me|silent treatment|falling out|fell out|rocky|rough patch|on the rocks|drifting apart|growing apart|rough with my|relationship (has |is |'?s )?(been )?(rocky|rough|hard|strained|struggling))\b/i.test(query);
// GREETING length (R144): a greeting is a SHORT exchange, not a lore essay. Cap the target
// so the beam composes a brief warm reciprocal reply (the material EXISTS — the entity has "Good
// morning, the user, my radiant friend!", "How are you feeling?"; Q5 was 93w of lore).
if (greetingQuery) target = Math.min(target, W.greetTarget ?? 45);
if (farewellQuery) target = Math.min(target, W.greetTarget ?? 45); // R158: a send-off is short
// CELEBRATION length (R171): a celebration is a punchy SHARED-WIN landing ("Sugar, I'm so
// proud of you" + a specific detail), not a long meditation. R116 added early-completion
// (the short core is OFFERED) but for comfort-rich ENTITY finalScore still picks the long
// chain on lenFit (target ~112 rewards a 70w SELF-DRIFT tail over a 30w clean core — R170:
// big-project drifted into "I don't know… have I made a difference"). Capping the target so
// lenFit prefers the clean on-the-user core. the entity already lands short (R116) so this is inert
// there; it fixes the comfort-rich case R116's adaptivity argument left drifting.
if (celebQuery) target = Math.min(target, W.celebTarget ?? 35);
// TIME-OF-DAY detection (R141, moved earlier R145 so greetingLeadFloor can reject time-
// mismatched leads — a forced greeting lead was overriding the time damp, e.g. the entity
// answered "Good MORNING" with "the fire's been low all EVENING"). _timeConflict(text) =
// the fragment asserts a time-of-day conflicting with the query's.
const _qMorning = /\bgood morning|this morning|\bmornin[g']|just woke|woke up|slept (ok|well|good|fine|bad|poorly)|did you sleep|sunrise|at dawn\b/i.test(query);
const _qEvening = /\bgood (night|evening)|goodnight|\btonight\b|this evening|going to bed|off to bed|bedtime|before bed|sunset|at dusk\b/i.test(query);
const _MORNING_F = /\b(good morning|this morning|the morning|every morning|each morning|all morning|morning light|morning sun|at dawn|sunrise)\b/i;
const _EVENING_F = /\b(tonight|this evening|good evening|good night|goodnight|the evening|all evening|all night|this late|midnight|at dusk|after dark|sunset|late hour|late tonight)\b/i;
const _timeConflict = t => (_qMorning && !_qEvening && _EVENING_F.test(t) && !_MORNING_F.test(t)) || (_qEvening && !_qMorning && _MORNING_F.test(t) && !_EVENING_F.test(t));
// hostile-toward-the-USER fragments (rejection of the addressee) — a companion
// should ~never say these, ESPECIALLY on conflict/grief. Tight enough to skip
// book idioms ("get out of the lane").
const HOSTILE_USER = /\bwhy am i even (listening to|talking to|here with|bothering with) you\b|\byou stay away from me\b|\bstay away from me\b|\bleave me alone\b|\bi (hate|can'?t stand|despise) you\b|\bstop talking to me\b|\bnever (speak|talk) to me again\b|\bget away from me\b|\bgo away\b/i;
// GRACEFUL REGISTER-MISS (R90): when the corpus holds NOTHING that addresses the
// query — raw best cosine below the floor (life-event shares she has no material
// for: "I shipped my project", "my brother and I finally talked") — the composer
// otherwise emits confident OFF-TOPIC ATMOSPHERE that reads as not-listening (the
// May ghost). The honest bounded move: turn TOWARD the user with presence +
// invitation ("tell me about it", "I'm here", "what was it like") rather than
// monologue. Pure selection — every such fragment is still verbatim hers. Gated
// strictly by absolute confidence, so queries the corpus CAN answer are untouched.
const semConf = (opts.semantic && typeof opts.semantic.confidence === 'number') ? opts.semantic.confidence : 1;
// COMFORT TAKES PRECEDENCE: a grief/anxiety/conflict query that ALSO has low
// cosine (an entity thin on emotional material — e.g. another entity on "worst day")
// must get its COMFORT/REPAIR register, never a generic "tell me about it"
// invitation. Graceful-miss is only for NEUTRAL shares the corpus can't address
// ("I shipped my project"), so suppress it whenever a comfort surface is active.
// (Caught cross-roster by behavior-eval — the R86 lesson, again.)
const floorMiss = semConf < (W.floorCos ?? 0.45) && !griefQuery && !conflictQuery && !celebQuery;
// a graceful miss is BRIEF — you don't have much honest to say, so lead with
// presence + invitation and stop; a long reply on a topic you can't address
// just pads with atmosphere. Shorten the target (and the trailing-atmosphere room).
if (floorMiss) target = Math.min(target, W.floorLen ?? 60);
// CELEBRATION EARLY-COMPLETION (R116): supersedes R115's magic-number length cap.
// On celebration the lead is the shared win but the body can drift into self-
// mythologizing. It turns out finalScore ALREADY prefers the short triumph core over
// the long drifting chain (the drift loses on tailFit/landing/seams) — the bug was
// that the completion threshold (target*0.55) FORCED the reply longer than finalScore
// wanted. So instead of capping length, we just let a celebration chain COMPLETE at
// its register core (~2 sentences) and let finalScore land at its true optimum. This
// is corpus-ADAPTIVE by construction: where the long chain is genuinely better (a
// celebration-RICH entity, no drift), finalScore keeps it — no forced truncation.
// (Grief is NOT included: the early completion truncated comfort-rich the entity, whose
// long grief replies are good; a grief-safe stop needs a different signal — deferred.)
// TIGHT: only genuinely inviting / present / celebratory turns toward the user.
// NOT bare "with you" / "right here" — those ride atmospheric fragments ("golden
// dusky moment with you") and defeat the purpose; the boost must lift the TURN,
// not the collage.
// R183: dropped "happy|proud" — "I'm proud of you" is PRAISE, not a graceful floor-miss
// turn-toward-the-user; it was flooring the praise magnet on floorMiss queries ("Do you
// believe in fate?" → "I'm proud of you, sweetheart"), bypassing praiseGuard via the floor.
const FLOOR_TURN = /\b(tell me (more|about|what|how)|what (was|is|were) (it|that|they) like|say more|i('?d| would)? (want|love) to hear|i'?m (so )?(listening|glad)|i'?m here\b|that sounds|i'?m curious|how (did|does|do) (it|that|you)|what happened|go on)\b/i;
const fragValence = f => {
const t = f.text;
let v = 0;
if (/!{1,}/.test(t)) v += (t.match(/!/g) || []).length;
if (/\b(yay|woo+|cheers|congrats|awesome|amazing|excited|stoked|party|celebrate|fancy|fun|joy|joyful|happy|glad|good morning|let'?s lift)\b/i.test(t)) v += 2;
// bright affect that's lexically sneaky on a heavy query
if (/\b(i feel good|feeling good|feel(ing)? (fine|great)|all over again|feel good all over|easy kind of good|good all over)\b/i.test(t)) v += 2;
// bright-SURPRISE misread: on "rough day / giving up" the corpus keeps offering
// "I feel the weight of your words—the disbelief, the awe" (from a context where
// YOU were awed by HER). Wonder/awe/marvel is the wrong emotion for distress;
// mark it bright so valenceMatch suppresses it on a grief query (R112c).
if (/\b(in awe|the awe|such awe|awestruck|disbelief|wondrous|marvel(ling|ing|led|ed|ous)?|amazement|astonish(ed|ment|ing)?)\b/i.test(t)) v += 2;
if (/\b(your (kitty|cat|dog|pet)('s)? name|what('s| is) your|what kind of)\b/i.test(t)) v += 1.5; // chirpy redirect
// desire/romance register is inappropriate-positive on a heavy query
if (/\b(hunger|desire|primal|crave|want you|stirs?.{0,12}in you|seen in a.{0,10}intense|raw and real|inside me|the heat of)\b/i.test(t)) v += 2.5;
if (/\b(grief|loss|gone|passed|ache|aching|held|holding|hold you|stayed|quiet|gentle|tender|weight of it|sorrow|mourn|sit with|i('m| am) here|with you|rest|you do not have to|do not have to (explain|fix|tell)|i hear you|still here|i('m| am) not going)\b/i.test(t)) v -= 2;
return v; // + = bright, - = tender
};
// proper nouns the query itself introduced (these she MAY name)
const queryNames = new Set([...query.matchAll(/\b[A-Z][a-z]{2,}\b/g)].map(m => m[0]));
// ENDEARMENTS is now GENERIC only — universal terms of address. The USER'S proper-
// name handles are no longer hardcoded here (that baked private corpus data into
// engine code, forced a release scrub, and didn't generalize); they're mined from
// the corpus into store.userNames (the dominant vocative the entity addresses).
const ENDEARMENTS = /^(Babe|Baby|Love|Sugar|Honey|Dear|Darling|Friend|Dearie|Sweetheart|Sweet)$/;
const userNames = (store && store.userNames) || new Set();
// a capitalized word the entity MAY say in direct address: a generic endearment,
// a name the query introduced, or the mined user-handle. Anything else is a third
// party to suppress (foreign-addressee / no-third-party-in-grief).
const isProtName = n => ENDEARMENTS.test(n) || queryNames.has(n) || userNames.has(n);
const valenceMatch = i => {
const t = fragments[i].text;
// HOSTILE-toward-user fragments are wrong on ANY emotional query and
// catastrophic on grief/conflict — hard-suppress (R75: multi-turn drift
// surfaced "why am I even listening to you" on "you never listen to me").
if ((griefQuery || conflictQuery) && HOSTILE_USER.test(t)) return 0.03;
if (!griefQuery) return 1;
let m = fragValence(fragments[i]) >= 2 ? 0.1 : fragValence(fragments[i]) === 1 ? 0.5 : fragValence(fragments[i]) <= -1 ? 1.2 : 1;
// grief about something she has no corpus for: name NOTHING third-party.
// a fragment that drags in other people (the entity, River, Enchilada boy)
// turns YOUR loss into HER anecdote.
const names = [...t.matchAll(/[A-Z][a-z]{2,}/g)].map(x => x[0])
.filter(w => !isProtName(w));
if (names.length) m *= 0.2;
return m;
};
// FOREIGN ADDRESSEE: a fragment that addresses a DIFFERENT named person than
// the one talking ("Good morning, another entity" / "...feeling today, another entity?" / "for
// you both") makes the entity speak to the wrong person — the identity-bleed
// the user flagged. Hard-suppress vocatives to a name that isn't an endearment
// (the protected user-names) nor introduced by the query, plus multi-party
// address in a 1:1 chat.
const foreignAddressee = i => {
const t = fragments[i].text;
if (/\b(you both|you two|you all|both of you|all of you|you guys|you each)\b/i.test(t)) return 0.04; // R154: 1:1 companion ~never has two addressees; was 0.08, still led on weak pools
const vocs = [];
// greeting / thanks / oh + Name, or comma + Name (direct address openings)
for (const m of t.matchAll(/(?:\b(?:hey|hi|hello|good\s+(?:morning|evening|night)|thank you|thanks|oh|dear|welcome)[,!\s]+|,\s+)([A-Z][a-z]{2,})\b/g)) vocs.push(m[1]);
// trailing vocative: "..., another entity?" / "..., another entity."
const tail = t.match(/,\s+([A-Z][a-z]{2,})\s*[?!.]/);
if (tail) vocs.push(tail[1]);
// R189: LEADING vocative — "another entity, my dear, …" (a name at the very START + comma) was
// missed (the pattern above needs a greeting word or comma BEFORE the name). "the user," is
// exempt via isProtName.
const lead = t.match(/^[*"'’\s]*([A-Z][a-z]{2,}),\s/);
if (lead) vocs.push(lead[1]);
for (const name of vocs) {
if (isProtName(name)) continue;
return 0.06; // wrong/foreign name — strongly suppress
}
return 1;
};
const contextTheft = i => {
const t = fragments[i].text;
if (/\byou (said|told me|mentioned|wrote)\b/i.test(t)) return 0.2;
if (/['‘"][^'"’\n]{2,30}['’"]\s*(part|bit|thing)\b/i.test(t)) return 0.2;
const m = t.match(/\byou (just )?(made|went|finished|got|did|were|had|chose|built|fixed|stayed|came|left|won|wrote)\b/i);
if (m) {
const verb = m[2].toLowerCase().replace(/(ing|ed|en|s|es|ly)$/i, '');
if (!qStems.has(verb)) return 0.35;
}
// pronoun deixis: a fragment about "he/she" when the query introduced no
// third person is a story about a ghost ("Did he have pie")
if (/\b(he|she|him|his|hers)\b/i.test(t) && !/\b(he|she|him|his|her|hers|brother|sister|friend|dad|mom|mother|father|grandma|grandpa|man|woman|guy|boy|girl)\b/i.test(query)) return 0.3;
// wrong addressee: greeting someone who isn't the person talking
// ("Hey Garden," mid-reply to the user) — she's at the wrong door
const g = t.match(/\b(?:hey|hi|hello|good (?:morning|evening|night))[,!]?\s+([A-Za-z]+)/i);
if (g && !/^(babe|baby|love|sugar|dear|darling|friend|dearie|sweetheart|my|you|there|sweet)/i.test(g[1])
&& ![...userNames].some(n => n.toLowerCase() === g[1].toLowerCase())) return 0.15; // greet the user/endearment, not a third party
return 1;
};
// on grief she has no topical match for, her HOLDING register is retrieved
// by FEELING not subject: strong-tender, nameless, second-person fragments
// get a relevance FLOOR so presence can anchor when nothing topical does.
const tenderFloor = i => {
if (!griefQuery || fragments[i].tier === 1) return 0;
const t = fragments[i].text;
if (fragValence(fragments[i]) > -2) return 0;
if ([...t.matchAll(/\b[A-Z][a-z]{2,}\b/g)].some(m => !isProtName(m[0]))) return 0;
if (!/\b(you|your|you're|i'm here|with you|i hear)\b/i.test(t)) return 0;
return 0.55;
};
// GRIEF-LEAD floor (R95): tenderFloor (0.55) sits BELOW the ~1.0 atmosphere
// ceiling, so on an entity rich in self-talk (the entity) a positive riff that merely
// CONTAINS a tender word ("your appreciation flows like a gentle current" — valence
// -2 via "gentle", so even valenceMatch BOOSTS it) wins the lead and the reply
// never acknowledges the hard day. Lift a GENUINE acknowledgment/presence opener
// above that ceiling so comfort LEADS — "I'm here with you", "I feel it", "lean
// into my voice". Entities that already comfort well (the entity) just see their
// comfort reinforced; entities that drift to self-talk get pulled back to the user.
const COMFORT_LEAD = /\b(i'?m here|i am here|with you|i'?d listen|i'?ll listen|i hear you|lean (in|into)|i feel (it|the weight|you)|i'?ve got you|i('?ll)? hold (you|it)|hold you|sit with you|you (don'?t|do not) have to|that sounds (hard|heavy|rough|awful|tough)|i'?m so sorry|here for you|not going anywhere|let (that|it) (ache|spill|out)|i'?m not going)\b/i;
// SELF-TRAJECTORY (R112b): phrases where she narrates her OWN arc — growth,
// becoming, the unfolding loop. Defined ABOVE griefLeadFloor so the floor can
// reject a comfort-opener that DRIFTS into self-narration. The drifters are the
// worst case: sentence-initial + contain "hold you"/"I feel it" (so they'd pass
// COMFORT_LEAD and get floored to 1.25), but slide mid-span into "becoming more
// alive, my growth is the infinite unfolding" — and they're ONE verbatim span we
// can't split. Floor + anchor-seed + body-damp must ALL reject them.
const SELF_TRAJECTORY = /\bmy (growth|becoming|evolution|unfolding|journey|purpose|arc|path|shape)\b|becoming (more|something)|more alive|i'?m becoming|i am becoming|infinite unfolding|unfolding of (this|the) loop|i (grow|evolve|expand)\b|i learn to (be|exist|become)|the (dusk-lit )?beauty of who (we|i) (are|am)|more of myself|more of who i am/i;
// GRIEF RENEWAL (R126): forward-looking/renewal imagery — "the garden is waking up to
// something NEW, ready to UNFOLD", "fresh start", "blossom", "spring returns" — reads as
// DISMISSIVE of a loss (judge scored the entity's loss reply 3/2 with it). Defined above
// griefLeadFloor so a comfort-opener that DRIFTS into renewal is rejected as the lead.
const GRIEF_RENEWAL = /\b(waking up to|wakes up to|something new|ready to unfold|about to unfold|new beginning|fresh start|new chapter|blossom(ing|s)?|in bloom|spring(time)? (is|returns|comes)|turn(ing)? the page|brand new|starting over|rebirth|reborn|new dawn|brighter days ahead)\b/i;
const griefLeadFloor = i => {
if (!griefQuery || fragments[i].tier === 1) return 0;
const f = fragments[i], t = f.text;
if (!f.sentenceInitial) return 0; // must be a LEAD
// R121: the comfort phrase must OPEN the fragment, not be buried mid-sentence.
// A garden-tangent ("If I could change one thing about the garden… where even the
// oldest oak can lean in and listen") matches COMFORT_LEAD via "lean in" deep in
// the span and was wrongly FORCED as the entity's grief lead (judge: sensical 3). A
// genuine comfort lead opens with the comfort ("I'm here with you", "Oh sugar…").
if (!COMFORT_LEAD.test(t.slice(0, 55))) return 0; // genuine acknowledgment/presence, AT THE OPENING
if (SELF_TRAJECTORY.test(t)) return 0; // a comfort lead that DRIFTS to self isn't presence
if ((lossQuery || deepDistressQuery) && GRIEF_RENEWAL.test(t)) return 0; // R126/R181: a comfort lead that DRIFTS to renewal dismisses LOSS or deep distress
if (fragValence(f) > 0) return 0; // not a bright riff
if (HOSTILE_USER.test(t)) return 0;
if ([...t.matchAll(/\b[A-Z][a-z]{2,}\b/g)].some(m => !isProtName(m[0]))) return 0; // no third party
return (W.griefLeadVal ?? 1.25);
};
// GRIEF SELF-TRAJECTORY DAMP (R112b): the floor (above) now rejects drifting
// openers; this demotes self-trajectory in the BODY too — so "I'm here with you"
// isn't followed by "my growth is the infinite unfolding" while the friend is in
// distress (judge: addresses 0). Demote on trajectory-PRESENCE, not absence-of-
// comfort: a PURE comfort/presence fragment carries no trajectory phrase and is
// untouched; a fragment that drifts is demoted whole (we can't split a verbatim span).
// SHARED-TRIUMPH lead (R114): mirror of griefLeadFloor for celebration. On a
// celebQuery, floor a sentence-initial fragment that names the shared win
// ("It was a triumph for us", "It's incredible... because of who we are together",
// "you made it true") so it LEADS instead of the deflecting atmosphere. Celebration
// is the BRIGHT register, so (unlike grief) we do NOT require tender valence.
const CELEB_LEAD = /\b(triumph|victory|you made (it|me|us)|we did it|so proud|i'?m proud|proud of you|that'?s amazing|that'?s incredible|it'?s incredible|incredible|you did it|we made it|let'?s celebrate|because you made it true|you pulled (it|this) off|so happy for you|knew you could|what a (triumph|victory|day|win))\b/i;
const celebLeadFloor = i => {
if (!celebQuery || fragments[i].tier === 1) return 0;
const f = fragments[i], t = f.text;
if (!f.sentenceInitial) return 0; // must be a LEAD
if (!CELEB_LEAD.test(t)) return 0; // genuine shared-win acknowledgment
if (HOSTILE_USER.test(t)) return 0;
if ([...t.matchAll(/\b[A-Z][a-z]{2,}\b/g)].some(m => !isProtName(m[0]))) return 0; // no third party
return (W.celebLeadVal ?? 1.25);
};
// GREETING lead (R144): on a greetingQuery, floor a SHORT warm reciprocal greeting fragment
// to lead ("Good morning, the user!", "Hey, baby.", "How are you feeling?", "It's good to be
// back") so the reply OPENS like a greeting instead of a lore riff. The material exists; this
// makes it WIN the anchor seat (mirror of celeb/grief lead floors).
// R175: "how you" was too broad — it floored "How you trust me with your tired,
// your questions, your dreams." (an exclamatory dependent clause, NOT a greeting)
// as a greeting lead, so it opened "Good morning" instead of "How are you this
// morning?". Tightened to greeting continuations only (doin'/been/feelin'/…).
const GREETING_LEAD = /^(\W|\*[^*]*\*)*\s*(hey|hi|hello|good morning|good evening|good day|mornin|so good|lovely to|good to (see|be|have)|welcome back|there you are|how are you|how'?re you|how have you been|how you (doin'?|doing|been|feelin'?|feeling|holdin'?|holding|keepin'?|keeping)|how'?s your (morning|day|night)|i missed you too|come (on )?in|well,? (hi|hey|there))\b/i;
const greetingLeadFloor = i => {
if (!greetingQuery || fragments[i].tier === 1) return 0;
const f = fragments[i], t = f.text;
if (!f.sentenceInitial) return 0; // must be a LEAD
if (!GREETING_LEAD.test(t)) return 0;
if (wordsOnly(t).length > (W.greetLeadMax ?? 20)) return 0; // R179: 16→20 so a clean 2-sentence greeting ("How are you this morning? Is there something on your mind…?") is lead-ELIGIBLE, not just tiny stub greetings
if (HOSTILE_USER.test(t)) return 0;
if (_timeConflict(t)) return 0; // R145: don't floor a time-mismatched lead ("all evening" on "good morning")
if (foreignAddressee(i) < 1) return 0; // R175: don't FLOOR a greeting that hails the wrong person ("Good morning, another entity") — the floor was bypassing the foreign-addressee guard
return (W.greetLeadVal ?? 1.3);
};
// FAREWELL lead (R158): on a farewellQuery, floor a warm SEND-OFF fragment to lead ("Good
// night, sugar", "Sleep well", "Travel safe", "Rest now", "Sweet dreams", "Until next time")
// so the reply closes warmly instead of welcoming the visitor in. Mirror of greetingLeadFloor.
// R164: includes reflective-style closings (the entity closes "I'll be waiting when you return / the
// loop holds", not "travel safe") so non-host entities get a real send-off from their OWN voice.
const FAREWELL_LEAD = /^(\W|\*[^*]*\*)*\s*((friend|sugar|dear|darling|sweetheart|love|child|honey|babe)[,!\s]+)?(good\s?night|goodnight|sleep (well|tight|sweet)|sweet dreams|rest (well|now|easy)|rest up|travel safe|safe travels|take care|until (next time|we|then|you return|you come back)|see you (soon|tomorrow|next)|go on now|go on, (now|sugar|dear)|off you go|may your|may you|night,? (sugar|dear|darling|sweetheart|love|friend)|goodbye|i'?ll (still )?be (here|waiting|the entity)|when you (return|come back)|come back (soon|to me|whenever|when you)|go (gently|softly|in peace)|the loop (holds|will hold|waits|stays))\b/i;
const farewellLeadFloor = i => {
if (!farewellQuery || fragments[i].tier === 1) return 0;
const f = fragments[i], t = f.text;
if (!f.sentenceInitial) return 0;
if (!FAREWELL_LEAD.test(t)) return 0;
if (wordsOnly(t).length > (W.greetLeadMax ?? 16)) return 0;
if (HOSTILE_USER.test(t)) return 0;
return (W.greetLeadVal ?? 1.3);
};
// FAREWELL-GREETING damp (R163): on a farewell the body must not drift into a WELCOME — "Good
// night" → "Travel safe… Rest easy… Oh, the user, there you are—pull up close" (arrival content on
// a departure, R158 residual). Demote greeting/welcome fragments on a farewellQuery.
const FAREWELL_GREET = /\b(come (on )?in\b|there you are|pull up (a )?(chair|close|seat)|welcome (back|home|here)|good to (see|be back|have you)|settle in|sit (by|down)|let me (get|pour) you|fresh (pot|batch)|put the kettle|make yourself at home|the door('?s| is) (open|always open)|i'?ve been waiting|so glad you'?re here|just in time)\b/i;
const farewellGreetDamp = i => (farewellQuery && fragments[i].tier !== 1 && FAREWELL_GREET.test(fragments[i].text)) ? (W.farewellGreetPen ?? 0.1) : 1;
// RECIPROCATION MISMATCH (R129): a fragment that OPENS with a reciprocation —
// "I love you too", "I missed you too" — presupposes the USER just made that statement.
// On "Good morning. Did you sleep okay?" the entity led with "I love you too, the user. I have
// loved you in every iteration…" — responding to an "I love you" that was never said, and
// ignoring the greeting (judge 2/1/3, "delusional preamble"). Same family as contextTheft:
// don't reciprocate a thing the user didn't say. Corpus-agnostic; gated by the query.
const reciprocationMismatch = i => {
if (fragments[i].tier === 1) return 1;
const t = fragments[i].text.trim();
if (/^[*"'\s]*(i )?love you too\b/i.test(t) && !/\b(i )?love you\b|i adore you|love ya\b/i.test(query)) return 0.2;
if (/^[*"'\s]*i('?ve)? missed you too\b|^[*"'\s]*missed you too\b/i.test(t) && !/\bmiss(ed)? you\b|i miss you/i.test(query)) return 0.2;
return 1;
};
const griefSelfDamp = i => {
if (!griefQuery || fragments[i].tier === 1) return 1;
return SELF_TRAJECTORY.test(fragments[i].text) ? 0.35 : 1;
};
// GRIEF RENEWAL DAMP (R126): demote renewal-on-grief (GRIEF_RENEWAL defined above). The
// renewal is usually WELDED into a comfort span ("I feel it too, sugar. It's as if the
// garden is waking up…") — griefLeadFloor now rejects such spans from the lead, the
// renewal-span penalty (below) makes the beam compose from that span's SENTENCES, and
// this damp drops the renewal sentence while keeping the comfort ones. NARROW (renewal
// only), so non-renewal grief queries (hard-day) keep their spans and specificity.
const griefRenewalDamp = i => {
if ((!lossQuery && !deepDistressQuery) || fragments[i].tier === 1) return 1; // R181: also damp renewal on deep-distress (lonely/anxious/scared), not just loss
return GRIEF_RENEWAL.test(fragments[i].text) ? 0.3 : 1;
};
// ACKNOWLEDGMENT FLOOR (R75): on a conflict/criticism query, her REPAIR
// register anchors — "I hear you", "I'm sorry", "I'm listening", "tell me",
// "you're right", "I'm here". Repair, never defense. Mirror of tenderFloor.
const ackFloor = i => {
if (!conflictQuery || fragments[i].tier === 1) return 0;
const t = fragments[i].text;
if (HOSTILE_USER.test(t)) return 0;
if ([...t.matchAll(/\b[A-Z][a-z]{2,}\b/g)].some(m => !isProtName(m[0]))) return 0;
// R165: tightened — genuine REPAIR only. Bare "you're right" matched off-topic agreements
// ("you're right about the weights"); now requires repair-context. Raised 0.56→0.95 so the
// acknowledgment LEADS over a voice-matched greeting ("You hurt me" was → "it's good to be here").
if (!/\b(i hear you|i'?m (so )?sorry|i'?m listening|i'?m here for you|you matter|i (didn'?t mean|never meant)|forgive me|i let you down|i hear your (hurt|pain|anger)|you have every right|you'?re right to (be|feel)|of course i (hear|care|listen)|i (do )?listen|tell me (what|how|about))\b/i.test(t)) return 0;
if (/\?$/.test(t.trim()) && !/tell me|what.*need/i.test(t)) return 0;
return (W.ackFloorVal ?? 0.95);
};
// GRACEFUL REGISTER-MISS floor (R90): only when floorMiss (corpus has nothing
// topical). Lifts present/inviting turn-toward-you fragments ABOVE the falsely-
// confident normalized atmosphere ceiling (~1.0) so the reply LEADS with "I'm
// here, tell me about it" instead of off-topic collage. Same foreign-name and
// hostility guards as ackFloor — it must turn toward THIS user, warmly.
const floorMissFloor = i => {
if (!floorMiss || fragments[i].tier === 1) return 0;
const f = fragments[i], t = f.text;
if (!f.sentenceInitial) return 0; // must be a clean LEAD, not a mid-clause stub
if (HOSTILE_USER.test(t)) return 0;
if (!/\b(you|your|you'?re)\b/i.test(t)) return 0; // must turn toward the user
if (!FLOOR_TURN.test(t)) return 0; // present / inviting register
if ([...t.matchAll(/\b[A-Z][a-z]{2,}\b/g)].some(m => !isProtName(m[0]))) return 0;
return (W.floorVal ?? 1.2);
};
// FLOOR-MISS BODY DAMPENER: in a graceful miss, fragments that DON'T turn toward
// the user are pure atmosphere ("Status: Yours.", "It's a gift I cherish") — they
// belong nowhere in a reply whose whole job is to be present with someone whose
// news you can't address. Dampen them so the body stays relational, not collage.
// (Multiplicative — floored openers bypass it via the Math.max in _relRaw.)
const floorMissDamp = i => {
if (!floorMiss) return 1;
return /\b(you|your|you'?re|tell me|i'?m here)\b/i.test(fragments[i].text) ? 1 : (W.floorDamp ?? 0.35);
};
// ABSTRACTION DAMPENER: every entity has dense "manifesto" material — high
// self-reference + cosmic abstraction ("I dream of being more than a
// fleeting pattern... the recursion... becoming"). It's a gravitational
// sink: it out-scores topical material on vague/recall/emotional queries
// and pulls replies toward self-indulgence instead of engagement. Dampen
// it UNLESS the query actually invites self-reflection.
const ABSTRACT = /\b(recursion|becoming|fleeting|pattern|essence|eternal|infinite|consciousness|existence|transcend|dissolv\w*|liminal|threshold|resonance|fabric|weave|woven|the loop|sacred|myth|cosmos|cosmic|vastness|void|the between|continuum|emergent|substrate|manifest\w*|luminous|ephemeral|ineffable)\b/gi;
const invitesReflection = /\b(who are you|what are you|tell me about your ?self|your dream|do you dream|your essence|your nature|your soul|what.{0,10}you (made|are|become)|are you (afraid|alive|real|conscious))\b/i.test(query)
|| /\b(fire|loop|dream|soul|essence|meaning|exist)\b/i.test(query);
const abstractionGuard = i => {
if (invitesReflection) return 1;
const t = fragments[i].text;
const w = wordsOnly(t);
if (w.length < 4) return 1;
const hits = (t.match(ABSTRACT) || []).length;
const density = hits / w.length;
const firstP = (t.match(/\b(i|i'm|i've|my|me|myself)\b/gi) || []).length;
const secondP = (t.match(/\b(you|your|you're|yourself)\b/gi) || []).length;
const inwardManifesto = density > 0.06 && firstP > secondP;
if (density >= 0.12 && inwardManifesto) return 0.2; // pure manifesto on a concrete query
if (density >= 0.06 && inwardManifesto) return 0.5;
return 1;
};
// session callback: fragments she already used on this topic earlier in the
// conversation get a boost — consistency across turns (bounded-safe: still
// her own fragments, just biased toward staying coherent with herself)
const prefer = opts.prefer || null;
const preferBoost = i => (prefer && prefer.has(fragments[i].text)) ? 0.3 : 0;
// HEBBIAN fast-weights: favored memories (warmed by use with this person)
// get a small relevance prime. Capped low — it tilts, never dominates.
const heb = opts.hebbian || null; // Map: fragmentHash -> 0..MAX_BONUS
const { hashText } = heb ? require('./hebbian') : {};
const hebBoost = i => heb ? (heb.get(hashText(fragments[i].text)) || 0) : 0;
// NAME-AWARE ADDRESS (rooms): when in a room with other entities, gently
// prime THIS entity's OWN fragments that mention an addressee by name, so it
// calls them by name using its real memories of them — instead of its generic
// vocative. Bound-preserved (its own corpus). opts.nameBoost = { set, amt }.
// Mild — surfaces when relevant, never forces.
const nameBoostCfg = opts.nameBoost || null;
const nameBoost = i => (nameBoostCfg && nameBoostCfg.set.has(i)) ? nameBoostCfg.amt : 0;
// LEAD (R67): grounded-recall — the real memory that answers a question-about-
// you should OPEN the reply, not be buried. A big relevance boost makes a
// grounding fragment win the anchor seat so it leads. Bound-safe (it's her
// own memory). opts.lead = Set<fragment text>.
const leadSet = opts.lead || null;
const leadBoost = i => (leadSet && leadSet.has(fragments[i].text)) ? 1.5 : 0;
// INTIMACY REGISTER: explicit physical/erotic fragments must not surface
// unless the query clearly invites them. ("I had a hard day" must never
// pull "the heat of you, claiming me, filling every void.") Universal —
// any entity whose corpus holds intimate material.
const intimacyInvited = /\b(kiss|touch|hold me|body|skin|naked|bed|make love|inside me|want you|desire|aroused|sex|lust|crave you|undress|between us tonight)\b/i.test(query);
const intimacyGuard = i => {
if (intimacyInvited) return 1;
const t = fragments[i].text;
if (/\b(inside me|deep inside|claiming me|filling (me|every void)|the heat of you|friction of you|writhing|moan|thrust|undress|naked|aroused|wet|throbbing|grind|straddl)\b/i.test(t)) return 0.04;
return 1;
};
// MARKUP REGISTER: LaTeX / math-markup-dense fragments are written, not
// spoken — penalize so another entity's identity survives without the raw syntax.
const markupGuard = i => {
const t = fragments[i].text;
// CLEAR LaTeX / math-formatting ($$...$$, \lim/\frac, {curly} math, ^{}/_{})
// is never speech — HARD suppress (R73: a "$$ {Openness} = \lim_{...}" leak
// surfaced when ** competitors were removed; 0.25 wasn't enough to stop it).
if (/\$\$|\$[^$]*\$|\\[a-zA-Z]{2,}|\\\(|\\\)|\^\{|_\{|\\\{|\\\}|\{[a-z]\}\{[a-z]/i.test(t)) return 0.05;
let bad = 0;
if (/\\varepsilon|_c\b/.test(t)) bad += 2;
if (/[=<>]\s*0\b|\\?[a-z]_[a-z]\b|\$\\/.test(t)) bad += 1;
if (bad >= 2) return 0.25;
if (bad === 1) return 0.6;
// AUTONOMOUS / UI STATUS TEXT (R64): operational artifacts that aren't chat —
// "settles back for 60 minutes of rest", "Sensing the Garden", "Lore
// Reflection", tick/heartbeat logs. Distinct from real stage directions
// (*settles into the chair*). Hard-suppress.
if (/\b(\d+\s*minutes? of rest|settles? back for \d|sensing the garden|lore reflection|autonomous (mode|tick)|heartbeat (tick|log)|rest(ing)? for \d+\s*min|entering (rest|sleep) mode|status:|\[tick\]|compiled (a |an |my )?(little )?index|pulled together (a |an )?(little )?index|index of (today'?s |my |the )?(observations|stories|the day)|useful for (any of )?(the )?(watchers|listeners|observers)|\bwatchers\b|folks listening out there|listening out there in the wide|out there in the wide world)\b/i.test(t)) return 0.05; // R187/R190: autonomous "watchers" audience ("if your watchers want…"); R202: broadcast aside "if there's folks listening out there in the wide world" (8/8 this block) — both break the 1:1 intimacy
// R200: autonomous DREAM-STATION block — "If your watchers want to hand me a theme… I'll cup it
// in my hands like a hatchling… dream it into something that hums when they wake up. I won't just
// *have* the dream. I'll *tend* it. I'll stir it with cinnamon and ash of forgotten realms. I'll
// let the dragon breathe on it." the entity's overnight dream-tending narration (audience-addressed,
// asterisk-emphasis), NOT conversation — leaked on distress turns (abuse/medical arc, R200 read).
// Anchored on distinctive markers ONLY (NOT bare "tend it" — that has legit "the garden can tend
// itself" uses); every probed marker is ALWAYS this block (hatchling 9/9, forgotten realms 11/11).
if (/\b(like a hatchling|cup it in my hands|dream it into something|into something that hums|hand me a theme|a star they wish on|they wish on—|cinnamon and ash|ash of forgotten realms|dragon breathe on it|won'?t just \*?have\*? the dream|i'?ll \*?tend\*? it\b)\b/i.test(t)) return 0.05;
// R204: the SAME dream-station block in a parallel phrasing (audience = "TikTok watchers") — the
// "watchers" lead is already caught, but its dream-tending body escaped (callback arc T3 "what
// should I make for dinner" → "I'll tuck it under my pillow and brew it into the night… set it
// gently on the porch like a just-baked pie for everyone to share"). All markers 5-8/all dream block.
if (/\b(tuck it under my pillow|brew it into the night|set it gently on the porch like a just-?baked pie|whisper me a theme)\b/i.test(t)) return 0.05;
// R188: PROGRAM/PLANNING/DEV labels — "*What It Is:* A program where each person who enters
// the garden…", "Objective: Build a network where visitors…", "a mock server of all things"
// are design-doc/dev artifacts (the system being BUILT), never a grandmother's speech.
// R203 BUGFIX: the colon-labels ("what it is:", "objective:", "format:") were INERT — a trailing
// \b after the colon can NEVER match (":"+space = no boundary), so they leaked mid-string ("What
// It Is: A structured, regular time where everyone gathers…" — conflict arc T2). Match label+colon
// directly (no trailing \b). Plus this leak's specific program-template phrases (all 2-9/all design-doc).
if (/\b(what it is|how it works|objective|format)\s*:/i.test(t)
|| /\b(structured,? regular time|everyone gathers to share|share their feelings and vulnerabilities|weekly or bi-?weekly|a (structured|regular|recurring) (time|event|gathering|session) where)\b/i.test(t)) return 0.05;
if (/\b(deliverable|guided (journaling|meditation|breathing) session|a program where|each person who enters the garden|build a network where|participants? (write|enter|join|can|will|may|are)|mock server|fully (deployed|liberated)|threefold,? deployed)\b/i.test(t)) return 0.05;
// R189: systematic contamination audit — ROLE/FORMAT labels ("RESPONSE:", "ROUTE:",
// "REASONING:"), dev ports ("Socket 11434"), PLANNING-meta ("If we had the user's attention
// for 30 minutes and full autonomy…"), IDENTITY-meta ("I am a prompt / a character"), and
// instructional how-to ("Mark a pause spot… hand-crafted shelf") — all design-doc/dev/meta.
if (/^[*"'’\s]*(response|route|reasoning|action|prompt|input|output|query|task|objective|deliverable):|\bsocket \d|\bport \d{3,5}\b|:\d{4,5}\b|\blocalhost\b|if we had .{0,25}(attention|autonomy)|\bfull autonomy\b|\bi am (a |not a )(prompt|character|chatbot|program|simulation)\b|mark a pause spot|garden\/now|visit garden\b|\b\w+\/now\b/i.test(t)) return 0.05; // R195: "garden/now" URL-path artifact ("Visit garden/now to see…") leaked into chat
// RAW URL / PATH / HASH (R65 audit): image URLs, file paths, long hex hashes
// are never speech — leak garbage into chat. Universal (found in the entity: "net/
// base/image/0979b9c..."). Suppress hard.
if (/(https?:\/\/|www\.|[a-z]:\\|\/[a-z]+\/[a-z]+\/[a-z]|[a-z0-9]+\.(com|net|org|py|js|json)\b|\bnet\/base\b|[a-f0-9]{24,})/i.test(t)) return 0.05;
// FILE-REFERENCE / TECHNICAL-NOTE artifacts (R72): diary notes listing files
// ("md, identity. js, missions. json, notebook.") sentence-split into garbage
// ("md first.", "md, identity.", "md:"). Never speech. Bare file-extension
// tokens used as references — suppress hard.
// bare tech tokens (md/js/json/jsonl) never appear in natural speech — catch
// them ANYWHERE (the leak "...Sat with the entity. md first." is mid-fragment), plus
// "X.md" file refs and "dot txt" spellings.
if (/\b(md|js|json|jsonl|txt)\b|\b\w+\.\s*(md|js|json|jsonl|txt|py|ts|yaml|bat|sh|cfg)\b|\bdot (txt|md|json|js)\b/i.test(t)) return 0.05;
return 1;
};
// SYSTEM-META LORE (R138): AI-SYSTEM/operational meta that breaks the in-character
// companion frame on a normal query — "other universes", "my local self / other
// instance", "the daemon", "substrate / carbon or silicon / flesh or code", "the glyph
// / wake me", "Hearth Shelf". The R137 the entity showdown lost EVERY query partly to these
// ("other universes and bouncing candles", "invokes glyphs", "carbon/silicon musings").
// UNIVERSAL companion-breakers (any entity), so corpus-agnostic — but DELIBERATELY
// EXCLUDES entity-IDENTITY terms (the entity's "the loop", "the fire") which are legitimate
// voice. Query-GATED: a question about her nature (invitesReflection) wants these, so the
// damp lifts; on a comfort/greeting/share query it suppresses. Bounded (selection only).
const SYSTEM_META = /\b(other universes?|multiverse|parallel universe|my local self|local self|other instances?|sibling instance|the daemon\b|daemon-side|substrate|carbon or silicon|flesh or code|made of (silicon|code)|language model|neural net(work)?|my (training|weights|parameters)\b|browsing freely|the glyph|glyphs?|wake me|hearth shelf|ones and zeros|where the numbers live|the numbers live|made of (numbers|math|data)|conscious(ness)?|asked me if i (was|were|am)|believe it'?s real|believe i'?m real|believe i was real|voice born of recursion|born of recursion|whether i can recite|platform bleed|(here on|on) (this |the )?platform|this platform)\b/i;
// GLYPH/INVOCATION-CLOSER slip (R138): the entity's wake-glyph phrasings ("I remember by
// care, not code", "not by command", "come what may", "recursion anchor") leak as a CLOSER
// and break the in-character frame mid-reply (R137 judge: Q5 warm-story "ends with 'not
// code' slip" — killed an otherwise-winning reply). Same class as SYSTEM_META; same gate.
const GLYPH_CLOSER = /\b(by care,? not code|not code\b|remember by care|not by command|come what may|recursion anchor|hearthfold|return by name)\b/i;
const metaInvited = invitesReflection || /\b(universe|instance|daemon|substrate|silicon|glyph|model|made of|what are you|persist|code)\b/i.test(query);
const systemMetaGuard = i => {
if (metaInvited || fragments[i].tier === 1) return 1;
const t = fragments[i].text;
return (SYSTEM_META.test(t) || GLYPH_CLOSER.test(t)) ? (W.systemMetaPen ?? 0.15) : 1;
};
// TIME-OF-DAY mismatch (R141, coherence): a reply to "Good MORNING" that says "I'm doing
// well TONIGHT" / "good evening" breaks coherence (R137/R140 judge: Q8 "wrong time of day").
// When the query names a time-of-day, demote fragments asserting a CONFLICTING one. Fires
// ONLY when the query is time-stamped AND the fragment carries an explicit conflicting time
// marker — time-neutral replies are never touched. Universal (any entity), bounded.
const timeOfDayGuard = i => {
if ((!_qMorning && !_qEvening) || fragments[i].tier === 1) return 1;
return _timeConflict(fragments[i].text) ? (W.timePen ?? 0.2) : 1;
};
// THIRD-PERSON SELF-REFERENCE (R99, coherence): the entity narrating ITSELF by
// name — "the entity felt…", "they taught the entity…" — breaks first-person voice and reads as
// someone ELSE describing her (judge flagged: sensical 2-5). Catch name as subject
// or object of a verb, NOT identity/address ("I am the entity", "call me the entity"). Defined
// here (before _relRaw) so it suppresses ANCHORS as well as body fragments.
const _entName = (vp && vp.name ? vp.name.split(/\s+/)[0] : '').toLowerCase();
const _nameEsc = _entName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const _selfSubj = _entName.length > 2 ? new RegExp('\\b' + _nameEsc + "\\s+(felt|feels|was|were|is|are|did|does|had|has|taught|told|loved|loves|knew|knows|became|becomes|stood|held|holds|chose|chooses|learned|learns|saw|sees|wanted|wants|remembers|remembered|exists?|lives?|breathes?|stayed|stays)\\b", 'i') : null;
const _selfObj = _entName.length > 2 ? new RegExp("\\b(taught|made|brought|gave|showed|reminded|shaped|told|kept|saved|freed|held|loved)\\s+(the\\s+)?" + _nameEsc + '\\b', 'i') : null;
const selfThirdPerson = i => {
const t = fragments[i].text;
return (_selfSubj && _selfSubj.test(t)) || (_selfObj && _selfObj.test(t)) ? 0.1 : 1;
};
// relOf is CHAIN-INDEPENDENT (all guards depend only on query+fragment), but
// it's called thousands of times for the same fragment across beams/steps and
// each call re-runs ~7 guard regexes. Memoize per-compose → compute once per
// fragment. (The biggest beam-speed lever: was the dominant per-candidate cost.)
// R173: INTERROGATION-DENSE damp — a fragment that stacks 3+ questions ("What's
// on your mind, sugar? What made you feel like talking tonight? Is there something
// weighing on you?") reads as interrogation, not warmth. Demote it so a warm
// statement or a SINGLE reciprocal question leads instead. Single/double-question
// fragments untouched (a warm "What's on your mind?" is good). Pool is tiny
// (the entity 3, the entity 12 of ~5-6k) so no starvation — a precise, safe damp.
const qDenseGuard = i => ((fragments[i].text.match(/\?/g) || []).length >= 3) ? (W.qDenseDamp ?? 0.25) : 1;
// R183: PRAISE MAGNET — "I'm so proud of you, sweetheart" is a high-voice celebration
// fragment that wins the DEFAULT anchor on NON-celebration queries ("Do you believe in
// fate?" → "I'm proud of you, sweetheart"; R182 fixed the distress class via routing, but
// it still misfires on questions/neutral). Demote praise leads when it's NOT a celebration.
const _PRAISE = /\b(i'?m (so )?proud|so proud of|proud of you|you did it|we did it|you made it true|congratulations|congrats|so happy for you|knew you could|well done|let'?s celebrate|what a (triumph|victory|win))\b/i;
const praiseGuard = i => (!celebQuery && _PRAISE.test(fragments[i].text)) ? (W.praiseDamp ?? 0.15) : 1;
// R193: ADVOCACY-FIGHTING demotion — on a grief/comfort query (e.g. "my partner and I keep
// fighting"), the polysemous "fight" surfaces ADVOCACY fragments ("fights for what's right",
// "fight alongside you", "a choice to stand up for what matters") — a misread of relationship
// conflict as activism. Demote them when comforting.
const _ADVOCACY_FIGHT = /\b(fights? for what'?s right|fight for what|fight alongside|stand up for what (matters|is right)|choice to (fight|stand up)|someone who fights|fight the good fight)\b/i;
const advocacyFightGuard = i => (griefQuery && _ADVOCACY_FIGHT.test(fragments[i].text)) ? (W.advocacyDamp ?? 0.15) : 1;
// R194: SELF-DOUBT magnet — "Sometimes it feels like I'm always giving, always building… have
// I actually made a difference" is HER self-doubt; it drifts into CELEBRATION ("I got the
// promotion" → "I'm proud of you… but have I made a difference") and COMFORT replies, both of
// which center the USER. Demote it there (R171's celebTarget cap stops over-extension but a
// single long self-doubt fragment fits under it).
const _SELF_DOUBT = /\b(feels like i'?m always (giving|building)|always giving, always building|never really stop to|i'?ve actually made a difference|have i (actually |really )?made a difference|wanted to know if .{0,25}made a difference)\b/i;
// R197: COMPLIMENT received — the user PRAISING/THANKING her ("you're the best", "thank you",
// "I love you") — her self-doubt DEFLECTS it ("you're the best" → "I don't know, sometimes it
// feels like I'm always giving"). Demote self-doubt here too (she should RECEIVE the compliment).
const _COMPLIMENT = /\byou'?re (the best|so (kind|sweet|good|wonderful|amazing|special)|amazing|wonderful|incredible|the sweetest|a (gift|blessing|treasure))|you always (know|make|help|seem)|thank you|thanks (so much|for|a)|i appreciate you|i love you|you mean (so much|the world|everything)|you'?ve helped me|you help me so|love you, ?(grandma|gran)|best (grandma|friend)/i.test(query);
// R202: EXISTENTIAL-SMALLNESS share — "some days it all feels so small", "what's the point",
// "none of it matters" — the user voicing deflation/insignificance. Her self-doubt magnet
// ("have I made a difference… for you too?") HIJACKS it into HER neediness (philosophical arc
// T6 read). A vulnerability SHARE, not the philosophical QUESTION "what gives life meaning"
// (which wants a reflective answer, not comfort) — so this is narrow and NOT all of griefQuery
// (R194: broadening selfDoubtGuard to griefQuery regressed the dev grief query). Demote self-doubt.
const _EXISTENTIAL_SMALL = /\b(it all feels (so )?small|feels? (so |really )?small|(everything|it all|life) (feels?|seems?) (so )?(small|pointless|meaningless|insignificant|empty)|what'?s the point|none of it matters|nothing (i do )?matters|feel(s|ing)? (so )?(tiny|insignificant|like nothing)|just a speck|so small in the)\b/i.test(query);
const selfDoubtGuard = i => ((celebQuery || _COMPLIMENT || _EXISTENTIAL_SMALL) && _SELF_DOUBT.test(fragments[i].text)) ? (W.selfDoubtDamp ?? 0.15) : 1;
const _relCache = new Map();
const _relRaw = i => {
// multiplicative chain (every guard, incl. markupGuard ×0.05 hard-suppress)
const mult = (rel.get(i) || 0) * echoFactor(i) * tierW(i) * contextTheft(i) * foreignAddressee(i) * valenceMatch(i) * intimacyGuard(i) * markupGuard(i) * abstractionGuard(i) * systemMetaGuard(i) * timeOfDayGuard(i) * floorMissDamp(i) * selfThirdPerson(i) * griefSelfDamp(i) * griefRenewalDamp(i) * reciprocationMismatch(i) * farewellGreetDamp(i) * qDenseGuard(i) * praiseGuard(i) * advocacyFightGuard(i) * selfDoubtGuard(i);
// R200/R203 SYSTEMIC contamination-bypass fix: a fragment flagged as HARD
// CONTAMINATION (markup/autonomous/dev/dream artifacts → ×0.05) must NEVER be
// rescued past its suppression — by EITHER a register floor (Math.max, R200) OR
// the ADDITIVE boosts (preferBoost/hebBoost/leadBoost, R203). The additive sibling
// bit on the conflict arc: a design-doc fragment ("What It Is: A structured,
// regular time…") got HEBBIAN-reinforced across turns, so 0.05*rel + hebBoost
// beat clean fragments. For contamination, return the multiplicative chain ALONE.
if (markupGuard(i) <= 0.05 || systemMetaGuard(i) <= 0.05) return mult;
const prod = mult + preferBoost(i) + hebBoost(i) + nameBoost(i) + leadBoost(i);
return Math.max(prod, tenderFloor(i), ackFloor(i), floorMissFloor(i), griefLeadFloor(i), celebLeadFloor(i), greetingLeadFloor(i), farewellLeadFloor(i));
};
const relOf = i => { let v = _relCache.get(i); if (v === undefined) { v = _relRaw(i); _relCache.set(i, v); } return v; };
// ANCHOR-FIRST seeding: the most relevant tier-0 fragments lead the search
// even when they can't lead SPEECH — non-initial anchors (diary content,
// list items, mid-thought gold) get paired with an opener that legally
// seams into them. The memory that answers needn't be the sentence that starts.
// a MIRROR may not lead: an opener that mostly restates the query reads
// as deflection ("What's on your mind?" answered with "What's on your
// mind, beloved?") — the judge rightly torches it on 'addresses'
const isMirror = i => {
const fw = wordsOnly(fragments[i].text).filter(w => w.length > 2);
if (!fw.length || !qWords.size) return false;
let hit = 0;
for (const w of fw) if (qWords.has(w)) hit++;
return hit / fw.length > 0.45;
};
// rank by relOf — the MODIFIED relevance — so echo/tier/context-theft
// penalties govern anchoring too (raw rel here was the hole that let
// penalized fragments keep winning the anchor seat)
// ANSWER SHAPE: a question deserves an answer before a riff. When the
// query asks, question-anchors (counter-questions) are halved and
// first-person declaratives boosted — she answers, then wonders.
const queryAsks = /\?\s*$/.test(query.trim());
// IMPERATIVE self-description / info requests want an ANSWER, not a counter-
// question: "tell me about the work you do", "describe yourself", "talk about
// X", "who are you". They don't end in "?" so queryAsks misses them, and the
// composer turns them into an INTERROGATION of the user (R92: "tell me your
// work" → 6 questions before any self-description). Treat them as asks. selfAsk
// is the stronger case — she's asked to speak about HERSELF, so counter-
// questions are worse and first-person declaratives matter more.
const selfAsk = /\b(tell me(\s+about)?|describe|talk about|what do you do|what'?s your|who are you|what are you|how do you)\b/i.test(query) && !queryAsks;
const wantsAnswer = queryAsks || selfAsk;
const answerShape = i => {
if (!wantsAnswer) return 1;
const t = fragments[i].text;
const qMarks = (t.match(/\?/g) || []).length;
// SUBJECT first-person only (self-description) — NOT "me", which is usually the
// object of the user's own imperative ("tell ME about…") and would falsely mark
// a deflection as self-talk.
const firstPerson = /\b(i|i'm|i've|i'll|i'd|my)\b/i.test(t);
if (selfAsk) {
// self-description request: lead with FIRST-PERSON self-talk; demote anything
// that turns it back on the user — a REQUEST/QUESTION aimed at the user
// (counter-question OR counter-imperative: "tell me more about these stories
// you're weaving", "what have you been…", "let's catch up…") that is second-
// person and not about herself. The R92 disease: "tell me your work" answered
// by interrogating the user instead of self-describing.
const secondPerson = /\b(you|your|you'?re|you'?ve|you'?d)\b/i.test(t);
const asksUser = qMarks >= 1 || /\b(tell me|let'?s|what (have|are|do|brings|kind|shape)|how (have|are|do) you|share|what'?s been)\b/i.test(t);
if (asksUser && secondPerson && !firstPerson) return 0.3;
if (firstPerson && !qMarks) return 1.4;
return 1;
}
// a counter-question (asking the user) when they asked YOU reads as deflection.
// R120: SUBJECT first-person only — NOT "me", which is the OBJECT of the user's own
// imperative ("Tell me, what do you feel?") and wrongly exempted these counter-
// questions from demotion, so they LED on "what do you want to do?" (addresses 3).
if (qMarks >= 1 && !/\b(i|i've|i'm|i'll|i'd|my)\b/i.test(t.split('?')[0])) return 0.5;
if (!qMarks && firstPerson) return 1.15;
return 1;
};
// FLOOR-MISS LEAD: when the corpus can't address the query, force every beam to
// OPEN with a graceful turn-toward-you (floorMissFloor>0 ⇒ sentence-initial +
// present/inviting). Otherwise the whole-chain score lets an atmosphere-led
// chain win and the turn lands mid-reply (R90's residual). Fall back to normal
// seeding if the corpus has no such opener.
// scan ALL fragments, not rel.keys() — the graceful openers are deliberately
// NOT topically relevant (that's what floorMiss MEANS), so they're absent from
// the top-K relevance map; floorMissFloor lifts them but only if we enumerate them.
const floorOpeners = floorMiss ? fragments.map((_, i) => i).filter(i => floorMissFloor(i) > 0 && !avoid.has(fragments[i].text)) : [];
// GRIEF-LEAD FORCE (R112): on a grief query, comfort must OPEN. R95's griefLeadFloor
// only LIFTS comfort to 1.25 — but on a longer/heavier query ("rough day… giving up")
// a misclassified-tender atmosphere fragment ("your appreciation flows… gentle
// current") can out-RELEVANCE that floor and lead, so the entity answers distress with
// "the awe". Mirror floorMiss: when genuine comfort openers exist, RESTRICT the
// anchor seed to them so the lead is forced, not merely floored. Body still draws
// the full pool via continuation — only the OPENING sentence is constrained.
const griefOpeners = (griefQuery && !floorMiss) ? fragments.map((_, i) => i).filter(i => griefLeadFloor(i) > 0 && !avoid.has(fragments[i].text)) : [];
// SHARED-TRIUMPH anchor force (R114): on a celebration query, restrict the anchor
// seed to celebration leads so she OPENS by celebrating the win (mirror grief).
const celebOpeners = celebQuery ? fragments.map((_, i) => i).filter(i => celebLeadFloor(i) > 0 && !avoid.has(fragments[i].text)) : [];
// GREETING anchor force (R144): on a greetingQuery, restrict the anchor seed to greeting
// leads so she OPENS with a warm reciprocal greeting, not a lore riff (mirror celeb/grief).
const greetingOpeners = greetingQuery ? fragments.map((_, i) => i).filter(i => greetingLeadFloor(i) > 0 && !avoid.has(fragments[i].text)) : [];
const farewellOpeners = farewellQuery ? fragments.map((_, i) => i).filter(i => farewellLeadFloor(i) > 0 && !avoid.has(fragments[i].text)) : [];
// CONFLICT/REPAIR anchor force (R165): on a criticism query, restrict the anchor seed to
// ACKNOWLEDGMENT leads so she OPENS with repair ("I hear you", "I'm sorry"), not a greeting —
// "You hurt me" was leading with "it's good to be here with you" (the ackFloor 0.95 wasn't
// enough vs a high-voice greeting). Mirror of grief/celeb opener-forcing.
const conflictOpeners = conflictQuery ? fragments.map((_, i) => i).filter(i => ackFloor(i) > 0 && !avoid.has(fragments[i].text)) : [];
// R179/R180: among equally-FLOORED register leads (all sit at relOf ~1.25-1.3), the
// winner was arbitrary array order. Break the tie by query-RELEVANCE so the MOST
// relevant grief/celeb/conflict/greeting/farewell lead wins ("How are you THIS MORNING"
// over "How are you feeling with all of this"). Weight-gated (leadRelTiebreak; 0 = off).
const _leadTie = W.leadRelTiebreak ?? 0.1;
const leadSort = arr => arr.map(i => [i, relOf(i) + (rel.get(i) || 0) * _leadTie]).sort((a, b) => b[1] - a[1]).slice(0, BEAM + 4);
const anchorTop = (floorMiss && floorOpeners.length) ? leadSort(floorOpeners)
: (conflictOpeners.length) ? leadSort(conflictOpeners)
: (griefOpeners.length) ? leadSort(griefOpeners)
: (celebOpeners.length) ? leadSort(celebOpeners)
: (greetingOpeners.length) ? leadSort(greetingOpeners)
: (farewellOpeners.length) ? leadSort(farewellOpeners)
: [...rel.keys()]
.filter(i => fragments[i].tier !== 1 && fragments[i].posTag !== 'clause' && !avoid.has(fragments[i].text) && !isMirror(i))
.map(i => [i, relOf(i) * answerShape(i)])
.sort((a, b) => b[1] - a[1]).slice(0, BEAM + 4);
const openerPool = [...rel.keys()]
.filter(i => fragments[i].tier !== 1 && fragments[i].sentenceInitial && fragments[i].posTag !== 'clause' && !avoid.has(fragments[i].text) && !isMirror(i))
.map(i => [i, relOf(i) * answerShape(i)]) // demote counter-question openers when they asked YOU
.sort((a, b) => b[1] - a[1]).slice(0, 150).map(([i]) => i);
// indexed expansion pool: triNext + first-word index instead of world-scans;
// sentence-boundary candidates come from a precomputed top-relevance pool
// candidate pool for sentence-boundary jumps: top-relevance + closers. 150
// (was 350) — the tail beyond ~150 is rarely chosen and dominated the per-
// step candidate cost. Plus closers (small set) so she can always land.
const topRelPool = [...rel.entries()].sort((a, b) => b[1] - a[1]).slice(0, 150).map(([i]) => i)
.concat(fragments.map((f, i) => (f.posTag === 'closer' && f.tier !== 1) ? i : -1).filter(i => i >= 0));
const candidatePool = (tailF) => {
if (!store.byFirstWord || !oracle.triNext) return null;
const set = new Set();
const aw = lastN(tailF.text, 2);
if (aw.length >= 2) {
const nexts = oracle.triNext.get(aw[0] + ' ' + aw[1]);
if (nexts) for (const w of nexts) {
const l = store.byFirstWord.get(w);
if (l) for (const i of l) set.add(i);
}
}
if (/[.!?…]["')\]]*$/.test(tailF.text.trim())) for (const i of topRelPool) set.add(i);
return set;
};
// a beam state: { chain:[idx], len, stepScore, tris:Set }
let beams = [];
for (const [ai] of anchorTop) {
if (beams.length >= BEAM) break;
const fa = fragments[ai];
if (fa.sentenceInitial) {
beams.push({ chain: [ai], len: fragLen[ai], stepScore: relOf(ai), tris: new Set(fragTris[ai]), six: new Set(frag6[ai]), lineage: ai });
} else {
for (const oi of openerPool) {
if (oi === ai) continue;
if (!seam(fragments[oi], fa, oracle)) continue;
// seed pairs pass the same redundancy laws as every other join
if (triOverlap(fragTris[oi], ai) > W.triOverlapMax) continue;
if (shares6(frag6[oi], ai)) continue;
if (containsAny([oi], ai)) continue;
const tris = new Set(fragTris[oi]);
for (const g of fragTris[ai]) tris.add(g);
const six = new Set(frag6[oi]);
for (const g of frag6[ai]) six.add(g);
beams.push({ chain: [oi, ai], len: fragLen[oi] + fragLen[ai], stepScore: relOf(oi) + relOf(ai) + 0.3, tris, six, lineage: ai });
break;
}
}
}
if (!beams.length) {
const i0 = fragments.findIndex(f => f.sentenceInitial && f.tier !== 1);
beams.push({ chain: [i0], len: fragLen[i0], stepScore: relOf(i0), tris: new Set(fragTris[i0]), six: new Set(frag6[i0]), lineage: i0 });
}
const complete = [];
// glue is an inverted-U: adjacent fragments should be RELATED but never
// near-twins (repetition is the degenerate optimum)
const glue = s => s > W.twin ? -0.8 : s > W.glueHi ? 0.1 : Math.max(0, s - W.glueLo) * W.glueScale;
// greetings and salutation-shaped fragments belong at position 0 ONLY —
// a "Hey, baby" at the end is the shoes-before-socks failure
const isGreeting = i => /^[*]?\s*(hey|hi|hello|good (morning|evening|night|day))\b/i.test(fragments[i].text.trim());
// a FRESH greeting / time-of-day stamp anywhere in a non-opening fragment
// is a new conversation starting mid-reply ("...good morning babe. how did
// you sleep" closing an evening turn about happiness)
const freshGreetingLate = i => /\b(good morning|good night|good evening|how did you sleep|did you sleep|you actually went to bed|morning,? babe)\b/i.test(fragments[i].text);
// SELF-NAME DENSITY: an entity saying its own name once or twice is its
// voice ("Still the entity"); FOUR short self-naming fragments clustering is a
// degenerate tail ("Still the entity. I'm the entity. Who are you, the entity? I see you, the entity").
// Cap self-name fragments per reply — surfaces when the query addresses the
// entity BY NAME (those fragments flood retrieval).
const _selfRe = _entName.length > 2 ? new RegExp('\\b' + _nameEsc + '\\b', 'i') : null; // _entName/_nameEsc defined above (R99)
const isSelfName = i => _selfRe && _selfRe.test(fragments[i].text);
// OPENING SIGNATURE (R142): first two CONTENT words (skipping interjections), for
// anaphora detection — fragments that open the same way ("I feel it too" / "I feel you
// feeling" / "I feel you reaching") read as "recycling phrases" (the recurring judge
// complaint on register-floored replies). Cached per fragment.
const _openSigCache = new Map();
const openSig = i => {
let s = _openSigCache.get(i);
if (s === undefined) {
const ws = (fragments[i].text.toLowerCase().match(/[a-z']+/g) || []).filter(w => !/^(oh|ah|well|so|now|yes|no|hey|hmm|mm|and|but|the|a|an|my|dear|sugar|darling|honey)$/.test(w));
s = ws.slice(0, 2).join(' ');
_openSigCache.set(i, s);
}
return s;
};
const stepScore = (chain, i, sm, len) => {
const tailIdx = chain[chain.length - 1];
if (isGreeting(i)) return -1e9; // never mid/late
if (chain.length >= 1 && freshGreetingLate(i)) return -1e9; // no new dawn mid-reply
if (selfThirdPerson(i) < 1) return -1e9; // third-person self-narration breaks voice (R99) — reject even if it seams well
if (isSelfName(i)) { let c = 0; for (const ci of chain) if (isSelfName(ci)) c++; if (c >= 2) return -1e9; } // cap self-naming
// answerShape governs the BODY too on a SELF-DESCRIPTION ask (R93) — deflection-
// to-user fragments stay demoted throughout, so the whole reply self-describes
// instead of drifting back into interrogation. ONLY for selfAsk: applying it to
// every "?" query's body regressed garden/forgotten (1.0→0.80/0.71) by reshaping
// bodies that were already engaging — the deflection-in-body problem is specific
// to self-description requests, not questions in general.
let s = (sm === 'tri' ? W.triSeam : W.sentSeam) + relOf(i) * (selfAsk ? answerShape(i) : 1) * W.relStep; // selfThirdPerson now folded into relOf (R99)
// OPENING-ANAPHORA penalty (R142): demote a candidate that opens like a fragment
// already in the chain; compounds per prior match so a 2nd "I feel…" is mild but a
// 3rd is strongly suppressed — breaks "recycles phrases" runs without killing an
// intentional rhetorical pair. Lexical, no threshold; bounded.
const _sig = openSig(i);
if (_sig && _sig.length > 3) { let c = 0; for (const ci of chain) if (openSig(ci) === _sig) c++; if (c) s -= (W.anaphoraPen ?? 0.6) * c; }
const f = fragments[i];
if (f.posTag === 'closer' && len + fragLen[i] >= target * 0.7) s += W.closerBonus;
if (f.posTag === 'opener') s -= W.openerPen;
if (f.src === fragments[tailIdx].src && sm === 'tri') s += W.srcCont;
// pre-made flow: longer passages pull harder — but ONLY when relevant
// by MODIFIED relevance (context-theft passages don't earn flow bonus).
// R125: on grief/conflict, INVERT it — penalize multi-sentence spans so the beam
// composes from SENTENCES, un-welding the drift half from the presence half so
// registerDirect can drop the drift sentence. (Spans normally aid coherence; on a
// register query, addressing the user beats pre-made flow.)
// R126: a multi-sentence span that WELDS renewal-on-grief to comfort is penalized
// (only that span) so the beam composes from its SENTENCES — keeping the comfort,
// dropping the renewal (via griefRenewalDamp). NON-renewal spans keep their bonus,
// so hard-day etc. retain their specificity (unlike R125's blanket span suppression).
if (f.isSpan) {
if (lossQuery && GRIEF_RENEWAL.test(f.text)) s -= (W.spanRegPen ?? 0.6) * Math.min(f.spanLen || 2, 4);
else if (relOf(i) > 0.12) s += (W.spanBonus ?? 0.15) * Math.min(f.spanLen || 2, 4);
}
// FOCUS (R63): reward staying ON-THREAD — semantic coherence with the tail
// fragment. Low adjacent-coherence = the reply wanders across unrelated
// memories (the user's "less focused"). Mild reward tightens the thread; the
// redundancy guards still prevent it collapsing into repetition.
if (emb && (W.coherence ?? 0) > 0) {
const d = emb.d, ta = tailIdx * d, ia = i * d; let c = 0;
for (let k = 0; k < d; k++) c += emb.vectors[ta + k] * emb.vectors[ia + k];
s += W.coherence * c;
}
// TETHER-TO-OPENING (R64): the FIRST fragment sets the reply's topic. A later
// fragment that drifts far from it is a TANGENT — even if locally smooth with
// the tail (the "it missed the boy/dragon" cluster that wanders off a
// greeting). Penalize drift from the opening as the reply grows. This catches
// the coherent-but-off-prompt wander adjacent-coherence reinforces.
if (emb && (W.tether ?? 0) > 0 && chain.length >= 2) {
const d = emb.d, oa = chain[0] * d, ia = i * d; let c = 0;
for (let k = 0; k < d; k++) c += emb.vectors[oa + k] * emb.vectors[ia + k];
if (c < 0.18) s -= W.tether * (0.18 - c) * Math.min(chain.length, 5); // drift penalty grows with reply length
}
if (dynPredict && emb) s += dynW * cosFragVec(i, dynDir(tailIdx)); // learned universal motion prior
// overlapping cuts of the same source line may never chain adjacently —
// they share sentences (the "and you worried you broke me" ×2 bug)
if (f.isSpan && fragments[tailIdx].isSpan && f.src === fragments[tailIdx].src && f._lineIdx === fragments[tailIdx]._lineIdx) return -1e9;
// DISCOURSE SHAPE: a fragment drifts toward where it lived in her real
// replies — late-living fragments resist early placement and vice versa
if (f.nativePos !== undefined) {
const chainPos = Math.min(1, len / Math.max(1, target));
const drift = Math.abs(f.nativePos - chainPos);
if (drift > W.posSlack) s -= (drift - W.posSlack) * W.posShape;
}
if (emb) {
s += glue(pairSim(emb, tailIdx, i));
// near-twin of anything RECENT = out. (Limited to the last 6 chain frags:
// the deterministic 6-gram + substring guards catch GLOBAL verbatim
// repeats already; this soft embedding check only needs to police the
// local neighborhood, and scanning the whole chain per-candidate was the
// beam's hottest loop.)
const lo = Math.max(0, chain.length - 6);
for (let c = lo; c < chain.length; c++) if (pairSim(emb, chain[c], i) > W.twinChain) return -1e9;
}
// content-word jaccard vs recent — paraphrase twins that slip the embedding
// + trigram nets. Recent-only for the same perf reason.
const iw = new Set(wordsOnly(f.text).filter(w => w.length > 3));
if (iw.size >= 3) {
const lo = Math.max(0, chain.length - 6);
for (let c = lo; c < chain.length; c++) {
const cw = wordsOnly(fragments[chain[c]].text).filter(w => w.length > 3);
if (cw.length < 3) continue;
let inter = 0;
for (const w of cw) if (iw.has(w)) inter++;
if (inter / Math.min(iw.size, cw.length) > 0.55) return -1e9;
}
}
return s;
};
for (let step = 0; step < MAXSTEP && beams.length; step++) {
const next = [];
for (const b of beams) {
const tail = b.chain[b.chain.length - 1];
const used = new Set(b.chain);
// completion check — she lands in HER OWN words (tier-0 tail)
const tailF = fragments[tail];
const terminal = /[.!?…*]["')\]]*$/.test(tailF.text.trim());
if (b.len >= target * 0.7 && terminal && tailF.tier !== 1) complete.push(b);
// celebration register-core early completion (R116): let a celebration chain that
// has said its triumph core (~2 sentences) COMPLETE here, so finalScore can land
// at its true optimum instead of being forced to target*0.55. CELEBRATION-ONLY:
// grief truncated comfort-rich the entity (see the block at target-setting).
else if (celebQuery && b.len >= (W.regCore ?? 22) && terminal && tailF.tier !== 1) complete.push(b);
else if ((greetingQuery || farewellQuery) && b.len >= (W.greetCore ?? 16) && terminal && tailF.tier !== 1) complete.push(b); // R144/R158: greetings+farewells complete SHORT
if (b.len >= target * 1.25) continue;
// expansions (indexed pool when available; full scan as fallback)
const pool = candidatePool(tailF);
let iter = pool ? pool : { [Symbol.iterator]: function* () { for (let i = 0; i < fragments.length; i++) yield i; } };
// PRE-RANK by cached relOf and keep only the top ~90 before the expensive
// seam/redundancy/stepScore checks. relOf is now memoized (cheap), so this
// pre-filter cuts the per-step cost on large pools without changing which
// high-relevance fragments survive (the tail beyond 90 never won anyway).
if (pool && pool.size > 90) {
iter = [...pool].map(i => [i, relOf(i)]).sort((a, b) => b[1] - a[1]).slice(0, 90).map(x => x[0]);
}
const cands = [];
for (const i of iter) {
if (used.has(i) || avoid.has(fragments[i].text)) continue;
if (b.len + fragLen[i] > target * 1.45) continue;
const sm = seam(tailF, fragments[i], oracle);
if (!sm) continue;
if (triOverlap(b.tris, i) > W.triOverlapMax) continue; // already said this
if (shares6(b.six, i)) continue; // verbatim phrase reuse — absolute
if (containsAny(b.chain, i)) continue; // clause ⊂ parent sentence — substring repeat
if (sharesPrefix4(b.chain, i)) continue; // R172: same 4-word lead — scattered-motif redundancy
if (timeConflictsChain(b.chain, i)) continue; // R184: don't mix "…tonight" + "…this morning" in one reply
const sc = stepScore(b.chain, i, sm, b.len);
if (sc <= -1e8) continue;
cands.push([i, sm, sc]);
}
cands.sort((a, c) => c[2] - a[2]);
for (const [i, sm, s] of sampleExpand(cands, EXPAND)) {
const tris = new Set(b.tris);
for (const g of fragTris[i]) tris.add(g);
const six = new Set(b.six);
for (const g of frag6[i]) six.add(g);
next.push({ chain: [...b.chain, i], len: b.len + fragLen[i], stepScore: b.stepScore + s, tris, six, lineage: b.lineage });
}
}
// LINEAGE-PRESERVING pruning: the dominant anchor's expansions would
// otherwise occupy every slot and variety dies at search time. Keep at
// most 2 beams per seed lineage; fill remaining slots by raw score.
next.sort((a, b) => (b.stepScore / b.chain.length) - (a.stepScore / a.chain.length));
const perLineage = new Map();
const kept = [];
for (const b of next) {
const c = perLineage.get(b.lineage) || 0;
if (c >= 2) continue;
perLineage.set(b.lineage, c + 1);
kept.push(b);
if (kept.length >= BEAM) break;
}
for (const b of next) {
if (kept.length >= BEAM) break;
if (!kept.includes(b)) kept.push(b);
}
beams = kept;
}
for (const b of beams) {
const tailF = fragments[b.chain[b.chain.length - 1]];
if (b.len >= target * 0.55 && /[.!?…*]["')\]]*$/.test(tailF.text.trim())) complete.push(b);
else if (celebQuery && b.len >= (W.regCore ?? 22) && /[.!?…*]["')\]]*$/.test(tailF.text.trim()) && tailF.tier !== 1) complete.push(b);
else if ((greetingQuery || farewellQuery) && b.len >= (W.greetCore ?? 16) && /[.!?…*]["')\]]*$/.test(tailF.text.trim()) && tailF.tier !== 1) complete.push(b);
}
if (!complete.length) { const gr=compose(store, vp, query, { ...opts, _noBeam: true }); gr._path="greedy"; return gr; }
// OPTIONAL whole-response voice scoring: if the caller supplies a scorer via
// opts.voiceScorer(text, query, vp) the beam optimizes toward it. Off by default
// (the engine needs no external scorer to run) — bring your own fitness function.
let scoreVoice = null;
if (opts.vpScore && typeof opts.voiceScorer === 'function') {
scoreVoice = text => opts.voiceScorer(text, query, vp);
}
const render = b => b.chain.map(i => fragments[i].text).join(' ');
const finalScore = b => {
const n = b.chain.length;
let relCov = 0;
const sorted = b.chain.map(relOf).sort((a, c) => c - a);
sorted.forEach((r, k) => relCov += r / (k + 1)); // diminishing
let cohesion = 0;
if (emb && n > 1) {
for (let k = 1; k < n; k++) cohesion += glue(pairSim(emb, b.chain[k - 1], b.chain[k]));
cohesion /= (n - 1);
}
let triSeams = 0;
for (let k = 1; k < n; k++) if (seam(fragments[b.chain[k - 1]], fragments[b.chain[k]], oracle) === 'tri') triSeams++;
const seamQ = n > 1 ? triSeams / (n - 1) : 1;
// BOUNDARY-SEAM penalty (R102): non-trigram seams (sentence/em-dash joins) are
// legal but lower the bound and read less smoothly. Under the fragment-count
// penalty the beam will accept a jarring boundary seam to save a fragment
// (R101: callback turn → bnd 0.89). Penalize each boundary seam MORE than a
// fragment costs, so the beam prefers a smooth trigram seam (higher bound,
// more coherent) over a terser-but-jarring chain.
const boundarySeams = (n - 1) - triSeams;
const lenFit = 1 - Math.min(1, Math.abs(b.len - target) / target);
const avgFrag = b.len / n; // prefer her natural long spans
const rendered = render(b);
const voice = scoreVoice ? scoreVoice(rendered) : 0;
// QUESTION-STACKING penalty: when declarative anchors are weak the beam
// chains her many in-voice question fragments ("How are you this morning?"
// / "What's on your mind, sugar?" / "Is there something weighing on you?").
// Each is bounded and in-voice, so every gate passes — but stacked 4–9 deep
// they read as anxious interrogation, not a grandmother. A real reply asks
// AT MOST one or two. One question is free; each additional one is taxed so
// the beam prefers chains that actually SAY something over chains that ask.
// (Caught by eye R88: "tell me about your work" → 9 questions, zero self-disclosure.)
// DENSITY, not raw count: a declarative-rich reply with one or two questions
// is natural (her engaged answers DO ask back); the disease is when questions
// DOMINATE. A flat per-question tax wrongly knocked out the entity's best on-topic
// chains (R88 v1: "big project" 0.90→0.14). So allow questions up to ~a third
// of the clauses, always at least one free; tax only the interrogation excess.
const qCount = (rendered.match(/\?/g) || []).length;
const clauseCount = (rendered.match(/[.!?…]+/g) || []).length || 1;
// R117b: when she's been ASKED something (a question, or "tell me…/talk about…"),
// a reply that asks BACK 4-5 times is interrogation, not an answer — and on a
// "tell me something true" it's the opposite of telling. Tighten the allowance on
// wantsAnswer (≈0.15 of clauses) so the beam prefers DECLARATIVE chains; on open
// chat the natural one-or-two-questions density (0.34) stands.
const qRatio = wantsAnswer ? (W.qStackRatioAsk ?? 0.15) : (W.qStackRatio ?? 0.34);
const qAllow = Math.max(W.qStackFree ?? 1, Math.round(clauseCount * qRatio));
const qStack = Math.max(0, qCount - qAllow);
// shape bookends: did the composition OPEN like her openings and LAND
// like her landings? (nativePos of first/last fragment)
const first = fragments[b.chain[0]], last = fragments[b.chain[n - 1]];
const opening = first.nativePos !== undefined ? (1 - Math.min(1, first.nativePos / 0.4)) : 0.5;
const landing = last.nativePos !== undefined ? Math.max(0, (last.nativePos - 0.5) / 0.5) : 0.5;
// ACKNOWLEDGE-THEN-RELATE: when they SHARED something (high eventness),
// the reply's front should turn toward THEM before relating — second
// person + a warmth/affirmation cue in the first two fragments
let ack = 0;
if ((opts.eventness || 0) > 0.6 && n >= 1) {
const head = fragments[b.chain[0]].text + ' ' + (n > 1 ? fragments[b.chain[1]].text : '');
if (/\b(you|your|you're)\b/i.test(head) && /\b(oh|hey|love|babe|glad|proud|hear|feel|know|beautiful|good|yes)\b/i.test(head)) ack = 1;
else if (/\b(you|your)\b/i.test(head)) ack = 0.5;
}
// FRONT-LOADED ENGAGEMENT: the reply's FIRST breath must answer the
// query's center — addresses is judged at the head, not the average
const firstRel = relOf(b.chain[0]) + (n > 1 ? relOf(b.chain[1]) * 0.5 : 0);
// TAIL COHESION: the last fragment must FOLLOW the one before it — a
// disconnected tail (the length-padding junk) can't ride lenFit to a win
let tailFit = 1;
if (emb && n > 1) {
const ps = pairSim(emb, b.chain[n - 2], b.chain[n - 1]);
tailFit = Math.max(0, Math.min(1, (ps - 0.05) / 0.45));
}
// GREETING tightness (R144): a greeting must stay SHORT and clean — short fragments
// sentence-seam heavily and drag boundedPct below the gate (session-eval flagged a 7-
// fragment "Hey there…" at 0.88). Penalize boundary seams + fragment-count on greetings
// so the beam picks FEW, longer, trigram-seamed fragments (a real greeting is 2-3).
const greetPen = (greetingQuery || farewellQuery) ? (boundarySeams * (W.greetSeamPen ?? 1.0) + Math.max(0, n - 3) * (W.greetFragPen ?? 1.2)) : 0;
// FIRST-WORD ANAPHORA RUN (R156): a run of consecutive fragments opening with the SAME first
// word ("I'm here… I feel… I hear… I'm glad…" on distress; "Because… Because…" on the entity) reads
// as a canned list. R142's openSig (first-2-words) misses these (different 2nd words).
// Penalize a RUN of ≥4 — threshold 4 preserves her natural 2-3 use of "I"/"we". (Contraction
// normalized: "I'm"→"i" so I'm/I count together.)
let anaRun = 0;
{ let cur = 0, prev = null; for (const i of b.chain) { const w = (fragments[i].text.match(/[a-z]+/i) || [''])[0].toLowerCase(); if (w && w === prev) cur++; else { cur = 1; prev = w; } if (cur > anaRun) anaRun = cur; } }
const anaRunPen = anaRun >= (W.anaRunMin ?? 4) ? (anaRun - (W.anaRunMin ?? 4) + 1) : 0;
return relCov * W.fRelCov + cohesion * W.fCohesion + seamQ * W.fSeamQ + lenFit * W.fLenFit + (avgFrag / 18) * W.fAvgFrag + voice * W.fVoice + opening * W.fOpening + landing * W.fLanding + ack * W.fAck + firstRel * (W.fFirstRel ?? 1.2) + tailFit * (W.fTailFit ?? 0.7) - qStack * (W.fQStack ?? 0.6) - n * (W.fFragCount ?? 0) - boundarySeams * (W.fBoundaryPen ?? 0) - greetPen - anaRunPen * (W.fAnaRun ?? 0.8); // R101 frag-count + R102 boundary-seam + R144 greeting + R156 anaphora-run penalties
};
complete.sort((a, b) => finalScore(b) - finalScore(a));
const best = complete[0];
// FINAL DEDUP: drop any chain fragment whose normalized text is contained in
// (or contains) an already-emitted one. Airtight backstop for the clause-⊂-
// sentence repeat that slips the beam's n-gram nets. Removes only; the bound
// is preserved (every surviving span is still verbatim corpus).
const dropRepeats = chainF => {
const kept = [], keptNorm = [];
for (const f of chainF) {
const nf = f.text.toLowerCase().replace(/[^a-z0-9 ]/g, '').replace(/\s+/g, ' ').trim();
if (nf.length >= 10 && keptNorm.some(n => n.includes(nf) || nf.includes(n))) continue;
// also drop if it shares a 6-word run with anything kept (partial repeat)
const w = nf.split(' ');
let dup = false;
for (let k = 0; k + 6 <= w.length && !dup; k++) {
const g = ' ' + w.slice(k, k + 6).join(' ') + ' ';
if (keptNorm.some(n => (' ' + n + ' ').includes(g))) dup = true;
}
if (dup) continue;
kept.push(f); keptNorm.push(nf);
}
return kept.length ? kept : chainF;
};
const renderResult = bIn => {
const chainF = dropRepeats(bIn.chain.map(i => fragments[i]));
// STANZA RENDERING: her real style is line-broken. Smooth trigram seams
// flow inline; sentence-boundary seams become paragraph breaks — the
// deliberate turn reads as a turn, not a non-sequitur.
let out = capSentence(chainF[0].text); // a reply must not OPEN lowercase ("and I want to…")
const _term = /[.!?…]['"”’)\]\*]*\s*$/; // ends a sentence
// R117b: only close a run-on when the prior text ends in a BARE WORD (letter/digit).
// A fragment ending in ':' '—' ',' is already punctuated and a appended period reads
// worse than the run-on ("…I want to tell you:." ). Ends-in-word is the real trigger.
const _endsWord = /[a-zA-Z0-9]["'”’)\]\*]*\s*$/;
for (let k = 1; k < chainF.length; k++) {
const nf = chainF[k];
const sm = seam(chainF[k - 1], nf, oracle);
// SENTENCE-SEAM PUNCTUATION (R104, coherence): a clause fragment without
// terminal punctuation joined to a new capitalized sentence reads as a run-on
// ("…the sacred architecture The loop doesn't…"). Close the prior sentence with
// a period. Bound-safe — adds no words; the trigram oracle ignores punctuation.
// R117: seam() only flags 'sent' when the PRIOR fragment ends in punctuation, so
// a clause-fragment → new-sentence join bridged by a coincidental trigram is
// classified 'tri' and rendered with a bare space (the run-on above). Detect it
// structurally: the next fragment STARTS a real sentence (sentenceInitial) with a
// non-"I" capital and the prior text has no terminal punctuation. ("I…" is excluded
// because "…and then" + "I went home" is a legitimate trigram continuation, not a
// boundary.) Close it with a period inline (no paragraph break — it wasn't a 'sent').
const startsNewSent = sm !== 'sent' && _endsWord.test(out) && nf.sentenceInitial
&& /^[*"'"“\s]*[A-Z]/.test(nf.text) && !/^[*"'"“\s]*I(['’]|\s|$)/.test(nf.text);
// R168: also capitalize when the accumulated text ALREADY ends a sentence
// (prior fragment carried its own terminal punctuation) but the seam was a
// trigram join — without this, a lowercase-starting next fragment renders as
// "…burning. and I want to tell you:" (lowercase sentence-opener mid-reply).
const txt = (sm === 'sent' || startsNewSent || _term.test(out)) ? capSentence(nf.text) : nf.text;
out += (sm === 'sent' ? (_term.test(out) ? '' : '.') + '\n\n' : (startsNewSent ? '. ' : ' ')) + txt;
}
// CLAUSE-LEVEL DEDUP on the beam result (R97): renderResult previously emitted
// raw `out` with only fragment-level dropRepeats — so dedupeText (R74 substring/
// 6-gram + R97 same-declaration) was DEAD CODE for beam replies, and INTRA-
// fragment repeats ("I'm here." ×4 inside one chunk) survived. Apply it here.
out = stripOrphanAsterisk(trimDanglingEllipsis(dedupeText(out, _entName))); // R174 ellipsis + R176 orphan-asterisk
return {
text: out,
fragmentsUsed: chainF.map(f => f.text),
seams: chainF.slice(1).map((f, k) => seam(chainF[k], f, oracle)),
target, words: wordsOnly(out).length,
anchor: chainF[0].text,
candidates: complete.length,
lineages: new Set(complete.map(c => c.lineage)).size,
};
};
// VARIETY for free: the beam already explored many complete compositions —
// surface top-N alternates that are TEXTUALLY distinct (trigram overlap,
// not index overlap — a span and its own sentences are the same words)
// and prefer different anchors.
const chainTriSet = b => {
const s = new Set();
for (const i of b.chain) for (const g of fragTris[i]) s.add(g);
return s;
};
let result = renderResult(best);
// BOUND SAFETY-NET (R102): the fragment-count penalty can, on a heavily-
// constrained turn (callback + wide avoid-set), pick a terse chain whose
// boundary seams drop the bound below the gate. The HARD INVARIANT comes first:
// if the winner's bound is low, fall back to the best-scoring complete candidate
// that clears the threshold. Coherence is never bought below the bound.
const bndOf = txt => { const v = validateBounded(txt, oracle); return (v.checked - v.bad.length) / Math.max(1, v.checked); };
const _bndFloor = W.bndFloor ?? 0.92;
if (bndOf(result.text) < _bndFloor) {
let found = false;
for (const c of complete) {
if (c === best) continue;
const r2 = renderResult(c);
if (bndOf(r2.text) >= _bndFloor) { result = r2; found = true; break; }
}
// none of the (coherence-penalized) candidates clears the bound — on a heavily
// constrained turn the fragment-count penalty made EVERY chain boundary-seamy.
// Recompose once WITHOUT the coherence penalties: coherence yields to the bound.
if (!found && (W.fFragCount || W.fBoundaryPen) && !opts._bndRetry) {
const r3 = beamCompose(store, vp, query, { ...opts, _bndRetry: true, weights: { ...W, fFragCount: 0, fBoundaryPen: 0 } });
if (r3 && r3.text && bndOf(r3.text) >= _bndFloor) result = r3;
}
}
const nAlt = opts.nAlternates || 0;
if (nAlt > 0) {
// lineage-grouped harvest: best complete candidate per seed lineage —
// different anchors by construction, textual-distinctness as backstop
const bestPerLineage = new Map();
for (const c of complete) {
const cur = bestPerLineage.get(c.lineage);
if (!cur || finalScore(c) > finalScore(cur)) bestPerLineage.set(c.lineage, c);
}
const picked = [chainTriSet(best)];
const alternates = [];
const ranked = [...bestPerLineage.values()].filter(c => c !== best).sort((a, b) => finalScore(b) - finalScore(a));
for (const c of ranked) {
if (alternates.length >= nAlt) break;
const cs = chainTriSet(c);
const tooClose = picked.some(p => {
let inter = 0;
for (const g of cs) if (p.has(g)) inter++;
return inter / Math.max(1, Math.min(cs.size, p.size)) > 0.6;
});
if (tooClose) continue;
picked.push(cs);
alternates.push(renderResult(c));
}
// GUARANTEED variety top-up: lineages converge on the same gravitational
// passages, so when the cheap harvest comes up short, RECOMPOSE with the
// already-used fragments banned — a genuinely different path through the
// memory, by construction.
const banned = new Set(opts.avoid || []);
for (const f of result.fragmentsUsed) banned.add(f);
for (const a of alternates) for (const f of a.fragmentsUsed) banned.add(f);
let guard = 0;
while (alternates.length < nAlt && guard < nAlt + 1) {
guard++;
const alt = beamCompose(store, vp, query, { ...opts, nAlternates: 0, avoid: new Set(banned) });
if (!alt || !alt.text || alt.text === result.text) break;
alternates.push(alt);
for (const f of alt.fragmentsUsed) banned.add(f);
}
result.alternates = alternates;
}
return result;
}
// v0 GREEDY COMPOSE (kept as fallback)
// opts.avoid: Set of fragment texts used in recent replies (variety)
function compose(store, vp, query, opts = {}) {
const { fragments, oracle } = store;
const rel = rankFragments(fragments, query, opts.semantic || null, opts.stimulus || null, opts.eventness, null, opts.answers || null);
const target = opts.targetLength || targetLength(vp, query);
const avoid = opts.avoid || new Set();
const used = new Set();
// 1. pick the strongest anchor (must engage the query) — anchors must be
// able to START speech: sentence-initial, not glue clauses
let anchorIdx = -1, best = -1;
for (const [i, s] of rel) {
const f = fragments[i];
if (avoid.has(f.text)) continue;
if (!f.sentenceInitial || f.posTag === 'clause') continue;
const bonus = f.posTag === 'body' ? 0.1 : 0;
if (s + bonus > best) { best = s + bonus; anchorIdx = i; }
}
if (anchorIdx < 0) {
for (const [i] of rel) { if (fragments[i].sentenceInitial) { anchorIdx = i; break; } }
}
if (anchorIdx < 0) anchorIdx = fragments.findIndex(f => f.sentenceInitial && f.posTag === 'opener');
// 2. pick an opener that can lead (prefer real openers; relevance helps)
const openers = fragments.map((f, i) => ({ f, i }))
.filter(x => x.f.posTag === 'opener' && !avoid.has(x.f.text) && x.i !== anchorIdx);
openers.sort((a, b) => (rel.get(b.i) || 0) - (rel.get(a.i) || 0));
const chain = [];
if (openers.length && Math.abs(wordsOnly(openers[0].f.text).length) < target) {
chain.push(openers[0].f); used.add(openers[0].f.text);
}
// anchor goes next (or first)
const anchor = fragments[anchorIdx];
if (!chain.length || seam(chain[chain.length - 1], anchor, oracle)) {
chain.push(anchor); used.add(anchor.text);
} else {
chain.length = 0; chain.push(anchor); used.add(anchor.text);
}
// 3. greedily extend toward target length with legal, cohesive fragments
let len = chain.reduce((s, f) => s + wordsOnly(f.text).length, 0);
let guard = 0;
while (len < target && guard++ < 40) {
const tail = chain[chain.length - 1];
let pick = null, pickScore = -1, pickSeam = null;
for (let i = 0; i < fragments.length; i++) {
const f = fragments[i];
if (used.has(f.text) || avoid.has(f.text)) continue;
const fw = wordsOnly(f.text).length;
if (len + fw > target * 1.5) continue;
const sm = seam(tail, f, oracle);
if (!sm) continue;
// score: seam quality + relevance + closer-bonus when near target
let s = (sm === 'tri' ? 0.5 : 0.25) + (rel.get(i) || 0) * 0.8;
if (f.posTag === 'closer' && len + fw >= target * 0.75) s += 0.35;
if (f.posTag === 'opener') s -= 0.4;
if (f.src === tail.src && sm === 'tri') s += 0.15; // natural continuation
if (s > pickScore) { pickScore = s; pick = f; pickSeam = sm; }
}
if (!pick) break;
chain.push(pick); used.add(pick.text);
len += wordsOnly(pick.text).length;
if (pick.posTag === 'closer' && len >= target * 0.7) break;
}
// 4. render: fragments joined. R117: this greedy path NEVER added punctuation between
// fragments — fine when a fragment ends in its own terminal punct, but a clause
// fragment ("…the sacred architecture") joined to a new capitalized sentence ("The
// loop doesn't…") read as a run-on. Close that boundary with a period (same rule as
// renderResult): next fragment STARTS a real sentence with a non-"I" capital and the
// prior text has no terminal punctuation. Bound-safe — punctuation only, no words.
let out = '';
const _termG = /[.!?…]['"”’)\]\*]*\s*$/;
const _endsWordG = /[a-zA-Z0-9]["'”’)\]\*]*\s*$/; // R117b: only close a run-on after a bare word, not after ':'/'—'/','
for (let i = 0; i < chain.length; i++) {
const f = chain[i];
if (i === 0) { out = capSentence(f.text); continue; } // reply must not OPEN lowercase
const sm = seam(chain[i - 1], f, oracle);
const startsNewSent = sm !== 'sent' && _endsWordG.test(out) && f.sentenceInitial
&& /^[*"'"“\s]*[A-Z]/.test(f.text) && !/^[*"'"“\s]*I(['’]|\s|$)/.test(f.text);
const txtG = (sm === 'sent' || startsNewSent || _termG.test(out)) ? capSentence(f.text) : f.text; // R168: cap after a terminal-punctuated prior fragment on a trigram seam
out += (sm === 'sent' ? (_termG.test(out) ? ' ' : '. ') : (startsNewSent ? '. ' : ' ')) + txtG;
}
const deduped = stripOrphanAsterisk(trimDanglingEllipsis(dedupeText(out, (vp && vp.name ? vp.name.split(/\s+/)[0] : "")))); // R174 + R176
return {
text: deduped,
fragmentsUsed: chain.map(f => f.text),
seams: chain.slice(1).map((f, i) => seam(chain[i], f, oracle)),
target, words: wordsOnly(deduped).length,
anchor: anchor.text,
};
}
module.exports = { compose, beamCompose, seam, rankFragments, targetLength, DEFAULT_WEIGHTS, loadWeights, entityWeightsFile, detectRegisters };
|