// flow.js — RUNG 3 PROTOTYPE: composition as a DYNAMICAL SYSTEM. Instead of // scoring candidates by hand-laws (beam), the reply is a TRAJECTORY through // embedding space: from the current state, PREDICT the next point, then SNAP // to the nearest LEGAL fragment (VQ — the corpus is the codebook, the bound). // // The dynamics here are NON-PARAMETRIC (no training yet): the corpus contains // the entity's REAL trajectories (the embedding sequence of each real reply). // Predict next = kernel-weighted average of the successors of the corpus states // most similar to where we are now. "When she was in a state like this, where // did she go next?" — her own motion through meaning, generalized. // // This is the scaffold Rung-3-full replaces the kernel with a trained SSM/ // transformer. Proving predict→snap→bounded works first. 'use strict'; const { seam } = require('./compose'); const { wordsOnly } = require('./fragments'); // gather the entity's REAL trajectories: ordered sentence-fragment indices per // reply (skip clauses/spans — we want the actual spoken path), then transition // pairs (predecessor index -> successor index). function buildTransitions(store) { const bySrc = new Map(); store.fragments.forEach((f, i) => { if (f.tier === 1 || f.posTag === 'clause' || f.isSpan) return; if (!bySrc.has(f.src)) bySrc.set(f.src, []); bySrc.get(f.src).push(i); }); const pred = [], succ = []; for (const seq of bySrc.values()) for (let k = 0; k + 1 < seq.length; k++) { pred.push(seq[k]); succ.push(seq[k + 1]); } return { pred, succ }; } // seedable PRNG so creative runs are reproducible for tests but vary in use function mulberry32(a) { return function () { a |= 0; a = a + 0x6D2B79F5 | 0; let t = Math.imul(a ^ a >>> 15, 1 | a); t = t + Math.imul(t ^ t >>> 7, 61 | t) ^ t; return ((t ^ t >>> 14) >>> 0) / 4294967296; }; } function vecOf(emb, i) { const d = emb.d, off = i * d, v = new Float32Array(d); for (let k = 0; k < d; k++) v[k] = emb.vectors[off + k]; return v; } function cos(emb, i, q) { const d = emb.d, off = i * d; let s = 0; for (let k = 0; k < d; k++) s += emb.vectors[off + k] * q[k]; return s; } function dot(a, b) { let s = 0; for (let k = 0; k < a.length; k++) s += a[k] * b[k]; return s; } // TRAINED DYNAMICS (Rung-3-full): load an MLP (from train_flow.py) and do the // forward pass in JS — out = normalize(x + W2·relu(W1·x + b1) + b2). The kernel // becomes learned weights; inference stays in-process and fast. function loadFlowMLP(filePath) { try { const m = JSON.parse(fs.readFileSync(filePath, 'utf8')); return m; } catch (_) { return null; } } function predictNextMLP(mlp, emb, curIdx) { const d = mlp.d, H = mlp.H, x = new Float32Array(d); const off = curIdx * d; for (let k = 0; k < d; k++) x[k] = emb.vectors[off + k]; const h = new Float32Array(H); for (let j = 0; j < H; j++) { let s = mlp.b1[j]; const row = mlp.W1[j]; for (let k = 0; k < d; k++) s += row[k] * x[k]; h[j] = s > 0 ? s : 0; } const out = new Float32Array(d); // W2 is [d×H] for (let i = 0; i < d; i++) { let s = mlp.b2[i]; const row = mlp.W2[i]; for (let j = 0; j < H; j++) s += row[j] * h[j]; out[i] = x[i] + s; } let n = 0; for (let k = 0; k < d; k++) n += out[k] * out[k]; n = Math.sqrt(n) || 1; for (let k = 0; k < d; k++) out[k] /= n; return out; } // weighted (diagonal-metric) similarity for learned attention: Σ w_k a_k b_k function wsim(emb, i, cur, w) { const d = emb.d, off = i * d; let s = 0; for (let k = 0; k < d; k++) s += w[k] * emb.vectors[off + k] * cur[k]; return s; } // low-rank projection: project a d-vector through P (r×d) → r-vector function project(P, vec, d, r) { const out = new Float32Array(r); for (let j = 0; j < r; j++) { const row = P[j]; let s = 0; for (let k = 0; k < d; k++) s += row[k] * vec[k]; out[j] = s; } return out; } // precompute projected predecessor keys (n×r) once per attn — cached on attn obj function projKeys(emb, trans, attn) { if (attn._keys) return attn._keys; const d = emb.d, r = attn.r, n = trans.pred.length; const keys = new Float32Array(n * r); for (let t = 0; t < n; t++) { const off = trans.pred[t] * d; for (let j = 0; j < r; j++) { const row = attn.P[j]; let s = 0; for (let k = 0; k < d; k++) s += row[k] * emb.vectors[off + k]; keys[t * r + j] = s; } } attn._keys = keys; return keys; } // predict the next-state embedding via attention over real transitions // (query=current, keys=predecessors, values=successors). attn={w,tau} uses a // LEARNED diagonal metric + temperature; else raw cosine kernel. Either way the // output is a weighted avg of REAL successors — always in the data manifold, so // it CANNOT collapse (the MLP failure mode). function predictNext(emb, trans, curIdx, K, attn) { const cur = vecOf(emb, curIdx); const d = emb.d; const scored = []; if (attn && attn.P) { // low-rank learned attention const r = attn.r, keys = projKeys(emb, trans, attn), pq = project(attn.P, cur, d, r); for (let t = 0; t < trans.pred.length; t++) { const off = t * r; let s = 0; for (let j = 0; j < r; j++) s += pq[j] * keys[off + j]; scored.push([t, s]); } } else if (attn) for (let t = 0; t < trans.pred.length; t++) scored.push([t, wsim(emb, trans.pred[t], cur, attn.w)]); else for (let t = 0; t < trans.pred.length; t++) scored.push([t, cos(emb, trans.pred[t], cur)]); scored.sort((a, b) => b[1] - a[1]); const top = scored.slice(0, K || 40); const tau = attn ? attn.tau : 8; const out = new Float32Array(d); let wsum = 0; for (const [t, s] of top) { const w = Math.exp((s - top[0][1]) * tau); wsum += w; const off = trans.succ[t] * d; for (let k = 0; k < d; k++) out[k] += w * emb.vectors[off + k]; } if (wsum > 0) for (let k = 0; k < d; k++) out[k] /= wsum; let n = 0; for (let k = 0; k < d; k++) n += out[k] * out[k]; n = Math.sqrt(n) || 1; for (let k = 0; k < d; k++) out[k] /= n; return out; } // greedy flow: anchor by relevance, then at each step predict-next + snap to the // nearest LEGAL, on-topic, unused fragment. Bounded by construction. function composeFlow(store, vp, query, opts = {}) { const { fragments, oracle } = store; const emb = opts.emb; const rel = opts.relevance || new Map(); // fragmentIndex -> 0..1 (from recall.js, optional) const trans = opts._trans || buildTransitions(store); const target = opts.targetLength || 90; // CREATIVITY: temperature on the snap. temp=0 → argmax (steady, R28). temp>0 → // sample among the legal fragments NEAR the predicted next-state. Every // sample is a real, corpus-legal fragment, so higher temp = more surprising // BUT NEVER unbounded. Safe wildness — the bound makes randomness harmless. const temp = opts.temp || 0; const rng = mulberry32((opts.seed || 1) >>> 0); const sampleTop = (cands) => { // cands: [idx, score] sorted desc. temp→0: take best. temp>0: softmax-sample top-N. if (temp <= 0.001 || cands.length === 1) return cands[0][0]; const N = Math.min(cands.length, 8); const top = cands.slice(0, N); const s0 = top[0][1]; const ws = top.map(([, s]) => Math.exp((s - s0) / Math.max(0.05, temp))); const sum = ws.reduce((a, b) => a + b, 0); let r = rng() * sum; for (let k = 0; k < N; k++) { r -= ws[k]; if (r <= 0) return top[k][0]; } return top[N - 1][0]; }; // anchor: most relevant sentence-initial tier-0 fragment let anchor = -1, best = -Infinity; for (let i = 0; i < fragments.length; i++) { const f = fragments[i]; if (f.tier === 1 || !f.sentenceInitial || f.posTag === 'clause' || f.isSpan) continue; const r = (rel.get(i) || 0); if (r > best) { best = r; anchor = i; } } if (anchor < 0) anchor = fragments.findIndex(f => f.sentenceInitial && f.tier !== 1); const chain = [anchor]; const used = new Set([anchor]); let len = wordsOnly(fragments[anchor].text).length; for (let step = 0; step < 12 && len < target * 1.25; step++) { const tail = chain[chain.length - 1]; const tailF = fragments[tail]; const terminal = /[.!?…]["')\]]*$/.test(tailF.text.trim()); if (len >= target * 0.7 && terminal) break; const eNext = opts.mlp ? predictNextMLP(opts.mlp, emb, tail) : predictNext(emb, trans, tail, opts.K || 40, opts.attn); // candidate legal successors (seam-legal, tier-0, unused), scored by // closeness to the predicted next-state + relevance const cands = []; for (let i = 0; i < fragments.length; i++) { if (used.has(i) || fragments[i].tier === 1 || fragments[i].isSpan) continue; if (!seam(tailF, fragments[i], oracle)) continue; const flowSim = cos(emb, i, eNext); cands.push([i, flowSim * 0.7 + (rel.get(i) || 0) * 0.3]); } if (!cands.length) break; cands.sort((a, b) => b[1] - a[1]); const bestI = sampleTop(cands); // temp=0 → argmax; temp>0 → creative sample chain.push(bestI); used.add(bestI); len += wordsOnly(fragments[bestI].text).length; } const chainF = chain.map(i => fragments[i]); let out = chainF[0].text; for (let k = 1; k < chainF.length; k++) { const sm = seam(chainF[k - 1], chainF[k], oracle); out += (sm === 'sent' ? ' ' : ' ') + chainF[k].text; } return { text: out, fragmentsUsed: chainF.map(f => f.text), words: wordsOnly(out).length, target, method: 'flow' }; } module.exports = { composeFlow, buildTransitions, loadFlowMLP, predictNextMLP, predictNext };