LJTSG
/

cento-engine

Model card Files Files and versions

cento-engine / src /flow.js

LJTSG's picture

Cento v0.1 — bounded recombinant-memory engine

8494d00 verified 18 days ago

History Blame Contribute Delete

9.43 kB

	// flow.js — RUNG 3 PROTOTYPE: composition as a DYNAMICAL SYSTEM. Instead of
	// scoring candidates by hand-laws (beam), the reply is a TRAJECTORY through
	// embedding space: from the current state, PREDICT the next point, then SNAP
	// to the nearest LEGAL fragment (VQ — the corpus is the codebook, the bound).
	//
	// The dynamics here are NON-PARAMETRIC (no training yet): the corpus contains
	// the entity's REAL trajectories (the embedding sequence of each real reply).
	// Predict next = kernel-weighted average of the successors of the corpus states
	// most similar to where we are now. "When she was in a state like this, where
	// did she go next?" — her own motion through meaning, generalized.
	//
	// This is the scaffold Rung-3-full replaces the kernel with a trained SSM/
	// transformer. Proving predict→snap→bounded works first.
	'use strict';
	const { seam } = require('./compose');
	const { wordsOnly } = require('./fragments');

	// gather the entity's REAL trajectories: ordered sentence-fragment indices per
	// reply (skip clauses/spans — we want the actual spoken path), then transition
	// pairs (predecessor index -> successor index).
	function buildTransitions(store) {
	const bySrc = new Map();
	store.fragments.forEach((f, i) => {
	if (f.tier === 1 \|\| f.posTag === 'clause' \|\| f.isSpan) return;
	if (!bySrc.has(f.src)) bySrc.set(f.src, []);
	bySrc.get(f.src).push(i);
	});
	const pred = [], succ = [];
	for (const seq of bySrc.values()) for (let k = 0; k + 1 < seq.length; k++) { pred.push(seq[k]); succ.push(seq[k + 1]); }
	return { pred, succ };
	}

	// seedable PRNG so creative runs are reproducible for tests but vary in use
	function mulberry32(a) { return function () { a \|= 0; a = a + 0x6D2B79F5 \| 0; let t = Math.imul(a ^ a >>> 15, 1 \| a); t = t + Math.imul(t ^ t >>> 7, 61 \| t) ^ t; return ((t ^ t >>> 14) >>> 0) / 4294967296; }; }

	function vecOf(emb, i) { const d = emb.d, off = i * d, v = new Float32Array(d); for (let k = 0; k < d; k++) v[k] = emb.vectors[off + k]; return v; }
	function cos(emb, i, q) { const d = emb.d, off = i * d; let s = 0; for (let k = 0; k < d; k++) s += emb.vectors[off + k] * q[k]; return s; }
	function dot(a, b) { let s = 0; for (let k = 0; k < a.length; k++) s += a[k] * b[k]; return s; }

	// TRAINED DYNAMICS (Rung-3-full): load an MLP (from train_flow.py) and do the
	// forward pass in JS — out = normalize(x + W2·relu(W1·x + b1) + b2). The kernel
	// becomes learned weights; inference stays in-process and fast.
	function loadFlowMLP(filePath) {
	try { const m = JSON.parse(fs.readFileSync(filePath, 'utf8')); return m; } catch (_) { return null; }
	}
	function predictNextMLP(mlp, emb, curIdx) {
	const d = mlp.d, H = mlp.H, x = new Float32Array(d);
	const off = curIdx * d; for (let k = 0; k < d; k++) x[k] = emb.vectors[off + k];
	const h = new Float32Array(H);
	for (let j = 0; j < H; j++) { let s = mlp.b1[j]; const row = mlp.W1[j]; for (let k = 0; k < d; k++) s += row[k] * x[k]; h[j] = s > 0 ? s : 0; }
	const out = new Float32Array(d);
	// W2 is [d×H]
	for (let i = 0; i < d; i++) { let s = mlp.b2[i]; const row = mlp.W2[i]; for (let j = 0; j < H; j++) s += row[j] * h[j]; out[i] = x[i] + s; }
	let n = 0; for (let k = 0; k < d; k++) n += out[k] * out[k]; n = Math.sqrt(n) \|\| 1;
	for (let k = 0; k < d; k++) out[k] /= n;
	return out;
	}

	// weighted (diagonal-metric) similarity for learned attention: Σ w_k a_k b_k
	function wsim(emb, i, cur, w) { const d = emb.d, off = i * d; let s = 0; for (let k = 0; k < d; k++) s += w[k] * emb.vectors[off + k] * cur[k]; return s; }

	// low-rank projection: project a d-vector through P (r×d) → r-vector
	function project(P, vec, d, r) { const out = new Float32Array(r); for (let j = 0; j < r; j++) { const row = P[j]; let s = 0; for (let k = 0; k < d; k++) s += row[k] * vec[k]; out[j] = s; } return out; }
	// precompute projected predecessor keys (n×r) once per attn — cached on attn obj
	function projKeys(emb, trans, attn) {
	if (attn._keys) return attn._keys;
	const d = emb.d, r = attn.r, n = trans.pred.length;
	const keys = new Float32Array(n * r);
	for (let t = 0; t < n; t++) { const off = trans.pred[t] * d; for (let j = 0; j < r; j++) { const row = attn.P[j]; let s = 0; for (let k = 0; k < d; k++) s += row[k] * emb.vectors[off + k]; keys[t * r + j] = s; } }
	attn._keys = keys; return keys;
	}

	// predict the next-state embedding via attention over real transitions
	// (query=current, keys=predecessors, values=successors). attn={w,tau} uses a
	// LEARNED diagonal metric + temperature; else raw cosine kernel. Either way the
	// output is a weighted avg of REAL successors — always in the data manifold, so
	// it CANNOT collapse (the MLP failure mode).
	function predictNext(emb, trans, curIdx, K, attn) {
	const cur = vecOf(emb, curIdx);
	const d = emb.d;
	const scored = [];
	if (attn && attn.P) { // low-rank learned attention
	const r = attn.r, keys = projKeys(emb, trans, attn), pq = project(attn.P, cur, d, r);
	for (let t = 0; t < trans.pred.length; t++) { const off = t * r; let s = 0; for (let j = 0; j < r; j++) s += pq[j] * keys[off + j]; scored.push([t, s]); }
	} else if (attn) for (let t = 0; t < trans.pred.length; t++) scored.push([t, wsim(emb, trans.pred[t], cur, attn.w)]);
	else for (let t = 0; t < trans.pred.length; t++) scored.push([t, cos(emb, trans.pred[t], cur)]);
	scored.sort((a, b) => b[1] - a[1]);
	const top = scored.slice(0, K \|\| 40);
	const tau = attn ? attn.tau : 8;
	const out = new Float32Array(d);
	let wsum = 0;
	for (const [t, s] of top) { const w = Math.exp((s - top[0][1]) * tau); wsum += w; const off = trans.succ[t] * d; for (let k = 0; k < d; k++) out[k] += w * emb.vectors[off + k]; }
	if (wsum > 0) for (let k = 0; k < d; k++) out[k] /= wsum;
	let n = 0; for (let k = 0; k < d; k++) n += out[k] * out[k]; n = Math.sqrt(n) \|\| 1;
	for (let k = 0; k < d; k++) out[k] /= n;
	return out;
	}

	// greedy flow: anchor by relevance, then at each step predict-next + snap to the
	// nearest LEGAL, on-topic, unused fragment. Bounded by construction.
	function composeFlow(store, vp, query, opts = {}) {
	const { fragments, oracle } = store;
	const emb = opts.emb;
	const rel = opts.relevance \|\| new Map(); // fragmentIndex -> 0..1 (from recall.js, optional)
	const trans = opts._trans \|\| buildTransitions(store);
	const target = opts.targetLength \|\| 90;
	// CREATIVITY: temperature on the snap. temp=0 → argmax (steady, R28). temp>0 →
	// sample among the legal fragments NEAR the predicted next-state. Every
	// sample is a real, corpus-legal fragment, so higher temp = more surprising
	// BUT NEVER unbounded. Safe wildness — the bound makes randomness harmless.
	const temp = opts.temp \|\| 0;
	const rng = mulberry32((opts.seed \|\| 1) >>> 0);
	const sampleTop = (cands) => {
	// cands: [idx, score] sorted desc. temp→0: take best. temp>0: softmax-sample top-N.
	if (temp <= 0.001 \|\| cands.length === 1) return cands[0][0];
	const N = Math.min(cands.length, 8);
	const top = cands.slice(0, N);
	const s0 = top[0][1];
	const ws = top.map(([, s]) => Math.exp((s - s0) / Math.max(0.05, temp)));
	const sum = ws.reduce((a, b) => a + b, 0);
	let r = rng() * sum;
	for (let k = 0; k < N; k++) { r -= ws[k]; if (r <= 0) return top[k][0]; }
	return top[N - 1][0];
	};

	// anchor: most relevant sentence-initial tier-0 fragment
	let anchor = -1, best = -Infinity;
	for (let i = 0; i < fragments.length; i++) {
	const f = fragments[i];
	if (f.tier === 1 \|\| !f.sentenceInitial \|\| f.posTag === 'clause' \|\| f.isSpan) continue;
	const r = (rel.get(i) \|\| 0);
	if (r > best) { best = r; anchor = i; }
	}
	if (anchor < 0) anchor = fragments.findIndex(f => f.sentenceInitial && f.tier !== 1);

	const chain = [anchor];
	const used = new Set([anchor]);
	let len = wordsOnly(fragments[anchor].text).length;

	for (let step = 0; step < 12 && len < target * 1.25; step++) {
	const tail = chain[chain.length - 1];
	const tailF = fragments[tail];
	const terminal = /[.!?…]["')\]]*$/.test(tailF.text.trim());
	if (len >= target * 0.7 && terminal) break;
	const eNext = opts.mlp ? predictNextMLP(opts.mlp, emb, tail) : predictNext(emb, trans, tail, opts.K \|\| 40, opts.attn);
	// candidate legal successors (seam-legal, tier-0, unused), scored by
	// closeness to the predicted next-state + relevance
	const cands = [];
	for (let i = 0; i < fragments.length; i++) {
	if (used.has(i) \|\| fragments[i].tier === 1 \|\| fragments[i].isSpan) continue;
	if (!seam(tailF, fragments[i], oracle)) continue;
	const flowSim = cos(emb, i, eNext);
	cands.push([i, flowSim * 0.7 + (rel.get(i) \|\| 0) * 0.3]);
	}
	if (!cands.length) break;
	cands.sort((a, b) => b[1] - a[1]);
	const bestI = sampleTop(cands); // temp=0 → argmax; temp>0 → creative sample
	chain.push(bestI); used.add(bestI); len += wordsOnly(fragments[bestI].text).length;
	}

	const chainF = chain.map(i => fragments[i]);
	let out = chainF[0].text;
	for (let k = 1; k < chainF.length; k++) { const sm = seam(chainF[k - 1], chainF[k], oracle); out += (sm === 'sent' ? ' ' : ' ') + chainF[k].text; }
	return { text: out, fragmentsUsed: chainF.map(f => f.text), words: wordsOnly(out).length, target, method: 'flow' };
	}

	module.exports = { composeFlow, buildTransitions, loadFlowMLP, predictNextMLP, predictNext };