browser-speak / index.html
Mike0021's picture
Show live microphone input level
1a21e8c verified
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Local Voice LLM</title>
<link rel="icon" href="data:," />
<link rel="stylesheet" href="./styles.css" />
</head>
<body>
<main class="shell">
<section class="topbar" aria-label="Session controls">
<div>
<h1>Local Voice LLM</h1>
<p>Client-side VAD, speech recognition, language model, and Supertonic speech synthesis.</p>
</div>
<div class="actions">
<button id="loadButton" type="button">
<span class="button-icon" aria-hidden="true"></span>
Load models
</button>
<button id="micButton" type="button" disabled>
<span class="button-icon" aria-hidden="true"></span>
Start mic
</button>
<button id="stopButton" type="button" disabled title="Stop generation and audio playback">
<span class="button-icon" aria-hidden="true"></span>
Stop
</button>
</div>
</section>
<section class="status-grid" aria-label="Model and pipeline state">
<article class="status-tile" data-state="idle" id="vadTile">
<span>VAD</span>
<strong id="vadState">Idle</strong>
</article>
<article class="status-tile" data-state="idle" id="asrTile">
<span>STT</span>
<strong id="asrState">Idle</strong>
</article>
<article class="status-tile" data-state="idle" id="llmTile">
<span>LLM</span>
<strong id="llmState">Idle</strong>
</article>
<article class="status-tile" data-state="idle" id="ttsTile">
<span>TTS</span>
<strong id="ttsState">Idle</strong>
</article>
</section>
<section class="workspace">
<div class="conversation" aria-label="Conversation">
<div class="transcript-panel">
<div class="panel-head">
<h2>Input</h2>
<span id="micBadge" class="badge">Mic off</span>
</div>
<div id="partialTranscript" class="partial">Waiting for speech.</div>
<div class="mic-level" aria-label="Microphone input level">
<span>Input level</span>
<div
id="micLevelMeter"
class="mic-level-meter"
role="meter"
aria-valuemin="0"
aria-valuemax="100"
aria-valuenow="0"
>
<span id="micLevelBar"></span>
</div>
<output id="micLevelValue">0%</output>
</div>
<div id="finalTranscript" class="final"></div>
</div>
<div class="response-panel">
<div class="panel-head">
<h2>Assistant</h2>
<span id="audioBadge" class="badge">Audio idle</span>
</div>
<div id="llmOutput" class="output">Load the models, start the microphone, and speak naturally.</div>
</div>
</div>
<aside class="side">
<section class="settings">
<h2>Runtime</h2>
<label>
Device
<select id="deviceSelect">
<option value="auto" selected>Auto</option>
<option value="webgpu">WebGPU</option>
<option value="wasm">WASM</option>
</select>
</label>
<div class="runtime-status" id="runtimeStatus" data-state="checking" aria-live="polite">
<span id="runtimeDeviceStatus">Checking runtime</span>
<small id="runtimeDeviceDetail">Adapter probe pending.</small>
</div>
<div class="build-status" id="runtimeBuildStatus">Build metadata pending.</div>
<label>
LLM
<select id="llmModelSelect">
<option value="HuggingFaceTB/SmolLM2-135M-Instruct" selected>SmolLM2 135M Instruct</option>
<option value="onnx-community/SmolLM2-360M-Instruct-ONNX">SmolLM2 360M Instruct</option>
<option value="onnx-community/granite-4.0-350m-ONNX-web">Granite 4.0 350M (WebGPU)</option>
<option value="onnx-community/Qwen3-0.6B-ONNX">Qwen3 0.6B (WebGPU)</option>
<option value="HuggingFaceTB/SmolLM2-1.7B-Instruct">SmolLM2 1.7B (WebGPU)</option>
</select>
</label>
<label>
STT
<select id="asrModelSelect">
<option value="onnx-community/moonshine-base-ONNX" selected>Moonshine Base</option>
<option value="onnx-community/moonshine-tiny-ONNX">Moonshine Tiny</option>
<option value="onnx-community/whisper-tiny.en">Whisper Tiny English</option>
</select>
</label>
<label>
TTS voice
<select id="voiceSelect">
<option value="F1">F1</option>
<option value="F2" selected>F2</option>
<option value="M1">M1</option>
<option value="M2">M2</option>
</select>
</label>
<label>
<span class="range-label">
TTS steps
<output id="ttsStepsValue" for="ttsSteps">2</output>
</span>
<input id="ttsSteps" type="range" min="2" max="8" value="2" />
</label>
<label>
<span class="range-label">
VAD silence
<span><output id="vadSilenceValue" for="vadSilence">480</output> ms</span>
</span>
<input id="vadSilence" type="range" min="200" max="800" step="20" value="480" />
</label>
<label class="check">
<input id="partialToggle" type="checkbox" checked />
Partial ASR previews
</label>
</section>
<section class="metrics">
<h2>Latency</h2>
<dl>
<div>
<dt>VAD close delay</dt>
<dd id="vadCloseLatency">-</dd>
</div>
<div>
<dt>Speech end → transcript</dt>
<dd id="asrLatency">-</dd>
</div>
<div>
<dt>Transcript → first token</dt>
<dd id="firstTokenLatency">-</dd>
</div>
<div>
<dt>Transcript → TTS queued</dt>
<dd id="firstTtsQueuedLatency">-</dd>
</div>
<div>
<dt>First TTS synth</dt>
<dd id="ttsSynthLatency">-</dd>
</div>
<div>
<dt>Transcript → first audio</dt>
<dd id="firstAudioLatency">-</dd>
</div>
<div>
<dt>Speech end → first audio</dt>
<dd id="speechToAudioLatency">-</dd>
</div>
<div>
<dt>LLM decode</dt>
<dd id="decodeRate">-</dd>
</div>
</dl>
<div id="micValidationCard" class="mic-validation" data-state="idle" aria-live="polite">
<span>Real mic validation</span>
<strong id="micValidationStatus">Load models to collect 3 rows</strong>
<small id="micValidationDetail">Use the mic series and say: "What app is this?"</small>
<div class="mic-progress" aria-hidden="true">
<span id="micValidationProgressBar"></span>
</div>
</div>
<div id="gpuValidationCard" class="mic-validation" data-state="idle" aria-live="polite">
<span>Hardware WebGPU validation</span>
<strong id="gpuValidationStatus">Checking adapter</strong>
<small id="gpuValidationDetail">Adapter probe pending.</small>
<div class="mic-progress" aria-hidden="true">
<span id="gpuValidationProgressBar"></span>
</div>
</div>
<button id="suiteButton" type="button" disabled>Run benchmark suite</button>
<button id="benchmarkButton" type="button" disabled>Run identity benchmark</button>
<button id="chatBenchmarkButton" type="button" disabled>Run chat benchmark</button>
<button id="ttsBenchmarkButton" type="button" disabled>Run TTS benchmark</button>
<button id="loopbackButton" type="button" disabled>Run voice loopback</button>
<button id="bargeInButton" type="button" disabled>Run barge-in check</button>
<button id="gpuBenchmarkButton" type="button" disabled>Run WebGPU evidence row</button>
<button id="evidenceCaptureButton" type="button" disabled>Run evidence capture</button>
<button id="micBenchmarkButton" type="button" disabled>Benchmark real mic</button>
<button id="micSeriesButton" type="button" disabled>Run 3 real-mic series</button>
</section>
</aside>
</section>
<section class="events" aria-label="Event log">
<div class="panel-head">
<h2>Events</h2>
<button id="clearLogButton" type="button">Clear</button>
</div>
<ol id="eventLog"></ol>
</section>
<section class="bench-results" aria-label="Benchmark results">
<div class="panel-head">
<h2>Benchmarks</h2>
<div class="mini-actions">
<button id="copyResultsButton" type="button" disabled>Copy JSON</button>
<button id="downloadResultsButton" type="button" disabled>Download JSON</button>
<button id="clearResultsButton" type="button" disabled>Clear</button>
</div>
</div>
<div id="benchmarkSummary" class="benchmark-summary" aria-live="polite">
No benchmark runs yet.
</div>
<div class="table-wrap">
<table>
<thead>
<tr>
<th>Run</th>
<th>Stack</th>
<th>ASR</th>
<th>STT WER</th>
<th>VAD close</th>
<th>Prompt</th>
<th>1st token</th>
<th>TTS queued</th>
<th>TTS synth</th>
<th>1st audio</th>
<th>End → audio</th>
<th>Audio done</th>
<th>Decode</th>
<th>LLM OK</th>
<th>Transcript</th>
<th>Output</th>
</tr>
</thead>
<tbody id="resultsBody">
<tr>
<td colspan="16">No benchmark runs yet.</td>
</tr>
</tbody>
</table>
</div>
</section>
</main>
<script type="module" src="./app.js"></script>
</body>
</html>