| <!DOCTYPE html> |
| <html lang="zh-CN"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>MOSS-TTS-Nano 测试页面</title> |
| <style> |
| * { |
| margin: 0; |
| padding: 0; |
| box-sizing: border-box; |
| } |
| body { |
| font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; |
| max-width: 800px; |
| margin: 0 auto; |
| padding: 20px; |
| background: #f5f5f5; |
| } |
| .container { |
| background: white; |
| border-radius: 8px; |
| padding: 30px; |
| box-shadow: 0 2px 10px rgba(0,0,0,0.1); |
| } |
| h1 { |
| color: #333; |
| margin-bottom: 10px; |
| } |
| .subtitle { |
| color: #666; |
| margin-bottom: 30px; |
| font-size: 14px; |
| } |
| .form-group { |
| margin-bottom: 20px; |
| } |
| label { |
| display: block; |
| margin-bottom: 8px; |
| font-weight: 600; |
| color: #333; |
| } |
| textarea { |
| width: 100%; |
| min-height: 120px; |
| padding: 12px; |
| border: 1px solid #ddd; |
| border-radius: 6px; |
| font-size: 16px; |
| resize: vertical; |
| font-family: inherit; |
| } |
| select { |
| width: 100%; |
| padding: 10px; |
| border: 1px solid #ddd; |
| border-radius: 6px; |
| font-size: 16px; |
| background: white; |
| } |
| .checkbox-group { |
| display: flex; |
| align-items: center; |
| gap: 10px; |
| } |
| .checkbox-group input[type="checkbox"] { |
| width: 18px; |
| height: 18px; |
| } |
| button { |
| background: #007bff; |
| color: white; |
| border: none; |
| padding: 12px 30px; |
| border-radius: 6px; |
| font-size: 16px; |
| cursor: pointer; |
| transition: background 0.3s; |
| } |
| button:hover { |
| background: #0056b3; |
| } |
| button:disabled { |
| background: #ccc; |
| cursor: not-allowed; |
| } |
| .status { |
| margin-top: 20px; |
| padding: 15px; |
| border-radius: 6px; |
| display: none; |
| } |
| .status.info { |
| background: #e7f3ff; |
| color: #0066cc; |
| display: block; |
| } |
| .status.success { |
| background: #e7ffe7; |
| color: #006600; |
| display: block; |
| } |
| .status.error { |
| background: #ffe7e7; |
| color: #cc0000; |
| display: block; |
| } |
| .audio-player { |
| margin-top: 20px; |
| display: none; |
| } |
| .audio-player.show { |
| display: block; |
| } |
| audio { |
| width: 100%; |
| margin-top: 10px; |
| } |
| .progress { |
| margin-top: 10px; |
| font-size: 14px; |
| color: #666; |
| } |
| .stats { |
| margin-top: 15px; |
| padding: 10px; |
| background: #f8f9fa; |
| border-radius: 6px; |
| font-size: 14px; |
| color: #666; |
| } |
| .playing-indicator { |
| display: none; |
| align-items: center; |
| gap: 10px; |
| margin-top: 15px; |
| padding: 10px; |
| background: #e7f3ff; |
| border-radius: 6px; |
| color: #0066cc; |
| } |
| .playing-indicator.active { |
| display: flex; |
| } |
| .pulse { |
| width: 10px; |
| height: 10px; |
| background: #007bff; |
| border-radius: 50%; |
| animation: pulse 1s infinite; |
| } |
| @keyframes pulse { |
| 0% { opacity: 1; } |
| 50% { opacity: 0.5; } |
| 100% { opacity: 1; } |
| } |
| </style> |
| </head> |
| <body> |
| <div class="container"> |
| <h1>🎙️ MOSS-TTS-Nano 测试</h1> |
| <p class="subtitle">轻量级多语言语音合成 - 边合成边播放</p> |
| |
| <div class="form-group"> |
| <label for="text">输入文本</label> |
| <textarea id="text" placeholder="请输入要合成的文本...">欢迎使用 MOSS-TTS-Nano 语音合成系统,这是一个测试页面。正在测试边合成边播放的功能。</textarea> |
| </div> |
|
|
| <div class="form-group"> |
| <label for="voice">选择声音</label> |
| <select id="voice"> |
| <option value="Junhao">加载中...</option> |
| </select> |
| </div> |
|
|
| <div class="form-group"> |
| <button id="synthesizeBtn" onclick="synthesize()">开始合成</button> |
| <button id="stopBtn" onclick="stopPlayback()" style="display:none; background:#dc3545; margin-left:10px;">停止</button> |
| </div> |
|
|
| <div class="playing-indicator" id="playingIndicator"> |
| <div class="pulse"></div> |
| <span>正在播放...</span> |
| </div> |
|
|
| <div id="status" class="status"></div> |
|
|
| <div id="audioPlayer" class="audio-player"> |
| <label>完整音频(试听)</label> |
| <audio id="audio" controls></audio> |
| <div class="stats" id="stats"></div> |
| </div> |
| </div> |
|
|
| <script> |
| let audioContext = null; |
| let isPlaying = false; |
| let nextPlayTime = 0; |
| let sourceNode = null; |
| |
| |
| window.onload = async function() { |
| try { |
| const response = await fetch('/voices'); |
| const voices = await response.json(); |
| const voiceSelect = document.getElementById('voice'); |
| voiceSelect.innerHTML = ''; |
| voices.forEach(voice => { |
| const option = document.createElement('option'); |
| option.value = voice.voice; |
| option.textContent = voice.display_name || voice.voice; |
| voiceSelect.appendChild(option); |
| }); |
| } catch (error) { |
| console.error('获取声音列表失败:', error); |
| showStatus('获取声音列表失败: ' + error.message, 'error'); |
| } |
| }; |
| |
| function showStatus(message, type = 'info') { |
| const statusDiv = document.getElementById('status'); |
| statusDiv.textContent = message; |
| statusDiv.className = 'status ' + type; |
| } |
| |
| function synthesize() { |
| const text = document.getElementById('text').value.trim(); |
| const voice = document.getElementById('voice').value; |
| const btn = document.getElementById('synthesizeBtn'); |
| const stopBtn = document.getElementById('stopBtn'); |
| |
| if (!text) { |
| showStatus('请输入要合成的文本', 'error'); |
| return; |
| } |
| |
| btn.disabled = true; |
| stopBtn.style.display = 'inline-block'; |
| showStatus('正在合成并播放...', 'info'); |
| document.getElementById('audioPlayer').classList.remove('show'); |
| document.getElementById('playingIndicator').classList.add('active'); |
| |
| synthesizeAndPlay(text, voice); |
| } |
| |
| function stopPlayback() { |
| if (audioContext && audioContext.state !== 'closed') { |
| audioContext.close(); |
| } |
| isPlaying = false; |
| document.getElementById('playingIndicator').classList.remove('active'); |
| document.getElementById('synthesizeBtn').disabled = false; |
| document.getElementById('stopBtn').style.display = 'none'; |
| showStatus('已停止播放', 'info'); |
| } |
| |
| async function synthesizeAndPlay(text, voice) { |
| const startTime = Date.now(); |
| let firstChunkTime = null; |
| let chunkCount = 0; |
| const allPcmChunks = []; |
| |
| try { |
| const response = await fetch('/v1/audio/speech', { |
| method: 'POST', |
| headers: { 'Content-Type': 'application/json' }, |
| body: JSON.stringify({ |
| input: text, |
| voice: voice, |
| stream: true, |
| response_format: 'pcm' |
| }) |
| }); |
| |
| if (!response.ok) { |
| throw new Error('请求失败: ' + response.statusText); |
| } |
| |
| |
| audioContext = new (window.AudioContext || window.webkitAudioContext)({ |
| sampleRate: 16000 |
| }); |
| isPlaying = true; |
| nextPlayTime = audioContext.currentTime + 0.1; |
| |
| const reader = response.body.getReader(); |
| const chunkSize = 640; |
| |
| while (true) { |
| const { done, value } = await reader.read(); |
| if (done) break; |
| |
| if (firstChunkTime === null) { |
| firstChunkTime = Date.now(); |
| showStatus(`开始接收音频流... (首块延迟: ${((firstChunkTime - startTime) / 1000).toFixed(3)}秒)`, 'info'); |
| } |
| |
| |
| allPcmChunks.push(value); |
| chunkCount++; |
| |
| |
| if (isPlaying) { |
| playPcmChunk(value); |
| } |
| |
| |
| const totalBytes = allPcmChunks.reduce((sum, chunk) => sum + chunk.length, 0); |
| showStatus(`正在播放... 已接收 ${chunkCount} 块 (${(totalBytes / 1024).toFixed(2)} KB)`, 'info'); |
| } |
| |
| |
| if (isPlaying) { |
| const elapsed = audioContext.currentTime - nextPlayTime; |
| if (elapsed > 0) { |
| await new Promise(resolve => setTimeout(resolve, elapsed * 1000 + 500)); |
| } |
| } |
| |
| } catch (error) { |
| showStatus('合成失败: ' + error.message, 'error'); |
| } finally { |
| isPlaying = false; |
| document.getElementById('playingIndicator').classList.remove('active'); |
| document.getElementById('synthesizeBtn').disabled = false; |
| document.getElementById('stopBtn').style.display = 'none'; |
| |
| |
| if (allPcmChunks.length > 0) { |
| const totalLength = allPcmChunks.reduce((sum, chunk) => sum + chunk.length, 0); |
| const mergedPcm = new Uint8Array(totalLength); |
| let offset = 0; |
| for (const chunk of allPcmChunks) { |
| mergedPcm.set(chunk, offset); |
| offset += chunk.length; |
| } |
| |
| const wavBlob = pcmToWav(mergedPcm, 16000, 1, 16); |
| const audioUrl = URL.createObjectURL(wavBlob); |
| const audio = document.getElementById('audio'); |
| audio.src = audioUrl; |
| document.getElementById('audioPlayer').classList.add('show'); |
| |
| const totalTime = ((Date.now() - startTime) / 1000).toFixed(2); |
| const firstChunkDelay = firstChunkTime ? ((firstChunkTime - startTime) / 1000).toFixed(3) : 'N/A'; |
| const audioDuration = (totalLength / 2 / 16000).toFixed(2); |
| |
| document.getElementById('stats').innerHTML = |
| `<strong>合成完成</strong><br> |
| 总耗时: ${totalTime}秒<br> |
| 首块延迟: ${firstChunkDelay}秒<br> |
| 块数: ${chunkCount}<br> |
| 音频时长: ${audioDuration}秒`; |
| |
| showStatus('合成完成!', 'success'); |
| } |
| } |
| } |
| |
| function playPcmChunk(pcmData) { |
| if (!audioContext || !isPlaying) return; |
| |
| |
| const int16Array = new Int16Array(pcmData.buffer, pcmData.byteOffset, pcmData.byteLength / 2); |
| const float32Array = new Float32Array(int16Array.length); |
| for (let i = 0; i < int16Array.length; i++) { |
| float32Array[i] = int16Array[i] / 32768.0; |
| } |
| |
| |
| const audioBuffer = audioContext.createBuffer(1, float32Array.length, 16000); |
| audioBuffer.getChannelData(0).set(float32Array); |
| |
| |
| const source = audioContext.createBufferSource(); |
| source.buffer = audioBuffer; |
| source.connect(audioContext.destination); |
| |
| |
| const duration = audioBuffer.duration; |
| if (nextPlayTime < audioContext.currentTime) { |
| nextPlayTime = audioContext.currentTime + 0.05; |
| } |
| source.start(nextPlayTime); |
| nextPlayTime += duration; |
| |
| |
| source.onended = () => { |
| source.disconnect(); |
| }; |
| } |
| |
| function pcmToWav(pcmData, sampleRate, channels, bitsPerSample) { |
| const byteRate = sampleRate * channels * bitsPerSample / 8; |
| const blockAlign = channels * bitsPerSample / 8; |
| const dataSize = pcmData.length; |
| const buffer = new ArrayBuffer(44 + dataSize); |
| const view = new DataView(buffer); |
| |
| writeString(view, 0, 'RIFF'); |
| view.setUint32(4, 36 + dataSize, true); |
| writeString(view, 8, 'WAVE'); |
| writeString(view, 12, 'fmt '); |
| view.setUint32(16, 16, true); |
| view.setUint16(20, 1, true); |
| view.setUint16(22, channels, true); |
| view.setUint32(24, sampleRate, true); |
| view.setUint32(28, byteRate, true); |
| view.setUint16(32, blockAlign, true); |
| view.setUint16(34, bitsPerSample, true); |
| writeString(view, 36, 'data'); |
| view.setUint32(40, dataSize, true); |
| |
| const pcmArray = new Uint8Array(buffer, 44); |
| pcmArray.set(pcmData); |
| |
| return new Blob([buffer], { type: 'audio/wav' }); |
| } |
| |
| function writeString(view, offset, string) { |
| for (let i = 0; i < string.length; i++) { |
| view.setUint8(offset + i, string.charCodeAt(i)); |
| } |
| } |
| </script> |
| </body> |
| </html> |
|
|