Spaces:
Running
Running
import { useEffect, useState, useRef } from "react"; | |
import { Mic, PhoneOff, ChevronDown } from "lucide-react"; | |
import { INPUT_SAMPLE_RATE } from "./constants"; | |
import WORKLET from "./play-worklet.js"; | |
export default function App() { | |
const [callStartTime, setCallStartTime] = useState(null); | |
const [callStarted, setCallStarted] = useState(false); | |
const [playing, setPlaying] = useState(false); | |
const [voice, setVoice] = useState("af_heart"); | |
const [voices, setVoices] = useState([]); | |
const [isListening, setIsListening] = useState(false); | |
const [isSpeaking, setIsSpeaking] = useState(false); | |
const [listeningScale, setListeningScale] = useState(1); | |
const [speakingScale, setSpeakingScale] = useState(1); | |
const [ripples, setRipples] = useState([]); | |
const [ready, setReady] = useState(false); | |
const [error, setError] = useState(null); | |
const [elapsedTime, setElapsedTime] = useState("00:00"); | |
const worker = useRef(null); | |
const micStreamRef = useRef(null); | |
const node = useRef(null); | |
useEffect(() => { | |
worker.current?.postMessage({ | |
type: "set_voice", | |
voice, | |
}); | |
}, [voice]); | |
useEffect(() => { | |
if (!callStarted) { | |
// Reset worker state after call ends | |
worker.current?.postMessage({ | |
type: "end_call", | |
}); | |
} | |
}, [callStarted]); | |
useEffect(() => { | |
if (callStarted && callStartTime) { | |
const interval = setInterval(() => { | |
const diff = Math.floor((Date.now() - callStartTime) / 1000); | |
const minutes = String(Math.floor(diff / 60)).padStart(2, "0"); | |
const seconds = String(diff % 60).padStart(2, "0"); | |
setElapsedTime(`${minutes}:${seconds}`); | |
}, 1000); | |
return () => clearInterval(interval); | |
} else { | |
setElapsedTime("00:00"); | |
} | |
}, [callStarted, callStartTime]); | |
useEffect(() => { | |
worker.current ??= new Worker(new URL("./worker.js", import.meta.url), { | |
type: "module", | |
}); | |
const onMessage = ({ data }) => { | |
if (data.error) { | |
return onError(data.error); | |
} | |
switch (data.type) { | |
case "status": | |
if (data.status === "recording_start") { | |
setIsListening(true); | |
setIsSpeaking(false); | |
} else if (data.status === "recording_end") { | |
setIsListening(false); | |
} else if (data.status === "ready") { | |
setVoices(data.voices); | |
setReady(true); | |
} | |
break; | |
case "output": | |
if (!playing) { | |
node.current?.port.postMessage(data.result.audio); | |
setPlaying(true); | |
setIsSpeaking(true); | |
setIsListening(false); | |
} | |
break; | |
} | |
}; | |
const onError = (err) => setError(err.message); | |
worker.current.addEventListener("message", onMessage); | |
worker.current.addEventListener("error", onError); | |
return () => { | |
worker.current.removeEventListener("message", onMessage); | |
worker.current.removeEventListener("error", onError); | |
}; | |
}, []); | |
useEffect(() => { | |
if (!callStarted) return; | |
let worklet; | |
let inputAudioContext; | |
let source; | |
let ignore = false; | |
let outputAudioContext; | |
const audioStreamPromise = Promise.resolve(micStreamRef.current); | |
audioStreamPromise | |
.then(async (stream) => { | |
if (ignore) return; | |
inputAudioContext = new (window.AudioContext || | |
window.webkitAudioContext)({ | |
sampleRate: INPUT_SAMPLE_RATE, | |
}); | |
const analyser = inputAudioContext.createAnalyser(); | |
analyser.fftSize = 256; | |
source = inputAudioContext.createMediaStreamSource(stream); | |
source.connect(analyser); | |
const inputDataArray = new Uint8Array(analyser.frequencyBinCount); | |
function calculateRMS(array) { | |
let sum = 0; | |
for (let i = 0; i < array.length; ++i) { | |
const normalized = array[i] / 128 - 1; | |
sum += normalized * normalized; | |
} | |
const rms = Math.sqrt(sum / array.length); | |
return rms; | |
} | |
await inputAudioContext.audioWorklet.addModule( | |
new URL("./vad-processor.js", import.meta.url), | |
); | |
worklet = new AudioWorkletNode(inputAudioContext, "vad-processor", { | |
numberOfInputs: 1, | |
numberOfOutputs: 0, | |
channelCount: 1, | |
channelCountMode: "explicit", | |
channelInterpretation: "discrete", | |
}); | |
source.connect(worklet); | |
worklet.port.onmessage = (event) => { | |
const { buffer } = event.data; | |
worker.current?.postMessage({ type: "audio", buffer }); | |
}; | |
outputAudioContext = new AudioContext({ | |
sampleRate: 24000, | |
}); | |
outputAudioContext.resume(); | |
const blob = new Blob([`(${WORKLET.toString()})()`], { | |
type: "application/javascript", | |
}); | |
const url = URL.createObjectURL(blob); | |
await outputAudioContext.audioWorklet.addModule(url); | |
URL.revokeObjectURL(url); | |
node.current = new AudioWorkletNode( | |
outputAudioContext, | |
"buffered-audio-worklet-processor", | |
); | |
node.current.port.onmessage = (event) => { | |
if (event.data.type === "playback_ended") { | |
setPlaying(false); | |
setIsSpeaking(false); | |
worker.current?.postMessage({ type: "playback_ended" }); | |
} | |
}; | |
const outputAnalyser = outputAudioContext.createAnalyser(); | |
outputAnalyser.fftSize = 256; | |
node.current.connect(outputAnalyser); | |
outputAnalyser.connect(outputAudioContext.destination); | |
const outputDataArray = new Uint8Array( | |
outputAnalyser.frequencyBinCount, | |
); | |
function updateVisualizers() { | |
analyser.getByteTimeDomainData(inputDataArray); | |
const rms = calculateRMS(inputDataArray); | |
const targetScale = 1 + Math.min(1.25 * rms, 0.25); | |
setListeningScale((prev) => prev + (targetScale - prev) * 0.25); | |
outputAnalyser.getByteTimeDomainData(outputDataArray); | |
const outputRMS = calculateRMS(outputDataArray); | |
const targetOutputScale = 1 + Math.min(1.25 * outputRMS, 0.25); | |
setSpeakingScale((prev) => prev + (targetOutputScale - prev) * 0.25); | |
requestAnimationFrame(updateVisualizers); | |
} | |
updateVisualizers(); | |
}) | |
.catch((err) => { | |
setError(err.message); | |
console.error(err); | |
}); | |
return () => { | |
ignore = true; | |
audioStreamPromise.then((s) => s.getTracks().forEach((t) => t.stop())); | |
source?.disconnect(); | |
worklet?.disconnect(); | |
inputAudioContext?.close(); | |
outputAudioContext?.close(); | |
}; | |
}, [callStarted]); | |
useEffect(() => { | |
if (!callStarted) return; | |
const interval = setInterval(() => { | |
const id = Date.now(); | |
setRipples((prev) => [...prev, id]); | |
setTimeout(() => { | |
setRipples((prev) => prev.filter((r) => r !== id)); | |
}, 1500); | |
}, 1000); | |
return () => clearInterval(interval); | |
}, [callStarted]); | |
const handleStartCall = async () => { | |
try { | |
const stream = await navigator.mediaDevices.getUserMedia({ | |
audio: { | |
channelCount: 1, | |
echoCancellation: true, | |
autoGainControl: true, | |
noiseSuppression: true, | |
sampleRate: INPUT_SAMPLE_RATE, | |
}, | |
}); | |
micStreamRef.current = stream; | |
setCallStartTime(Date.now()); | |
setCallStarted(true); | |
worker.current?.postMessage({ type: "start_call" }); | |
} catch (err) { | |
setError(err.message); | |
console.error(err); | |
} | |
}; | |
return ( | |
<div className="h-screen min-h-[240px] flex items-center justify-center bg-gray-50 p-4 relative"> | |
<div className="h-full max-h-[320px] w-[640px] bg-white rounded-xl shadow-lg p-8 flex items-center justify-between space-x-16"> | |
<div className="text-green-700 w-[140px]"> | |
<div className="text-xl font-bold flex justify-between"> | |
{voices?.[voice]?.name} | |
<span className="font-normal text-gray-500">{elapsedTime}</span> | |
</div> | |
<div className="text-base relative"> | |
<button | |
type="button" | |
disabled={!ready} | |
className={`w-full flex items-center justify-between border border-gray-300 rounded-md transition-colors ${ | |
ready | |
? "bg-transparent hover:border-gray-400" | |
: "bg-gray-100 opacity-50 cursor-not-allowed" | |
}`} | |
> | |
<span className="px-2 py-1">Select voice</span> | |
<ChevronDown className="absolute right-2" /> | |
</button> | |
<select | |
value={voice} | |
onChange={(e) => setVoice(e.target.value)} | |
className="absolute inset-0 opacity-0 cursor-pointer" | |
disabled={!ready} | |
> | |
{Object.entries(voices).map(([key, v]) => ( | |
<option key={key} value={key}> | |
{`${v.name} (${ | |
v.language === "en-us" ? "American" : v.language | |
} ${v.gender})`} | |
</option> | |
))} | |
</select> | |
</div> | |
</div> | |
<div className="relative flex items-center justify-center w-32 h-32 flex-shrink-0 aspect-square"> | |
{callStarted && | |
ripples.map((id) => ( | |
<div | |
key={id} | |
className="absolute inset-0 rounded-full border-2 border-green-200 pointer-events-none" | |
style={{ animation: "ripple 1.5s ease-out forwards" }} | |
/> | |
))} | |
{/* Pulsing loader while initializing */} | |
<div | |
className={`absolute w-32 h-32 rounded-full ${ | |
error ? "bg-red-200" : "bg-green-200" | |
} ${!ready ? "animate-ping opacity-75" : ""}`} | |
style={{ animationDuration: "1.5s" }} | |
/> | |
{/* Main rings */} | |
<div | |
className={`absolute w-32 h-32 rounded-full shadow-inner transition-transform duration-300 ease-out ${ | |
error ? "bg-red-300" : "bg-green-300" | |
} ${!ready ? "opacity-0" : ""}`} | |
style={{ transform: `scale(${speakingScale})` }} | |
/> | |
<div | |
className={`absolute w-32 h-32 rounded-full shadow-inner transition-transform duration-300 ease-out ${ | |
error ? "bg-red-200" : "bg-green-200" | |
} ${!ready ? "opacity-0" : ""}`} | |
style={{ transform: `scale(${listeningScale})` }} | |
/> | |
{/* Center text: show error if present, else existing statuses */} | |
<div | |
className={`absolute z-10 text-lg text-center ${ | |
error ? "text-red-700" : "text-gray-700" | |
}`} | |
> | |
{error ? ( | |
error | |
) : ( | |
<> | |
{!ready && "Loading..."} | |
{isListening && "Listening..."} | |
{isSpeaking && "Speaking..."} | |
</> | |
)} | |
</div> | |
</div> | |
<div className="space-y-4 w-[140px]"> | |
{callStarted ? ( | |
<button | |
className="flex items-center space-x-2 px-4 py-2 bg-red-100 text-red-700 rounded-md hover:bg-red-200" | |
onClick={() => { | |
setCallStarted(false); | |
setCallStartTime(null); | |
setPlaying(false); | |
setIsListening(false); | |
setIsSpeaking(false); | |
}} | |
> | |
<PhoneOff className="w-5 h-5" /> | |
<span>End call</span> | |
</button> | |
) : ( | |
<button | |
className={`flex items-center space-x-2 px-4 py-2 rounded-md ${ | |
ready | |
? "bg-blue-100 text-blue-700 hover:bg-blue-200" | |
: "bg-blue-100 text-blue-700 opacity-50 cursor-not-allowed" | |
}`} | |
onClick={handleStartCall} | |
disabled={!ready} | |
> | |
<span>Start call</span> | |
</button> | |
)} | |
</div> | |
</div> | |
<div className="absolute bottom-4 text-sm"> | |
Built with{" "} | |
<a | |
href="https://github.com/huggingface/transformers.js" | |
rel="noopener noreferrer" | |
target="_blank" | |
className="text-blue-600 hover:underline" | |
> | |
🤗 Transformers.js | |
</a> | |
</div> | |
</div> | |
); | |
} | |