// audioUtils.ts const SAMPLE_RATE = 16000; // ASR models typically expect 16kHz audio /** * Reads an audio Blob (or File) and converts it to a Float32Array of PCM audio data * at a specified sample rate. * @param file The audio Blob or File. * @returns A Promise resolving with the Float32Array of resampled audio data. */ export async function readAudio(file: Blob): Promise { const audioContext = new AudioContext(); // Use a standard AudioContext to decode initially const arrayBuffer = await file.arrayBuffer(); // Decode the audio data from the ArrayBuffer. This handles various formats (mp3, wav, webm, etc.) // and gives you an AudioBuffer with raw PCM data at the original sample rate. const audioBuffer = await audioContext.decodeAudioData(arrayBuffer); // Check if resampling is needed if (audioBuffer.sampleRate === SAMPLE_RATE) { // If sample rate matches, return the first channel's data directly // Ensure it's mono; if stereo, you might need to mix or pick a channel if (audioBuffer.numberOfChannels > 1) { // Basic mixing or just take the first channel for simplicity // For ASR, mono is usually sufficient and expected by models const monoData = audioBuffer.getChannelData(0); // If needed, mix channels: // const channelData1 = audioBuffer.getChannelData(0); // const channelData2 = audioBuffer.getChannelData(1); // const monoData = new Float32Array(channelData1.length); // for (let i = 0; i < monoData.length; i++) { // monoData[i] = (channelData1[i] + channelData2[i]) / 2; // } return monoData; } else { return audioBuffer.getChannelData(0); // Already mono } } else { // Resampling is needed const targetSampleRate = SAMPLE_RATE; const numberOfChannels = 1; // ASR models typically expect mono input // Calculate the length of the resampled buffer const duration = audioBuffer.duration; const resampledLength = Math.ceil(duration * targetSampleRate); // Create an OfflineAudioContext for resampling // This context renders audio offline and allows changing the sample rate const offlineAudioContext = new OfflineAudioContext( numberOfChannels, resampledLength, targetSampleRate ); // Create a buffer source node from the original AudioBuffer const source = offlineAudioContext.createBufferSource(); source.buffer = audioBuffer; // Connect the source to the offline context's destination source.connect(offlineAudioContext.destination); // Start the source (playback in the offline context) source.start(0); // Render the audio. This performs the resampling. const resampledBuffer = await offlineAudioContext.startRendering(); // Return the resampled audio data from the first channel return resampledBuffer.getChannelData(0); } }