File size: 2,992 Bytes
3baa9da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
// audioUtils.ts
const SAMPLE_RATE = 16000; // ASR models typically expect 16kHz audio
/**

 * Reads an audio Blob (or File) and converts it to a Float32Array of PCM audio data

 * at a specified sample rate.

 * @param file The audio Blob or File.

 * @returns A Promise resolving with the Float32Array of resampled audio data.

 */
export async function readAudio(file: Blob): Promise<Float32Array> {
  const audioContext = new AudioContext(); // Use a standard AudioContext to decode initially
  const arrayBuffer = await file.arrayBuffer();

  // Decode the audio data from the ArrayBuffer. This handles various formats (mp3, wav, webm, etc.)
  // and gives you an AudioBuffer with raw PCM data at the original sample rate.
  const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);

  // Check if resampling is needed
  if (audioBuffer.sampleRate === SAMPLE_RATE) {
    // If sample rate matches, return the first channel's data directly
    // Ensure it's mono; if stereo, you might need to mix or pick a channel
    if (audioBuffer.numberOfChannels > 1) {
        // Basic mixing or just take the first channel for simplicity
        // For ASR, mono is usually sufficient and expected by models
        const monoData = audioBuffer.getChannelData(0);
         // If needed, mix channels:
        // const channelData1 = audioBuffer.getChannelData(0);
        // const channelData2 = audioBuffer.getChannelData(1);
        // const monoData = new Float32Array(channelData1.length);
        // for (let i = 0; i < monoData.length; i++) {
        //     monoData[i] = (channelData1[i] + channelData2[i]) / 2;
        // }
        return monoData;
    } else {
       return audioBuffer.getChannelData(0); // Already mono
    }

  } else {
    // Resampling is needed
    const targetSampleRate = SAMPLE_RATE;
    const numberOfChannels = 1; // ASR models typically expect mono input

    // Calculate the length of the resampled buffer
    const duration = audioBuffer.duration;
    const resampledLength = Math.ceil(duration * targetSampleRate);

    // Create an OfflineAudioContext for resampling
    // This context renders audio offline and allows changing the sample rate
    const offlineAudioContext = new OfflineAudioContext(
      numberOfChannels,
      resampledLength,
      targetSampleRate
    );

    // Create a buffer source node from the original AudioBuffer
    const source = offlineAudioContext.createBufferSource();
    source.buffer = audioBuffer;

    // Connect the source to the offline context's destination
    source.connect(offlineAudioContext.destination);

    // Start the source (playback in the offline context)
    source.start(0);

    // Render the audio. This performs the resampling.
    const resampledBuffer = await offlineAudioContext.startRendering();

    // Return the resampled audio data from the first channel
    return resampledBuffer.getChannelData(0);
  }
}