import { getInference } from "$lib/agents/getInference"; import type { AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionOutput, } from "@huggingface/inference"; import type { Tool } from "$lib/types"; export const speechToTextTool: Tool< AutomaticSpeechRecognitionArgs["data"], AutomaticSpeechRecognitionOutput["text"] > = { name: "speechToText", description: "Caption an audio file and returns its text content.", examples: [ { prompt: "Transcribe the sound file", command: "speechToText(audio)", }, ], call: async (data) => { return ( await getInference().automaticSpeechRecognition( { data: await data, model: "facebook/wav2vec2-large-960h-lv60-self", }, { wait_for_model: true } ) ).text; }, };