nsarrazin's picture
refactoring & better look
39b5447
raw
history blame
813 Bytes
import { getInference } from "$lib/agents/getInference";
import type {
AutomaticSpeechRecognitionArgs,
AutomaticSpeechRecognitionOutput,
} from "@huggingface/inference";
import type { Tool } from "$lib/types";
export const speechToTextTool: Tool<
AutomaticSpeechRecognitionArgs["data"],
AutomaticSpeechRecognitionOutput["text"]
> = {
name: "speechToText",
description: "Caption an audio file and returns its text content.",
examples: [
{
prompt: "Transcribe the sound file",
command: "speechToText(audio)",
},
],
call: async (data) => {
return (
await getInference().automaticSpeechRecognition(
{
data: await data,
model: "facebook/wav2vec2-large-960h-lv60-self",
},
{ wait_for_model: true }
)
).text;
},
};