agent-course-final-assessment

Running

File size: 928 Bytes

a225ae4

import os

import whisper
from smolagents import Tool


class OpenAISpeechToTextTool(Tool):
    """
    Tool to convert speech to text using OpenAI's Whisper model.

    Args:
        audio_path (str): Path to the audio file.

    Returns:
        str: Transcribed text from the audio file.
    """

    name = "transcribe_audio"
    description = "Transcribes audio to text and returns the text"
    inputs = {
        "audio_path": {"type": "string", "description": "Path to the audio file"},
    }
    output_type = "string"

    def forward(self, audio_path: str) -> str:
        try:
            model = whisper.load_model("small")

            if not os.path.exists(audio_path):
                return f"Error: Audio file not found at {audio_path}"

            result = model.transcribe(audio_path)
            return result["text"]
        except Exception as e:
            return f"Error transcribing audio: {str(e)}"