from smolagents.tools import Tool
from huggingface_hub import InferenceClient

class AudioTranscriptionTool(Tool):
    name = "audio_transcription"
    description = "Transcribes speech from an audio file (e.g., MP3)"
    inputs = {"audio_path": {"type": "string", "description": "Path to the audio file"}}
    output_type = "string"

    def __init__(self):
        super().__init__()
        self.client = InferenceClient("openai/whisper-large-v3")

    def forward(self, audio_path: str) -> str:
        with open(audio_path, "rb") as f:
            transcript = self.client.audio_to_text(f)
        return transcript["text"]