from smolagents.tools import Tool from huggingface_hub import InferenceClient class AudioTranscriptionTool(Tool): name = "audio_transcription" description = "Transcribes speech from an audio file (e.g., MP3)" inputs = {"audio_path": {"type": "string", "description": "Path to the audio file"}} output_type = "string" def __init__(self): super().__init__() self.client = InferenceClient("openai/whisper-large-v3") def forward(self, audio_path: str) -> str: with open(audio_path, "rb") as f: transcript = self.client.audio_to_text(f) return transcript["text"]