import tempfile import whisper from langchain_core.tools import tool from loguru import logger from tools.load_file import load_file @tool("transcribe_audio_tool", parse_docstring=True) def transcribe_audio(file_name: str) -> str: """ Convert speech from an audio file or URL to text. Args: file_name (str): Either a local file path to an audio file or a URL pointing to an audio file. Returns: str: The transcribed text from the audio file """ logger.info(f"use transcribe_audio_tool with param: {file_name}") model_name = "base" try: audio_bytes = load_file(file_name) with tempfile.NamedTemporaryFile() as tmp: tmp.write(audio_bytes.getvalue()) tmp.flush() model = whisper.load_model(model_name) result = model.transcribe(tmp.name, fp16=False) return result["text"] except Exception as e: print(f"Error transcribing audio: {str(e)}") return "" if __name__ == "__main__": print(transcribe_audio.invoke("../data/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3")) print(transcribe_audio.invoke( "https://agents-course-unit4-scoring.hf.space/files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3"))