shrewd-agent / tools /transcribe_audio.py
Pycrolis
feat(tool): add audio transcription tool using Whisper AI
4c07abc
import tempfile
import whisper
from langchain_core.tools import tool
from loguru import logger
from tools.load_file import load_file
@tool("transcribe_audio_tool", parse_docstring=True)
def transcribe_audio(file_name: str) -> str:
"""
Convert speech from an audio file or URL to text.
Args:
file_name (str): Either a local file path to an audio file or a URL pointing to an audio file.
Returns:
str: The transcribed text from the audio file
"""
logger.info(f"use transcribe_audio_tool with param: {file_name}")
model_name = "base"
try:
audio_bytes = load_file(file_name)
with tempfile.NamedTemporaryFile() as tmp:
tmp.write(audio_bytes.getvalue())
tmp.flush()
model = whisper.load_model(model_name)
result = model.transcribe(tmp.name, fp16=False)
return result["text"]
except Exception as e:
print(f"Error transcribing audio: {str(e)}")
return ""
if __name__ == "__main__":
print(transcribe_audio.invoke("../data/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3"))
print(transcribe_audio.invoke(
"https://agents-course-unit4-scoring.hf.space/files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3"))