Spaces:
Running
Running
File size: 1,251 Bytes
4c07abc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import tempfile
import whisper
from langchain_core.tools import tool
from loguru import logger
from tools.load_file import load_file
@tool("transcribe_audio_tool", parse_docstring=True)
def transcribe_audio(file_name: str) -> str:
"""
Convert speech from an audio file or URL to text.
Args:
file_name (str): Either a local file path to an audio file or a URL pointing to an audio file.
Returns:
str: The transcribed text from the audio file
"""
logger.info(f"use transcribe_audio_tool with param: {file_name}")
model_name = "base"
try:
audio_bytes = load_file(file_name)
with tempfile.NamedTemporaryFile() as tmp:
tmp.write(audio_bytes.getvalue())
tmp.flush()
model = whisper.load_model(model_name)
result = model.transcribe(tmp.name, fp16=False)
return result["text"]
except Exception as e:
print(f"Error transcribing audio: {str(e)}")
return ""
if __name__ == "__main__":
print(transcribe_audio.invoke("../data/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3"))
print(transcribe_audio.invoke(
"https://agents-course-unit4-scoring.hf.space/files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3"))
|