File size: 1,251 Bytes
4c07abc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import tempfile

import whisper
from langchain_core.tools import tool
from loguru import logger

from tools.load_file import load_file


@tool("transcribe_audio_tool", parse_docstring=True)
def transcribe_audio(file_name: str) -> str:
    """
    Convert speech from an audio file or URL to text.

    Args:
        file_name (str): Either a local file path to an audio file or a URL pointing to an audio file.


    Returns:
        str: The transcribed text from the audio file
    """
    logger.info(f"use transcribe_audio_tool with param: {file_name}")
    model_name = "base"

    try:
        audio_bytes = load_file(file_name)
        with tempfile.NamedTemporaryFile() as tmp:
            tmp.write(audio_bytes.getvalue())
            tmp.flush()
            model = whisper.load_model(model_name)
            result = model.transcribe(tmp.name, fp16=False)
            return result["text"]

    except Exception as e:
        print(f"Error transcribing audio: {str(e)}")
        return ""


if __name__ == "__main__":
    print(transcribe_audio.invoke("../data/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3"))
    print(transcribe_audio.invoke(
        "https://agents-course-unit4-scoring.hf.space/files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3"))