Spaces:

Pycrolis
/

shrewd-agent

Running

shrewd-agent / tools /transcribe_audio.py

Pycrolis

feat(tool): add audio transcription tool using Whisper AI

4c07abc about 1 month ago

1.25 kB

	import tempfile

	import whisper
	from langchain_core.tools import tool
	from loguru import logger

	from tools.load_file import load_file


	@tool("transcribe_audio_tool", parse_docstring=True)
	def transcribe_audio(file_name: str) -> str:
	"""
	Convert speech from an audio file or URL to text.

	Args:
	file_name (str): Either a local file path to an audio file or a URL pointing to an audio file.


	Returns:
	str: The transcribed text from the audio file
	"""
	logger.info(f"use transcribe_audio_tool with param: {file_name}")
	model_name = "base"

	try:
	audio_bytes = load_file(file_name)
	with tempfile.NamedTemporaryFile() as tmp:
	tmp.write(audio_bytes.getvalue())
	tmp.flush()
	model = whisper.load_model(model_name)
	result = model.transcribe(tmp.name, fp16=False)
	return result["text"]

	except Exception as e:
	print(f"Error transcribing audio: {str(e)}")
	return ""


	if __name__ == "__main__":
	print(transcribe_audio.invoke("../data/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3"))
	print(transcribe_audio.invoke(
	"https://agents-course-unit4-scoring.hf.space/files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3"))