ai-doctor / voice_of_the_patient.py
DanishICUP's picture
Upload 5 files
17a5b4a verified
raw
history blame
2.07 kB
import logging
import speech_recognition as sr
from pydub import AudioSegment
from io import BytesIO
logging.basicConfig(level=logging.INFO , format='%(asctime)s - %(levelname)s - %(message)s')
def record_audio(file_path , duration=20 , phrase_time_limit= None):
recognizer = sr.Recognizer()
try:
with sr.Microphone() as source:
logging.info("Adjusting for ambient noise...")
recognizer.adjust_for_ambient_noise(source)
logging.info("Recording audio...")
# Record the audio
audio_data = recognizer.listen(source , timeout=duration , phrase_time_limit=phrase_time_limit)
logging.info("Recording complete.")
# Convert the recorded audio to an MP3 file
audio_waves = audio_data.get_wav_data()
audio_segments = AudioSegment.from_wav(BytesIO(audio_waves))
audio_segments.export(file_path , format="mp3" , bitrate="128k")
logging.info(f"Audio saved to {file_path}.")
except Exception as e:
logging.error(f"An error occurred while recording audio: {e}")
audio_filePath = "patient_voice.mp3"
record_audio(file_path= audio_filePath)
import os
from groq import Groq
from dotenv import load_dotenv
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
sst_model = "whisper-large-v3"
def transcription_with_groq(audio_filePath , sst_model , GROQ_API_KEY):
try:
groq_client = Groq(api_key = GROQ_API_KEY)
audio_file = open(audio_filePath , "rb")
transcription = groq_client.audio.transcriptions.create(
model = sst_model,
file = audio_file,
language = "en"
)
logging.info("Transcription complete.")
print(transcription.text)
return transcription.text
except Exception as e:
logging.error(f"An error occurred during transcription: {e}")
transcription_with_groq(audio_filePath=audio_filePath , sst_model=sst_model,GROQ_API_KEY=GROQ_API_KEY)