from fastapi import FastAPI, UploadFile, File from fastapi.responses import JSONResponse import whisperx import torch import tempfile import shutil import os app = FastAPI() # Load model globally to avoid reloading for every request device = "cuda" if torch.cuda.is_available() else "cpu" model = whisperx.load_model("medium", device) @app.post("/transcribe") async def transcribe_audio(file: UploadFile = File(...)): try: # Save uploaded audio to temp file with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: shutil.copyfileobj(file.file, tmp) temp_audio_path = tmp.name # Load and process audio audio = whisperx.load_audio(temp_audio_path) result = model.transcribe(audio, batch_size=16, return_word_timestamps=True) # Clean up temp file os.remove(temp_audio_path) return JSONResponse(content=result) except Exception as e: return JSONResponse(status_code=500, content={"error": str(e)})