from flask import Flask, request, jsonify import whisper import tempfile import os from pathlib import Path import torch from flask_cors import CORS app = Flask(__name__) CORS(app) # Load Whisper model model = whisper.load_model("base") @app.route('/transcribe', methods=['POST']) def transcribe_audio(): try: # Check if audio file is in the request if 'audio' not in request.files: return jsonify({'error': 'No audio file provided'}), 400 audio_file = request.files['audio'] # Save audio file temporarily temp_dir = tempfile.mkdtemp() temp_path = os.path.join(temp_dir, audio_file.filename) audio_file.save(temp_path) # Transcribe audio with word-level timestamps result = model.transcribe( temp_path, word_timestamps=True, language="en" # Adjust based on your needs ) # Format word-level transcription with timestamps word_level_transcription = [] for segment in result['segments']: for word in segment['words']: word_level_transcription.append({ 'word': word['word'], 'start': word['start'], 'end': word['end'], 'confidence': word['probability'] }) # Clean up temporary file os.remove(temp_path) os.rmdir(temp_dir) # Return transcription results return jsonify({ 'transcription': word_level_transcription, 'full_text': result['text'] }), 200 except Exception as e: return jsonify({'error': str(e)}), 500 if __name__ == '__main__': app.run(debug=True, host='0.0.0.0', port=7860)