Spaces:
Running
Running
File size: 1,768 Bytes
6c3cd1d b3cef24 e1ed6ea ced86e4 b3cef24 6c3cd1d ced86e4 b3cef24 e1ed6ea b3cef24 6c3cd1d e1ed6ea 6c3cd1d e1ed6ea 6c3cd1d e1ed6ea 6c3cd1d 7e9b550 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from flask import Flask, request, jsonify
import whisper
import tempfile
import os
from pathlib import Path
import torch
from flask_cors import CORS
app = Flask(__name__)
CORS(app)
# Load Whisper model
model = whisper.load_model("base")
@app.route('/transcribe', methods=['POST'])
def transcribe_audio():
try:
# Check if audio file is in the request
if 'audio' not in request.files:
return jsonify({'error': 'No audio file provided'}), 400
audio_file = request.files['audio']
# Save audio file temporarily
temp_dir = tempfile.mkdtemp()
temp_path = os.path.join(temp_dir, audio_file.filename)
audio_file.save(temp_path)
# Transcribe audio with word-level timestamps
result = model.transcribe(
temp_path,
word_timestamps=True,
language="en" # Adjust based on your needs
)
# Format word-level transcription with timestamps
word_level_transcription = []
for segment in result['segments']:
for word in segment['words']:
word_level_transcription.append({
'word': word['word'],
'start': word['start'],
'end': word['end'],
'confidence': word['probability']
})
# Clean up temporary file
os.remove(temp_path)
os.rmdir(temp_dir)
# Return transcription results
return jsonify({
'transcription': word_level_transcription,
'full_text': result['text']
}), 200
except Exception as e:
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=7860) |