Spaces:
Running
Running
from flask import Flask, request, jsonify | |
import whisper | |
import tempfile | |
import os | |
from pathlib import Path | |
import torch | |
from flask_cors import CORS | |
app = Flask(__name__) | |
CORS(app) | |
# Load Whisper model | |
model = whisper.load_model("base") | |
def transcribe_audio(): | |
try: | |
# Check if audio file is in the request | |
if 'audio' not in request.files: | |
return jsonify({'error': 'No audio file provided'}), 400 | |
audio_file = request.files['audio'] | |
# Save audio file temporarily | |
temp_dir = tempfile.mkdtemp() | |
temp_path = os.path.join(temp_dir, audio_file.filename) | |
audio_file.save(temp_path) | |
# Transcribe audio with word-level timestamps | |
result = model.transcribe( | |
temp_path, | |
word_timestamps=True, | |
language="en" # Adjust based on your needs | |
) | |
# Format word-level transcription with timestamps | |
word_level_transcription = [] | |
for segment in result['segments']: | |
for word in segment['words']: | |
word_level_transcription.append({ | |
'word': word['word'], | |
'start': word['start'], | |
'end': word['end'], | |
'confidence': word['probability'] | |
}) | |
# Clean up temporary file | |
os.remove(temp_path) | |
os.rmdir(temp_dir) | |
# Return transcription results | |
return jsonify({ | |
'transcription': word_level_transcription, | |
'full_text': result['text'] | |
}), 200 | |
except Exception as e: | |
return jsonify({'error': str(e)}), 500 | |
if __name__ == '__main__': | |
app.run(debug=True, host='0.0.0.0', port=7860) |