NitinBot001 commited on
Commit
3212eef
·
verified ·
1 Parent(s): e1ed6ea

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +232 -43
main.py CHANGED
@@ -1,61 +1,250 @@
1
  from flask import Flask, request, jsonify
2
- import whisper
3
  import tempfile
4
  import os
5
- from pathlib import Path
6
- import torch
 
7
  from flask_cors import CORS
8
 
 
 
 
 
9
  app = Flask(__name__)
10
  CORS(app)
11
 
12
- # Load Whisper model
13
- model = whisper.load_model("base")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  @app.route('/transcribe', methods=['POST'])
16
  def transcribe_audio():
 
 
 
 
 
 
 
 
17
  try:
18
- # Check if audio file is in the request
19
- if 'audio' not in request.files:
 
 
 
 
 
 
20
  return jsonify({'error': 'No audio file provided'}), 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- audio_file = request.files['audio']
23
-
24
- # Save audio file temporarily
25
- temp_dir = tempfile.mkdtemp()
26
- temp_path = os.path.join(temp_dir, audio_file.filename)
27
- audio_file.save(temp_path)
28
-
29
- # Transcribe audio with word-level timestamps
30
- result = model.transcribe(
31
- temp_path,
32
- word_timestamps=True,
33
- language="en" # Adjust based on your needs
34
- )
35
-
36
- # Format word-level transcription with timestamps
37
- word_level_transcription = []
38
- for segment in result['segments']:
39
- for word in segment['words']:
40
- word_level_transcription.append({
41
- 'word': word['word'],
42
- 'start': word['start'],
43
- 'end': word['end'],
44
- 'confidence': word['probability']
45
- })
46
-
47
- # Clean up temporary file
48
- os.remove(temp_path)
49
- os.rmdir(temp_dir)
50
-
51
- # Return transcription results
52
- return jsonify({
53
- 'transcription': word_level_transcription,
54
- 'full_text': result['text']
55
- }), 200
56
 
57
- except Exception as e:
58
- return jsonify({'error': str(e)}), 500
 
 
 
 
 
 
 
 
 
59
 
60
  if __name__ == '__main__':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  app.run(debug=True, host='0.0.0.0', port=7860)
 
1
  from flask import Flask, request, jsonify
 
2
  import tempfile
3
  import os
4
+ from werkzeug.utils import secure_filename
5
+ import logging
6
+ from datetime import datetime
7
  from flask_cors import CORS
8
 
9
+ # Configure logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
  app = Flask(__name__)
14
  CORS(app)
15
 
16
+ # Configuration
17
+ app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100MB max file size
18
+ ALLOWED_EXTENSIONS = {'wav', 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'webm', 'flac'}
19
+
20
+ # Initialize model variable
21
+ model = None
22
+ MODEL_SIZE = "base" # Change this to your preferred model size
23
+
24
+ def load_whisper_model():
25
+ """Load Whisper model with proper error handling"""
26
+ global model
27
+ try:
28
+ # Try importing openai-whisper first
29
+ import whisper
30
+ logger.info(f"Loading Whisper model: {MODEL_SIZE}")
31
+ model = whisper.load_model(MODEL_SIZE)
32
+ logger.info("Whisper model loaded successfully")
33
+ return True
34
+ except ImportError:
35
+ logger.error("OpenAI Whisper not installed. Install with: pip install openai-whisper")
36
+ return False
37
+ except AttributeError as e:
38
+ logger.error(f"Whisper import error: {e}")
39
+ logger.error("Make sure you have the correct whisper package installed:")
40
+ logger.error("pip uninstall whisper")
41
+ logger.error("pip install openai-whisper")
42
+ return False
43
+ except Exception as e:
44
+ logger.error(f"Error loading Whisper model: {e}")
45
+ return False
46
+
47
+ # Try to load the model at startup
48
+ model_loaded = load_whisper_model()
49
+
50
+ def allowed_file(filename):
51
+ """Check if the file extension is allowed"""
52
+ return '.' in filename and \
53
+ filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
54
+
55
+ def format_timestamp(seconds):
56
+ """Convert seconds to HH:MM:SS.mmm format"""
57
+ hours = int(seconds // 3600)
58
+ minutes = int((seconds % 3600) // 60)
59
+ secs = seconds % 60
60
+ return f"{hours:02d}:{minutes:02d}:{secs:06.3f}"
61
+
62
+ @app.route('/', methods=['GET'])
63
+ def health_check():
64
+ """Health check endpoint"""
65
+ return jsonify({
66
+ "status": "healthy" if model_loaded else "unhealthy",
67
+ "message": "Whisper Transcription API is running" if model_loaded else "Whisper model failed to load",
68
+ "model": MODEL_SIZE if model_loaded else "none",
69
+ "model_loaded": model_loaded,
70
+ "timestamp": datetime.now().isoformat()
71
+ })
72
 
73
  @app.route('/transcribe', methods=['POST'])
74
  def transcribe_audio():
75
+ """
76
+ Transcribe audio file and return word-level timestamps
77
+
78
+ Expected form data:
79
+ - audio_file: The audio file to transcribe
80
+ - language (optional): Language code (e.g., 'en', 'es', 'fr')
81
+ - task (optional): 'transcribe' or 'translate' (default: transcribe)
82
+ """
83
  try:
84
+ # Check if model is loaded
85
+ if not model_loaded or model is None:
86
+ return jsonify({
87
+ 'error': 'Whisper model not loaded. Please check server logs and ensure openai-whisper is installed correctly.'
88
+ }), 503
89
+
90
+ # Check if audio file is present
91
+ if 'audio_file' not in request.files:
92
  return jsonify({'error': 'No audio file provided'}), 400
93
+
94
+ file = request.files['audio_file']
95
+
96
+ if file.filename == '':
97
+ return jsonify({'error': 'No file selected'}), 400
98
+
99
+ if not allowed_file(file.filename):
100
+ return jsonify({
101
+ 'error': f'File type not allowed. Supported formats: {", ".join(ALLOWED_EXTENSIONS)}'
102
+ }), 400
103
+
104
+ # Get optional parameters
105
+ language = request.form.get('language', None)
106
+ task = request.form.get('task', 'transcribe')
107
+
108
+ if task not in ['transcribe', 'translate']:
109
+ return jsonify({'error': 'Task must be either "transcribe" or "translate"'}), 400
110
+
111
+ # Save uploaded file temporarily
112
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file.filename.rsplit('.', 1)[1].lower()}") as tmp_file:
113
+ file.save(tmp_file.name)
114
+ temp_path = tmp_file.name
115
+
116
+ logger.info(f"Processing file: {file.filename}")
117
+
118
+ try:
119
+ # Transcribe with word-level timestamps
120
+ result = model.transcribe(
121
+ temp_path,
122
+ language=language,
123
+ task=task,
124
+ word_timestamps=True,
125
+ verbose=False
126
+ )
127
+
128
+ # Extract word-level data
129
+ word_segments = []
130
+ for segment in result.get("segments", []):
131
+ if "words" in segment:
132
+ for word_data in segment["words"]:
133
+ word_segments.append({
134
+ "word": word_data.get("word", "").strip(),
135
+ "start": word_data.get("start", 0),
136
+ "end": word_data.get("end", 0),
137
+ "start_formatted": format_timestamp(word_data.get("start", 0)),
138
+ "end_formatted": format_timestamp(word_data.get("end", 0)),
139
+ "confidence": word_data.get("probability", 0)
140
+ })
141
+
142
+ # Prepare response
143
+ response_data = {
144
+ "success": True,
145
+ "filename": secure_filename(file.filename),
146
+ "language": result.get("language", "unknown"),
147
+ "task": task,
148
+ "duration": result.get("segments", [{}])[-1].get("end", 0) if result.get("segments") else 0,
149
+ "text": result.get("text", ""),
150
+ "word_count": len(word_segments),
151
+ "segments": result.get("segments", []),
152
+ "words": word_segments,
153
+ "model_used": MODEL_SIZE,
154
+ "processing_time": None # You can add timing if needed
155
+ }
156
+
157
+ logger.info(f"Successfully transcribed {len(word_segments)} words from {file.filename}")
158
+ return jsonify(response_data)
159
+
160
+ except Exception as e:
161
+ logger.error(f"Transcription error: {str(e)}")
162
+ return jsonify({'error': f'Transcription failed: {str(e)}'}), 500
163
+
164
+ finally:
165
+ # Clean up temporary file
166
+ if os.path.exists(temp_path):
167
+ os.unlink(temp_path)
168
+
169
+ except Exception as e:
170
+ logger.error(f"API error: {str(e)}")
171
+ return jsonify({'error': f'Server error: {str(e)}'}), 500
172
 
173
+ @app.route('/models', methods=['GET'])
174
+ def available_models():
175
+ """Get information about available Whisper models"""
176
+ models_info = {
177
+ "current_model": MODEL_SIZE if model_loaded else "none",
178
+ "model_loaded": model_loaded,
179
+ "available_models": {
180
+ "tiny": {"size": "~39 MB", "speed": "~32x", "accuracy": "lowest"},
181
+ "base": {"size": "~74 MB", "speed": "~16x", "accuracy": "low"},
182
+ "small": {"size": "~244 MB", "speed": "~6x", "accuracy": "medium"},
183
+ "medium": {"size": "~769 MB", "speed": "~2x", "accuracy": "high"},
184
+ "large": {"size": "~1550 MB", "speed": "~1x", "accuracy": "highest"}
185
+ },
186
+ "supported_languages": [
187
+ "en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl", "ca", "nl",
188
+ "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk", "el", "ms", "cs", "ro",
189
+ "da", "hu", "ta", "no", "th", "ur", "hr", "bg", "lt", "la", "mi", "ml", "cy",
190
+ "sk", "te", "fa", "lv", "bn", "sr", "az", "sl", "kn", "et", "mk", "br", "eu",
191
+ "is", "hy", "ne", "mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km",
192
+ "sn", "yo", "so", "af", "oc", "ka", "be", "tg", "sd", "gu", "am", "yi", "lo",
193
+ "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my", "bo", "tl", "mg",
194
+ "as", "tt", "haw", "ln", "ha", "ba", "jw", "su"
195
+ ],
196
+ "installation_help": {
197
+ "error": "Whisper model not loaded" if not model_loaded else None,
198
+ "install_command": "pip install openai-whisper torch torchaudio",
199
+ "uninstall_conflicts": "pip uninstall whisper (if you have conflicting whisper package)"
200
+ }
201
+ }
202
+ return jsonify(models_info)
 
 
 
 
203
 
204
+ @app.errorhandler(413)
205
+ def too_large(e):
206
+ return jsonify({'error': 'File too large. Maximum size is 100MB'}), 413
207
+
208
+ @app.errorhandler(404)
209
+ def not_found(e):
210
+ return jsonify({'error': 'Endpoint not found'}), 404
211
+
212
+ @app.errorhandler(500)
213
+ def internal_error(e):
214
+ return jsonify({'error': 'Internal server error'}), 500
215
 
216
  if __name__ == '__main__':
217
+ if not model_loaded:
218
+ print(f"""
219
+ ⚠️ WHISPER MODEL LOADING FAILED ⚠️
220
+ ===================================
221
+
222
+ The Whisper model could not be loaded. Please check:
223
+
224
+ 1. Install the correct package:
225
+ pip install openai-whisper torch torchaudio
226
+
227
+ 2. If you have conflicts, uninstall the wrong whisper package:
228
+ pip uninstall whisper
229
+ pip install openai-whisper
230
+
231
+ 3. Make sure you have sufficient disk space for the model
232
+
233
+ The server will start but transcription will not work until the model is loaded.
234
+ """)
235
+ else:
236
+ print(f"""
237
+ Whisper Transcription API Server
238
+ ================================
239
+ Model: {MODEL_SIZE} ✅
240
+ Status: Ready
241
+ Endpoints:
242
+ - GET / : Health check
243
+ - POST /transcribe : Transcribe audio file
244
+ - GET /models : Available models info
245
+
246
+ Supported formats: {', '.join(ALLOWED_EXTENSIONS)}
247
+ Max file size: 100MB
248
+ """)
249
+
250
  app.run(debug=True, host='0.0.0.0', port=7860)