NitinBot001 commited on
Commit
e1ed6ea
·
verified ·
1 Parent(s): ced86e4

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +42 -175
main.py CHANGED
@@ -2,193 +2,60 @@ from flask import Flask, request, jsonify
2
  import whisper
3
  import tempfile
4
  import os
 
 
5
  from flask_cors import CORS
6
- from werkzeug.utils import secure_filename
7
- import logging
8
- from datetime import datetime
9
-
10
- # Configure logging
11
- logging.basicConfig(level=logging.INFO)
12
- logger = logging.getLogger(__name__)
13
 
14
  app = Flask(__name__)
15
  CORS(app)
16
 
17
- # Configuration
18
- app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100MB max file size
19
- ALLOWED_EXTENSIONS = {'wav', 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'webm', 'flac'}
20
-
21
- # Load Whisper model (you can change the model size: tiny, base, small, medium, large)
22
- MODEL_SIZE = "base" # Change this to your preferred model size
23
- logger.info(f"Loading Whisper model: {MODEL_SIZE}")
24
- model = whisper.load_model(MODEL_SIZE)
25
- logger.info("Whisper model loaded successfully")
26
-
27
- def allowed_file(filename):
28
- """Check if the file extension is allowed"""
29
- return '.' in filename and \
30
- filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
31
-
32
- def format_timestamp(seconds):
33
- """Convert seconds to HH:MM:SS.mmm format"""
34
- hours = int(seconds // 3600)
35
- minutes = int((seconds % 3600) // 60)
36
- secs = seconds % 60
37
- return f"{hours:02d}:{minutes:02d}:{secs:06.3f}"
38
-
39
- @app.route('/', methods=['GET'])
40
- def health_check():
41
- """Health check endpoint"""
42
- return jsonify({
43
- "status": "healthy",
44
- "message": "Whisper Transcription API is running",
45
- "model": MODEL_SIZE,
46
- "timestamp": datetime.now().isoformat()
47
- })
48
 
49
  @app.route('/transcribe', methods=['POST'])
50
  def transcribe_audio():
51
- """
52
- Transcribe audio file and return word-level timestamps
53
-
54
- Expected form data:
55
- - audio_file: The audio file to transcribe
56
- - language (optional): Language code (e.g., 'en', 'es', 'fr')
57
- - task (optional): 'transcribe' or 'translate' (default: transcribe)
58
- """
59
  try:
60
- # Check if audio file is present
61
- if 'audio_file' not in request.files:
62
  return jsonify({'error': 'No audio file provided'}), 400
63
-
64
- file = request.files['audio_file']
65
-
66
- if file.filename == '':
67
- return jsonify({'error': 'No file selected'}), 400
68
-
69
- if not allowed_file(file.filename):
70
- return jsonify({
71
- 'error': f'File type not allowed. Supported formats: {", ".join(ALLOWED_EXTENSIONS)}'
72
- }), 400
73
-
74
- # Get optional parameters
75
- language = request.form.get('language', None)
76
- task = request.form.get('task', 'transcribe')
77
-
78
- if task not in ['transcribe', 'translate']:
79
- return jsonify({'error': 'Task must be either "transcribe" or "translate"'}), 400
80
-
81
- # Save uploaded file temporarily
82
- with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file.filename.rsplit('.', 1)[1].lower()}") as tmp_file:
83
- file.save(tmp_file.name)
84
- temp_path = tmp_file.name
85
-
86
- logger.info(f"Processing file: {file.filename}")
87
-
88
- try:
89
- # Transcribe with word-level timestamps
90
- result = model.transcribe(
91
- temp_path,
92
- language=language,
93
- task=task,
94
- word_timestamps=True,
95
- verbose=False
96
- )
97
-
98
- # Extract word-level data
99
- word_segments = []
100
- for segment in result.get("segments", []):
101
- if "words" in segment:
102
- for word_data in segment["words"]:
103
- word_segments.append({
104
- "word": word_data.get("word", "").strip(),
105
- "start": word_data.get("start", 0),
106
- "end": word_data.get("end", 0),
107
- "start_formatted": format_timestamp(word_data.get("start", 0)),
108
- "end_formatted": format_timestamp(word_data.get("end", 0)),
109
- "confidence": word_data.get("probability", 0)
110
- })
111
-
112
- # Prepare response
113
- response_data = {
114
- "success": True,
115
- "filename": secure_filename(file.filename),
116
- "language": result.get("language", "unknown"),
117
- "task": task,
118
- "duration": result.get("segments", [{}])[-1].get("end", 0) if result.get("segments") else 0,
119
- "text": result.get("text", ""),
120
- "word_count": len(word_segments),
121
- "segments": result.get("segments", []),
122
- "words": word_segments,
123
- "model_used": MODEL_SIZE,
124
- "processing_time": None # You can add timing if needed
125
- }
126
-
127
- logger.info(f"Successfully transcribed {len(word_segments)} words from {file.filename}")
128
- return jsonify(response_data)
129
-
130
- except Exception as e:
131
- logger.error(f"Transcription error: {str(e)}")
132
- return jsonify({'error': f'Transcription failed: {str(e)}'}), 500
133
-
134
- finally:
135
- # Clean up temporary file
136
- if os.path.exists(temp_path):
137
- os.unlink(temp_path)
138
-
139
- except Exception as e:
140
- logger.error(f"API error: {str(e)}")
141
- return jsonify({'error': f'Server error: {str(e)}'}), 500
142
-
143
- @app.route('/models', methods=['GET'])
144
- def available_models():
145
- """Get information about available Whisper models"""
146
- models_info = {
147
- "current_model": MODEL_SIZE,
148
- "available_models": {
149
- "tiny": {"size": "~39 MB", "speed": "~32x", "accuracy": "lowest"},
150
- "base": {"size": "~74 MB", "speed": "~16x", "accuracy": "low"},
151
- "small": {"size": "~244 MB", "speed": "~6x", "accuracy": "medium"},
152
- "medium": {"size": "~769 MB", "speed": "~2x", "accuracy": "high"},
153
- "large": {"size": "~1550 MB", "speed": "~1x", "accuracy": "highest"}
154
- },
155
- "supported_languages": [
156
- "en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl", "ca", "nl",
157
- "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk", "el", "ms", "cs", "ro",
158
- "da", "hu", "ta", "no", "th", "ur", "hr", "bg", "lt", "la", "mi", "ml", "cy",
159
- "sk", "te", "fa", "lv", "bn", "sr", "az", "sl", "kn", "et", "mk", "br", "eu",
160
- "is", "hy", "ne", "mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km",
161
- "sn", "yo", "so", "af", "oc", "ka", "be", "tg", "sd", "gu", "am", "yi", "lo",
162
- "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my", "bo", "tl", "mg",
163
- "as", "tt", "haw", "ln", "ha", "ba", "jw", "su"
164
- ]
165
- }
166
- return jsonify(models_info)
167
 
168
- @app.errorhandler(413)
169
- def too_large(e):
170
- return jsonify({'error': 'File too large. Maximum size is 100MB'}), 413
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
- @app.errorhandler(404)
173
- def not_found(e):
174
- return jsonify({'error': 'Endpoint not found'}), 404
175
-
176
- @app.errorhandler(500)
177
- def internal_error(e):
178
- return jsonify({'error': 'Internal server error'}), 500
179
 
180
  if __name__ == '__main__':
181
- print(f"""
182
- Whisper Transcription API Server
183
- ================================
184
- Model: {MODEL_SIZE}
185
- Endpoints:
186
- - GET / : Health check
187
- - POST /transcribe : Transcribe audio file
188
- - GET /models : Available models info
189
-
190
- Supported formats: {', '.join(ALLOWED_EXTENSIONS)}
191
- Max file size: 100MB
192
- """)
193
-
194
  app.run(debug=True, host='0.0.0.0', port=7860)
 
2
  import whisper
3
  import tempfile
4
  import os
5
+ from pathlib import Path
6
+ import torch
7
  from flask_cors import CORS
 
 
 
 
 
 
 
8
 
9
  app = Flask(__name__)
10
  CORS(app)
11
 
12
+ # Load Whisper model
13
+ model = whisper.load_model("base")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  @app.route('/transcribe', methods=['POST'])
16
  def transcribe_audio():
 
 
 
 
 
 
 
 
17
  try:
18
+ # Check if audio file is in the request
19
+ if 'audio' not in request.files:
20
  return jsonify({'error': 'No audio file provided'}), 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ audio_file = request.files['audio']
23
+
24
+ # Save audio file temporarily
25
+ temp_dir = tempfile.mkdtemp()
26
+ temp_path = os.path.join(temp_dir, audio_file.filename)
27
+ audio_file.save(temp_path)
28
+
29
+ # Transcribe audio with word-level timestamps
30
+ result = model.transcribe(
31
+ temp_path,
32
+ word_timestamps=True,
33
+ language="en" # Adjust based on your needs
34
+ )
35
+
36
+ # Format word-level transcription with timestamps
37
+ word_level_transcription = []
38
+ for segment in result['segments']:
39
+ for word in segment['words']:
40
+ word_level_transcription.append({
41
+ 'word': word['word'],
42
+ 'start': word['start'],
43
+ 'end': word['end'],
44
+ 'confidence': word['probability']
45
+ })
46
+
47
+ # Clean up temporary file
48
+ os.remove(temp_path)
49
+ os.rmdir(temp_dir)
50
+
51
+ # Return transcription results
52
+ return jsonify({
53
+ 'transcription': word_level_transcription,
54
+ 'full_text': result['text']
55
+ }), 200
56
 
57
+ except Exception as e:
58
+ return jsonify({'error': str(e)}), 500
 
 
 
 
 
59
 
60
  if __name__ == '__main__':
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  app.run(debug=True, host='0.0.0.0', port=7860)