Spaces:

NitinBot001
/

Whisper-api

Running

App Files Files Community

NitinBot001 commited on Jun 27

Commit

3212eef

verified ·

1 Parent(s): e1ed6ea

Update main.py

Browse files

Files changed (1) hide show

main.py +232 -43

main.py CHANGED Viewed

@@ -1,61 +1,250 @@
 from flask import Flask, request, jsonify
-import whisper
 import tempfile
 import os
-from pathlib import Path
-import torch
 from flask_cors import CORS
 app = Flask(__name__)
 CORS(app)
-# Load Whisper model
-model = whisper.load_model("base")
 @app.route('/transcribe', methods=['POST'])
 def transcribe_audio():
     try:
-        # Check if audio file is in the request
-        if 'audio' not in request.files:
             return jsonify({'error': 'No audio file provided'}), 400
-        audio_file = request.files['audio']
-        # Save audio file temporarily
-        temp_dir = tempfile.mkdtemp()
-        temp_path = os.path.join(temp_dir, audio_file.filename)
-        audio_file.save(temp_path)
-        # Transcribe audio with word-level timestamps
-        result = model.transcribe(
-            temp_path,
-            word_timestamps=True,
-            language="en"  # Adjust based on your needs
-        )
-        # Format word-level transcription with timestamps
-        word_level_transcription = []
-        for segment in result['segments']:
-            for word in segment['words']:
-                word_level_transcription.append({
-                    'word': word['word'],
-                    'start': word['start'],
-                    'end': word['end'],
-                    'confidence': word['probability']
-                })
-        # Clean up temporary file
-        os.remove(temp_path)
-        os.rmdir(temp_dir)
-        # Return transcription results
-        return jsonify({
-            'transcription': word_level_transcription,
-            'full_text': result['text']
-        }), 200
-    except Exception as e:
-        return jsonify({'error': str(e)}), 500
 if __name__ == '__main__':
     app.run(debug=True, host='0.0.0.0', port=7860)

 from flask import Flask, request, jsonify
 import tempfile
 import os
+from werkzeug.utils import secure_filename
+import logging
+from datetime import datetime
 from flask_cors import CORS
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 app = Flask(__name__)
 CORS(app)
+# Configuration
+app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024  # 100MB max file size
+ALLOWED_EXTENSIONS = {'wav', 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'webm', 'flac'}
+# Initialize model variable
+model = None
+MODEL_SIZE = "base"  # Change this to your preferred model size
+def load_whisper_model():
+    """Load Whisper model with proper error handling"""
+    global model
+    try:
+        # Try importing openai-whisper first
+        import whisper
+        logger.info(f"Loading Whisper model: {MODEL_SIZE}")
+        model = whisper.load_model(MODEL_SIZE)
+        logger.info("Whisper model loaded successfully")
+        return True
+    except ImportError:
+        logger.error("OpenAI Whisper not installed. Install with: pip install openai-whisper")
+        return False
+    except AttributeError as e:
+        logger.error(f"Whisper import error: {e}")
+        logger.error("Make sure you have the correct whisper package installed:")
+        logger.error("pip uninstall whisper")
+        logger.error("pip install openai-whisper")
+        return False
+    except Exception as e:
+        logger.error(f"Error loading Whisper model: {e}")
+        return False
+# Try to load the model at startup
+model_loaded = load_whisper_model()
+def allowed_file(filename):
+    """Check if the file extension is allowed"""
+    return '.' in filename and \
+           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+def format_timestamp(seconds):
+    """Convert seconds to HH:MM:SS.mmm format"""
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    secs = seconds % 60
+    return f"{hours:02d}:{minutes:02d}:{secs:06.3f}"
+@app.route('/', methods=['GET'])
+def health_check():
+    """Health check endpoint"""
+    return jsonify({
+        "status": "healthy" if model_loaded else "unhealthy",
+        "message": "Whisper Transcription API is running" if model_loaded else "Whisper model failed to load",
+        "model": MODEL_SIZE if model_loaded else "none",
+        "model_loaded": model_loaded,
+        "timestamp": datetime.now().isoformat()
+    })
 @app.route('/transcribe', methods=['POST'])
 def transcribe_audio():
+    """
+    Transcribe audio file and return word-level timestamps
+    Expected form data:
+    - audio_file: The audio file to transcribe
+    - language (optional): Language code (e.g., 'en', 'es', 'fr')
+    - task (optional): 'transcribe' or 'translate' (default: transcribe)
+    """
     try:
+        # Check if model is loaded
+        if not model_loaded or model is None:
+            return jsonify({
+                'error': 'Whisper model not loaded. Please check server logs and ensure openai-whisper is installed correctly.'
+            }), 503
+        # Check if audio file is present
+        if 'audio_file' not in request.files:
             return jsonify({'error': 'No audio file provided'}), 400
+        file = request.files['audio_file']
+        if file.filename == '':
+            return jsonify({'error': 'No file selected'}), 400
+        if not allowed_file(file.filename):
+            return jsonify({
+                'error': f'File type not allowed. Supported formats: {", ".join(ALLOWED_EXTENSIONS)}'
+            }), 400
+        # Get optional parameters
+        language = request.form.get('language', None)
+        task = request.form.get('task', 'transcribe')
+        if task not in ['transcribe', 'translate']:
+            return jsonify({'error': 'Task must be either "transcribe" or "translate"'}), 400
+        # Save uploaded file temporarily
+        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file.filename.rsplit('.', 1)[1].lower()}") as tmp_file:
+            file.save(tmp_file.name)
+            temp_path = tmp_file.name
+        logger.info(f"Processing file: {file.filename}")
+        try:
+            # Transcribe with word-level timestamps
+            result = model.transcribe(
+                temp_path,
+                language=language,
+                task=task,
+                word_timestamps=True,
+                verbose=False
+            )
+            # Extract word-level data
+            word_segments = []
+            for segment in result.get("segments", []):
+                if "words" in segment:
+                    for word_data in segment["words"]:
+                        word_segments.append({
+                            "word": word_data.get("word", "").strip(),
+                            "start": word_data.get("start", 0),
+                            "end": word_data.get("end", 0),
+                            "start_formatted": format_timestamp(word_data.get("start", 0)),
+                            "end_formatted": format_timestamp(word_data.get("end", 0)),
+                            "confidence": word_data.get("probability", 0)
+                        })
+            # Prepare response
+            response_data = {
+                "success": True,
+                "filename": secure_filename(file.filename),
+                "language": result.get("language", "unknown"),
+                "task": task,
+                "duration": result.get("segments", [{}])[-1].get("end", 0) if result.get("segments") else 0,
+                "text": result.get("text", ""),
+                "word_count": len(word_segments),
+                "segments": result.get("segments", []),
+                "words": word_segments,
+                "model_used": MODEL_SIZE,
+                "processing_time": None  # You can add timing if needed
+            }
+            logger.info(f"Successfully transcribed {len(word_segments)} words from {file.filename}")
+            return jsonify(response_data)
+        except Exception as e:
+            logger.error(f"Transcription error: {str(e)}")
+            return jsonify({'error': f'Transcription failed: {str(e)}'}), 500
+        finally:
+            # Clean up temporary file
+            if os.path.exists(temp_path):
+                os.unlink(temp_path)
+    except Exception as e:
+        logger.error(f"API error: {str(e)}")
+        return jsonify({'error': f'Server error: {str(e)}'}), 500
+@app.route('/models', methods=['GET'])
+def available_models():
+    """Get information about available Whisper models"""
+    models_info = {
+        "current_model": MODEL_SIZE if model_loaded else "none",
+        "model_loaded": model_loaded,
+        "available_models": {
+            "tiny": {"size": "~39 MB", "speed": "~32x", "accuracy": "lowest"},
+            "base": {"size": "~74 MB", "speed": "~16x", "accuracy": "low"},
+            "small": {"size": "~244 MB", "speed": "~6x", "accuracy": "medium"},
+            "medium": {"size": "~769 MB", "speed": "~2x", "accuracy": "high"},
+            "large": {"size": "~1550 MB", "speed": "~1x", "accuracy": "highest"}
+        },
+        "supported_languages": [
+            "en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl", "ca", "nl",
+            "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk", "el", "ms", "cs", "ro",
+            "da", "hu", "ta", "no", "th", "ur", "hr", "bg", "lt", "la", "mi", "ml", "cy",
+            "sk", "te", "fa", "lv", "bn", "sr", "az", "sl", "kn", "et", "mk", "br", "eu",
+            "is", "hy", "ne", "mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km",
+            "sn", "yo", "so", "af", "oc", "ka", "be", "tg", "sd", "gu", "am", "yi", "lo",
+            "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my", "bo", "tl", "mg",
+            "as", "tt", "haw", "ln", "ha", "ba", "jw", "su"
+        ],
+        "installation_help": {
+            "error": "Whisper model not loaded" if not model_loaded else None,
+            "install_command": "pip install openai-whisper torch torchaudio",
+            "uninstall_conflicts": "pip uninstall whisper (if you have conflicting whisper package)"
+        }
+    }
+    return jsonify(models_info)
+@app.errorhandler(413)
+def too_large(e):
+    return jsonify({'error': 'File too large. Maximum size is 100MB'}), 413
+@app.errorhandler(404)
+def not_found(e):
+    return jsonify({'error': 'Endpoint not found'}), 404
+@app.errorhandler(500)
+def internal_error(e):
+    return jsonify({'error': 'Internal server error'}), 500
 if __name__ == '__main__':
+    if not model_loaded:
+        print(f"""
+        ⚠️  WHISPER MODEL LOADING FAILED ⚠️
+        ===================================
+        The Whisper model could not be loaded. Please check:
+        1. Install the correct package:
+           pip install openai-whisper torch torchaudio
+        2. If you have conflicts, uninstall the wrong whisper package:
+           pip uninstall whisper
+           pip install openai-whisper
+        3. Make sure you have sufficient disk space for the model
+        The server will start but transcription will not work until the model is loaded.
+        """)
+    else:
+        print(f"""
+        Whisper Transcription API Server
+        ================================
+        Model: {MODEL_SIZE} ✅
+        Status: Ready
+        Endpoints:
+        - GET  /           : Health check
+        - POST /transcribe : Transcribe audio file
+        - GET  /models     : Available models info
+        Supported formats: {', '.join(ALLOWED_EXTENSIONS)}
+        Max file size: 100MB
+        """)
     app.run(debug=True, host='0.0.0.0', port=7860)