Spaces:

diabolic6045
/

tts-api

Sleeping

App Files Files Community

Avinyaa commited on May 31

Commit

5efbc82

1 Parent(s): 9acb9c3

u

Browse files

Files changed (7) hide show

Dockerfile +12 -2
README.md +40 -0
app.py +12 -3
app_config.py +53 -0
startup.py +132 -0
test.py +13 -1
test_kokoro_install.py +93 -1

Dockerfile CHANGED Viewed

@@ -2,9 +2,13 @@ FROM python:3.11
 WORKDIR /app
-# Set environment variables to fix Numba caching issues
 ENV NUMBA_CACHE_DIR=/tmp/numba_cache
 ENV NUMBA_DISABLE_JIT=1
 # Install git, git-lfs, and espeak-ng for Kokoro TTS
 RUN apt-get update && apt-get install -y git git-lfs espeak-ng && rm -rf /var/lib/apt/lists/*
@@ -12,6 +16,10 @@ RUN apt-get update && apt-get install -y git git-lfs espeak-ng && rm -rf /var/li
 # Initialize git lfs
 RUN git lfs install
 COPY requirements.txt .
 RUN pip install uv
@@ -22,4 +30,6 @@ COPY . .
 # Expose the port
 EXPOSE 7860
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 WORKDIR /app
+# Set environment variables to fix Numba caching issues and configure HF cache
 ENV NUMBA_CACHE_DIR=/tmp/numba_cache
 ENV NUMBA_DISABLE_JIT=1
+ENV HF_HOME=/tmp/hf_cache
+ENV TRANSFORMERS_CACHE=/tmp/hf_cache
+ENV HF_HUB_CACHE=/tmp/hf_cache
+ENV TORCH_HOME=/tmp/torch_cache
 # Install git, git-lfs, and espeak-ng for Kokoro TTS
 RUN apt-get update && apt-get install -y git git-lfs espeak-ng && rm -rf /var/lib/apt/lists/*
 # Initialize git lfs
 RUN git lfs install
+# Create cache directories with proper permissions
+RUN mkdir -p /tmp/hf_cache /tmp/torch_cache /tmp/numba_cache && \
+    chmod -R 777 /tmp/hf_cache /tmp/torch_cache /tmp/numba_cache
 COPY requirements.txt .
 RUN pip install uv
 # Expose the port
 EXPOSE 7860
+# Default command - use startup.py for debugging if needed
+# CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
+Alternative for debugging: CMD ["python", "startup.py"]

README.md CHANGED Viewed

@@ -20,6 +20,7 @@ A FastAPI-based Text-to-Speech API using Kokoro, an open-weight TTS model with 8
 - Docker support
 - Lightweight and fast processing
 - Apache-licensed weights
 ## About Kokoro
@@ -27,6 +28,22 @@ A FastAPI-based Text-to-Speech API using Kokoro, an open-weight TTS model with 8
 ## Setup
 ### Local Development
 1. Install system dependencies:
@@ -200,5 +217,28 @@ Run the standalone test:
 python test.py
 ```
 This will generate audio files demonstrating Kokoro's capabilities.

 - Docker support
 - Lightweight and fast processing
 - Apache-licensed weights
+- Optimized for Hugging Face Spaces deployment
 ## About Kokoro
 ## Setup
+### Hugging Face Spaces Deployment
+This API is optimized for Hugging Face Spaces deployment. The Docker configuration automatically handles:
+- Cache directory setup with proper permissions
+- Environment variable configuration
+- Model downloading and caching
+Simply deploy to Hugging Face Spaces using the Docker SDK.
+#### Troubleshooting on HF Spaces
+If you encounter permission errors, you can use the diagnostic startup script:
+1. Change the Dockerfile CMD to: `CMD ["python", "startup.py"]`
+2. This will run diagnostics and show detailed information about the environment
 ### Local Development
 1. Install system dependencies:
 python test.py
 ```
+Run the installation test:
+```bash
+python test_kokoro_install.py
+```
+For debugging on Hugging Face Spaces:
+```bash
+python startup.py
+```
 This will generate audio files demonstrating Kokoro's capabilities.
+## Environment Variables
+The following environment variables are automatically configured:
+- `HF_HOME=/tmp/hf_cache` - Hugging Face cache directory
+- `TRANSFORMERS_CACHE=/tmp/hf_cache` - Transformers cache
+- `HF_HUB_CACHE=/tmp/hf_cache` - HF Hub cache
+- `TORCH_HOME=/tmp/torch_cache` - PyTorch cache
+- `NUMBA_CACHE_DIR=/tmp/numba_cache` - Numba cache
+- `NUMBA_DISABLE_JIT=1` - Disable Numba JIT compilation
+These are set automatically by the application for optimal performance on Hugging Face Spaces.

app.py CHANGED Viewed

@@ -1,3 +1,6 @@
 from fastapi import FastAPI, HTTPException, Form
 from fastapi.responses import FileResponse
 from pydantic import BaseModel
@@ -26,9 +29,14 @@ class KokoroTTSService:
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         logger.info(f"Using device: {self.device}")
         try:
             # Initialize Kokoro pipeline with default language
-            self.pipeline = KPipeline(lang_code='a')
             logger.info("Kokoro TTS pipeline loaded successfully")
         except Exception as e:
             logger.error(f"Failed to load Kokoro TTS pipeline: {e}")
@@ -39,11 +47,12 @@ class KokoroTTSService:
         try:
             # Create a unique filename for the output
             output_filename = f"kokoro_output_{uuid.uuid4().hex}.wav"
-            output_path = os.path.join(tempfile.gettempdir(), output_filename)
             # Update pipeline language if different
             if self.pipeline.lang_code != lang_code:
-                self.pipeline = KPipeline(lang_code=lang_code)
             # Generate speech using Kokoro
             generator = self.pipeline(text, voice=voice)

+# Import configuration first to setup cache directories
+import app_config
 from fastapi import FastAPI, HTTPException, Form
 from fastapi.responses import FileResponse
 from pydantic import BaseModel
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         logger.info(f"Using device: {self.device}")
+        if app_config.is_hf_spaces():
+            logger.info("Running on Hugging Face Spaces")
         try:
             # Initialize Kokoro pipeline with default language
+            # Explicitly specify the repo_id to avoid warnings
+            logger.info("Initializing Kokoro TTS pipeline...")
+            self.pipeline = KPipeline(lang_code='a', repo_id='hexgrad/Kokoro-82M')
             logger.info("Kokoro TTS pipeline loaded successfully")
         except Exception as e:
             logger.error(f"Failed to load Kokoro TTS pipeline: {e}")
         try:
             # Create a unique filename for the output
             output_filename = f"kokoro_output_{uuid.uuid4().hex}.wav"
+            output_path = os.path.join(app_config.get_temp_dir(), output_filename)
             # Update pipeline language if different
             if self.pipeline.lang_code != lang_code:
+                logger.info(f"Switching language from {self.pipeline.lang_code} to {lang_code}")
+                self.pipeline = KPipeline(lang_code=lang_code, repo_id='hexgrad/Kokoro-82M')
             # Generate speech using Kokoro
             generator = self.pipeline(text, voice=voice)

app_config.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""
+Configuration for Kokoro TTS API, especially for Hugging Face Spaces deployment.
+"""
+import os
+import tempfile
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def setup_hf_cache():
+    """Setup cache directories for Hugging Face Spaces"""
+    cache_dirs = {
+        'HF_HOME': '/tmp/hf_cache',
+        'TRANSFORMERS_CACHE': '/tmp/hf_cache',
+        'HF_HUB_CACHE': '/tmp/hf_cache',
+        'TORCH_HOME': '/tmp/torch_cache',
+        'NUMBA_CACHE_DIR': '/tmp/numba_cache'
+    }
+    # Set environment variables
+    for key, value in cache_dirs.items():
+        os.environ[key] = value
+        logger.info(f"Set {key} to {value}")
+    # Create directories
+    for cache_dir in set(cache_dirs.values()):
+        try:
+            os.makedirs(cache_dir, exist_ok=True)
+            # Ensure write permissions
+            os.chmod(cache_dir, 0o777)
+            logger.info(f"Created cache directory: {cache_dir}")
+        except Exception as e:
+            logger.warning(f"Could not create/modify {cache_dir}: {e}")
+    # Additional HF settings
+    os.environ['NUMBA_DISABLE_JIT'] = '1'
+    os.environ['HF_HUB_DISABLE_TELEMETRY'] = '1'
+    logger.info("Cache directories setup completed")
+def get_temp_dir():
+    """Get a writable temporary directory"""
+    return tempfile.gettempdir()
+def is_hf_spaces():
+    """Check if running on Hugging Face Spaces"""
+    return os.environ.get('SPACE_ID') is not None
+# Initialize cache setup
+setup_hf_cache()

startup.py ADDED Viewed

	@@ -0,0 +1,132 @@

+#!/usr/bin/env python3
+"""
+Startup script for Kokoro TTS API on Hugging Face Spaces
+"""
+import os
+import sys
+import logging
+import subprocess
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+def check_environment():
+    """Check the environment and permissions"""
+    logger.info("=== Environment Check ===")
+    # Check if running on HF Spaces
+    space_id = os.environ.get('SPACE_ID')
+    if space_id:
+        logger.info(f"Running on Hugging Face Spaces: {space_id}")
+    else:
+        logger.info("Not running on Hugging Face Spaces")
+    # Check Python version
+    logger.info(f"Python version: {sys.version}")
+    # Check available disk space
+    try:
+        result = subprocess.run(['df', '-h', '/tmp'], capture_output=True, text=True)
+        logger.info(f"Disk space in /tmp:\n{result.stdout}")
+    except Exception as e:
+        logger.warning(f"Could not check disk space: {e}")
+    # Check write permissions
+    test_dirs = ['/tmp', '/app', '.']
+    for test_dir in test_dirs:
+        try:
+            test_file = os.path.join(test_dir, 'test_write.tmp')
+            with open(test_file, 'w') as f:
+                f.write('test')
+            os.remove(test_file)
+            logger.info(f"✅ Write permission OK: {test_dir}")
+        except Exception as e:
+            logger.warning(f"❌ Write permission failed: {test_dir} - {e}")
+def setup_cache_dirs():
+    """Setup cache directories with proper permissions"""
+    logger.info("=== Setting up cache directories ===")
+    cache_dirs = [
+        '/tmp/hf_cache',
+        '/tmp/torch_cache',
+        '/tmp/numba_cache'
+    ]
+    for cache_dir in cache_dirs:
+        try:
+            os.makedirs(cache_dir, exist_ok=True)
+            os.chmod(cache_dir, 0o777)
+            logger.info(f"✅ Created cache directory: {cache_dir}")
+        except Exception as e:
+            logger.error(f"❌ Failed to create {cache_dir}: {e}")
+def check_dependencies():
+    """Check if required packages are installed"""
+    logger.info("=== Checking dependencies ===")
+    required_packages = [
+        'kokoro',
+        'soundfile',
+        'torch',
+        'fastapi',
+        'uvicorn'
+    ]
+    for package in required_packages:
+        try:
+            __import__(package)
+            logger.info(f"✅ {package} is available")
+        except ImportError:
+            logger.error(f"❌ {package} is not available")
+def test_kokoro():
+    """Test Kokoro TTS functionality"""
+    logger.info("=== Testing Kokoro TTS ===")
+    try:
+        # Import after setting up environment
+        import app_config  # This will setup cache dirs
+        from kokoro import KPipeline
+        logger.info("Initializing Kokoro pipeline...")
+        pipeline = KPipeline(lang_code='a', repo_id='hexgrad/Kokoro-82M')
+        logger.info("✅ Kokoro pipeline initialized successfully")
+        # Test generation
+        logger.info("Testing speech generation...")
+        text = "Hello, this is a test."
+        generator = pipeline(text, voice='af_heart')
+        for i, (gs, ps, audio) in enumerate(generator):
+            logger.info(f"✅ Generated audio segment {i}: gs={gs}, ps={ps}, audio shape: {audio.shape}")
+            break
+        logger.info("✅ Kokoro TTS test completed successfully")
+        return True
+    except Exception as e:
+        logger.error(f"❌ Kokoro TTS test failed: {e}")
+        return False
+def main():
+    """Main startup function"""
+    logger.info("🚀 Starting Kokoro TTS API setup...")
+    check_environment()
+    setup_cache_dirs()
+    check_dependencies()
+    if test_kokoro():
+        logger.info("🎉 All checks passed! Starting the API...")
+        # Import and start the app
+        import uvicorn
+        uvicorn.run("app:app", host="0.0.0.0", port=7860, log_level="info")
+    else:
+        logger.error("❌ Setup failed. Please check the logs above.")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

test.py CHANGED Viewed

@@ -1,9 +1,21 @@
 from kokoro import KPipeline
 import soundfile as sf
 import torch
 # Initialize Kokoro pipeline
-pipeline = KPipeline(lang_code='a')
 # Text to convert to speech
 text = '''

+import os
+# Configure cache directories for Hugging Face Spaces
+os.environ['HF_HOME'] = '/tmp/hf_cache'
+os.environ['TRANSFORMERS_CACHE'] = '/tmp/hf_cache'
+os.environ['HF_HUB_CACHE'] = '/tmp/hf_cache'
+os.environ['TORCH_HOME'] = '/tmp/torch_cache'
+# Create cache directories
+os.makedirs('/tmp/hf_cache', exist_ok=True)
+os.makedirs('/tmp/torch_cache', exist_ok=True)
 from kokoro import KPipeline
 import soundfile as sf
 import torch
 # Initialize Kokoro pipeline
+pipeline = KPipeline(lang_code='a', repo_id='hexgrad/Kokoro-82M')
 # Text to convert to speech
 text = '''

test_kokoro_install.py CHANGED Viewed

	@@ -1 +1,93 @@
1	-

+#!/usr/bin/env python3
+"""
+Simple test script to verify Kokoro TTS installation and functionality.
+"""
+import os
+# Configure cache directories for Hugging Face Spaces
+os.environ['HF_HOME'] = '/tmp/hf_cache'
+os.environ['TRANSFORMERS_CACHE'] = '/tmp/hf_cache'
+os.environ['HF_HUB_CACHE'] = '/tmp/hf_cache'
+os.environ['TORCH_HOME'] = '/tmp/torch_cache'
+# Create cache directories
+os.makedirs('/tmp/hf_cache', exist_ok=True)
+os.makedirs('/tmp/torch_cache', exist_ok=True)
+def test_kokoro_import():
+    """Test if Kokoro can be imported"""
+    try:
+        from kokoro import KPipeline
+        import soundfile as sf
+        import torch
+        print("✅ All required packages imported successfully!")
+        return True
+    except ImportError as e:
+        print(f"❌ Import error: {e}")
+        return False
+def test_kokoro_pipeline():
+    """Test if Kokoro pipeline can be initialized"""
+    try:
+        from kokoro import KPipeline
+        pipeline = KPipeline(lang_code='a', repo_id='hexgrad/Kokoro-82M')
+        print("✅ Kokoro pipeline initialized successfully!")
+        return True
+    except Exception as e:
+        print(f"❌ Pipeline initialization error: {e}")
+        return False
+def test_kokoro_generation():
+    """Test if Kokoro can generate speech"""
+    try:
+        from kokoro import KPipeline
+        import soundfile as sf
+        pipeline = KPipeline(lang_code='a', repo_id='hexgrad/Kokoro-82M')
+        text = "Hello, this is a test of Kokoro TTS."
+        generator = pipeline(text, voice='af_heart')
+        for i, (gs, ps, audio) in enumerate(generator):
+            print(f"✅ Generated audio segment {i}: gs={gs}, ps={ps}")
+            # Save test audio
+            sf.write('test_kokoro.wav', audio, 24000)
+            print("✅ Test audio saved as 'test_kokoro.wav'")
+            break  # Just test the first segment
+        return True
+    except Exception as e:
+        print(f"❌ Speech generation error: {e}")
+        return False
+def main():
+    """Run all tests"""
+    print("🎤 Testing Kokoro TTS Installation")
+    print("=" * 40)
+    tests = [
+        ("Import Test", test_kokoro_import),
+        ("Pipeline Test", test_kokoro_pipeline),
+        ("Generation Test", test_kokoro_generation)
+    ]
+    passed = 0
+    total = len(tests)
+    for test_name, test_func in tests:
+        print(f"\n🔍 Running {test_name}...")
+        if test_func():
+            passed += 1
+        else:
+            print(f"❌ {test_name} failed!")
+    print(f"\n📊 Results: {passed}/{total} tests passed")
+    if passed == total:
+        print("🎉 All tests passed! Kokoro TTS is ready to use.")
+    else:
+        print("⚠️  Some tests failed. Please check the installation.")
+if __name__ == "__main__":
+    main()