Avinyaa commited on
Commit
5efbc82
Β·
1 Parent(s): 9acb9c3
Files changed (7) hide show
  1. Dockerfile +12 -2
  2. README.md +40 -0
  3. app.py +12 -3
  4. app_config.py +53 -0
  5. startup.py +132 -0
  6. test.py +13 -1
  7. test_kokoro_install.py +93 -1
Dockerfile CHANGED
@@ -2,9 +2,13 @@ FROM python:3.11
2
 
3
  WORKDIR /app
4
 
5
- # Set environment variables to fix Numba caching issues
6
  ENV NUMBA_CACHE_DIR=/tmp/numba_cache
7
  ENV NUMBA_DISABLE_JIT=1
 
 
 
 
8
 
9
  # Install git, git-lfs, and espeak-ng for Kokoro TTS
10
  RUN apt-get update && apt-get install -y git git-lfs espeak-ng && rm -rf /var/lib/apt/lists/*
@@ -12,6 +16,10 @@ RUN apt-get update && apt-get install -y git git-lfs espeak-ng && rm -rf /var/li
12
  # Initialize git lfs
13
  RUN git lfs install
14
 
 
 
 
 
15
  COPY requirements.txt .
16
 
17
  RUN pip install uv
@@ -22,4 +30,6 @@ COPY . .
22
  # Expose the port
23
  EXPOSE 7860
24
 
25
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
2
 
3
  WORKDIR /app
4
 
5
+ # Set environment variables to fix Numba caching issues and configure HF cache
6
  ENV NUMBA_CACHE_DIR=/tmp/numba_cache
7
  ENV NUMBA_DISABLE_JIT=1
8
+ ENV HF_HOME=/tmp/hf_cache
9
+ ENV TRANSFORMERS_CACHE=/tmp/hf_cache
10
+ ENV HF_HUB_CACHE=/tmp/hf_cache
11
+ ENV TORCH_HOME=/tmp/torch_cache
12
 
13
  # Install git, git-lfs, and espeak-ng for Kokoro TTS
14
  RUN apt-get update && apt-get install -y git git-lfs espeak-ng && rm -rf /var/lib/apt/lists/*
 
16
  # Initialize git lfs
17
  RUN git lfs install
18
 
19
+ # Create cache directories with proper permissions
20
+ RUN mkdir -p /tmp/hf_cache /tmp/torch_cache /tmp/numba_cache && \
21
+ chmod -R 777 /tmp/hf_cache /tmp/torch_cache /tmp/numba_cache
22
+
23
  COPY requirements.txt .
24
 
25
  RUN pip install uv
 
30
  # Expose the port
31
  EXPOSE 7860
32
 
33
+ # Default command - use startup.py for debugging if needed
34
+ # CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
35
+ Alternative for debugging: CMD ["python", "startup.py"]
README.md CHANGED
@@ -20,6 +20,7 @@ A FastAPI-based Text-to-Speech API using Kokoro, an open-weight TTS model with 8
20
  - Docker support
21
  - Lightweight and fast processing
22
  - Apache-licensed weights
 
23
 
24
  ## About Kokoro
25
 
@@ -27,6 +28,22 @@ A FastAPI-based Text-to-Speech API using Kokoro, an open-weight TTS model with 8
27
 
28
  ## Setup
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  ### Local Development
31
 
32
  1. Install system dependencies:
@@ -200,5 +217,28 @@ Run the standalone test:
200
  python test.py
201
  ```
202
 
 
 
 
 
 
 
 
 
 
 
203
  This will generate audio files demonstrating Kokoro's capabilities.
204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  - Docker support
21
  - Lightweight and fast processing
22
  - Apache-licensed weights
23
+ - Optimized for Hugging Face Spaces deployment
24
 
25
  ## About Kokoro
26
 
 
28
 
29
  ## Setup
30
 
31
+ ### Hugging Face Spaces Deployment
32
+
33
+ This API is optimized for Hugging Face Spaces deployment. The Docker configuration automatically handles:
34
+ - Cache directory setup with proper permissions
35
+ - Environment variable configuration
36
+ - Model downloading and caching
37
+
38
+ Simply deploy to Hugging Face Spaces using the Docker SDK.
39
+
40
+ #### Troubleshooting on HF Spaces
41
+
42
+ If you encounter permission errors, you can use the diagnostic startup script:
43
+
44
+ 1. Change the Dockerfile CMD to: `CMD ["python", "startup.py"]`
45
+ 2. This will run diagnostics and show detailed information about the environment
46
+
47
  ### Local Development
48
 
49
  1. Install system dependencies:
 
217
  python test.py
218
  ```
219
 
220
+ Run the installation test:
221
+ ```bash
222
+ python test_kokoro_install.py
223
+ ```
224
+
225
+ For debugging on Hugging Face Spaces:
226
+ ```bash
227
+ python startup.py
228
+ ```
229
+
230
  This will generate audio files demonstrating Kokoro's capabilities.
231
 
232
+ ## Environment Variables
233
+
234
+ The following environment variables are automatically configured:
235
+
236
+ - `HF_HOME=/tmp/hf_cache` - Hugging Face cache directory
237
+ - `TRANSFORMERS_CACHE=/tmp/hf_cache` - Transformers cache
238
+ - `HF_HUB_CACHE=/tmp/hf_cache` - HF Hub cache
239
+ - `TORCH_HOME=/tmp/torch_cache` - PyTorch cache
240
+ - `NUMBA_CACHE_DIR=/tmp/numba_cache` - Numba cache
241
+ - `NUMBA_DISABLE_JIT=1` - Disable Numba JIT compilation
242
+
243
+ These are set automatically by the application for optimal performance on Hugging Face Spaces.
244
+
app.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  from fastapi import FastAPI, HTTPException, Form
2
  from fastapi.responses import FileResponse
3
  from pydantic import BaseModel
@@ -26,9 +29,14 @@ class KokoroTTSService:
26
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
27
  logger.info(f"Using device: {self.device}")
28
 
 
 
 
29
  try:
30
  # Initialize Kokoro pipeline with default language
31
- self.pipeline = KPipeline(lang_code='a')
 
 
32
  logger.info("Kokoro TTS pipeline loaded successfully")
33
  except Exception as e:
34
  logger.error(f"Failed to load Kokoro TTS pipeline: {e}")
@@ -39,11 +47,12 @@ class KokoroTTSService:
39
  try:
40
  # Create a unique filename for the output
41
  output_filename = f"kokoro_output_{uuid.uuid4().hex}.wav"
42
- output_path = os.path.join(tempfile.gettempdir(), output_filename)
43
 
44
  # Update pipeline language if different
45
  if self.pipeline.lang_code != lang_code:
46
- self.pipeline = KPipeline(lang_code=lang_code)
 
47
 
48
  # Generate speech using Kokoro
49
  generator = self.pipeline(text, voice=voice)
 
1
+ # Import configuration first to setup cache directories
2
+ import app_config
3
+
4
  from fastapi import FastAPI, HTTPException, Form
5
  from fastapi.responses import FileResponse
6
  from pydantic import BaseModel
 
29
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
30
  logger.info(f"Using device: {self.device}")
31
 
32
+ if app_config.is_hf_spaces():
33
+ logger.info("Running on Hugging Face Spaces")
34
+
35
  try:
36
  # Initialize Kokoro pipeline with default language
37
+ # Explicitly specify the repo_id to avoid warnings
38
+ logger.info("Initializing Kokoro TTS pipeline...")
39
+ self.pipeline = KPipeline(lang_code='a', repo_id='hexgrad/Kokoro-82M')
40
  logger.info("Kokoro TTS pipeline loaded successfully")
41
  except Exception as e:
42
  logger.error(f"Failed to load Kokoro TTS pipeline: {e}")
 
47
  try:
48
  # Create a unique filename for the output
49
  output_filename = f"kokoro_output_{uuid.uuid4().hex}.wav"
50
+ output_path = os.path.join(app_config.get_temp_dir(), output_filename)
51
 
52
  # Update pipeline language if different
53
  if self.pipeline.lang_code != lang_code:
54
+ logger.info(f"Switching language from {self.pipeline.lang_code} to {lang_code}")
55
+ self.pipeline = KPipeline(lang_code=lang_code, repo_id='hexgrad/Kokoro-82M')
56
 
57
  # Generate speech using Kokoro
58
  generator = self.pipeline(text, voice=voice)
app_config.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration for Kokoro TTS API, especially for Hugging Face Spaces deployment.
3
+ """
4
+
5
+ import os
6
+ import tempfile
7
+ import logging
8
+
9
+ # Configure logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+ def setup_hf_cache():
14
+ """Setup cache directories for Hugging Face Spaces"""
15
+ cache_dirs = {
16
+ 'HF_HOME': '/tmp/hf_cache',
17
+ 'TRANSFORMERS_CACHE': '/tmp/hf_cache',
18
+ 'HF_HUB_CACHE': '/tmp/hf_cache',
19
+ 'TORCH_HOME': '/tmp/torch_cache',
20
+ 'NUMBA_CACHE_DIR': '/tmp/numba_cache'
21
+ }
22
+
23
+ # Set environment variables
24
+ for key, value in cache_dirs.items():
25
+ os.environ[key] = value
26
+ logger.info(f"Set {key} to {value}")
27
+
28
+ # Create directories
29
+ for cache_dir in set(cache_dirs.values()):
30
+ try:
31
+ os.makedirs(cache_dir, exist_ok=True)
32
+ # Ensure write permissions
33
+ os.chmod(cache_dir, 0o777)
34
+ logger.info(f"Created cache directory: {cache_dir}")
35
+ except Exception as e:
36
+ logger.warning(f"Could not create/modify {cache_dir}: {e}")
37
+
38
+ # Additional HF settings
39
+ os.environ['NUMBA_DISABLE_JIT'] = '1'
40
+ os.environ['HF_HUB_DISABLE_TELEMETRY'] = '1'
41
+
42
+ logger.info("Cache directories setup completed")
43
+
44
+ def get_temp_dir():
45
+ """Get a writable temporary directory"""
46
+ return tempfile.gettempdir()
47
+
48
+ def is_hf_spaces():
49
+ """Check if running on Hugging Face Spaces"""
50
+ return os.environ.get('SPACE_ID') is not None
51
+
52
+ # Initialize cache setup
53
+ setup_hf_cache()
startup.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Startup script for Kokoro TTS API on Hugging Face Spaces
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ import logging
9
+ import subprocess
10
+
11
+ # Configure logging
12
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
13
+ logger = logging.getLogger(__name__)
14
+
15
+ def check_environment():
16
+ """Check the environment and permissions"""
17
+ logger.info("=== Environment Check ===")
18
+
19
+ # Check if running on HF Spaces
20
+ space_id = os.environ.get('SPACE_ID')
21
+ if space_id:
22
+ logger.info(f"Running on Hugging Face Spaces: {space_id}")
23
+ else:
24
+ logger.info("Not running on Hugging Face Spaces")
25
+
26
+ # Check Python version
27
+ logger.info(f"Python version: {sys.version}")
28
+
29
+ # Check available disk space
30
+ try:
31
+ result = subprocess.run(['df', '-h', '/tmp'], capture_output=True, text=True)
32
+ logger.info(f"Disk space in /tmp:\n{result.stdout}")
33
+ except Exception as e:
34
+ logger.warning(f"Could not check disk space: {e}")
35
+
36
+ # Check write permissions
37
+ test_dirs = ['/tmp', '/app', '.']
38
+ for test_dir in test_dirs:
39
+ try:
40
+ test_file = os.path.join(test_dir, 'test_write.tmp')
41
+ with open(test_file, 'w') as f:
42
+ f.write('test')
43
+ os.remove(test_file)
44
+ logger.info(f"βœ… Write permission OK: {test_dir}")
45
+ except Exception as e:
46
+ logger.warning(f"❌ Write permission failed: {test_dir} - {e}")
47
+
48
+ def setup_cache_dirs():
49
+ """Setup cache directories with proper permissions"""
50
+ logger.info("=== Setting up cache directories ===")
51
+
52
+ cache_dirs = [
53
+ '/tmp/hf_cache',
54
+ '/tmp/torch_cache',
55
+ '/tmp/numba_cache'
56
+ ]
57
+
58
+ for cache_dir in cache_dirs:
59
+ try:
60
+ os.makedirs(cache_dir, exist_ok=True)
61
+ os.chmod(cache_dir, 0o777)
62
+ logger.info(f"βœ… Created cache directory: {cache_dir}")
63
+ except Exception as e:
64
+ logger.error(f"❌ Failed to create {cache_dir}: {e}")
65
+
66
+ def check_dependencies():
67
+ """Check if required packages are installed"""
68
+ logger.info("=== Checking dependencies ===")
69
+
70
+ required_packages = [
71
+ 'kokoro',
72
+ 'soundfile',
73
+ 'torch',
74
+ 'fastapi',
75
+ 'uvicorn'
76
+ ]
77
+
78
+ for package in required_packages:
79
+ try:
80
+ __import__(package)
81
+ logger.info(f"βœ… {package} is available")
82
+ except ImportError:
83
+ logger.error(f"❌ {package} is not available")
84
+
85
+ def test_kokoro():
86
+ """Test Kokoro TTS functionality"""
87
+ logger.info("=== Testing Kokoro TTS ===")
88
+
89
+ try:
90
+ # Import after setting up environment
91
+ import app_config # This will setup cache dirs
92
+ from kokoro import KPipeline
93
+
94
+ logger.info("Initializing Kokoro pipeline...")
95
+ pipeline = KPipeline(lang_code='a', repo_id='hexgrad/Kokoro-82M')
96
+ logger.info("βœ… Kokoro pipeline initialized successfully")
97
+
98
+ # Test generation
99
+ logger.info("Testing speech generation...")
100
+ text = "Hello, this is a test."
101
+ generator = pipeline(text, voice='af_heart')
102
+
103
+ for i, (gs, ps, audio) in enumerate(generator):
104
+ logger.info(f"βœ… Generated audio segment {i}: gs={gs}, ps={ps}, audio shape: {audio.shape}")
105
+ break
106
+
107
+ logger.info("βœ… Kokoro TTS test completed successfully")
108
+ return True
109
+
110
+ except Exception as e:
111
+ logger.error(f"❌ Kokoro TTS test failed: {e}")
112
+ return False
113
+
114
+ def main():
115
+ """Main startup function"""
116
+ logger.info("πŸš€ Starting Kokoro TTS API setup...")
117
+
118
+ check_environment()
119
+ setup_cache_dirs()
120
+ check_dependencies()
121
+
122
+ if test_kokoro():
123
+ logger.info("πŸŽ‰ All checks passed! Starting the API...")
124
+ # Import and start the app
125
+ import uvicorn
126
+ uvicorn.run("app:app", host="0.0.0.0", port=7860, log_level="info")
127
+ else:
128
+ logger.error("❌ Setup failed. Please check the logs above.")
129
+ sys.exit(1)
130
+
131
+ if __name__ == "__main__":
132
+ main()
test.py CHANGED
@@ -1,9 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  from kokoro import KPipeline
2
  import soundfile as sf
3
  import torch
4
 
5
  # Initialize Kokoro pipeline
6
- pipeline = KPipeline(lang_code='a')
7
 
8
  # Text to convert to speech
9
  text = '''
 
1
+ import os
2
+
3
+ # Configure cache directories for Hugging Face Spaces
4
+ os.environ['HF_HOME'] = '/tmp/hf_cache'
5
+ os.environ['TRANSFORMERS_CACHE'] = '/tmp/hf_cache'
6
+ os.environ['HF_HUB_CACHE'] = '/tmp/hf_cache'
7
+ os.environ['TORCH_HOME'] = '/tmp/torch_cache'
8
+
9
+ # Create cache directories
10
+ os.makedirs('/tmp/hf_cache', exist_ok=True)
11
+ os.makedirs('/tmp/torch_cache', exist_ok=True)
12
+
13
  from kokoro import KPipeline
14
  import soundfile as sf
15
  import torch
16
 
17
  # Initialize Kokoro pipeline
18
+ pipeline = KPipeline(lang_code='a', repo_id='hexgrad/Kokoro-82M')
19
 
20
  # Text to convert to speech
21
  text = '''
test_kokoro_install.py CHANGED
@@ -1 +1,93 @@
1
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simple test script to verify Kokoro TTS installation and functionality.
4
+ """
5
+
6
+ import os
7
+
8
+ # Configure cache directories for Hugging Face Spaces
9
+ os.environ['HF_HOME'] = '/tmp/hf_cache'
10
+ os.environ['TRANSFORMERS_CACHE'] = '/tmp/hf_cache'
11
+ os.environ['HF_HUB_CACHE'] = '/tmp/hf_cache'
12
+ os.environ['TORCH_HOME'] = '/tmp/torch_cache'
13
+
14
+ # Create cache directories
15
+ os.makedirs('/tmp/hf_cache', exist_ok=True)
16
+ os.makedirs('/tmp/torch_cache', exist_ok=True)
17
+
18
+ def test_kokoro_import():
19
+ """Test if Kokoro can be imported"""
20
+ try:
21
+ from kokoro import KPipeline
22
+ import soundfile as sf
23
+ import torch
24
+ print("βœ… All required packages imported successfully!")
25
+ return True
26
+ except ImportError as e:
27
+ print(f"❌ Import error: {e}")
28
+ return False
29
+
30
+ def test_kokoro_pipeline():
31
+ """Test if Kokoro pipeline can be initialized"""
32
+ try:
33
+ from kokoro import KPipeline
34
+ pipeline = KPipeline(lang_code='a', repo_id='hexgrad/Kokoro-82M')
35
+ print("βœ… Kokoro pipeline initialized successfully!")
36
+ return True
37
+ except Exception as e:
38
+ print(f"❌ Pipeline initialization error: {e}")
39
+ return False
40
+
41
+ def test_kokoro_generation():
42
+ """Test if Kokoro can generate speech"""
43
+ try:
44
+ from kokoro import KPipeline
45
+ import soundfile as sf
46
+
47
+ pipeline = KPipeline(lang_code='a', repo_id='hexgrad/Kokoro-82M')
48
+ text = "Hello, this is a test of Kokoro TTS."
49
+
50
+ generator = pipeline(text, voice='af_heart')
51
+
52
+ for i, (gs, ps, audio) in enumerate(generator):
53
+ print(f"βœ… Generated audio segment {i}: gs={gs}, ps={ps}")
54
+ # Save test audio
55
+ sf.write('test_kokoro.wav', audio, 24000)
56
+ print("βœ… Test audio saved as 'test_kokoro.wav'")
57
+ break # Just test the first segment
58
+
59
+ return True
60
+ except Exception as e:
61
+ print(f"❌ Speech generation error: {e}")
62
+ return False
63
+
64
+ def main():
65
+ """Run all tests"""
66
+ print("🎀 Testing Kokoro TTS Installation")
67
+ print("=" * 40)
68
+
69
+ tests = [
70
+ ("Import Test", test_kokoro_import),
71
+ ("Pipeline Test", test_kokoro_pipeline),
72
+ ("Generation Test", test_kokoro_generation)
73
+ ]
74
+
75
+ passed = 0
76
+ total = len(tests)
77
+
78
+ for test_name, test_func in tests:
79
+ print(f"\nπŸ” Running {test_name}...")
80
+ if test_func():
81
+ passed += 1
82
+ else:
83
+ print(f"❌ {test_name} failed!")
84
+
85
+ print(f"\nπŸ“Š Results: {passed}/{total} tests passed")
86
+
87
+ if passed == total:
88
+ print("πŸŽ‰ All tests passed! Kokoro TTS is ready to use.")
89
+ else:
90
+ print("⚠️ Some tests failed. Please check the installation.")
91
+
92
+ if __name__ == "__main__":
93
+ main()