Spaces:

sematech
/

sema-api

Sleeping

App Files Files Community

kamau1 commited on Jun 21

Commit

5aa0409

1 Parent(s): 937c29e

fix: switch to HF default cache, remove MODELS_DIR and unused import, update CTranslate2 download to include all required files

Browse files

Files changed (1) hide show

sema_translation_api.py +22 -17

sema_translation_api.py CHANGED Viewed

@@ -13,7 +13,7 @@ from typing import Optional
 from fastapi import FastAPI, HTTPException, Request
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
-from huggingface_hub import hf_hub_download, snapshot_download
 import ctranslate2
 import sentencepiece as spm
 import fasttext
@@ -49,7 +49,6 @@ app.add_middleware(
 # --- Global Variables ---
 REPO_ID = "sematech/sema-utils"
-MODELS_DIR = "hf_models"
 beam_size = 1
 device = "cpu"
@@ -71,38 +70,44 @@ def get_nairobi_time():
     return full_date, curr_time
 def download_models():
-    """Download models from HuggingFace Hub"""
     print("🔄 Downloading models from sematech/sema-utils...")
-    # Ensure models directory exists
-    os.makedirs(MODELS_DIR, exist_ok=True)
     try:
-        # Download individual files from root
         print("📥 Downloading SentencePiece model...")
         spm_path = hf_hub_download(
             repo_id=REPO_ID,
-            filename="spm.model",
-            local_dir=MODELS_DIR
         )
         print("📥 Downloading language detection model...")
         ft_path = hf_hub_download(
             repo_id=REPO_ID,
-            filename="lid218e.bin",
-            local_dir=MODELS_DIR
         )
-        # Download translation model (3.3B) from subfolder
         print("📥 Downloading translation model (3.3B)...")
-        ct_model_path = snapshot_download(
             repo_id=REPO_ID,
-            allow_patterns="translation_models/sematrans-3.3B/*",
-            local_dir=MODELS_DIR
         )
-        # Construct paths
-        ct_model_full_path = os.path.join(MODELS_DIR, "translation_models", "sematrans-3.3B")
         return spm_path, ft_path, ct_model_full_path

 from fastapi import FastAPI, HTTPException, Request
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
+from huggingface_hub import hf_hub_download
 import ctranslate2
 import sentencepiece as spm
 import fasttext
 # --- Global Variables ---
 REPO_ID = "sematech/sema-utils"
 beam_size = 1
 device = "cpu"
     return full_date, curr_time
 def download_models():
+    """Download models from HuggingFace Hub using default cache"""
     print("🔄 Downloading models from sematech/sema-utils...")
     try:
+        # Download individual files from root (using default HF cache)
         print("📥 Downloading SentencePiece model...")
         spm_path = hf_hub_download(
             repo_id=REPO_ID,
+            filename="spm.model"
         )
         print("📥 Downloading language detection model...")
         ft_path = hf_hub_download(
             repo_id=REPO_ID,
+            filename="lid218e.bin"
         )
+        # Download translation model files individually
         print("📥 Downloading translation model (3.3B)...")
+        # Download all necessary CTranslate2 files
+        model_bin_path = hf_hub_download(
+            repo_id=REPO_ID,
+            filename="translation_models/sematrans-3.3B/model.bin"
+        )
+        hf_hub_download(
+            repo_id=REPO_ID,
+            filename="translation_models/sematrans-3.3B/config.json"
+        )
+        hf_hub_download(
             repo_id=REPO_ID,
+            filename="translation_models/sematrans-3.3B/shared_vocabulary.txt"
         )
+        # The model directory is the parent of the model.bin file
+        ct_model_full_path = os.path.dirname(model_bin_path)
         return spm_path, ft_path, ct_model_full_path