File size: 1,311 Bytes
4a9476d d292c5b 3b0b65b ce35779 12cbf91 d292c5b 4a9476d 3b0b65b 4a9476d 450a1f3 90fe9f1 4a9476d 3b0b65b d329f57 4a9476d fd4271f 4a9476d 90fe9f1 4a9476d ce35779 bc5d040 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import os
import streamlit as st
CACHE_DIR = "/tmp/pretrained_models"
import torch
import gc
import time
from speechbrain.pretrained.interfaces import foreign_class
from faster_whisper import WhisperModel
# Ensure the folder exists and is writable
os.makedirs(CACHE_DIR, exist_ok=True)
# -------------------------------
# Load Model (Cached)
# -------------------------------
@st.cache_resource(show_spinner="Loading model...") # making sure we only load the model once per every app instance
def load_accent_model():
"""Loads custom accent classification model."""
if not os.getenv("HF_TOKEN"):
st.error("Hugging Face token not found.")
st.stop()
try:
return foreign_class(
source="Jzuluaga/accent-id-commonaccent_xlsr-en-english",
pymodule_file="custom_interface.py",
classname="CustomEncoderWav2vec2Classifier",
)
except Exception as e:
st.error(f"Error loading model: {e}")
st.stop()
@st.cache_resource(show_spinner="Loading Whisper...")
def load_whisper():
return WhisperModel("tiny", device="cpu", compute_type="int8_float32")
def unload_model(model):
del model
torch.cuda.empty_cache()
gc.collect()
time.sleep(5) # give system time to clean up before moving on
|