Spaces:

Gregniuki
/

f5-tts_Polish_English_German

Running on Zero

App Files Files Community

Gregniuki commited on Nov 27, 2024

Commit

774f8ef

verified ·

1 Parent(s): 4c21e38

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -35

app.py CHANGED Viewed

@@ -68,29 +68,43 @@ speed = 1
 fix_duration = None
-def load_model(page_name, repo_name, exp_name, model_cls, model_cfg, ckpt_step):
-    ckpt_path = str(cached_path(f"hf://{page_name}/{repo_name}/{exp_name}/model_{ckpt_step}.pt"))
-    # ckpt_path = f"ckpts/{exp_name}/model_{ckpt_step}.pt"  # .pt | .safetensors
-    vocab_char_map, vocab_size = get_tokenizer("Emilia_ZH_EN", "pinyin")
-    model = CFM(
-        transformer=model_cls(
-            **model_cfg, text_num_embeds=vocab_size, mel_dim=n_mel_channels
-        ),
-        mel_spec_kwargs=dict(
-            target_sample_rate=target_sample_rate,
-            n_mel_channels=n_mel_channels,
-            hop_length=hop_length,
-        ),
-        odeint_kwargs=dict(
-            method=ode_method,
-        ),
-        vocab_char_map=vocab_char_map,
-    ).to(device)
-    dtype = None
-    model = load_checkpoint(model, ckpt_path, device, dtype=dtype, use_ema = True)
-    return model
 # load models
@@ -99,21 +113,16 @@ F5TTS_model_cfg = dict(
 )
 E2TTS_model_cfg = dict(dim=1024, depth=24, heads=16, ff_mult=4)
-F5TTS_ema_model = load_model(
-    "Gregniuki", "F5-tts_English_German_Polish", "English", DiT, F5TTS_model_cfg, 222600
 )
-E2TTS_ema_model = load_model(
-    "Gregniuki", "F5-tts_English_German_Polish", "Polish2", DiT, F5TTS_model_cfg, 1200000
 )
-E2TTS_ema_model2 = load_model(
-    "Gregniuki", "F5-tts_English_German_Polish", "Polish", DiT, F5TTS_model_cfg, 500000
-)
-E2TTS_ema_model3 = load_model(
-     "SWivid", "F5-TTS", "F5TTS_Base", DiT, F5TTS_model_cfg, 1200000
- )
-E2TTS_ema_model4 = load_model(
-    "SWivid", "E2-TTS", "E2TTS_Base", UNetT, E2TTS_model_cfg, 1200000
 )
 def chunk_text(text, max_chars=135):
     """
     Splits the input text into chunks, each with a maximum number of characters.

 fix_duration = None
+DEFAULT_TTS_MODEL = "F5-TTS"
+tts_model_choice = DEFAULT_TTS_MODEL
+# load models
+vocoder = load_vocoder()
+def load_f5tts(ckpt_path=str(cached_path("hf://SWivid/F5-TTS/F5TTS_Base/model_1200000.safetensors"))):
+    F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
+    return load_model(DiT, F5TTS_model_cfg, ckpt_path)
+def load_e2tts(ckpt_path=str(cached_path("hf://SWivid/E2-TTS/E2TTS_Base/model_1200000.safetensors"))):
+    E2TTS_model_cfg = dict(dim=1024, depth=24, heads=16, ff_mult=4)
+    return load_model(UNetT, E2TTS_model_cfg, ckpt_path)
+def load_custom(ckpt_path: str, vocab_path="", model_cfg=None):
+    ckpt_path, vocab_path = ckpt_path.strip(), vocab_path.strip()
+    if ckpt_path.startswith("hf://"):
+        ckpt_path = str(cached_path(ckpt_path))
+    if vocab_path.startswith("hf://"):
+        vocab_path = str(cached_path(vocab_path))
+    if model_cfg is None:
+        model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
+    return load_model(DiT, model_cfg, ckpt_path, vocab_file=vocab_path)
+F2TTS_ema_model3 = load_f5tts()
+E2TTS_ema_model4 = load_e2tts() if USING_SPACES else None
+custom_ema_model, pre_custom_path = None, ""
+chat_model_state = None
+chat_tokenizer_state = None
 # load models
 )
 E2TTS_model_cfg = dict(dim=1024, depth=24, heads=16, ff_mult=4)
+F5TTS_ema_model = load_custom(
+    "https://huggingface.co/Gregniuki/F5-tts_English_German_Polish/resolve/main/English/model_222600.pt", "", F5TTS_model_cfg
 )
+E2TTS_ema_model = load_custom(
+    "https://huggingface.co/Gregniuki/F5-tts_English_German_Polish/resolve/main/Polish2/model_1200000.pt", "", F5TTS_model_cfg
 )
+E2TTS_ema_model2 = load_custom(
+    "https://huggingface.co/Gregniuki/F5-tts_English_German_Polish/resolve/main/Polish/model_500000.pt", "", F5TTS_model_cfg
 )
 def chunk_text(text, max_chars=135):
     """
     Splits the input text into chunks, each with a maximum number of characters.