File size: 6,254 Bytes
caa0b3d
 
c91fbe6
caa0b3d
c91fbe6
caa0b3d
 
c91fbe6
caa0b3d
c91fbe6
caa0b3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c91fbe6
caa0b3d
 
 
 
 
 
 
 
 
 
 
 
c91fbe6
caa0b3d
 
 
 
 
 
 
 
 
c91fbe6
caa0b3d
c91fbe6
 
caa0b3d
 
 
 
c91fbe6
caa0b3d
c91fbe6
caa0b3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c91fbe6
 
caa0b3d
 
c91fbe6
caa0b3d
 
 
 
 
 
 
c91fbe6
 
caa0b3d
c91fbe6
caa0b3d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# --- START OF FILE inference_cli.py ---

import argparse
import shutil
import soundfile as sf
import os # For path manipulation if needed
import sys # To potentially add app.py directory to path

# Try to import app.py - assumes it's in the same directory or Python path
try:
    # If app.py is not directly importable, you might need to add its directory to the path
    # Example: sys.path.append(os.path.dirname(os.path.abspath(__file__))) # Add current dir
    import app
    from app import infer # Import the main inference function
except ImportError as e:
    print(f"Error: Could not import 'app.py'. Make sure it's in the Python path.")
    print(f"Details: {e}")
    sys.exit(1)
except Exception as e:
    print(f"An unexpected error occurred during 'app.py' import: {e}")
    sys.exit(1)


def main():
    parser = argparse.ArgumentParser(description="F5 TTS - Simplified CLI Interface using app.py")

    # --- Input Arguments ---
    parser.add_argument("--ref_audio", required=True, help="Path to the reference audio file (wav, mp3, etc.)")
    parser.add_argument("--ref_text", default="", help="Reference text. If empty, audio transcription will be performed by app.py's infer function.")
    parser.add_argument("--gen_text", required=True, help="Text to generate")

    # --- Model & Generation Parameters ---
    # Note: app.py seems hardcoded to load the "Multi" model at the top level.
    # This argument might not change the loaded model unless app.py's infer logic uses it internally.
    parser.add_argument("--exp_name", default="Multi", help="Experiment name / model selection (default: Multi - effectiveness depends on app.py)")
    parser.add_argument("--language", default="en-us", help="Synthesized language code (e.g., en-us, pl, de) (default: en-us)")
    parser.add_argument("--ref_language", default="en-us", help="Reference language code (e.g., en-us, pl, de) (default: en-us)")
    parser.add_argument("--speed", type=float, default=1.0, help="Audio speed factor (default: 1.0)")

    # --- Postprocessing ---
    parser.add_argument("--remove_silence", action="store_true", help="Remove silence from the output audio (uses app.py logic)")
    parser.add_argument("--cross_fade_duration", type=float, default=0.15, help="Cross-fade duration between batches (s)")

    # --- Output Arguments ---
    parser.add_argument("--output_audio", default="output.wav", help="Path to save the output WAV file")
    parser.add_argument("--output_spectrogram", default="spectrogram.png", help="Path to save the spectrogram image (PNG)")

    args = parser.parse_args()

    print("--- Configuration ---")
    print(f"Reference Audio: {args.ref_audio}")
    print(f"Reference Text: '{args.ref_text if args.ref_text else '<Automatic Transcription>'}'")
    print(f"Generation Text: '{args.gen_text[:100]}...'")
    print(f"Model (exp_name): {args.exp_name}")
    print(f"Synth Language: {args.language}")
    print(f"Ref Language: {args.ref_language}")
    print(f"Speed: {args.speed}")
    print(f"Remove Silence: {args.remove_silence}")
    print(f"Cross-Fade: {args.cross_fade_duration}s")
    print(f"Output Audio: {args.output_audio}")
    print(f"Output Spectrogram: {args.output_spectrogram}")
    print("--------------------")

    # --- Set Global Variables in app.py ---
    # The 'infer' function in app.py relies on these globals being set.
    try:
        print(f"Setting language in app module to: {args.language}")
        app.language = args.language
        print(f"Setting ref_language in app module to: {args.ref_language}")
        app.ref_language = args.ref_language
        print(f"Setting speed in app module to: {args.speed}")
        app.speed = args.speed
    except AttributeError as e:
        print(f"Error: Could not set global variable in 'app.py'. Does it exist? Details: {e}")
        sys.exit(1)

    # --- Run Inference ---
    print("\nStarting inference process (will load models if not already loaded)...")
    try:
        # Call the infer function directly from the imported app module
        (sr, audio_data), temp_spectrogram_path = infer(
            ref_audio_orig=args.ref_audio,
            ref_text=args.ref_text,
            gen_text=args.gen_text,
            exp_name=args.exp_name,
            remove_silence=args.remove_silence,
            cross_fade_duration=args.cross_fade_duration
            # Note: language, ref_language, speed are used globally within app.py's functions
        )
        print("Inference completed.")

    except Exception as e:
        print(f"\nError during inference: {e}")
        import traceback
        traceback.print_exc() # Print detailed traceback
        sys.exit(1)

    # --- Save Outputs ---
    try:
        # Save audio
        print(f"Saving audio to: {args.output_audio}")
        # Ensure directory exists
        os.makedirs(os.path.dirname(os.path.abspath(args.output_audio)) or '.', exist_ok=True)
        # Ensure data is float32 for soundfile
        if audio_data.dtype != "float32":
             audio_data = audio_data.astype("float32")
        sf.write(args.output_audio, audio_data, sr)

        # Copy spectrogram from the temporary path returned by infer
        print(f"Copying spectrogram from {temp_spectrogram_path} to: {args.output_spectrogram}")
        # Ensure directory exists
        os.makedirs(os.path.dirname(os.path.abspath(args.output_spectrogram)) or '.', exist_ok=True)
        shutil.copy(temp_spectrogram_path, args.output_spectrogram)

        print("\n--- Success ---")
        print(f"Audio saved in: {args.output_audio}")
        print(f"Spectrogram saved in: {args.output_spectrogram}")
        print("---------------")

    except Exception as e:
        print(f"\nError saving output files: {e}")
        sys.exit(1)

    # Optional: Clean up the temporary spectrogram file if needed,
    # but NamedTemporaryFile usually handles this if delete=True was used in app.py
    # try:
    #     if os.path.exists(temp_spectrogram_path):
    #         os.remove(temp_spectrogram_path)
    # except Exception as e:
    #     print(f"Warning: Could not clean up temporary spectrogram file {temp_spectrogram_path}: {e}")

if __name__ == "__main__":
    main()

# --- END OF FILE inference_cli.py ---