drewThomasson commited on
Commit
1df0182
·
verified ·
1 Parent(s): e35df2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -10
app.py CHANGED
@@ -77,7 +77,7 @@ parser.add_argument("--length_penalty", type=float, default=1.0, help="A length
77
  parser.add_argument("--repetition_penalty", type=float, default=2.0, help="A penalty that prevents the autoregressive decoder from repeating itself. Defaults to 2.0.")
78
  parser.add_argument("--top_k", type=int, default=50, help="Top-k sampling. Lower values mean more likely outputs. Defaults to 50.")
79
  parser.add_argument("--top_p", type=float, default=0.8, help="Top-p sampling. Lower values mean more likely outputs. Defaults to 0.8.")
80
- parser.add_argument("--speed", type=float, default=1.0, help="Speed factor for the speech generation. Defaults to 1.0.")
81
  parser.add_argument("--enable_text_splitting", type=bool, default=False, help="Enable splitting text into sentences. Defaults to True.")
82
 
83
  args = parser.parse_args()
@@ -110,10 +110,10 @@ import socket
110
  #nltk.download('punkt_tab')
111
 
112
  # Import the locally stored Xtts default model
113
- import import_locally_stored_tts_model_files
114
 
115
  #make the nltk folder point to the nltk folder in the app dir
116
- nltk.data.path.append('/home/user/app/nltk_data')
117
 
118
  # Download UniDic if it's not already installed
119
  #unidic.download()
@@ -893,7 +893,7 @@ def run_gradio_interface():
893
  temperature = gr.Slider(
894
  label="Temperature",
895
  minimum=0.1,
896
- maximum=2.0,
897
  step=0.1,
898
  value=0.65,
899
  info="Higher values lead to more creative, unpredictable outputs. Lower values make it more monotone."
@@ -901,7 +901,7 @@ def run_gradio_interface():
901
  length_penalty = gr.Slider(
902
  label="Length Penalty",
903
  minimum=0.5,
904
- maximum=3.0,
905
  step=0.1,
906
  value=1.0,
907
  info="Penalize longer sequences. Higher values produce shorter outputs."
@@ -909,7 +909,7 @@ def run_gradio_interface():
909
  repetition_penalty = gr.Slider(
910
  label="Repetition Penalty",
911
  minimum=1.0,
912
- maximum=5.0,
913
  step=0.1,
914
  value=2.0,
915
  info="Penalizes repeated phrases. Higher values reduce repetition."
@@ -926,17 +926,17 @@ def run_gradio_interface():
926
  label="Top-p Sampling",
927
  minimum=0.1,
928
  maximum=1.0,
929
- step=0.1,
930
  value=0.8,
931
  info="Controls cumulative probability for word selection. Lower values make the output more predictable."
932
  )
933
  speed = gr.Slider(
934
  label="Speed",
935
  minimum=0.5,
936
- maximum=4.0,
937
  step=0.1,
938
  value=1.0,
939
- info="Adjusts the playback speed of the generated audio."
940
  )
941
  enable_text_splitting = gr.Checkbox(
942
  label="Enable Text Splitting",
@@ -951,7 +951,16 @@ def run_gradio_interface():
951
  download_files = gr.File(label="Download Files", interactive=False)
952
 
953
  convert_btn.click(
954
- convert_ebook_to_audio,
 
 
 
 
 
 
 
 
 
955
  inputs=[
956
  ebook_file, target_voice_file, language, use_custom_model, custom_model_file, custom_config_file,
957
  custom_vocab_file, temperature, length_penalty, repetition_penalty,
@@ -959,6 +968,8 @@ def run_gradio_interface():
959
  ],
960
  outputs=[output, audio_player]
961
  )
 
 
962
  use_custom_model.change(
963
  lambda x: [gr.update(visible=x)] * 4,
964
  inputs=[use_custom_model],
 
77
  parser.add_argument("--repetition_penalty", type=float, default=2.0, help="A penalty that prevents the autoregressive decoder from repeating itself. Defaults to 2.0.")
78
  parser.add_argument("--top_k", type=int, default=50, help="Top-k sampling. Lower values mean more likely outputs. Defaults to 50.")
79
  parser.add_argument("--top_p", type=float, default=0.8, help="Top-p sampling. Lower values mean more likely outputs. Defaults to 0.8.")
80
+ parser.add_argument("--speed", type=float, default=1.0, help="Speed factor for the speech generation. IE: How fast the Narrerator will speak. Defaults to 1.0.")
81
  parser.add_argument("--enable_text_splitting", type=bool, default=False, help="Enable splitting text into sentences. Defaults to True.")
82
 
83
  args = parser.parse_args()
 
110
  #nltk.download('punkt_tab')
111
 
112
  # Import the locally stored Xtts default model
113
+ #import import_locally_stored_tts_model_files
114
 
115
  #make the nltk folder point to the nltk folder in the app dir
116
+ #nltk.data.path.append('/home/user/app/nltk_data')
117
 
118
  # Download UniDic if it's not already installed
119
  #unidic.download()
 
893
  temperature = gr.Slider(
894
  label="Temperature",
895
  minimum=0.1,
896
+ maximum=10.0,
897
  step=0.1,
898
  value=0.65,
899
  info="Higher values lead to more creative, unpredictable outputs. Lower values make it more monotone."
 
901
  length_penalty = gr.Slider(
902
  label="Length Penalty",
903
  minimum=0.5,
904
+ maximum=10.0,
905
  step=0.1,
906
  value=1.0,
907
  info="Penalize longer sequences. Higher values produce shorter outputs."
 
909
  repetition_penalty = gr.Slider(
910
  label="Repetition Penalty",
911
  minimum=1.0,
912
+ maximum=10.0,
913
  step=0.1,
914
  value=2.0,
915
  info="Penalizes repeated phrases. Higher values reduce repetition."
 
926
  label="Top-p Sampling",
927
  minimum=0.1,
928
  maximum=1.0,
929
+ step=.01,
930
  value=0.8,
931
  info="Controls cumulative probability for word selection. Lower values make the output more predictable."
932
  )
933
  speed = gr.Slider(
934
  label="Speed",
935
  minimum=0.5,
936
+ maximum=3.0,
937
  step=0.1,
938
  value=1.0,
939
+ info="Adjusts How fast the narrator will speak."
940
  )
941
  enable_text_splitting = gr.Checkbox(
942
  label="Enable Text Splitting",
 
951
  download_files = gr.File(label="Download Files", interactive=False)
952
 
953
  convert_btn.click(
954
+ lambda *args: convert_ebook_to_audio(
955
+ *args[:7],
956
+ float(args[7]), # Ensure temperature is float
957
+ float(args[8]), # Ensure length_penalty is float
958
+ float(args[9]), # Ensure repetition_penalty is float
959
+ int(args[10]), # Ensure top_k is int
960
+ float(args[11]), # Ensure top_p is float
961
+ float(args[12]), # Ensure speed is float
962
+ *args[13:]
963
+ ),
964
  inputs=[
965
  ebook_file, target_voice_file, language, use_custom_model, custom_model_file, custom_config_file,
966
  custom_vocab_file, temperature, length_penalty, repetition_penalty,
 
968
  ],
969
  outputs=[output, audio_player]
970
  )
971
+
972
+
973
  use_custom_model.change(
974
  lambda x: [gr.update(visible=x)] * 4,
975
  inputs=[use_custom_model],