NeuralFalcon commited on
Commit
c366e2b
·
verified ·
1 Parent(s): 20b42c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -21
app.py CHANGED
@@ -1,8 +1,4 @@
1
 
2
- # import os
3
- # os.system("pip install --no-cache-dir --upgrade --force-reinstall -r requirements.txt")
4
-
5
-
6
  # Initalize a pipeline
7
  from kokoro import KPipeline
8
  # from IPython.display import display, Audio
@@ -177,6 +173,7 @@ def remove_silence_function(file_path,minimum_silence=50):
177
  combined += chunk
178
  combined.export(output_path, format=audio_format)
179
  return output_path
 
180
  def generate_and_save_audio(text, Language="American English",voice="af_bella", speed=1,remove_silence=False,keep_silence_up_to=0.05):
181
  text=clean_text(text)
182
  update_pipeline(Language)
@@ -205,6 +202,8 @@ def generate_and_save_audio(text, Language="American English",voice="af_bella",
205
  audio_int16 = (audio_np * 32767).astype(np.int16) # Scale to 16-bit range
206
  audio_bytes = audio_int16.tobytes() # Convert to bytes
207
  # Write the audio chunk to the WAV file
 
 
208
  wav_file.writeframes(audio_bytes)
209
  if remove_silence:
210
  keep_silence = int(keep_silence_up_to * 1000)
@@ -212,39 +211,44 @@ def generate_and_save_audio(text, Language="American English",voice="af_bella",
212
  return new_wave_file,timestamps
213
  return save_path,timestamps
214
 
 
 
215
  def adjust_timestamps(timestamp_dict):
216
  adjusted_timestamps = []
217
- last_end_time = 0 # Tracks the last word's end time
218
 
219
  for segment_id in sorted(timestamp_dict.keys()):
220
  segment = timestamp_dict[segment_id]
221
  words = segment["words"]
 
222
 
223
- for word_entry in words:
224
- # Skip word entries with start or end time as None or 0
225
- if word_entry["start"] in [None, 0] and word_entry["end"] in [None, 0]:
226
- continue
 
227
 
228
- # Fill in None values with the last valid timestamp or use 0 as default
229
- word_start = word_entry["start"] if word_entry["start"] is not None else last_end_time
230
- word_end = word_entry["end"] if word_entry["end"] is not None else word_start # Use word_start if end is None
231
 
232
- new_start = word_start + last_end_time
233
- new_end = word_end + last_end_time
 
234
 
235
  adjusted_timestamps.append({
236
- "word": word_entry["word"],
237
- "start": round(new_start, 3),
238
- "end": round(new_end, 3)
239
  })
240
 
241
- # Update last_end_time to the last word's end time in this segment
242
- if words:
243
- last_end_time = adjusted_timestamps[-1]["end"]
244
 
245
  return adjusted_timestamps
246
 
247
 
 
248
  import string
249
 
250
  def write_word_srt(word_level_timestamps, output_file="word.srt", skip_punctuation=True):
@@ -278,6 +282,30 @@ def write_word_srt(word_level_timestamps, output_file="word.srt", skip_punctuati
278
 
279
  import string
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  def write_sentence_srt(word_level_timestamps, output_file="subtitles.srt", max_words=8, min_pause=0.1):
282
  subtitles = [] # Stores subtitle blocks
283
  subtitle_words = [] # Temporary list for words in the current subtitle
@@ -343,6 +371,7 @@ def write_sentence_srt(word_level_timestamps, output_file="subtitles.srt", max_w
343
  # Write subtitles to SRT file
344
  with open(output_file, "w", encoding="utf-8") as f:
345
  for i, (start, end, text) in enumerate(subtitles, start=1):
 
346
  f.write(f"{i}\n{format_srt_time(start)} --> {format_srt_time(end)}\n{text}\n\n")
347
 
348
  # print(f"SRT file '{output_file}' created successfully!")
@@ -591,6 +620,7 @@ import click
591
  @click.option("--debug", is_flag=True, default=False, help="Enable debug mode.")
592
  @click.option("--share", is_flag=True, default=False, help="Enable sharing of the interface.")
593
  def main(debug, share):
 
594
  demo1 = ui()
595
  demo2 = tutorial()
596
  demo = gr.TabbedInterface([demo1, demo2],["Multilingual TTS","VoicePack Explanation"],title="Kokoro TTS")#,theme='JohnSmith9982/small_and_pretty')
@@ -608,4 +638,4 @@ last_used_language = "a"
608
  pipeline = KPipeline(lang_code=last_used_language)
609
  temp_folder = create_audio_dir()
610
  if __name__ == "__main__":
611
- main()
 
1
 
 
 
 
 
2
  # Initalize a pipeline
3
  from kokoro import KPipeline
4
  # from IPython.display import display, Audio
 
173
  combined += chunk
174
  combined.export(output_path, format=audio_format)
175
  return output_path
176
+
177
  def generate_and_save_audio(text, Language="American English",voice="af_bella", speed=1,remove_silence=False,keep_silence_up_to=0.05):
178
  text=clean_text(text)
179
  update_pipeline(Language)
 
202
  audio_int16 = (audio_np * 32767).astype(np.int16) # Scale to 16-bit range
203
  audio_bytes = audio_int16.tobytes() # Convert to bytes
204
  # Write the audio chunk to the WAV file
205
+ duration_sec = len(audio_np) / 24000
206
+ timestamps[i]["duration"] = duration_sec
207
  wav_file.writeframes(audio_bytes)
208
  if remove_silence:
209
  keep_silence = int(keep_silence_up_to * 1000)
 
211
  return new_wave_file,timestamps
212
  return save_path,timestamps
213
 
214
+
215
+
216
  def adjust_timestamps(timestamp_dict):
217
  adjusted_timestamps = []
218
+ last_global_end = 0 # Cumulative audio timeline
219
 
220
  for segment_id in sorted(timestamp_dict.keys()):
221
  segment = timestamp_dict[segment_id]
222
  words = segment["words"]
223
+ chunk_duration = segment["duration"]
224
 
225
+ # If there are valid words, get last word end
226
+ last_word_end_in_chunk = (
227
+ max(w["end"] for w in words if w["end"] not in [None, 0])
228
+ if words else 0
229
+ )
230
 
231
+ silence_gap = chunk_duration - last_word_end_in_chunk
232
+ if silence_gap < 0: # In rare cases where end > duration (due to rounding)
233
+ silence_gap = 0
234
 
235
+ for word in words:
236
+ start = word["start"] or 0
237
+ end = word["end"] or start
238
 
239
  adjusted_timestamps.append({
240
+ "word": word["word"],
241
+ "start": round(last_global_end + start, 3),
242
+ "end": round(last_global_end + end, 3)
243
  })
244
 
245
+ # Add entire chunk duration to global end
246
+ last_global_end += chunk_duration
 
247
 
248
  return adjusted_timestamps
249
 
250
 
251
+
252
  import string
253
 
254
  def write_word_srt(word_level_timestamps, output_file="word.srt", skip_punctuation=True):
 
282
 
283
  import string
284
 
285
+
286
+ def split_line_by_char_limit(text, max_chars=30):
287
+ words = text.split()
288
+ lines = []
289
+ current_line = ""
290
+
291
+ for word in words:
292
+ if len(current_line + " " + word) <= max_chars:
293
+ current_line = (current_line + " " + word).strip()
294
+ else:
295
+ lines.append(current_line)
296
+ current_line = word
297
+
298
+ if current_line:
299
+ # Check if last line is a single word and there is a previous line
300
+ if len(current_line.split()) == 1 and len(lines) > 0:
301
+ # Append single word to previous line
302
+ lines[-1] += " " + current_line
303
+ else:
304
+ lines.append(current_line)
305
+
306
+ return "\n".join(lines)
307
+
308
+
309
  def write_sentence_srt(word_level_timestamps, output_file="subtitles.srt", max_words=8, min_pause=0.1):
310
  subtitles = [] # Stores subtitle blocks
311
  subtitle_words = [] # Temporary list for words in the current subtitle
 
371
  # Write subtitles to SRT file
372
  with open(output_file, "w", encoding="utf-8") as f:
373
  for i, (start, end, text) in enumerate(subtitles, start=1):
374
+ text=split_line_by_char_limit(text, max_chars=30)
375
  f.write(f"{i}\n{format_srt_time(start)} --> {format_srt_time(end)}\n{text}\n\n")
376
 
377
  # print(f"SRT file '{output_file}' created successfully!")
 
620
  @click.option("--debug", is_flag=True, default=False, help="Enable debug mode.")
621
  @click.option("--share", is_flag=True, default=False, help="Enable sharing of the interface.")
622
  def main(debug, share):
623
+ # def main(debug=True, share=True):
624
  demo1 = ui()
625
  demo2 = tutorial()
626
  demo = gr.TabbedInterface([demo1, demo2],["Multilingual TTS","VoicePack Explanation"],title="Kokoro TTS")#,theme='JohnSmith9982/small_and_pretty')
 
638
  pipeline = KPipeline(lang_code=last_used_language)
639
  temp_folder = create_audio_dir()
640
  if __name__ == "__main__":
641
+ main()