alexnasa commited on
Commit
d672cc3
·
verified ·
1 Parent(s): d67610b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -38
app.py CHANGED
@@ -561,14 +561,16 @@ def get_duration(image_path, audio_path, text, orientation_state, num_steps, ses
561
  return 0
562
 
563
 
564
- audio_chunks = inferpipe.get_times(
565
- prompt=text,
566
- image_path=image_path,
567
- audio_path=audio_path,
568
- orientation_state= orientation_state,
569
- seq_len=args.seq_len,
570
- num_steps=num_steps
571
- )
 
 
572
 
573
  warmup_s = 30
574
  duration_s = (20 * num_steps) + warmup_s
@@ -635,13 +637,15 @@ def infer_scene(image_path, audio_path, text, orientation_state, num_steps, sess
635
  if session_id is None:
636
  session_id = uuid.uuid4().hex
637
 
 
 
638
  result = None
639
 
640
  try:
641
- result = infer(image_path, audio_path, text, orientation_state, num_steps, session_id, progress)
642
  except Exception as e:
643
  err = str(e).lower()
644
- print(f"{session_id} failed due to {err}")
645
  raise
646
 
647
  return result
@@ -864,7 +868,7 @@ with gr.Blocks(css=css) as demo:
864
  infer_btn = gr.Button("🦜 Avatar Me", variant='primary', elem_classes="button-gradient")
865
  with gr.Accordion("Advanced Settings", open=False):
866
  raw_img_text = gr.Text(show_label=False, label="", value='', visible=False)
867
- limit_on = gr.Checkbox(label="Limit Audio files to 5 seconds", value=True)
868
  adaptive_text = gr.Checkbox(label="Adaptive Video Prompt", value=True)
869
  text_input = gr.Textbox(show_label=False, lines=6, elem_classes=["stateful"], interactive=False, value= ADAPTIVE_PROMPT_TEMPLATES[1])
870
 
@@ -872,28 +876,29 @@ with gr.Blocks(css=css) as demo:
872
 
873
  cached_examples = gr.Examples(
874
  examples=[
875
-
876
- [
877
- "examples/images/creature-001.png",
878
- "examples/audios/keen.wav",
879
- ADAPTIVE_PROMPT_TEMPLATES[2],
880
- 20,
881
- ''
882
- ],
883
-
884
  [
885
  "examples/images/female-001.png",
886
  "examples/audios/script.wav",
887
- ADAPTIVE_PROMPT_TEMPLATES[2],
888
- 14,
889
  ''
890
  ],
891
 
 
892
  [
893
  "examples/images/male-001.png",
894
  "examples/audios/denial.wav",
895
- ADAPTIVE_PROMPT_TEMPLATES[2],
896
- 12,
 
 
 
 
 
 
 
 
897
  ''
898
  ],
899
 
@@ -968,14 +973,6 @@ with gr.Blocks(css=css) as demo:
968
  fn=speak_to_me,
969
  inputs=[session_state],
970
  outputs=[audio_input]
971
- ).then(
972
- fn=apply_audio,
973
- inputs=[audio_input],
974
- outputs=[audio_input]
975
- ).then(
976
- fn=preprocess_audio_first_5s_librosa,
977
- inputs=[audio_input, limit_on, session_state],
978
- outputs=[audio_input],
979
  )
980
  image_input.orientation(fn=orientation_changed, inputs=[session_state], outputs=[orientation_state]).then(fn=preprocess_img, inputs=[image_input, raw_img_text, orientation_state, session_state], outputs=[image_input, raw_img_text])
981
  image_input.clear(fn=clear_raw_image, outputs=[raw_img_text])
@@ -984,12 +981,7 @@ with gr.Blocks(css=css) as demo:
984
  audio_input.change(fn=update_generate_button, inputs=[image_input, audio_input, orientation_state, text_input, num_steps, session_state], outputs=[time_required])
985
  num_steps.change(fn=slider_value_change, inputs=[image_input, audio_input, orientation_state, text_input, num_steps, session_state, adaptive_text], outputs=[time_required, text_input])
986
  adaptive_text.change(fn=check_box_clicked, inputs=[adaptive_text], outputs=[text_input])
987
- audio_input.upload(fn=apply_audio, inputs=[audio_input], outputs=[audio_input]
988
- ).then(
989
- fn=preprocess_audio_first_5s_librosa,
990
- inputs=[audio_input, limit_on, session_state],
991
- outputs=[audio_input],
992
- )
993
 
994
  if __name__ == "__main__":
995
  demo.unload(cleanup)
 
561
  return 0
562
 
563
 
564
+ # audio_chunks = inferpipe.get_times(
565
+ # prompt=text,
566
+ # image_path=image_path,
567
+ # audio_path=audio_path,
568
+ # orientation_state= orientation_state,
569
+ # seq_len=args.seq_len,
570
+ # num_steps=num_steps
571
+ # )
572
+
573
+ audio_chunks = 1
574
 
575
  warmup_s = 30
576
  duration_s = (20 * num_steps) + warmup_s
 
637
  if session_id is None:
638
  session_id = uuid.uuid4().hex
639
 
640
+ limited_audio_path = preprocess_audio_first_5s_librosa(audio_path, True, session_id)
641
+
642
  result = None
643
 
644
  try:
645
+ result = infer(image_path, limited_audio_path, text, orientation_state, num_steps, session_id, progress)
646
  except Exception as e:
647
  err = str(e).lower()
648
+ print(f"{session_id} failed due to {err}")
649
  raise
650
 
651
  return result
 
868
  infer_btn = gr.Button("🦜 Avatar Me", variant='primary', elem_classes="button-gradient")
869
  with gr.Accordion("Advanced Settings", open=False):
870
  raw_img_text = gr.Text(show_label=False, label="", value='', visible=False)
871
+ limit_on = gr.Checkbox(label="Limit Audio files to 5 seconds", value=True, visible=False)
872
  adaptive_text = gr.Checkbox(label="Adaptive Video Prompt", value=True)
873
  text_input = gr.Textbox(show_label=False, lines=6, elem_classes=["stateful"], interactive=False, value= ADAPTIVE_PROMPT_TEMPLATES[1])
874
 
 
876
 
877
  cached_examples = gr.Examples(
878
  examples=[
879
+
 
 
 
 
 
 
 
 
880
  [
881
  "examples/images/female-001.png",
882
  "examples/audios/script.wav",
883
+ ADAPTIVE_PROMPT_TEMPLATES[1],
884
+ 8,
885
  ''
886
  ],
887
 
888
+
889
  [
890
  "examples/images/male-001.png",
891
  "examples/audios/denial.wav",
892
+ ADAPTIVE_PROMPT_TEMPLATES[1],
893
+ 8,
894
+ ''
895
+ ],
896
+
897
+ [
898
+ "examples/images/female-003.png",
899
+ "examples/audios/matcha.wav",
900
+ ADAPTIVE_PROMPT_TEMPLATES[1],
901
+ 8,
902
  ''
903
  ],
904
 
 
973
  fn=speak_to_me,
974
  inputs=[session_state],
975
  outputs=[audio_input]
 
 
 
 
 
 
 
 
976
  )
977
  image_input.orientation(fn=orientation_changed, inputs=[session_state], outputs=[orientation_state]).then(fn=preprocess_img, inputs=[image_input, raw_img_text, orientation_state, session_state], outputs=[image_input, raw_img_text])
978
  image_input.clear(fn=clear_raw_image, outputs=[raw_img_text])
 
981
  audio_input.change(fn=update_generate_button, inputs=[image_input, audio_input, orientation_state, text_input, num_steps, session_state], outputs=[time_required])
982
  num_steps.change(fn=slider_value_change, inputs=[image_input, audio_input, orientation_state, text_input, num_steps, session_state, adaptive_text], outputs=[time_required, text_input])
983
  adaptive_text.change(fn=check_box_clicked, inputs=[adaptive_text], outputs=[text_input])
984
+
 
 
 
 
 
985
 
986
  if __name__ == "__main__":
987
  demo.unload(cleanup)