Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -561,14 +561,16 @@ def get_duration(image_path, audio_path, text, orientation_state, num_steps, ses
|
|
| 561 |
return 0
|
| 562 |
|
| 563 |
|
| 564 |
-
audio_chunks = inferpipe.get_times(
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
|
|
|
|
|
|
| 572 |
|
| 573 |
warmup_s = 30
|
| 574 |
duration_s = (20 * num_steps) + warmup_s
|
|
@@ -635,13 +637,15 @@ def infer_scene(image_path, audio_path, text, orientation_state, num_steps, sess
|
|
| 635 |
if session_id is None:
|
| 636 |
session_id = uuid.uuid4().hex
|
| 637 |
|
|
|
|
|
|
|
| 638 |
result = None
|
| 639 |
|
| 640 |
try:
|
| 641 |
-
result = infer(image_path,
|
| 642 |
except Exception as e:
|
| 643 |
err = str(e).lower()
|
| 644 |
-
print(f"{session_id} failed due to {err}")
|
| 645 |
raise
|
| 646 |
|
| 647 |
return result
|
|
@@ -864,7 +868,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 864 |
infer_btn = gr.Button("🦜 Avatar Me", variant='primary', elem_classes="button-gradient")
|
| 865 |
with gr.Accordion("Advanced Settings", open=False):
|
| 866 |
raw_img_text = gr.Text(show_label=False, label="", value='', visible=False)
|
| 867 |
-
limit_on = gr.Checkbox(label="Limit Audio files to 5 seconds", value=True)
|
| 868 |
adaptive_text = gr.Checkbox(label="Adaptive Video Prompt", value=True)
|
| 869 |
text_input = gr.Textbox(show_label=False, lines=6, elem_classes=["stateful"], interactive=False, value= ADAPTIVE_PROMPT_TEMPLATES[1])
|
| 870 |
|
|
@@ -872,28 +876,29 @@ with gr.Blocks(css=css) as demo:
|
|
| 872 |
|
| 873 |
cached_examples = gr.Examples(
|
| 874 |
examples=[
|
| 875 |
-
|
| 876 |
-
[
|
| 877 |
-
"examples/images/creature-001.png",
|
| 878 |
-
"examples/audios/keen.wav",
|
| 879 |
-
ADAPTIVE_PROMPT_TEMPLATES[2],
|
| 880 |
-
20,
|
| 881 |
-
''
|
| 882 |
-
],
|
| 883 |
-
|
| 884 |
[
|
| 885 |
"examples/images/female-001.png",
|
| 886 |
"examples/audios/script.wav",
|
| 887 |
-
ADAPTIVE_PROMPT_TEMPLATES[
|
| 888 |
-
|
| 889 |
''
|
| 890 |
],
|
| 891 |
|
|
|
|
| 892 |
[
|
| 893 |
"examples/images/male-001.png",
|
| 894 |
"examples/audios/denial.wav",
|
| 895 |
-
ADAPTIVE_PROMPT_TEMPLATES[
|
| 896 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 897 |
''
|
| 898 |
],
|
| 899 |
|
|
@@ -968,14 +973,6 @@ with gr.Blocks(css=css) as demo:
|
|
| 968 |
fn=speak_to_me,
|
| 969 |
inputs=[session_state],
|
| 970 |
outputs=[audio_input]
|
| 971 |
-
).then(
|
| 972 |
-
fn=apply_audio,
|
| 973 |
-
inputs=[audio_input],
|
| 974 |
-
outputs=[audio_input]
|
| 975 |
-
).then(
|
| 976 |
-
fn=preprocess_audio_first_5s_librosa,
|
| 977 |
-
inputs=[audio_input, limit_on, session_state],
|
| 978 |
-
outputs=[audio_input],
|
| 979 |
)
|
| 980 |
image_input.orientation(fn=orientation_changed, inputs=[session_state], outputs=[orientation_state]).then(fn=preprocess_img, inputs=[image_input, raw_img_text, orientation_state, session_state], outputs=[image_input, raw_img_text])
|
| 981 |
image_input.clear(fn=clear_raw_image, outputs=[raw_img_text])
|
|
@@ -984,12 +981,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 984 |
audio_input.change(fn=update_generate_button, inputs=[image_input, audio_input, orientation_state, text_input, num_steps, session_state], outputs=[time_required])
|
| 985 |
num_steps.change(fn=slider_value_change, inputs=[image_input, audio_input, orientation_state, text_input, num_steps, session_state, adaptive_text], outputs=[time_required, text_input])
|
| 986 |
adaptive_text.change(fn=check_box_clicked, inputs=[adaptive_text], outputs=[text_input])
|
| 987 |
-
|
| 988 |
-
).then(
|
| 989 |
-
fn=preprocess_audio_first_5s_librosa,
|
| 990 |
-
inputs=[audio_input, limit_on, session_state],
|
| 991 |
-
outputs=[audio_input],
|
| 992 |
-
)
|
| 993 |
|
| 994 |
if __name__ == "__main__":
|
| 995 |
demo.unload(cleanup)
|
|
|
|
| 561 |
return 0
|
| 562 |
|
| 563 |
|
| 564 |
+
# audio_chunks = inferpipe.get_times(
|
| 565 |
+
# prompt=text,
|
| 566 |
+
# image_path=image_path,
|
| 567 |
+
# audio_path=audio_path,
|
| 568 |
+
# orientation_state= orientation_state,
|
| 569 |
+
# seq_len=args.seq_len,
|
| 570 |
+
# num_steps=num_steps
|
| 571 |
+
# )
|
| 572 |
+
|
| 573 |
+
audio_chunks = 1
|
| 574 |
|
| 575 |
warmup_s = 30
|
| 576 |
duration_s = (20 * num_steps) + warmup_s
|
|
|
|
| 637 |
if session_id is None:
|
| 638 |
session_id = uuid.uuid4().hex
|
| 639 |
|
| 640 |
+
limited_audio_path = preprocess_audio_first_5s_librosa(audio_path, True, session_id)
|
| 641 |
+
|
| 642 |
result = None
|
| 643 |
|
| 644 |
try:
|
| 645 |
+
result = infer(image_path, limited_audio_path, text, orientation_state, num_steps, session_id, progress)
|
| 646 |
except Exception as e:
|
| 647 |
err = str(e).lower()
|
| 648 |
+
print(f"{session_id} failed due to {err}")
|
| 649 |
raise
|
| 650 |
|
| 651 |
return result
|
|
|
|
| 868 |
infer_btn = gr.Button("🦜 Avatar Me", variant='primary', elem_classes="button-gradient")
|
| 869 |
with gr.Accordion("Advanced Settings", open=False):
|
| 870 |
raw_img_text = gr.Text(show_label=False, label="", value='', visible=False)
|
| 871 |
+
limit_on = gr.Checkbox(label="Limit Audio files to 5 seconds", value=True, visible=False)
|
| 872 |
adaptive_text = gr.Checkbox(label="Adaptive Video Prompt", value=True)
|
| 873 |
text_input = gr.Textbox(show_label=False, lines=6, elem_classes=["stateful"], interactive=False, value= ADAPTIVE_PROMPT_TEMPLATES[1])
|
| 874 |
|
|
|
|
| 876 |
|
| 877 |
cached_examples = gr.Examples(
|
| 878 |
examples=[
|
| 879 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 880 |
[
|
| 881 |
"examples/images/female-001.png",
|
| 882 |
"examples/audios/script.wav",
|
| 883 |
+
ADAPTIVE_PROMPT_TEMPLATES[1],
|
| 884 |
+
8,
|
| 885 |
''
|
| 886 |
],
|
| 887 |
|
| 888 |
+
|
| 889 |
[
|
| 890 |
"examples/images/male-001.png",
|
| 891 |
"examples/audios/denial.wav",
|
| 892 |
+
ADAPTIVE_PROMPT_TEMPLATES[1],
|
| 893 |
+
8,
|
| 894 |
+
''
|
| 895 |
+
],
|
| 896 |
+
|
| 897 |
+
[
|
| 898 |
+
"examples/images/female-003.png",
|
| 899 |
+
"examples/audios/matcha.wav",
|
| 900 |
+
ADAPTIVE_PROMPT_TEMPLATES[1],
|
| 901 |
+
8,
|
| 902 |
''
|
| 903 |
],
|
| 904 |
|
|
|
|
| 973 |
fn=speak_to_me,
|
| 974 |
inputs=[session_state],
|
| 975 |
outputs=[audio_input]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 976 |
)
|
| 977 |
image_input.orientation(fn=orientation_changed, inputs=[session_state], outputs=[orientation_state]).then(fn=preprocess_img, inputs=[image_input, raw_img_text, orientation_state, session_state], outputs=[image_input, raw_img_text])
|
| 978 |
image_input.clear(fn=clear_raw_image, outputs=[raw_img_text])
|
|
|
|
| 981 |
audio_input.change(fn=update_generate_button, inputs=[image_input, audio_input, orientation_state, text_input, num_steps, session_state], outputs=[time_required])
|
| 982 |
num_steps.change(fn=slider_value_change, inputs=[image_input, audio_input, orientation_state, text_input, num_steps, session_state, adaptive_text], outputs=[time_required, text_input])
|
| 983 |
adaptive_text.change(fn=check_box_clicked, inputs=[adaptive_text], outputs=[text_input])
|
| 984 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 985 |
|
| 986 |
if __name__ == "__main__":
|
| 987 |
demo.unload(cleanup)
|