Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os, asyncio, json, tempfile, websockets, pdfplumber
|
2 |
import gradio as gr
|
3 |
import openai
|
4 |
from dotenv import load_dotenv
|
5 |
import numpy as np
|
6 |
import wave
|
|
|
|
|
7 |
|
8 |
# โโโ 0. ์ด๊ธฐํ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
9 |
load_dotenv()
|
@@ -11,6 +25,19 @@ openai.api_key = os.getenv("OPENAI_API_KEY")
|
|
11 |
if not openai.api_key:
|
12 |
raise RuntimeError("OPENAI_API_KEY ๊ฐ .env ์ ์์ต๋๋ค!")
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
LANG = ["Korean","English","Japanese","Chinese",
|
15 |
"Thai","Russian","Vietnamese","Spanish","French"]
|
16 |
VOICE = {l: ("nova" if l in ["Korean","Japanese","Chinese"] else "alloy")
|
@@ -63,27 +90,121 @@ def translate_pdf(file, src, tgt):
|
|
63 |
return text, asyncio.run(gpt_translate(text, src, tgt))
|
64 |
|
65 |
# โโโ 2-1. ์ค๋์ค ๋ฒ์ญ (ํญ1์ฉ) โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
async def translate_audio_async(file, src, tgt):
|
67 |
-
if not file: return "โ ๏ธ
|
68 |
|
69 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
# STT: Whisper API ์ฌ์ฉ
|
|
|
71 |
client = get_client()
|
72 |
-
with open(
|
73 |
transcript = await client.audio.transcriptions.create(
|
74 |
model="whisper-1",
|
75 |
file=audio_file,
|
76 |
language=src[:2].lower() # ์ธ์ด ์ฝ๋ ๊ฐ์ํ
|
77 |
)
|
78 |
|
|
|
|
|
|
|
|
|
79 |
orig_text = transcript.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
trans_text = await gpt_translate(orig_text, src, tgt)
|
|
|
|
|
|
|
81 |
audio_path = await gpt_tts(trans_text, tgt)
|
82 |
|
83 |
return orig_text, trans_text, audio_path
|
84 |
except Exception as e:
|
85 |
print(f"์ค๋์ค ๋ฒ์ญ ์ค๋ฅ: {e}")
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
def translate_audio(file, src, tgt):
|
89 |
return asyncio.run(translate_audio_async(file, src, tgt))
|
@@ -306,19 +427,83 @@ def realtime_four_sync(audio, src, state):
|
|
306 |
state["Thai"], state["Russian"], state)
|
307 |
|
308 |
# โโโ 5. UI โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
309 |
-
with gr.Blocks(title="SMARTok Demo") as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
with gr.Tabs():
|
311 |
# ํญ 1 โ ์ค๋์ค ๋ฒ์ญ
|
312 |
-
with gr.TabItem("๐๏ธ
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
320 |
|
321 |
-
btn1.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
322 |
|
323 |
# ํญ 2 โ PDF ๋ฒ์ญ
|
324 |
with gr.TabItem("๐ PDF"):
|
|
|
1 |
+
# SMARTok Demo - ์ค์๊ฐ ๋ค๊ตญ์ด ๋ฒ์ญ ์์คํ
|
2 |
+
#
|
3 |
+
# ํ์ ํจํค์ง:
|
4 |
+
# pip install gradio openai python-dotenv pdfplumber numpy websockets
|
5 |
+
#
|
6 |
+
# ์ ํ ํจํค์ง (๋น๋์ค ์ฒ๋ฆฌ):
|
7 |
+
# - ffmpeg ์ค์น: sudo apt-get install ffmpeg (Linux) / brew install ffmpeg (Mac)
|
8 |
+
# - ๋๋ pip install moviepy
|
9 |
+
#
|
10 |
+
# ํ๊ฒฝ ๋ณ์:
|
11 |
+
# .env ํ์ผ์ OPENAI_API_KEY ์ค์ ํ์
|
12 |
+
|
13 |
import os, asyncio, json, tempfile, websockets, pdfplumber
|
14 |
import gradio as gr
|
15 |
import openai
|
16 |
from dotenv import load_dotenv
|
17 |
import numpy as np
|
18 |
import wave
|
19 |
+
import subprocess
|
20 |
+
import mimetypes
|
21 |
|
22 |
# โโโ 0. ์ด๊ธฐํ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
23 |
load_dotenv()
|
|
|
25 |
if not openai.api_key:
|
26 |
raise RuntimeError("OPENAI_API_KEY ๊ฐ .env ์ ์์ต๋๋ค!")
|
27 |
|
28 |
+
# ffmpeg ์ค์น ํ์ธ
|
29 |
+
def check_ffmpeg():
|
30 |
+
try:
|
31 |
+
subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
|
32 |
+
return True
|
33 |
+
except:
|
34 |
+
return False
|
35 |
+
|
36 |
+
HAS_FFMPEG = check_ffmpeg()
|
37 |
+
if not HAS_FFMPEG:
|
38 |
+
print("โ ๏ธ ffmpeg๊ฐ ์ค์น๋์ด ์์ง ์์ต๋๋ค. ๋น๋์ค ์ฒ๋ฆฌ๊ฐ ์ ํ๋ ์ ์์ต๋๋ค.")
|
39 |
+
print("์ค์น ๋ฐฉ๋ฒ: sudo apt-get install ffmpeg (Linux) / brew install ffmpeg (Mac)")
|
40 |
+
|
41 |
LANG = ["Korean","English","Japanese","Chinese",
|
42 |
"Thai","Russian","Vietnamese","Spanish","French"]
|
43 |
VOICE = {l: ("nova" if l in ["Korean","Japanese","Chinese"] else "alloy")
|
|
|
90 |
return text, asyncio.run(gpt_translate(text, src, tgt))
|
91 |
|
92 |
# โโโ 2-1. ์ค๋์ค ๋ฒ์ญ (ํญ1์ฉ) โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
93 |
+
def extract_audio_from_video(video_path):
|
94 |
+
"""MP4 ๋ฑ ๋น๋์ค ํ์ผ์์ ์ค๋์ค ์ถ์ถ"""
|
95 |
+
audio_output = None
|
96 |
+
try:
|
97 |
+
# ์์ ์ค๋์ค ํ์ผ ์์ฑ
|
98 |
+
audio_output = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
99 |
+
audio_output.close()
|
100 |
+
|
101 |
+
# ๋ฐฉ๋ฒ 1: ffmpeg ์ฌ์ฉ ์๋
|
102 |
+
if HAS_FFMPEG:
|
103 |
+
cmd = [
|
104 |
+
'ffmpeg',
|
105 |
+
'-i', video_path,
|
106 |
+
'-vn', # ๋น๋์ค ์คํธ๋ฆผ ์ ๊ฑฐ
|
107 |
+
'-acodec', 'pcm_s16le', # WAV ํฌ๋งท
|
108 |
+
'-ar', '16000', # 16kHz ์ํ๋ง
|
109 |
+
'-ac', '1', # ๋ชจ๋
ธ
|
110 |
+
'-y', # ๋ฎ์ด์ฐ๊ธฐ
|
111 |
+
audio_output.name
|
112 |
+
]
|
113 |
+
|
114 |
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
115 |
+
|
116 |
+
if result.returncode == 0:
|
117 |
+
return audio_output.name
|
118 |
+
else:
|
119 |
+
print(f"ffmpeg ์ค๋ฅ: {result.stderr}")
|
120 |
+
|
121 |
+
# ๋ฐฉ๋ฒ 2: moviepy ์ฌ์ฉ ์๋
|
122 |
+
try:
|
123 |
+
from moviepy.editor import VideoFileClip
|
124 |
+
print("moviepy๋ฅผ ์ฌ์ฉํ์ฌ ์ค๋์ค ์ถ์ถ ์ค...")
|
125 |
+
video = VideoFileClip(video_path)
|
126 |
+
video.audio.write_audiofile(
|
127 |
+
audio_output.name,
|
128 |
+
fps=16000,
|
129 |
+
nbytes=2,
|
130 |
+
codec='pcm_s16le',
|
131 |
+
verbose=False,
|
132 |
+
logger=None
|
133 |
+
)
|
134 |
+
video.close()
|
135 |
+
return audio_output.name
|
136 |
+
except ImportError:
|
137 |
+
raise Exception(
|
138 |
+
"๋น๋์ค ์ฒ๋ฆฌ๋ฅผ ์ํด ffmpeg ๋๋ moviepy๊ฐ ํ์ํฉ๋๋ค.\n"
|
139 |
+
"์ค์น: pip install moviepy ๋๋ ffmpeg ์ค์น"
|
140 |
+
)
|
141 |
+
except Exception as e:
|
142 |
+
raise Exception(f"moviepy ์ค๋ฅ: {str(e)}")
|
143 |
+
|
144 |
+
except Exception as e:
|
145 |
+
# ์ค๋ฅ ์ ์์ ํ์ผ ์ ๋ฆฌ
|
146 |
+
if audio_output and os.path.exists(audio_output.name):
|
147 |
+
os.unlink(audio_output.name)
|
148 |
+
raise e
|
149 |
+
|
150 |
async def translate_audio_async(file, src, tgt):
|
151 |
+
if not file: return "โ ๏ธ ์ค๋์ค/๋น๋์ค ์
๋ก๋ ํ์", "", None
|
152 |
|
153 |
try:
|
154 |
+
# ํ์ผ ํ์
ํ์ธ
|
155 |
+
mime_type, _ = mimetypes.guess_type(file)
|
156 |
+
audio_file_path = file
|
157 |
+
temp_audio_path = None
|
158 |
+
|
159 |
+
# ๋น๋์ค ํ์ผ์ธ ๊ฒฝ์ฐ ์ค๋์ค ์ถ์ถ
|
160 |
+
if mime_type and mime_type.startswith('video/'):
|
161 |
+
print(f"๋น๋์ค ํ์ผ ๊ฐ์ง: {mime_type}")
|
162 |
+
print(f"ํ์ผ ํฌ๊ธฐ: {os.path.getsize(file) / 1024 / 1024:.1f} MB")
|
163 |
+
print("๋น๋์ค์์ ์ค๋์ค ์ถ์ถ ์ค... (์๊ฐ์ด ๊ฑธ๋ฆด ์ ์์ต๋๋ค)")
|
164 |
+
temp_audio_path = extract_audio_from_video(file)
|
165 |
+
audio_file_path = temp_audio_path
|
166 |
+
print("์ค๋์ค ์ถ์ถ ์๋ฃ!")
|
167 |
+
|
168 |
# STT: Whisper API ์ฌ์ฉ
|
169 |
+
print("์์ฑ ์ธ์ ์ค...")
|
170 |
client = get_client()
|
171 |
+
with open(audio_file_path, 'rb') as audio_file:
|
172 |
transcript = await client.audio.transcriptions.create(
|
173 |
model="whisper-1",
|
174 |
file=audio_file,
|
175 |
language=src[:2].lower() # ์ธ์ด ์ฝ๋ ๊ฐ์ํ
|
176 |
)
|
177 |
|
178 |
+
# ์์ ํ์ผ ์ ๋ฆฌ
|
179 |
+
if temp_audio_path and os.path.exists(temp_audio_path):
|
180 |
+
os.unlink(temp_audio_path)
|
181 |
+
|
182 |
orig_text = transcript.text
|
183 |
+
if not orig_text.strip():
|
184 |
+
return "โ ๏ธ ์์ฑ์ด ๊ฐ์ง๋์ง ์์์ต๋๋ค", "", None
|
185 |
+
|
186 |
+
print(f"์ธ์๋ ํ
์คํธ: {orig_text[:50]}...")
|
187 |
+
|
188 |
+
# ๋ฒ์ญ
|
189 |
+
print(f"{src} โ {tgt} ๋ฒ์ญ ์ค...")
|
190 |
trans_text = await gpt_translate(orig_text, src, tgt)
|
191 |
+
|
192 |
+
# TTS
|
193 |
+
print("์์ฑ ํฉ์ฑ ์ค...")
|
194 |
audio_path = await gpt_tts(trans_text, tgt)
|
195 |
|
196 |
return orig_text, trans_text, audio_path
|
197 |
except Exception as e:
|
198 |
print(f"์ค๋์ค ๋ฒ์ญ ์ค๋ฅ: {e}")
|
199 |
+
# ์์ ํ์ผ ์ ๋ฆฌ
|
200 |
+
if 'temp_audio_path' in locals() and temp_audio_path and os.path.exists(temp_audio_path):
|
201 |
+
os.unlink(temp_audio_path)
|
202 |
+
|
203 |
+
error_msg = str(e)
|
204 |
+
if "ffmpeg" in error_msg.lower():
|
205 |
+
error_msg += "\n\n๐ก ํด๊ฒฐ ๋ฐฉ๋ฒ:\n1. ffmpeg ์ค์น: sudo apt-get install ffmpeg\n2. ๋๋ pip install moviepy"
|
206 |
+
|
207 |
+
return "โ ๏ธ ๋ฒ์ญ ์ค ์ค๋ฅ ๋ฐ์", error_msg, None
|
208 |
|
209 |
def translate_audio(file, src, tgt):
|
210 |
return asyncio.run(translate_audio_async(file, src, tgt))
|
|
|
427 |
state["Thai"], state["Russian"], state)
|
428 |
|
429 |
# โโโ 5. UI โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
430 |
+
with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as demo:
|
431 |
+
gr.Markdown(
|
432 |
+
"""
|
433 |
+
# ๐ SMARTok ์ค์๊ฐ ๋ฒ์ญ ์์คํ
|
434 |
+
|
435 |
+
๋ค๊ตญ์ด ์ค์๊ฐ ๋ฒ์ญ์ ์ง์ํ๋ ํตํฉ ๋ฒ์ญ ํ๋ซํผ
|
436 |
+
"""
|
437 |
+
)
|
438 |
+
|
439 |
with gr.Tabs():
|
440 |
# ํญ 1 โ ์ค๋์ค ๋ฒ์ญ
|
441 |
+
with gr.TabItem("๐๏ธ ์ค๋์ค/๋น๋์ค"):
|
442 |
+
gr.Markdown("### ๐ ์ค๋์ค/๋น๋์ค ํ์ผ ๋ฒ์ญ")
|
443 |
+
|
444 |
+
with gr.Row():
|
445 |
+
src1 = gr.Dropdown(LANG, value="Korean", label="์
๋ ฅ ์ธ์ด")
|
446 |
+
tgt1 = gr.Dropdown(LANG, value="English", label="์ถ๋ ฅ ์ธ์ด")
|
447 |
+
|
448 |
+
with gr.Tabs():
|
449 |
+
with gr.TabItem("๐ ํ์ผ ์
๋ก๋"):
|
450 |
+
# ํ์ผ ์
๋ก๋ - ์ค๋์ค์ ๋น๋์ค ๋ชจ๋ ์ง์
|
451 |
+
aud1_file = gr.File(
|
452 |
+
label="์ค๋์ค/๋น๋์ค ํ์ผ ์
๋ก๋",
|
453 |
+
file_types=[".mp3", ".wav", ".m4a", ".flac", ".ogg", ".opus",
|
454 |
+
".mp4", ".avi", ".mov", ".mkv", ".webm", ".flv"],
|
455 |
+
type="filepath"
|
456 |
+
)
|
457 |
+
gr.Markdown(
|
458 |
+
"๐ **์ง์ ํ์**\n"
|
459 |
+
"- ์ค๋์ค: MP3, WAV, M4A, FLAC, OGG, OPUS\n"
|
460 |
+
"- ๋น๋์ค: MP4, AVI, MOV, MKV, WebM, FLV\n\n"
|
461 |
+
"โ ๏ธ **์ฃผ์์ฌํญ**\n"
|
462 |
+
"- ๋น๋์ค ํ์ผ์ ์ค๋์ค ์ถ์ถ ์๊ฐ์ด ํ์ํฉ๋๋ค\n"
|
463 |
+
"- ๋์ฉ๋ ํ์ผ์ ์ฒ๋ฆฌ ์๊ฐ์ด ์ค๋ ๊ฑธ๋ฆด ์ ์์ต๋๋ค"
|
464 |
+
)
|
465 |
+
|
466 |
+
with gr.TabItem("๐ค ๋ง์ดํฌ ๋
น์"):
|
467 |
+
aud1_mic = gr.Audio(
|
468 |
+
sources=["microphone"],
|
469 |
+
type="filepath",
|
470 |
+
label="๋ง์ดํฌ ๋
น์"
|
471 |
+
)
|
472 |
+
gr.Markdown("๐ก **ํ**: ๋
น์ ํ '์ ์ง' ๋ฒํผ์ ๋๋ฌ์ฃผ์ธ์")
|
473 |
|
474 |
+
btn1 = gr.Button("๐ ๋ฒ์ญ ์์", variant="primary", size="lg")
|
475 |
+
|
476 |
+
# ์งํ ์ํ ํ์
|
477 |
+
status1 = gr.Textbox(label="์งํ ์ํ", value="๋๊ธฐ ์ค...", interactive=False)
|
478 |
+
|
479 |
+
with gr.Row():
|
480 |
+
with gr.Column():
|
481 |
+
o1 = gr.Textbox(label="๐ ์๋ฌธ", lines=6)
|
482 |
+
with gr.Column():
|
483 |
+
t1 = gr.Textbox(label="๐ ๋ฒ์ญ", lines=6)
|
484 |
+
|
485 |
+
a1 = gr.Audio(label="๐ ๋ฒ์ญ๋ ์์ฑ (TTS)", type="filepath", autoplay=True)
|
486 |
+
|
487 |
+
# ํ์ผ๏ฟฝ๏ฟฝ๏ฟฝ๋ ๋ง์ดํฌ ์ค ํ์ฑํ๋ ์
๋ ฅ ์ฌ์ฉ
|
488 |
+
def translate_with_status(file_input, mic_input, src, tgt):
|
489 |
+
active_input = file_input if file_input else mic_input
|
490 |
+
if not active_input:
|
491 |
+
return "โ ๏ธ ํ์ผ์ ์
๋ก๋ํ๊ฑฐ๋ ๋
น์์ ํด์ฃผ์ธ์", "", None
|
492 |
+
|
493 |
+
# ์ํ ์
๋ฐ์ดํธ๋ ๋๊ธฐ ํจ์์์ ์ฒ๋ฆฌ
|
494 |
+
return translate_audio(active_input, src, tgt)
|
495 |
+
|
496 |
+
btn1.click(
|
497 |
+
lambda: "์ฒ๋ฆฌ ์ค... ์ ์๋ง ๊ธฐ๋ค๋ ค์ฃผ์ธ์ โณ",
|
498 |
+
outputs=status1
|
499 |
+
).then(
|
500 |
+
translate_with_status,
|
501 |
+
[aud1_file, aud1_mic, src1, tgt1],
|
502 |
+
[o1, t1, a1]
|
503 |
+
).then(
|
504 |
+
lambda: "โ
์๋ฃ!",
|
505 |
+
outputs=status1
|
506 |
+
)
|
507 |
|
508 |
# ํญ 2 โ PDF ๋ฒ์ญ
|
509 |
with gr.TabItem("๐ PDF"):
|