Spaces:

VIDraft
/

voice-trans

Running on Zero

App Files Files Community

openfree commited on 17 days ago

Commit

e1fe24a

verified ·

1 Parent(s): 2b6f990

Update app.py

Browse files

Files changed (1) hide show

app.py +198 -13

app.py CHANGED Viewed

@@ -1,9 +1,23 @@
 import os, asyncio, json, tempfile, websockets, pdfplumber
 import gradio as gr
 import openai
 from dotenv import load_dotenv
 import numpy as np
 import wave
 # ─── 0. 초기화 ───────────────────────────────────────────────
 load_dotenv()
@@ -11,6 +25,19 @@ openai.api_key = os.getenv("OPENAI_API_KEY")
 if not openai.api_key:
     raise RuntimeError("OPENAI_API_KEY 가 .env 에 없습니다!")
 LANG = ["Korean","English","Japanese","Chinese",
         "Thai","Russian","Vietnamese","Spanish","French"]
 VOICE = {l: ("nova" if l in ["Korean","Japanese","Chinese"] else "alloy")
@@ -63,27 +90,121 @@ def translate_pdf(file, src, tgt):
     return text, asyncio.run(gpt_translate(text, src, tgt))
 # ─── 2-1. 오디오 번역 (탭1용) ────────────────────────────────
 async def translate_audio_async(file, src, tgt):
-    if not file: return "⚠️ 오디오 업로드 필요", "", None
     try:
         # STT: Whisper API 사용
         client = get_client()
-        with open(file, 'rb') as audio_file:
             transcript = await client.audio.transcriptions.create(
                 model="whisper-1",
                 file=audio_file,
                 language=src[:2].lower()  # 언어 코드 간소화
             )
         orig_text = transcript.text
         trans_text = await gpt_translate(orig_text, src, tgt)
         audio_path = await gpt_tts(trans_text, tgt)
         return orig_text, trans_text, audio_path
     except Exception as e:
         print(f"오디오 번역 오류: {e}")
-        return "⚠️ 번역 중 오류 발생", str(e), None
 def translate_audio(file, src, tgt):
     return asyncio.run(translate_audio_async(file, src, tgt))
@@ -306,19 +427,83 @@ def realtime_four_sync(audio, src, state):
             state["Thai"], state["Russian"], state)
 # ─── 5. UI ──────────────────────────────────────────────────
-with gr.Blocks(title="SMARTok Demo") as demo:
     with gr.Tabs():
         # 탭 1 – 오디오 번역
-        with gr.TabItem("🎙️ 오디오"):
-            src1 = gr.Dropdown(LANG, value="Korean", label="입력 언어")
-            tgt1 = gr.Dropdown(LANG, value="English", label="출력 언어")
-            aud1 = gr.Audio(sources=["microphone", "upload"], type="filepath")
-            btn1 = gr.Button("번역")
-            o1 = gr.Textbox(label="원문")
-            t1 = gr.Textbox(label="번역")
-            a1 = gr.Audio(label="TTS", type="filepath", autoplay=True)
-            btn1.click(translate_audio, [aud1, src1, tgt1], [o1, t1, a1])
         # 탭 2 – PDF 번역
         with gr.TabItem("📄 PDF"):

+# SMARTok Demo - 실시간 다국어 번역 시스템
+#
+# 필수 패키지:
+# pip install gradio openai python-dotenv pdfplumber numpy websockets
+#
+# 선택 패키지 (비디오 처리):
+# - ffmpeg 설치: sudo apt-get install ffmpeg (Linux) / brew install ffmpeg (Mac)
+# - 또는 pip install moviepy
+#
+# 환경 변수:
+# .env 파일에 OPENAI_API_KEY 설정 필요
 import os, asyncio, json, tempfile, websockets, pdfplumber
 import gradio as gr
 import openai
 from dotenv import load_dotenv
 import numpy as np
 import wave
+import subprocess
+import mimetypes
 # ─── 0. 초기화 ───────────────────────────────────────────────
 load_dotenv()
 if not openai.api_key:
     raise RuntimeError("OPENAI_API_KEY 가 .env 에 없습니다!")
+# ffmpeg 설치 확인
+def check_ffmpeg():
+    try:
+        subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
+        return True
+    except:
+        return False
+HAS_FFMPEG = check_ffmpeg()
+if not HAS_FFMPEG:
+    print("⚠️ ffmpeg가 설치되어 있지 않습니다. 비디오 처리가 제한될 수 있습니다.")
+    print("설치 방법: sudo apt-get install ffmpeg (Linux) / brew install ffmpeg (Mac)")
 LANG = ["Korean","English","Japanese","Chinese",
         "Thai","Russian","Vietnamese","Spanish","French"]
 VOICE = {l: ("nova" if l in ["Korean","Japanese","Chinese"] else "alloy")
     return text, asyncio.run(gpt_translate(text, src, tgt))
 # ─── 2-1. 오디오 번역 (탭1용) ────────────────────────────────
+def extract_audio_from_video(video_path):
+    """MP4 등 비디오 파일에서 오디오 추출"""
+    audio_output = None
+    try:
+        # 임시 오디오 파일 생성
+        audio_output = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+        audio_output.close()
+        # 방법 1: ffmpeg 사용 시도
+        if HAS_FFMPEG:
+            cmd = [
+                'ffmpeg',
+                '-i', video_path,
+                '-vn',  # 비디오 스트림 제거
+                '-acodec', 'pcm_s16le',  # WAV 포맷
+                '-ar', '16000',  # 16kHz 샘플링
+                '-ac', '1',  # 모노
+                '-y',  # 덮어쓰기
+                audio_output.name
+            ]
+            result = subprocess.run(cmd, capture_output=True, text=True)
+            if result.returncode == 0:
+                return audio_output.name
+            else:
+                print(f"ffmpeg 오류: {result.stderr}")
+        # 방법 2: moviepy 사용 시도
+        try:
+            from moviepy.editor import VideoFileClip
+            print("moviepy를 사용하여 오디오 추출 중...")
+            video = VideoFileClip(video_path)
+            video.audio.write_audiofile(
+                audio_output.name,
+                fps=16000,
+                nbytes=2,
+                codec='pcm_s16le',
+                verbose=False,
+                logger=None
+            )
+            video.close()
+            return audio_output.name
+        except ImportError:
+            raise Exception(
+                "비디오 처리를 위해 ffmpeg 또는 moviepy가 필요합니다.\n"
+                "설치: pip install moviepy 또는 ffmpeg 설치"
+            )
+        except Exception as e:
+            raise Exception(f"moviepy 오류: {str(e)}")
+    except Exception as e:
+        # 오류 시 임시 파일 정리
+        if audio_output and os.path.exists(audio_output.name):
+            os.unlink(audio_output.name)
+        raise e
 async def translate_audio_async(file, src, tgt):
+    if not file: return "⚠️ 오디오/비디오 업로드 필요", "", None
     try:
+        # 파일 타입 확인
+        mime_type, _ = mimetypes.guess_type(file)
+        audio_file_path = file
+        temp_audio_path = None
+        # 비디오 파일인 경우 오디오 추출
+        if mime_type and mime_type.startswith('video/'):
+            print(f"비디오 파일 감지: {mime_type}")
+            print(f"파일 크기: {os.path.getsize(file) / 1024 / 1024:.1f} MB")
+            print("비디오에서 오디오 추출 중... (시간이 걸릴 수 있습니다)")
+            temp_audio_path = extract_audio_from_video(file)
+            audio_file_path = temp_audio_path
+            print("오디오 추출 완료!")
         # STT: Whisper API 사용
+        print("음성 인식 중...")
         client = get_client()
+        with open(audio_file_path, 'rb') as audio_file:
             transcript = await client.audio.transcriptions.create(
                 model="whisper-1",
                 file=audio_file,
                 language=src[:2].lower()  # 언어 코드 간소화
             )
+        # 임시 파일 정리
+        if temp_audio_path and os.path.exists(temp_audio_path):
+            os.unlink(temp_audio_path)
         orig_text = transcript.text
+        if not orig_text.strip():
+            return "⚠️ 음성이 감지되지 않았습니다", "", None
+        print(f"인식된 텍스트: {orig_text[:50]}...")
+        # 번역
+        print(f"{src} → {tgt} 번역 중...")
         trans_text = await gpt_translate(orig_text, src, tgt)
+        # TTS
+        print("음성 합성 중...")
         audio_path = await gpt_tts(trans_text, tgt)
         return orig_text, trans_text, audio_path
     except Exception as e:
         print(f"오디오 번역 오류: {e}")
+        # 임시 파일 정리
+        if 'temp_audio_path' in locals() and temp_audio_path and os.path.exists(temp_audio_path):
+            os.unlink(temp_audio_path)
+        error_msg = str(e)
+        if "ffmpeg" in error_msg.lower():
+            error_msg += "\n\n💡 해결 방법:\n1. ffmpeg 설치: sudo apt-get install ffmpeg\n2. 또는 pip install moviepy"
+        return "⚠️ 번역 중 오류 발생", error_msg, None
 def translate_audio(file, src, tgt):
     return asyncio.run(translate_audio_async(file, src, tgt))
             state["Thai"], state["Russian"], state)
 # ─── 5. UI ──────────────────────────────────────────────────
+with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # 🌍 SMARTok 실시간 번역 시스템
+        다국어 실시간 번역을 지원하는 통합 번역 플랫폼
+        """
+    )
     with gr.Tabs():
         # 탭 1 – 오디오 번역
+        with gr.TabItem("🎙️ 오디오/비디오"):
+            gr.Markdown("### 🌐 오디오/비디오 파일 번역")
+            with gr.Row():
+                src1 = gr.Dropdown(LANG, value="Korean", label="입력 언어")
+                tgt1 = gr.Dropdown(LANG, value="English", label="출력 언어")
+            with gr.Tabs():
+                with gr.TabItem("📁 파일 업로드"):
+                    # 파일 업로드 - 오디오와 비디오 모두 지원
+                    aud1_file = gr.File(
+                        label="오디오/비디오 파일 업로드",
+                        file_types=[".mp3", ".wav", ".m4a", ".flac", ".ogg", ".opus",
+                                   ".mp4", ".avi", ".mov", ".mkv", ".webm", ".flv"],
+                        type="filepath"
+                    )
+                    gr.Markdown(
+                        "📌 **지원 형식**\n"
+                        "- 오디오: MP3, WAV, M4A, FLAC, OGG, OPUS\n"
+                        "- 비디오: MP4, AVI, MOV, MKV, WebM, FLV\n\n"
+                        "⚠️ **주의사항**\n"
+                        "- 비디오 파일은 오디오 추출 시간이 필요합니다\n"
+                        "- 대용량 파일은 처리 시간이 오래 걸릴 수 있습니다"
+                    )
+                with gr.TabItem("🎤 마이크 녹음"):
+                    aud1_mic = gr.Audio(
+                        sources=["microphone"],
+                        type="filepath",
+                        label="마이크 녹음"
+                    )
+                    gr.Markdown("💡 **팁**: 녹음 후 '정지' 버튼을 눌러주세요")
+            btn1 = gr.Button("🔄 번역 시작", variant="primary", size="lg")
+            # 진행 상태 표시
+            status1 = gr.Textbox(label="진행 상태", value="대기 중...", interactive=False)
+            with gr.Row():
+                with gr.Column():
+                    o1 = gr.Textbox(label="📝 원문", lines=6)
+                with gr.Column():
+                    t1 = gr.Textbox(label="📝 번역", lines=6)
+            a1 = gr.Audio(label="🔊 번역된 음성 (TTS)", type="filepath", autoplay=True)
+            # 파일���나 마이크 중 활성화된 입력 사용
+            def translate_with_status(file_input, mic_input, src, tgt):
+                active_input = file_input if file_input else mic_input
+                if not active_input:
+                    return "⚠️ 파일을 업로드하거나 녹음을 해주세요", "", None
+                # 상태 업데이트는 동기 함수에서 처리
+                return translate_audio(active_input, src, tgt)
+            btn1.click(
+                lambda: "처리 중... 잠시만 기다려주세요 ⏳",
+                outputs=status1
+            ).then(
+                translate_with_status,
+                [aud1_file, aud1_mic, src1, tgt1],
+                [o1, t1, a1]
+            ).then(
+                lambda: "✅ 완료!",
+                outputs=status1
+            )
         # 탭 2 – PDF 번역
         with gr.TabItem("📄 PDF"):