Spaces:

abdullahhameed111
/

Challenge_Task

Build error

App Files Files Community

abdullah0101 commited on Jun 11

Commit

3859913

1 Parent(s): 9fe6f54

Add full application code and deps

Browse files

Files changed (9) hide show

Dockerfile +15 -0
app.py +4 -0
app/__init__.py +0 -0
app/accent_classifier.py +32 -0
app/config.py +10 -0
app/main.py +43 -0
app/utils.py +70 -0
requirements.txt +13 -0
ui/demo.py +54 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+FROM python:3.10-slim
+RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg git && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+ENV PYTHONUNBUFFERED=1 \
+    TRANSFORMERS_CACHE=/app/.cache/hf
+# default: FastAPI, but you can override CMD in HF Spaces Runtime
+CMD ["uvicorn", "app.main:api", "--host", "0.0.0.0", "--port", "8000"]

app.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""Entry‑point for Hugging Face Spaces (Gradio SDK)."""
+from ui.demo import demo
+demo.launch(server_name="0.0.0.0", server_port=7860)

app/__init__.py ADDED Viewed

File without changes

app/accent_classifier.py ADDED Viewed

	@@ -0,0 +1,32 @@

+"""Lightweight Wav2Vec‑based English accent ID."""
+from functools import lru_cache
+from typing import Dict
+import torch
+import torchaudio
+from transformers import AutoProcessor, AutoModelForAudioClassification
+import numpy as np
+from .config import BASE_MODEL_ID, SEGMENT_SECONDS
+class AccentClassifier:
+    def __init__(self, device: str | None = None):
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self.processor = AutoProcessor.from_pretrained(BASE_MODEL_ID)
+        self.model = AutoModelForAudioClassification.from_pretrained(BASE_MODEL_ID).to(self.device)
+        self.labels = self.model.config.id2label
+    @torch.inference_mode()
+    def classify(self, wav: str) -> Dict[str, str | int]:
+        wav_arr, sr = torchaudio.load(wav)
+        wav_arr = wav_arr.squeeze(0).numpy()[: sr * SEGMENT_SECONDS]
+        inp = self.processor(wav_arr, sampling_rate=sr, return_tensors="pt")
+        inp = {k: v.to(self.device) for k, v in inp.items()}
+        logits = self.model(**inp).logits[0]
+        probs = torch.softmax(logits, dim=-1).cpu().numpy()
+        idx = int(np.argmax(probs))
+        return {"accent": self.labels[idx], "confidence": int(probs[idx] * 100)}
+@lru_cache(maxsize=1)
+def get_classifier() -> AccentClassifier:
+    return AccentClassifier()

app/config.py ADDED Viewed

	@@ -0,0 +1,10 @@

+"""Centralised paths & constants."""
+from pathlib import Path
+ROOT = Path(__file__).resolve().parent.parent
+MODELS_DIR = ROOT / "models"
+AUDIO_CACHE = ROOT / "cache" / "audio"
+AUDIO_CACHE.mkdir(parents=True, exist_ok=True)
+BASE_MODEL_ID = "dima806/english_accents_classification"  # HF model
+SEGMENT_SECONDS = 30  # audio length fed to the classifier

app/main.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from fastapi import FastAPI, UploadFile, File, HTTPException
+from pydantic import BaseModel
+from pathlib import Path
+import tempfile
+import asyncio
+from .utils import download_video, extract_audio, trim_silence
+from .accent_classifier import get_classifier
+api = FastAPI()
+class URLBody(BaseModel):
+    url: str
+@api.post("/analyze/url")
+async def analyze_url(body: URLBody):
+    with tempfile.TemporaryDirectory() as td:
+        tdir = Path(td)
+        video = await download_video(body.url, tdir)
+        wav = tdir / "aud.wav"
+        await extract_audio(video, wav)
+        wav = trim_silence(wav)
+        return get_classifier().classify(str(wav))
+@api.post("/analyze/upload")
+async def analyze_upload(file: UploadFile = File(...)):
+    if not file.content_type.startswith(("audio", "video")):
+        raise HTTPException(400, "Unsupported file type")
+    with tempfile.NamedTemporaryFile(delete=False, suffix=file.filename) as tmp:
+        tmp.write(await file.read())
+        tmp.flush()
+        path = Path(tmp.name)
+    if file.content_type.startswith("video"):
+        wav = path.with_suffix(".wav")
+        await extract_audio(path, wav)
+    else:
+        wav = path
+    wav = trim_silence(wav)
+    return get_classifier().classify(str(wav))
+@api.get("/healthz")
+async def health():
+    return {"status": "ok"}

app/utils.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import asyncio
+import subprocess
+from pathlib import Path
+from typing import List
+import torchaudio
+from yt_dlp import YoutubeDL
+import webrtcvad
+from .config import AUDIO_CACHE
+# ---------------------------------------------------------------------------
+# ffmpeg helpers
+# ---------------------------------------------------------------------------
+def _run(cmd: List[str]):
+    proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    if proc.returncode != 0:
+        raise RuntimeError(proc.stderr.decode())
+# ---------------------------------------------------------------------------
+# Video → Audio
+# ---------------------------------------------------------------------------
+async def download_video(url: str, out_dir: Path) -> Path:
+    """Async wrapper around yt‑dlp to pull remote video assets."""
+    ydl_opts = {
+        "quiet": True,
+        "no_warnings": True,
+        "outtmpl": str(out_dir / "download.%(ext)s"),
+        "format": "bestvideo+bestaudio/best / best",
+    }
+    loop = asyncio.get_running_loop()
+    def _job():
+        with YoutubeDL(ydl_opts) as ydl:
+            ydl.download([url])
+    await loop.run_in_executor(None, _job)
+    return next(out_dir.glob("download.*"))
+async def extract_audio(video_path: Path, wav_path: Path, sr: int = 16000):
+    cmd = [
+        "ffmpeg", "-y", "-i", str(video_path),
+        "-vn", "-ac", "1", "-ar", str(sr), str(wav_path)
+    ]
+    loop = asyncio.get_running_loop()
+    await loop.run_in_executor(None, _run, cmd)
+# ---------------------------------------------------------------------------
+# VAD trimming (WebRTC)
+# ---------------------------------------------------------------------------
+def _frame_gen(frame_ms, pcm16, sr):
+    n = int(sr * (frame_ms / 1000.0) * 2)
+    for i in range(0, len(pcm16), n):
+        yield pcm16[i : i + n]
+def trim_silence(wav_path: Path, aggressiveness: int = 3) -> Path:
+    sig, sr = torchaudio.load(str(wav_path))
+    sig = sig.squeeze(0).numpy()
+    vad = webrtcvad.Vad(aggressiveness)
+    frames = list(_frame_gen(30, (sig * 32768).astype("int16").tobytes(), sr))
+    voiced = [vad.is_speech(f, sr) for f in frames]
+    if not any(voiced):
+        return wav_path
+    first, last = voiced.index(True), len(voiced) - 1 - voiced[::-1].index(True)
+    kept = sig[first * 480 : (last + 1) * 480]
+    out = wav_path.with_name(wav_path.stem + "_trim.wav")
+    torchaudio.save(str(out), torchaudio.tensor(kept).unsqueeze(0), sr)
+    return out

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+fastapi==0.111.0      # only needed if you use /app/main.py via uvicorn
+uvicorn[standard]==0.30.1
+yt-dlp==2024.05.27
+ffmpeg-python==0.2.0
+webrtcvad==2.0.10
+transformers==4.41.1
+accelerate==0.30.1
+torch>=2.3.0
+scikit-learn==1.5.0
+pydantic==2.7.1
+torchaudio==2.3.0
+gradio==4.34.0
+aiohttp==3.9.5

ui/demo.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import asyncio
+import gradio as gr
+import tempfile
+from pathlib import Path
+from app.utils import download_video, extract_audio, trim_silence
+from app.accent_classifier import get_classifier
+clf = get_classifier()
+async def _url_pipeline(url: str):
+    with tempfile.TemporaryDirectory() as td:
+        tdir = Path(td)
+        video = await download_video(url, tdir)
+        wav = tdir / "aud.wav"
+        await extract_audio(video, wav)
+        wav = trim_silence(wav)
+        return clf.classify(str(wav))
+def analyze_url(url: str):
+    return asyncio.run(_url_pipeline(url))
+def analyze_file(file):
+    path = Path(file.name)
+    if path.suffix.lower() in {".mp4", ".mov", ".mkv"}:
+        wav = path.with_suffix(".wav")
+        asyncio.run(extract_audio(path, wav))
+    else:
+        wav = path
+    wav = trim_silence(wav)
+    return clf.classify(str(wav))
+def fmt(res):
+    if not res:
+        return "Analysis failed."
+    return f"**Accent:** {res['accent']}\n\n**Confidence:** {res['confidence']}%"
+with gr.Blocks(title="English Accent Detector") as demo:
+    gr.Markdown("## REM Waste – Accent Screening Tool")
+    with gr.Tab("From URL"):
+        url_in = gr.Text(label="Public video URL (Loom, MP4, YouTube, …)")
+        btn = gr.Button("Analyze")
+        out = gr.Markdown()
+        btn.click(lambda u: fmt(analyze_url(u)), inputs=url_in, outputs=out)
+    with gr.Tab("Upload File"):
+        file_in = gr.File()
+        btn2 = gr.Button("Analyze")
+        out2 = gr.Markdown()
+        btn2.click(lambda f: fmt(analyze_file(f)), inputs=file_in, outputs=out2)
+if __name__ == "__main__":
+    demo.launch()