File size: 3,617 Bytes
4d98d92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
from transformers import pipeline

# Load the audio classification model
pipe = pipeline("audio-classification", model="dima806/english_accents_classification")

# Define the inference function with styled, color-coded output
def classify_accent(audio):
    try:
        result = pipe(audio)
        if not result:
            return "<p style='color: red; font-weight: bold;'>⚠️ No prediction returned. Please try a different audio file.</p>"

        # Start HTML table with styling
        table = """

        <table style="

            width: 100%; 

            border-collapse: collapse; 

            font-family: Arial, sans-serif; 

            margin-top: 1em;

        ">

            <thead>

                <tr style="border-bottom: 2px solid #4CAF50; background-color: #f2f2f2;">

                    <th style="text-align:left; padding: 8px; font-size: 1.1em; color: #333;">Accent</th>

                    <th style="text-align:left; padding: 8px; font-size: 1.1em; color: #333;">Confidence</th>

                </tr>

            </thead>

            <tbody>

        """

        for i, r in enumerate(result):
            label = r['label'].capitalize()
            score = f"{r['score'] * 100:.2f}%"
            
            if i == 0:
                # Highlight top accent with green background and bold text
                row = f"""

                <tr style="background-color:#d4edda; font-weight: bold; color: #155724;">

                    <td style="padding: 8px; border-bottom: 1px solid #c3e6cb;">{label}</td>

                    <td style="padding: 8px; border-bottom: 1px solid #c3e6cb;">{score}</td>

                </tr>

                """
            else:
                row = f"""

                <tr style="color: #333;">

                    <td style="padding: 8px; border-bottom: 1px solid #ddd;">{label}</td>

                    <td style="padding: 8px; border-bottom: 1px solid #ddd;">{score}</td>

                </tr>

                """
            table += row

        table += "</tbody></table>"

        top_result = result[0]
        return f"""

        <h3 style='color: #2E7D32; font-family: Arial, sans-serif;'>

            🎤 Predicted Accent: <span style='font-weight:bold'>{top_result['label'].capitalize()}</span>

        </h3>

        {table}

        """

    except Exception as e:
        error_message = str(e)
        if "numpy ndarray" in error_message.lower():
            return "<p style='color: red; font-weight: bold;'>⚠️ Error: Invalid input.<br> Please end the recording then press submit.</p>"
        else:
            return f"<p style='color: red; font-weight: bold;'>⚠️ Unexpected Error: {error_message}<br>Please try again with a different audio file.</p>"

# Create and launch the Gradio app
gr.Interface(
    fn=classify_accent,
    inputs=gr.Audio(type="filepath", label="🎙 Record or Upload English Audio"),
    outputs=gr.HTML(),  # Use HTML to render styled output
    title="🌍 English Accent Classifier",
    description=(
        "Upload or record an English audio sample to detect the speaker's accent.\n\n"
        "**Supported accents:** American, British, Indian, African, Australian.\n"
        "Audio Classification Model:\n" 
        "[dima806/english_accents_classification](https://huggingface.co/dima806/english_accents_classification)\n"
        "Dataset: https://www.kaggle.com/code/dima806/common-voice-accent-classification\n"
    ),
    flagging_mode="never",
    theme="default"
).launch(share=True)