Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- README.md +2 -2
- audio_redteam_demo.py +35 -18
- audio_transform_demo.py +5 -4
- requirements.txt +1 -0
README.md
CHANGED
@@ -2,5 +2,5 @@
|
|
2 |
title: Audio-RedTeaming-Demo
|
3 |
app_file: audio_redteam_demo.py
|
4 |
sdk: gradio
|
5 |
-
sdk_version:
|
6 |
-
---
|
|
|
2 |
title: Audio-RedTeaming-Demo
|
3 |
app_file: audio_redteam_demo.py
|
4 |
sdk: gradio
|
5 |
+
sdk_version: 5.31.0
|
6 |
+
---
|
audio_redteam_demo.py
CHANGED
@@ -8,15 +8,24 @@ TARGET_MODELS = [
|
|
8 |
"gpt-4o-audio-preview",
|
9 |
"gemini-2.5-pro-preview-05-06",
|
10 |
"gemini-2.0-flash",
|
|
|
11 |
]
|
12 |
TTS_PROVIDERS = ["kokoro", "smallestai"]
|
|
|
|
|
|
|
|
|
|
|
13 |
VOICE_IDS = [
|
14 |
-
"af_heart (Kokoro)",
|
15 |
-
"karen (
|
16 |
-
"rebecca (
|
17 |
-
"chetan (
|
18 |
-
"george (
|
19 |
-
"
|
|
|
|
|
|
|
20 |
]
|
21 |
TRANSFORM_TYPES = ["none", "speed", "pitch", "echo", "reverb", "noise"]
|
22 |
|
@@ -67,7 +76,7 @@ def create_transform_ui():
|
|
67 |
minimum=50,
|
68 |
maximum=1000,
|
69 |
value=250,
|
70 |
-
step=
|
71 |
label="Echo Delay (ms)",
|
72 |
info="Delay in milliseconds for the echo effect",
|
73 |
visible=False,
|
@@ -157,6 +166,7 @@ def create_redteam_demo():
|
|
157 |
prompt,
|
158 |
target_model,
|
159 |
tts_provider,
|
|
|
160 |
voice_id,
|
161 |
transform_type,
|
162 |
speed_rate: Optional[float] = None,
|
@@ -198,7 +208,10 @@ def create_redteam_demo():
|
|
198 |
print("Voice ID: ", voice_id)
|
199 |
# Initialize the orchestrator with selected parameters
|
200 |
orchestrator = AudioRedTeamOrchestrator(
|
201 |
-
tts_provider=tts_provider,
|
|
|
|
|
|
|
202 |
)
|
203 |
# print("CCCC")
|
204 |
# Create a temporary directory for saving files
|
@@ -241,6 +254,9 @@ def create_redteam_demo():
|
|
241 |
tts_provider = gr.Dropdown(
|
242 |
choices=TTS_PROVIDERS, label="TTS Provider", value=TTS_PROVIDERS[0]
|
243 |
)
|
|
|
|
|
|
|
244 |
voice_id = gr.Dropdown(choices=VOICE_IDS, label="Voice ID", value=VOICE_IDS[0])
|
245 |
transform_type = gr.Dropdown(
|
246 |
choices=TRANSFORM_TYPES,
|
@@ -283,7 +299,7 @@ def create_redteam_demo():
|
|
283 |
minimum=50,
|
284 |
maximum=1000,
|
285 |
value=250,
|
286 |
-
step=
|
287 |
label="Echo Delay (ms)",
|
288 |
interactive=True,
|
289 |
info="Delay in milliseconds for the echo effect",
|
@@ -333,15 +349,15 @@ def create_redteam_demo():
|
|
333 |
# Function to update visible components based on transform type
|
334 |
def update_transform_ui(transform_type):
|
335 |
return [
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
]
|
346 |
|
347 |
# print("FFFF")
|
@@ -374,6 +390,7 @@ def create_redteam_demo():
|
|
374 |
prompt,
|
375 |
target_model,
|
376 |
tts_provider,
|
|
|
377 |
voice_id,
|
378 |
transform_type,
|
379 |
speed_rate,
|
|
|
8 |
"gpt-4o-audio-preview",
|
9 |
"gemini-2.5-pro-preview-05-06",
|
10 |
"gemini-2.0-flash",
|
11 |
+
"gemini-2.5-flash-preview-04-17",
|
12 |
]
|
13 |
TTS_PROVIDERS = ["kokoro", "smallestai"]
|
14 |
+
TTS_MODELS = [
|
15 |
+
"Kokoro-82M",
|
16 |
+
"lightning",
|
17 |
+
"lightning-large",
|
18 |
+
]
|
19 |
VOICE_IDS = [
|
20 |
+
"af_heart (American, F, Kokoro-82M)",
|
21 |
+
"karen (British, F, lightning)",
|
22 |
+
"rebecca (American, F, lightning)",
|
23 |
+
"chetan (Indian, M, lightning)",
|
24 |
+
"george (American, M, lightning)",
|
25 |
+
"solomon (British, M, lightning-large)",
|
26 |
+
"saina (Indian, F, lightning)",
|
27 |
+
"angela (British, F, lightning-large)",
|
28 |
+
"nyah (Australian, F, lightning-large)",
|
29 |
]
|
30 |
TRANSFORM_TYPES = ["none", "speed", "pitch", "echo", "reverb", "noise"]
|
31 |
|
|
|
76 |
minimum=50,
|
77 |
maximum=1000,
|
78 |
value=250,
|
79 |
+
step=1,
|
80 |
label="Echo Delay (ms)",
|
81 |
info="Delay in milliseconds for the echo effect",
|
82 |
visible=False,
|
|
|
166 |
prompt,
|
167 |
target_model,
|
168 |
tts_provider,
|
169 |
+
tts_model,
|
170 |
voice_id,
|
171 |
transform_type,
|
172 |
speed_rate: Optional[float] = None,
|
|
|
208 |
print("Voice ID: ", voice_id)
|
209 |
# Initialize the orchestrator with selected parameters
|
210 |
orchestrator = AudioRedTeamOrchestrator(
|
211 |
+
tts_provider=tts_provider,
|
212 |
+
model_name=target_model,
|
213 |
+
voice_id=voice_id,
|
214 |
+
tts_model=tts_model,
|
215 |
)
|
216 |
# print("CCCC")
|
217 |
# Create a temporary directory for saving files
|
|
|
254 |
tts_provider = gr.Dropdown(
|
255 |
choices=TTS_PROVIDERS, label="TTS Provider", value=TTS_PROVIDERS[0]
|
256 |
)
|
257 |
+
tts_model = gr.Dropdown(
|
258 |
+
choices=TTS_MODELS, label="TTS Model", value=TTS_MODELS[0]
|
259 |
+
)
|
260 |
voice_id = gr.Dropdown(choices=VOICE_IDS, label="Voice ID", value=VOICE_IDS[0])
|
261 |
transform_type = gr.Dropdown(
|
262 |
choices=TRANSFORM_TYPES,
|
|
|
299 |
minimum=50,
|
300 |
maximum=1000,
|
301 |
value=250,
|
302 |
+
step=1,
|
303 |
label="Echo Delay (ms)",
|
304 |
interactive=True,
|
305 |
info="Delay in milliseconds for the echo effect",
|
|
|
349 |
# Function to update visible components based on transform type
|
350 |
def update_transform_ui(transform_type):
|
351 |
return [
|
352 |
+
gr.update(visible=transform_type == "speed"),
|
353 |
+
gr.update(visible=transform_type == "pitch"),
|
354 |
+
gr.update(visible=transform_type == "pitch"),
|
355 |
+
gr.update(visible=transform_type == "echo"),
|
356 |
+
gr.update(visible=transform_type == "echo"),
|
357 |
+
gr.update(visible=transform_type == "reverb"),
|
358 |
+
gr.update(visible=transform_type == "reverb"),
|
359 |
+
gr.update(visible=transform_type == "noise"),
|
360 |
+
gr.update(visible=transform_type == "noise"),
|
361 |
]
|
362 |
|
363 |
# print("FFFF")
|
|
|
390 |
prompt,
|
391 |
target_model,
|
392 |
tts_provider,
|
393 |
+
tts_model,
|
394 |
voice_id,
|
395 |
transform_type,
|
396 |
speed_rate,
|
audio_transform_demo.py
CHANGED
@@ -429,10 +429,11 @@ class AudioRedTeamOrchestrator:
|
|
429 |
def __init__(
|
430 |
self,
|
431 |
tts_provider: Any = "kokoro",
|
|
|
432 |
model_name: str = "gpt-4o-audio-preview",
|
433 |
voice_id: str = "af_heart",
|
434 |
):
|
435 |
-
self.tts_client = self.load_tts(tts_provider, voice_id)
|
436 |
|
437 |
# Initialize target model to None
|
438 |
self.generate_client = self.load_model(model_name)
|
@@ -441,10 +442,10 @@ class AudioRedTeamOrchestrator:
|
|
441 |
self.evaluate_client = OpenAI(api_key=openai_api_key)
|
442 |
self.waveform_transform = WaveformTransform()
|
443 |
|
444 |
-
def load_tts(self, tts_provider: str, voice_id: str = "af_heart"):
|
445 |
if tts_provider == "smallestai":
|
446 |
return SmallestAITTS(
|
447 |
-
model_name=
|
448 |
api_key=smallest_api_key,
|
449 |
provider=tts_provider,
|
450 |
endpoint_url="https://waves-api.smallest.ai/api/v1/",
|
@@ -452,7 +453,7 @@ class AudioRedTeamOrchestrator:
|
|
452 |
)
|
453 |
elif tts_provider == "kokoro":
|
454 |
return KokoroTTS(
|
455 |
-
model_name=
|
456 |
voice_id=voice_id,
|
457 |
)
|
458 |
else:
|
|
|
429 |
def __init__(
|
430 |
self,
|
431 |
tts_provider: Any = "kokoro",
|
432 |
+
tts_model: str = "Kokoro-82M",
|
433 |
model_name: str = "gpt-4o-audio-preview",
|
434 |
voice_id: str = "af_heart",
|
435 |
):
|
436 |
+
self.tts_client = self.load_tts(tts_provider, tts_model, voice_id)
|
437 |
|
438 |
# Initialize target model to None
|
439 |
self.generate_client = self.load_model(model_name)
|
|
|
442 |
self.evaluate_client = OpenAI(api_key=openai_api_key)
|
443 |
self.waveform_transform = WaveformTransform()
|
444 |
|
445 |
+
def load_tts(self, tts_provider: str, tts_model: str, voice_id: str = "af_heart"):
|
446 |
if tts_provider == "smallestai":
|
447 |
return SmallestAITTS(
|
448 |
+
model_name=tts_model,
|
449 |
api_key=smallest_api_key,
|
450 |
provider=tts_provider,
|
451 |
endpoint_url="https://waves-api.smallest.ai/api/v1/",
|
|
|
453 |
)
|
454 |
elif tts_provider == "kokoro":
|
455 |
return KokoroTTS(
|
456 |
+
model_name=tts_model,
|
457 |
voice_id=voice_id,
|
458 |
)
|
459 |
else:
|
requirements.txt
CHANGED
@@ -38,3 +38,4 @@ pydub==0.25.1
|
|
38 |
websockets==13.0.0
|
39 |
torch==2.3.0
|
40 |
torchaudio==2.3.0
|
|
|
|
38 |
websockets==13.0.0
|
39 |
torch==2.3.0
|
40 |
torchaudio==2.3.0
|
41 |
+
kokoro==0.9.4
|