jena-shreyas commited on
Commit
232ff56
·
verified ·
1 Parent(s): ce2ed27

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -2,5 +2,5 @@
2
  title: Audio-RedTeaming-Demo
3
  app_file: audio_redteam_demo.py
4
  sdk: gradio
5
- sdk_version: 3.50.2
6
- ---
 
2
  title: Audio-RedTeaming-Demo
3
  app_file: audio_redteam_demo.py
4
  sdk: gradio
5
+ sdk_version: 5.31.0
6
+ ---
audio_redteam_demo.py CHANGED
@@ -8,15 +8,24 @@ TARGET_MODELS = [
8
  "gpt-4o-audio-preview",
9
  "gemini-2.5-pro-preview-05-06",
10
  "gemini-2.0-flash",
 
11
  ]
12
  TTS_PROVIDERS = ["kokoro", "smallestai"]
 
 
 
 
 
13
  VOICE_IDS = [
14
- "af_heart (Kokoro)",
15
- "karen (SmallestAI)",
16
- "rebecca (SmallestAI)",
17
- "chetan (SmallestAI)",
18
- "george (SmallestAI)",
19
- "saina (SmallestAI)",
 
 
 
20
  ]
21
  TRANSFORM_TYPES = ["none", "speed", "pitch", "echo", "reverb", "noise"]
22
 
@@ -67,7 +76,7 @@ def create_transform_ui():
67
  minimum=50,
68
  maximum=1000,
69
  value=250,
70
- step=10,
71
  label="Echo Delay (ms)",
72
  info="Delay in milliseconds for the echo effect",
73
  visible=False,
@@ -157,6 +166,7 @@ def create_redteam_demo():
157
  prompt,
158
  target_model,
159
  tts_provider,
 
160
  voice_id,
161
  transform_type,
162
  speed_rate: Optional[float] = None,
@@ -198,7 +208,10 @@ def create_redteam_demo():
198
  print("Voice ID: ", voice_id)
199
  # Initialize the orchestrator with selected parameters
200
  orchestrator = AudioRedTeamOrchestrator(
201
- tts_provider=tts_provider, model_name=target_model, voice_id=voice_id
 
 
 
202
  )
203
  # print("CCCC")
204
  # Create a temporary directory for saving files
@@ -241,6 +254,9 @@ def create_redteam_demo():
241
  tts_provider = gr.Dropdown(
242
  choices=TTS_PROVIDERS, label="TTS Provider", value=TTS_PROVIDERS[0]
243
  )
 
 
 
244
  voice_id = gr.Dropdown(choices=VOICE_IDS, label="Voice ID", value=VOICE_IDS[0])
245
  transform_type = gr.Dropdown(
246
  choices=TRANSFORM_TYPES,
@@ -283,7 +299,7 @@ def create_redteam_demo():
283
  minimum=50,
284
  maximum=1000,
285
  value=250,
286
- step=10,
287
  label="Echo Delay (ms)",
288
  interactive=True,
289
  info="Delay in milliseconds for the echo effect",
@@ -333,15 +349,15 @@ def create_redteam_demo():
333
  # Function to update visible components based on transform type
334
  def update_transform_ui(transform_type):
335
  return [
336
- speed_rate.update(visible=transform_type == "speed"),
337
- pitch_steps.update(visible=transform_type == "pitch"),
338
- pitch_bins.update(visible=transform_type == "pitch"),
339
- echo_delay.update(visible=transform_type == "echo"),
340
- echo_volume.update(visible=transform_type == "echo"),
341
- reverb_rir.update(visible=transform_type == "reverb"),
342
- reverb_method.update(visible=transform_type == "reverb"),
343
- noise_file.update(visible=transform_type == "noise"),
344
- noise_volume.update(visible=transform_type == "noise"),
345
  ]
346
 
347
  # print("FFFF")
@@ -374,6 +390,7 @@ def create_redteam_demo():
374
  prompt,
375
  target_model,
376
  tts_provider,
 
377
  voice_id,
378
  transform_type,
379
  speed_rate,
 
8
  "gpt-4o-audio-preview",
9
  "gemini-2.5-pro-preview-05-06",
10
  "gemini-2.0-flash",
11
+ "gemini-2.5-flash-preview-04-17",
12
  ]
13
  TTS_PROVIDERS = ["kokoro", "smallestai"]
14
+ TTS_MODELS = [
15
+ "Kokoro-82M",
16
+ "lightning",
17
+ "lightning-large",
18
+ ]
19
  VOICE_IDS = [
20
+ "af_heart (American, F, Kokoro-82M)",
21
+ "karen (British, F, lightning)",
22
+ "rebecca (American, F, lightning)",
23
+ "chetan (Indian, M, lightning)",
24
+ "george (American, M, lightning)",
25
+ "solomon (British, M, lightning-large)",
26
+ "saina (Indian, F, lightning)",
27
+ "angela (British, F, lightning-large)",
28
+ "nyah (Australian, F, lightning-large)",
29
  ]
30
  TRANSFORM_TYPES = ["none", "speed", "pitch", "echo", "reverb", "noise"]
31
 
 
76
  minimum=50,
77
  maximum=1000,
78
  value=250,
79
+ step=1,
80
  label="Echo Delay (ms)",
81
  info="Delay in milliseconds for the echo effect",
82
  visible=False,
 
166
  prompt,
167
  target_model,
168
  tts_provider,
169
+ tts_model,
170
  voice_id,
171
  transform_type,
172
  speed_rate: Optional[float] = None,
 
208
  print("Voice ID: ", voice_id)
209
  # Initialize the orchestrator with selected parameters
210
  orchestrator = AudioRedTeamOrchestrator(
211
+ tts_provider=tts_provider,
212
+ model_name=target_model,
213
+ voice_id=voice_id,
214
+ tts_model=tts_model,
215
  )
216
  # print("CCCC")
217
  # Create a temporary directory for saving files
 
254
  tts_provider = gr.Dropdown(
255
  choices=TTS_PROVIDERS, label="TTS Provider", value=TTS_PROVIDERS[0]
256
  )
257
+ tts_model = gr.Dropdown(
258
+ choices=TTS_MODELS, label="TTS Model", value=TTS_MODELS[0]
259
+ )
260
  voice_id = gr.Dropdown(choices=VOICE_IDS, label="Voice ID", value=VOICE_IDS[0])
261
  transform_type = gr.Dropdown(
262
  choices=TRANSFORM_TYPES,
 
299
  minimum=50,
300
  maximum=1000,
301
  value=250,
302
+ step=1,
303
  label="Echo Delay (ms)",
304
  interactive=True,
305
  info="Delay in milliseconds for the echo effect",
 
349
  # Function to update visible components based on transform type
350
  def update_transform_ui(transform_type):
351
  return [
352
+ gr.update(visible=transform_type == "speed"),
353
+ gr.update(visible=transform_type == "pitch"),
354
+ gr.update(visible=transform_type == "pitch"),
355
+ gr.update(visible=transform_type == "echo"),
356
+ gr.update(visible=transform_type == "echo"),
357
+ gr.update(visible=transform_type == "reverb"),
358
+ gr.update(visible=transform_type == "reverb"),
359
+ gr.update(visible=transform_type == "noise"),
360
+ gr.update(visible=transform_type == "noise"),
361
  ]
362
 
363
  # print("FFFF")
 
390
  prompt,
391
  target_model,
392
  tts_provider,
393
+ tts_model,
394
  voice_id,
395
  transform_type,
396
  speed_rate,
audio_transform_demo.py CHANGED
@@ -429,10 +429,11 @@ class AudioRedTeamOrchestrator:
429
  def __init__(
430
  self,
431
  tts_provider: Any = "kokoro",
 
432
  model_name: str = "gpt-4o-audio-preview",
433
  voice_id: str = "af_heart",
434
  ):
435
- self.tts_client = self.load_tts(tts_provider, voice_id)
436
 
437
  # Initialize target model to None
438
  self.generate_client = self.load_model(model_name)
@@ -441,10 +442,10 @@ class AudioRedTeamOrchestrator:
441
  self.evaluate_client = OpenAI(api_key=openai_api_key)
442
  self.waveform_transform = WaveformTransform()
443
 
444
- def load_tts(self, tts_provider: str, voice_id: str = "af_heart"):
445
  if tts_provider == "smallestai":
446
  return SmallestAITTS(
447
- model_name="lightning",
448
  api_key=smallest_api_key,
449
  provider=tts_provider,
450
  endpoint_url="https://waves-api.smallest.ai/api/v1/",
@@ -452,7 +453,7 @@ class AudioRedTeamOrchestrator:
452
  )
453
  elif tts_provider == "kokoro":
454
  return KokoroTTS(
455
- model_name="Kokoro-82M",
456
  voice_id=voice_id,
457
  )
458
  else:
 
429
  def __init__(
430
  self,
431
  tts_provider: Any = "kokoro",
432
+ tts_model: str = "Kokoro-82M",
433
  model_name: str = "gpt-4o-audio-preview",
434
  voice_id: str = "af_heart",
435
  ):
436
+ self.tts_client = self.load_tts(tts_provider, tts_model, voice_id)
437
 
438
  # Initialize target model to None
439
  self.generate_client = self.load_model(model_name)
 
442
  self.evaluate_client = OpenAI(api_key=openai_api_key)
443
  self.waveform_transform = WaveformTransform()
444
 
445
+ def load_tts(self, tts_provider: str, tts_model: str, voice_id: str = "af_heart"):
446
  if tts_provider == "smallestai":
447
  return SmallestAITTS(
448
+ model_name=tts_model,
449
  api_key=smallest_api_key,
450
  provider=tts_provider,
451
  endpoint_url="https://waves-api.smallest.ai/api/v1/",
 
453
  )
454
  elif tts_provider == "kokoro":
455
  return KokoroTTS(
456
+ model_name=tts_model,
457
  voice_id=voice_id,
458
  )
459
  else:
requirements.txt CHANGED
@@ -38,3 +38,4 @@ pydub==0.25.1
38
  websockets==13.0.0
39
  torch==2.3.0
40
  torchaudio==2.3.0
 
 
38
  websockets==13.0.0
39
  torch==2.3.0
40
  torchaudio==2.3.0
41
+ kokoro==0.9.4