Kit-Lemonfoot commited on
Commit
918ef2d
1 Parent(s): 827b7fd

Added some more indies, moved Dramatubers to their own tab, a few fixes

Browse files
Files changed (1) hide show
  1. app.py +22 -41
app.py CHANGED
@@ -35,6 +35,9 @@ limitation = os.getenv("SYSTEM") == "spaces"
35
  #limitation=True
36
  language_dict = tts_order_voice
37
 
 
 
 
38
  audio_mode = []
39
  f0method_mode = []
40
  if limitation is True:
@@ -60,8 +63,10 @@ vcArr.append(VC(48000, config))
60
  def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_voice, f0_up_key, f0_method, index_rate, filter_radius, resample_sr, rms_mix_rate, protect, record_button):
61
  try:
62
  #Setup audio
 
63
  if vc_audio_mode == "Input path" or "Youtube" and vc_input != "":
64
  audio, sr = librosa.load(vc_input, sr=16000, mono=True)
 
65
  elif vc_audio_mode == "Upload audio":
66
  if vc_upload is None:
67
  return "Please upload an audio file.", None
@@ -81,7 +86,11 @@ def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_v
81
  if tts_text is None or tts_voice is None or tts_text=="":
82
  return "You need to enter text and select a voice.", None
83
  voice = language_dict[tts_voice]
84
- asyncio.run(edge_tts.Communicate(tts_text, voice).save("tts.mp3"))
 
 
 
 
85
  try:
86
  audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
87
  except:
@@ -103,6 +112,13 @@ def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_v
103
  if sampling_rate != 16000:
104
  audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
105
  tts_text = "Recorded Audio"
 
 
 
 
 
 
 
106
  times = [0, 0, 0]
107
  f0_up_key = int(f0_up_key)
108
 
@@ -187,6 +203,8 @@ def load_model():
187
  model_index = f"weights/{category_folder}/{character_name}/{info['feature_retrieval_library']}"
188
  if info['feature_retrieval_library'] == "None":
189
  model_index = None
 
 
190
  model_path = f"weights/{category_folder}/{character_name}/{model_name}"
191
  cpt = torch.load(f"weights/{category_folder}/{character_name}/{model_name}", map_location="cpu")
192
  model_version = cpt.get("version", "v1")
@@ -282,9 +300,6 @@ def change_audio_mode(vc_audio_mode):
282
  gr.Audio.update(visible=False),
283
  gr.Audio.update(visible=False),
284
  gr.Audio.update(visible=False),
285
- gr.Slider.update(visible=False),
286
- gr.Audio.update(visible=False),
287
- gr.Button.update(visible=False),
288
  # EdgeTTS
289
  gr.Textbox.update(visible=False),
290
  gr.Dropdown.update(visible=False),
@@ -304,9 +319,6 @@ def change_audio_mode(vc_audio_mode):
304
  gr.Audio.update(visible=False),
305
  gr.Audio.update(visible=False),
306
  gr.Audio.update(visible=False),
307
- gr.Slider.update(visible=False),
308
- gr.Audio.update(visible=False),
309
- gr.Button.update(visible=False),
310
  # EdgeTTS
311
  gr.Textbox.update(visible=False),
312
  gr.Dropdown.update(visible=False),
@@ -326,9 +338,6 @@ def change_audio_mode(vc_audio_mode):
326
  gr.Audio.update(visible=True),
327
  gr.Audio.update(visible=True),
328
  gr.Audio.update(visible=True),
329
- gr.Slider.update(visible=True),
330
- gr.Audio.update(visible=True),
331
- gr.Button.update(visible=True),
332
  # TTS
333
  gr.Textbox.update(visible=False),
334
  gr.Dropdown.update(visible=False),
@@ -348,9 +357,6 @@ def change_audio_mode(vc_audio_mode):
348
  gr.Audio.update(visible=False),
349
  gr.Audio.update(visible=False),
350
  gr.Audio.update(visible=False),
351
- gr.Slider.update(visible=False),
352
- gr.Audio.update(visible=False),
353
- gr.Button.update(visible=False),
354
  # TTS
355
  gr.Textbox.update(visible=True),
356
  gr.Dropdown.update(visible=True),
@@ -370,9 +376,6 @@ def change_audio_mode(vc_audio_mode):
370
  gr.Audio.update(visible=False),
371
  gr.Audio.update(visible=False),
372
  gr.Audio.update(visible=False),
373
- gr.Slider.update(visible=False),
374
- gr.Audio.update(visible=False),
375
- gr.Button.update(visible=False),
376
  # TTS
377
  gr.Textbox.update(visible=False),
378
  gr.Dropdown.update(visible=False),
@@ -392,9 +395,6 @@ def change_audio_mode(vc_audio_mode):
392
  gr.Audio.update(visible=False),
393
  gr.Audio.update(visible=False),
394
  gr.Audio.update(visible=False),
395
- gr.Slider.update(visible=False),
396
- gr.Audio.update(visible=False),
397
- gr.Button.update(visible=False),
398
  # TTS
399
  gr.Textbox.update(visible=False, interactive=True),
400
  gr.Dropdown.update(visible=False, interactive=True),
@@ -535,19 +535,6 @@ if __name__ == '__main__':
535
  ],
536
  outputs=[vc_log, vc_output]
537
  )
538
-
539
- vc_volume = gr.Slider(
540
- minimum=0,
541
- maximum=10,
542
- label="Vocal volume",
543
- value=4,
544
- interactive=True,
545
- step=1,
546
- info="Adjust vocal volume (Default: 4}",
547
- visible=False
548
- )
549
- vc_combined_output = gr.Audio(label="Output Combined Audio", visible=False)
550
- vc_combine = gr.Button("Combine",variant="primary", visible=False)
551
 
552
  with gr.Row():
553
  with gr.Column():
@@ -582,11 +569,6 @@ if __name__ == '__main__':
582
  inputs=[vc_link, vc_download_audio, vc_split_model],
583
  outputs=[vc_vocal_preview, vc_inst_preview, vc_audio_preview, vc_input]
584
  )
585
- vc_combine.click(
586
- fn=combine_vocal_and_inst,
587
- inputs=[vc_output, vc_volume, vc_split_model],
588
- outputs=[vc_combined_output]
589
- )
590
  vc_audio_mode.change(
591
  fn=change_audio_mode,
592
  inputs=[vc_audio_mode],
@@ -600,20 +582,19 @@ if __name__ == '__main__':
600
  vc_vocal_preview,
601
  vc_inst_preview,
602
  vc_audio_preview,
603
- vc_volume,
604
- vc_combined_output,
605
- vc_combine,
606
  tts_text,
607
  tts_voice,
608
  record_button
609
  ]
610
  )
 
611
  gr.Markdown(
612
  "## <center>Credit to:\n"
613
  "#### <center>Original devs:\n"
614
  "<center>the RVC Project, lj1995, zomehwh, sysf\n\n"
615
  "#### <center>Model creators:\n"
616
- "<center>dacoolkid44, Hijack, Maki Ligon, megaaziib, KitLemonfoot, yeey5, Sui, MahdeenSky, Itaxhix, Acato, Kyuubical, Listra92, IshimaIshimsky, ZomballTH, Jotape91, RigidSpinner, RandomAssBettel, Mimizukari, Oida, Shu-Kun, Nhat Minh, Ardha27, Legitdark, TempoHawk, 0x3e9, Kaiaya, Skeetawn, Sonphantrung, Pianissimo, RavenCutie21, HinaBl, Brazurl, PetroOne, Rubinlord, Gloomwastragic, Sunesu, Aimbo, Act8113, Blyxeen\n"
617
  )
618
  if limitation is True:
619
  app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)
 
35
  #limitation=True
36
  language_dict = tts_order_voice
37
 
38
+ authors = ["dacoolkid44", "Hijack", "Maki Ligon", "megaaziib", "KitLemonfoot", "yeey5", "Sui", "MahdeenSky"]
39
+ authorskip = ["dacoolkid44 & Hijack", "dacoolkid44 & Hijack & Maki Ligon", "Kit Lemonfoot / NSHFB"]
40
+
41
  audio_mode = []
42
  f0method_mode = []
43
  if limitation is True:
 
63
  def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_voice, f0_up_key, f0_method, index_rate, filter_radius, resample_sr, rms_mix_rate, protect, record_button):
64
  try:
65
  #Setup audio
66
+ audio=None
67
  if vc_audio_mode == "Input path" or "Youtube" and vc_input != "":
68
  audio, sr = librosa.load(vc_input, sr=16000, mono=True)
69
+ tts_text = "YouTube Audio"
70
  elif vc_audio_mode == "Upload audio":
71
  if vc_upload is None:
72
  return "Please upload an audio file.", None
 
86
  if tts_text is None or tts_voice is None or tts_text=="":
87
  return "You need to enter text and select a voice.", None
88
  voice = language_dict[tts_voice]
89
+ try:
90
+ asyncio.run(edge_tts.Communicate(tts_text, voice).save("tts.mp3"))
91
+ except:
92
+ print("Failed to get E-TTS handle. A restart may be needed soon.")
93
+ return "ERROR: Failed to communicate with Edge-TTS. The Edge-TTS service may be down or cannot communicate. Please try another method or try again later.", None
94
  try:
95
  audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
96
  except:
 
112
  if sampling_rate != 16000:
113
  audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
114
  tts_text = "Recorded Audio"
115
+
116
+ if audio is None:
117
+ if vc_audio_mode == "Edge-TTS":
118
+ print("Failed to get E-TTS handle. A restart may be needed soon.")
119
+ return "ERROR: Failed to obtain a correct response from Edge-TTS. The Edge-TTS service may be down or unable to communicate. Please try another method or try again later.", None
120
+ return "ERROR: Unknown audio error. Please try again.", None
121
+
122
  times = [0, 0, 0]
123
  f0_up_key = int(f0_up_key)
124
 
 
203
  model_index = f"weights/{category_folder}/{character_name}/{info['feature_retrieval_library']}"
204
  if info['feature_retrieval_library'] == "None":
205
  model_index = None
206
+ if not (model_author in authors or model_author in authorskip):
207
+ authors.append(model_author)
208
  model_path = f"weights/{category_folder}/{character_name}/{model_name}"
209
  cpt = torch.load(f"weights/{category_folder}/{character_name}/{model_name}", map_location="cpu")
210
  model_version = cpt.get("version", "v1")
 
300
  gr.Audio.update(visible=False),
301
  gr.Audio.update(visible=False),
302
  gr.Audio.update(visible=False),
 
 
 
303
  # EdgeTTS
304
  gr.Textbox.update(visible=False),
305
  gr.Dropdown.update(visible=False),
 
319
  gr.Audio.update(visible=False),
320
  gr.Audio.update(visible=False),
321
  gr.Audio.update(visible=False),
 
 
 
322
  # EdgeTTS
323
  gr.Textbox.update(visible=False),
324
  gr.Dropdown.update(visible=False),
 
338
  gr.Audio.update(visible=True),
339
  gr.Audio.update(visible=True),
340
  gr.Audio.update(visible=True),
 
 
 
341
  # TTS
342
  gr.Textbox.update(visible=False),
343
  gr.Dropdown.update(visible=False),
 
357
  gr.Audio.update(visible=False),
358
  gr.Audio.update(visible=False),
359
  gr.Audio.update(visible=False),
 
 
 
360
  # TTS
361
  gr.Textbox.update(visible=True),
362
  gr.Dropdown.update(visible=True),
 
376
  gr.Audio.update(visible=False),
377
  gr.Audio.update(visible=False),
378
  gr.Audio.update(visible=False),
 
 
 
379
  # TTS
380
  gr.Textbox.update(visible=False),
381
  gr.Dropdown.update(visible=False),
 
395
  gr.Audio.update(visible=False),
396
  gr.Audio.update(visible=False),
397
  gr.Audio.update(visible=False),
 
 
 
398
  # TTS
399
  gr.Textbox.update(visible=False, interactive=True),
400
  gr.Dropdown.update(visible=False, interactive=True),
 
535
  ],
536
  outputs=[vc_log, vc_output]
537
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
538
 
539
  with gr.Row():
540
  with gr.Column():
 
569
  inputs=[vc_link, vc_download_audio, vc_split_model],
570
  outputs=[vc_vocal_preview, vc_inst_preview, vc_audio_preview, vc_input]
571
  )
 
 
 
 
 
572
  vc_audio_mode.change(
573
  fn=change_audio_mode,
574
  inputs=[vc_audio_mode],
 
582
  vc_vocal_preview,
583
  vc_inst_preview,
584
  vc_audio_preview,
585
+
 
 
586
  tts_text,
587
  tts_voice,
588
  record_button
589
  ]
590
  )
591
+ authStr=", ".join(authors)
592
  gr.Markdown(
593
  "## <center>Credit to:\n"
594
  "#### <center>Original devs:\n"
595
  "<center>the RVC Project, lj1995, zomehwh, sysf\n\n"
596
  "#### <center>Model creators:\n"
597
+ f"<center>{authStr}\n"
598
  )
599
  if limitation is True:
600
  app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)