Kevin676 commited on
Commit
563b74b
1 Parent(s): a17c2a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -3
app.py CHANGED
@@ -2,10 +2,26 @@ import torch
2
  from peft import PeftModel
3
  import transformers
4
  import gradio as gr
 
 
 
 
 
 
 
5
 
6
  import whisper
7
  model1 = whisper.load_model("small")
8
 
 
 
 
 
 
 
 
 
 
9
  assert (
10
  "LlamaTokenizer" in transformers._import_structure["models.llama"]
11
  ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
@@ -135,7 +151,22 @@ def evaluate(
135
  )
136
  s = generation_output.sequences[0]
137
  output = tokenizer.decode(s)
138
- return output.split("### Response:")[1].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
 
141
  g = gr.Interface(
@@ -154,8 +185,13 @@ g = gr.Interface(
154
  outputs=[
155
  gr.inputs.Textbox(
156
  lines=5,
157
- label="Output",
158
- )
 
 
 
 
 
159
  ],
160
  title="🦙🌲 Alpaca-LoRA",
161
  description="Alpaca-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit [the project's website](https://github.com/tloen/alpaca-lora).",
 
2
  from peft import PeftModel
3
  import transformers
4
  import gradio as gr
5
+ import os
6
+ os.system('pip install voicefixer --upgrade')
7
+ from voicefixer import VoiceFixer
8
+ voicefixer = VoiceFixer()
9
+
10
+ from TTS.api import TTS
11
+ tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
12
 
13
  import whisper
14
  model1 = whisper.load_model("small")
15
 
16
+ import torchaudio
17
+ from speechbrain.pretrained import SpectralMaskEnhancement
18
+
19
+ enhance_model = SpectralMaskEnhancement.from_hparams(
20
+ source="speechbrain/metricgan-plus-voicebank",
21
+ savedir="pretrained_models/metricgan-plus-voicebank",
22
+ run_opts={"device":"cuda"},
23
+ )
24
+
25
  assert (
26
  "LlamaTokenizer" in transformers._import_structure["models.llama"]
27
  ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
 
151
  )
152
  s = generation_output.sequences[0]
153
  output = tokenizer.decode(s)
154
+
155
+ tts.tts_to_file(output.split("### Response:")[1].strip(), speaker_wav = upload, language="en", file_path="output.wav")
156
+
157
+ voicefixer.restore(input="output.wav", # input wav file path
158
+ output="audio1.wav", # output wav file path
159
+ cuda=True, # whether to use gpu acceleration
160
+ mode = 0) # You can try out mode 0, 1, or 2 to find out the best result
161
+
162
+ noisy = enhance_model.load_audio(
163
+ "audio1.wav"
164
+ ).unsqueeze(0)
165
+
166
+ enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
167
+ torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
168
+
169
+ return [result.text, output.split("### Response:")[1].strip(), "enhanced.wav"]
170
 
171
 
172
  g = gr.Interface(
 
185
  outputs=[
186
  gr.inputs.Textbox(
187
  lines=5,
188
+ label="Speech to Text",
189
+ ),
190
+ gr.inputs.Textbox(
191
+ lines=5,
192
+ label="Alpaca Output",
193
+ ),
194
+ gr.Audio(label="Audio with Custom Voice"),
195
  ],
196
  title="🦙🌲 Alpaca-LoRA",
197
  description="Alpaca-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit [the project's website](https://github.com/tloen/alpaca-lora).",