hjconstas NealCaren commited on
Commit
e15a07e
0 Parent(s):

Duplicate from NealCaren/TranscribeX

Browse files

Co-authored-by: Neal Caren <NealCaren@users.noreply.huggingface.co>

Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +13 -0
  3. app.py +71 -0
  4. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: TranscribeX
3
+ emoji: 👀
4
+ colorFrom: yellow
5
+ colorTo: green
6
+ sdk: streamlit
7
+ sdk_version: 1.21.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: NealCaren/TranscribeX
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisperx
2
+ import streamlit as st
3
+ import torch
4
+ import tempfile
5
+ import subprocess
6
+
7
+
8
+ def transcribe(audio_file):
9
+
10
+
11
+ if torch.cuda.is_available():
12
+ device = "gpu"
13
+ else:
14
+ device = "cpu"
15
+ batch_size = 16 # reduce if low on GPU mem
16
+ compute_type = "int8" # change to "float16" if high on GPU mem (may reduce accuracy)
17
+ YOUR_HF_TOKEN = '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'
18
+
19
+ # load audio file
20
+ audio_bytes = uploaded_file.getvalue()
21
+ with open(temp_file, 'wb') as f:
22
+ f.write(audio_bytes)
23
+
24
+ # 1. Transcribe with original whisper (batched)
25
+ model = whisperx.load_model("tiny", device = device, compute_type=compute_type)
26
+
27
+ audio = whisperx.load_audio(temp_file)
28
+ result = model.transcribe(audio, batch_size=batch_size)
29
+ st.write("Transcribed! Here's what we have so far:")
30
+ st.write(result["segments"]) # before alignment
31
+
32
+ # delete model if low on GPU resources
33
+ # import gc; gc.collect(); torch.cuda.empty_cache(); del model
34
+
35
+ # 2. Align whisper output
36
+ model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
37
+ result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
38
+ st.write("Aligned! Here's what we have so far:")
39
+ st.write(result["segments"]) # after alignment
40
+
41
+ # delete model if low on GPU resources
42
+ # import gc; gc.collect(); torch.cuda.empty_cache(); del model_a
43
+
44
+ # 3. Assign speaker labels
45
+ diarize_model = whisperx.DiarizationPipeline(use_auth_token=YOUR_HF_TOKEN, device=device)
46
+
47
+ # add min/max number of speakers if known
48
+ diarize_segments = diarize_model(audio_file)
49
+ # diarize_model(audio_file, min_speakers=min_speakers, max_speakers=max_speakers)
50
+
51
+ result = whisperx.assign_word_speakers(diarize_segments, result)
52
+ st.write(diarize_segments)
53
+ st.write(result["segments"]) # segments are now assigned speaker IDs
54
+
55
+
56
+ st.title("Automated Transcription")
57
+
58
+ form = st.form(key='my_form')
59
+ uploaded_file = form.file_uploader("Choose a file")
60
+
61
+ submit = form.form_submit_button("Transcribe!")
62
+
63
+
64
+ if submit:
65
+ #temporary file to store audio_file
66
+ tmp_dir = tempfile.TemporaryDirectory()
67
+ temp_file = tmp_dir.name + '/mono.wav'
68
+ cmd = f"ffmpeg -y -i {uploaded_file} -acodec pcm_s16le -ar 16000 -ac 1 {temp_file}"
69
+ subprocess.Popen(cmd, shell=True).wait()
70
+
71
+ transcribe(temp_file)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ git+https://github.com/m-bain/whisperx.git
2
+ streamlit
3
+ pandas