游雁 commited on
Commit
72963f9
1 Parent(s): 7470a36
Files changed (3) hide show
  1. README.md +5 -2
  2. README_zh.md +5 -2
  3. demo.py +27 -0
README.md CHANGED
@@ -11,8 +11,7 @@ SenseVoice is a speech foundation model with multiple speech understanding capab
11
 
12
  <div align="center">
13
  <h4>
14
- <a href="https://www.modelscope.cn/studios/iic/SenseVoice"> Online Demo </a>
15
- |<a href="https://fun-audio-llm.github.io/"> Homepage </a>
16
  |<a href="#What's News"> What's News </a>
17
  |<a href="#Benchmarks"> Benchmarks </a>
18
  |<a href="#Install"> Install </a>
@@ -23,6 +22,10 @@ SenseVoice is a speech foundation model with multiple speech understanding capab
23
  Model Zoo:
24
  [modelscope](https://www.modelscope.cn/models/iic/SenseVoiceSmall), [huggingface](https://huggingface.co/FunAudioLLM/SenseVoiceSmall)
25
 
 
 
 
 
26
  </div>
27
 
28
 
 
11
 
12
  <div align="center">
13
  <h4>
14
+ <a href="https://fun-audio-llm.github.io/"> Homepage </a>
 
15
  |<a href="#What's News"> What's News </a>
16
  |<a href="#Benchmarks"> Benchmarks </a>
17
  |<a href="#Install"> Install </a>
 
22
  Model Zoo:
23
  [modelscope](https://www.modelscope.cn/models/iic/SenseVoiceSmall), [huggingface](https://huggingface.co/FunAudioLLM/SenseVoiceSmall)
24
 
25
+ Online Demo:
26
+ [modelscope demo](https://www.modelscope.cn/studios/iic/SenseVoice), [huggingface space](https://huggingface.co/spaces/FunAudioLLM/SenseVoice)
27
+
28
+
29
  </div>
30
 
31
 
README_zh.md CHANGED
@@ -10,8 +10,7 @@ SenseVoice是具有音频理解能力的音频基础模型,包括语音识别
10
  [//]: # (<div align="center"><img src="image/sensevoice2.png" width="700"/> </div>)
11
 
12
  <h4>
13
- <a href="https://www.modelscope.cn/studios/iic/SenseVoice"> 在线体验 </a>
14
- |<a href="#What's New"> 文档主页 </a>
15
  |<a href="#核心功能"> 核心功能 </a>
16
  </h4>
17
  <h4>
@@ -23,6 +22,10 @@ SenseVoice是具有音频理解能力的音频基础模型,包括语音识别
23
  </h4>
24
 
25
  模型仓库:中国大陆用户推荐 [modelscope](https://www.modelscope.cn/models/iic/SenseVoiceSmall),海外用户推荐 [huggingface](https://huggingface.co/FunAudioLLM/SenseVoiceSmall)
 
 
 
 
26
  </div>
27
 
28
  <a name="核心功能"></a>
 
10
  [//]: # (<div align="center"><img src="image/sensevoice2.png" width="700"/> </div>)
11
 
12
  <h4>
13
+ <a href="#What's New"> 文档主页 </a>
 
14
  |<a href="#核心功能"> 核心功能 </a>
15
  </h4>
16
  <h4>
 
22
  </h4>
23
 
24
  模型仓库:中国大陆用户推荐 [modelscope](https://www.modelscope.cn/models/iic/SenseVoiceSmall),海外用户推荐 [huggingface](https://huggingface.co/FunAudioLLM/SenseVoiceSmall)
25
+
26
+ 在线体验:
27
+ [modelscope demo](https://www.modelscope.cn/studios/iic/SenseVoice), [huggingface space](https://huggingface.co/spaces/FunAudioLLM/SenseVoice)
28
+
29
  </div>
30
 
31
  <a name="核心功能"></a>
demo.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from funasr import AutoModel
2
+ from funasr.utils.postprocess_utils import rich_transcription_postprocess
3
+
4
+ model_dir = "FunAudioLLM/SenseVoiceSmall"
5
+
6
+
7
+ model = AutoModel(
8
+ model=model_dir,
9
+ vad_model="fsmn-vad",
10
+ vad_kwargs={"max_single_segment_time": 30000},
11
+ device="cuda:0",
12
+ hub="hf",
13
+ )
14
+
15
+ # en
16
+ res = model.generate(
17
+ input=f"{model.model_path}/example/en.mp3",
18
+ cache={},
19
+ language="auto", # "zn", "en", "yue", "ja", "ko", "nospeech"
20
+ use_itn=True,
21
+ batch_size_s=60,
22
+ merge_vad=True, #
23
+ merge_length_s=15,
24
+ )
25
+ text = rich_transcription_postprocess(res[0]["text"])
26
+ print(text)
27
+