csukuangfj commited on
Commit
ab2e12a
1 Parent(s): e9d0f12

add korean

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. model.py +44 -0
app.py CHANGED
@@ -138,7 +138,7 @@ def process(language: str, repo_id: str, add_punctuation: str, in_filename: str)
138
  recognizer = get_pretrained_model(repo_id)
139
  vad = get_vad()
140
 
141
- if "whisper" in repo_id:
142
  add_punctuation = "No"
143
 
144
  if add_punctuation == "Yes":
 
138
  recognizer = get_pretrained_model(repo_id)
139
  vad = get_vad()
140
 
141
+ if "whisper" in repo_id or "korean" in repo_id or "vosk-model" in repo_id:
142
  add_punctuation = "No"
143
 
144
  if add_punctuation == "Yes":
model.py CHANGED
@@ -240,6 +240,8 @@ def get_pretrained_model(repo_id: str) -> sherpa_onnx.OfflineRecognizer:
240
  return chinese_english_mixed_models[repo_id](repo_id)
241
  elif repo_id in russian_models:
242
  return russian_models[repo_id](repo_id)
 
 
243
  else:
244
  raise ValueError(f"Unsupported repo_id: {repo_id}")
245
 
@@ -360,6 +362,43 @@ def _get_english_model(repo_id: str) -> sherpa_onnx.OfflineRecognizer:
360
  return recognizer
361
 
362
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  chinese_dialect_models = {
364
  "csukuangfj/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04": _get_chinese_dialect_models,
365
  }
@@ -384,6 +423,10 @@ chinese_english_mixed_models = {
384
  "csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28": _get_paraformer_zh_pre_trained_model,
385
  }
386
 
 
 
 
 
387
  russian_models = {
388
  "alphacep/vosk-model-ru": _get_russian_pre_trained_model,
389
  "alphacep/vosk-model-small-ru": _get_russian_pre_trained_model,
@@ -395,4 +438,5 @@ language_to_models = {
395
  "Chinese": list(chinese_models.keys()),
396
  "English": list(english_models.keys()),
397
  "Russian": list(russian_models.keys()),
 
398
  }
 
240
  return chinese_english_mixed_models[repo_id](repo_id)
241
  elif repo_id in russian_models:
242
  return russian_models[repo_id](repo_id)
243
+ elif repo_id in korean_models:
244
+ return korean_models[repo_id](repo_id)
245
  else:
246
  raise ValueError(f"Unsupported repo_id: {repo_id}")
247
 
 
362
  return recognizer
363
 
364
 
365
+ @lru_cache(maxsize=10)
366
+ def _get_korean_pre_trained_model(repo_id: str) -> sherpa_onnx.OfflineRecognizer:
367
+ assert repo_id in ("k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24",), repo_id
368
+
369
+ encoder_model = _get_nn_model_filename(
370
+ repo_id=repo_id,
371
+ filename="encoder-epoch-99-avg-1.int8.onnx",
372
+ subfolder=".",
373
+ )
374
+
375
+ decoder_model = _get_nn_model_filename(
376
+ repo_id=repo_id,
377
+ filename="decoder-epoch-99-avg-1.onnx",
378
+ subfolder=".",
379
+ )
380
+
381
+ joiner_model = _get_nn_model_filename(
382
+ repo_id=repo_id,
383
+ filename="joiner-epoch-99-avg-1.onnx",
384
+ subfolder=".",
385
+ )
386
+
387
+ tokens = _get_token_filename(repo_id=repo_id, subfolder=".")
388
+
389
+ recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
390
+ tokens=tokens,
391
+ encoder=encoder_model,
392
+ decoder=decoder_model,
393
+ joiner=joiner_model,
394
+ num_threads=2,
395
+ sample_rate=16000,
396
+ feature_dim=80,
397
+ )
398
+
399
+ return recognizer
400
+
401
+
402
  chinese_dialect_models = {
403
  "csukuangfj/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04": _get_chinese_dialect_models,
404
  }
 
423
  "csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28": _get_paraformer_zh_pre_trained_model,
424
  }
425
 
426
+ korean_models = {
427
+ "k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24": _get_korean_pre_trained_model,
428
+ }
429
+
430
  russian_models = {
431
  "alphacep/vosk-model-ru": _get_russian_pre_trained_model,
432
  "alphacep/vosk-model-small-ru": _get_russian_pre_trained_model,
 
438
  "Chinese": list(chinese_models.keys()),
439
  "English": list(english_models.keys()),
440
  "Russian": list(russian_models.keys()),
441
+ "Korean": list(korean_models.keys()),
442
  }