kevinwang676 commited on
Commit
c9c04ba
1 Parent(s): 899e20a

Create config_se.json

Browse files
Files changed (1) hide show
  1. config_se.json +119 -0
config_se.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "speaker_encoder",
3
+ "run_name": "speaker_encoder",
4
+ "run_description": "resnet speaker encoder trained with commonvoice all languages dev and train, Voxceleb 1 dev and Voxceleb 2 dev",
5
+ "epochs": 100000,
6
+ "batch_size": null,
7
+ "eval_batch_size": null,
8
+ "mixed_precision": false,
9
+ "run_eval": true,
10
+ "test_delay_epochs": 0,
11
+ "print_eval": false,
12
+ "print_step": 50,
13
+ "tb_plot_step": 100,
14
+ "tb_model_param_stats": false,
15
+ "save_step": 1000,
16
+ "checkpoint": true,
17
+ "keep_all_best": false,
18
+ "keep_after": 10000,
19
+ "num_loader_workers": 8,
20
+ "num_val_loader_workers": 0,
21
+ "use_noise_augment": false,
22
+ "output_path": "../checkpoints/speaker_encoder/language_balanced/normalized/angleproto-4-samples-by-speakers/",
23
+ "distributed_backend": "nccl",
24
+ "distributed_url": "tcp://localhost:54321",
25
+ "audio": {
26
+ "fft_size": 512,
27
+ "win_length": 400,
28
+ "hop_length": 160,
29
+ "frame_shift_ms": null,
30
+ "frame_length_ms": null,
31
+ "stft_pad_mode": "reflect",
32
+ "sample_rate": 16000,
33
+ "resample": false,
34
+ "preemphasis": 0.97,
35
+ "ref_level_db": 20,
36
+ "do_sound_norm": false,
37
+ "do_trim_silence": false,
38
+ "trim_db": 60,
39
+ "power": 1.5,
40
+ "griffin_lim_iters": 60,
41
+ "num_mels": 64,
42
+ "mel_fmin": 0.0,
43
+ "mel_fmax": 8000.0,
44
+ "spec_gain": 20,
45
+ "signal_norm": false,
46
+ "min_level_db": -100,
47
+ "symmetric_norm": false,
48
+ "max_norm": 4.0,
49
+ "clip_norm": false,
50
+ "stats_path": null
51
+ },
52
+ "datasets": [
53
+ {
54
+ "name": "voxceleb2",
55
+ "path": "/workspace/scratch/ecasanova/datasets/VoxCeleb/vox2_dev_aac/",
56
+ "meta_file_train": null,
57
+ "ununsed_speakers": null,
58
+ "meta_file_val": null,
59
+ "meta_file_attn_mask": "",
60
+ "language": "voxceleb"
61
+ }
62
+ ],
63
+ "model_params": {
64
+ "model_name": "resnet",
65
+ "input_dim": 64,
66
+ "use_torch_spec": true,
67
+ "log_input": true,
68
+ "proj_dim": 512
69
+ },
70
+ "audio_augmentation": {
71
+ "p": 0.5,
72
+ "rir": {
73
+ "rir_path": "/workspace/store/ecasanova/ComParE/RIRS_NOISES/simulated_rirs/",
74
+ "conv_mode": "full"
75
+ },
76
+ "additive": {
77
+ "sounds_path": "/workspace/store/ecasanova/ComParE/musan/",
78
+ "speech": {
79
+ "min_snr_in_db": 13,
80
+ "max_snr_in_db": 20,
81
+ "min_num_noises": 1,
82
+ "max_num_noises": 1
83
+ },
84
+ "noise": {
85
+ "min_snr_in_db": 0,
86
+ "max_snr_in_db": 15,
87
+ "min_num_noises": 1,
88
+ "max_num_noises": 1
89
+ },
90
+ "music": {
91
+ "min_snr_in_db": 5,
92
+ "max_snr_in_db": 15,
93
+ "min_num_noises": 1,
94
+ "max_num_noises": 1
95
+ }
96
+ },
97
+ "gaussian": {
98
+ "p": 0.0,
99
+ "min_amplitude": 0.0,
100
+ "max_amplitude": 1e-05
101
+ }
102
+ },
103
+ "storage": {
104
+ "sample_from_storage_p": 0.5,
105
+ "storage_size": 40
106
+ },
107
+ "max_train_step": 1000000,
108
+ "loss": "angleproto",
109
+ "grad_clip": 3.0,
110
+ "lr": 0.0001,
111
+ "lr_decay": false,
112
+ "warmup_steps": 4000,
113
+ "wd": 1e-06,
114
+ "steps_plot_stats": 100,
115
+ "num_speakers_in_batch": 100,
116
+ "num_utters_per_speaker": 4,
117
+ "skip_speakers": true,
118
+ "voice_len": 2.0
119
+ }