tuanio commited on
Commit
56d5be0
1 Parent(s): 7fc4277

Upload config.json

Browse files
Files changed (1) hide show
  1. config.json +4 -50
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/home/users/astar/ares/stunvat/scratch/checkpoints/moe-llava-qwen1.5-1.8b-ft-mergedlora",
3
  "architectures": [
4
  "LlavaQwen1_5ForCausalLM"
5
  ],
@@ -13,63 +13,17 @@
13
  "image_projector_type": "mlp2x_gelu",
14
  "initializer_range": 0.02,
15
  "intermediate_size": 5504,
16
- "lora": {},
17
  "max_position_embeddings": 32768,
18
  "max_window_layers": 21,
19
  "mm_hidden_size": 768,
20
  "mm_image_tower": "google/siglip-base-patch16-256-multilingual",
21
- "mm_projector_lr": null,
22
  "mm_use_im_patch_token": false,
23
  "mm_use_im_start_end": false,
24
  "mm_video_tower": null,
25
  "mm_vision_select_feature": "patch",
26
  "mm_vision_select_layer": -2,
27
- "model_type": "moe_llava_qwen1_5",
28
- "moe": {
29
- "capacity_factor": 1.5,
30
- "ep_size": 1,
31
- "eval_capacity_factor": 2.0,
32
- "min_capacity": 0,
33
- "moe_enable": true,
34
- "moe_layers_idx": [
35
- 0,
36
- 2,
37
- 4,
38
- 6,
39
- 8,
40
- 10,
41
- 12,
42
- 14,
43
- 16,
44
- 18,
45
- 20,
46
- 22
47
- ],
48
- "moe_mode": "sparse",
49
- "num_experts": [
50
- 4,
51
- 4,
52
- 4,
53
- 4,
54
- 4,
55
- 4,
56
- 4,
57
- 4,
58
- 4,
59
- 4,
60
- 4,
61
- 4
62
- ],
63
- "router_aux_loss_coef": 0.01,
64
- "top_k_experts": 2,
65
- "train_modules": [
66
- "mlp.gate_proj",
67
- "mlp.up_proj",
68
- "mlp.down_proj",
69
- "wg"
70
- ],
71
- "use_residual": false
72
- },
73
  "num_attention_heads": 16,
74
  "num_hidden_layers": 24,
75
  "num_key_value_heads": 16,
@@ -80,7 +34,7 @@
80
  "tie_word_embeddings": false,
81
  "tokenizer_padding_side": "right",
82
  "torch_dtype": "float16",
83
- "transformers_version": "4.37.0",
84
  "tune_mm_mlp_adapter": false,
85
  "use_cache": true,
86
  "use_mm_proj": true,
 
1
  {
2
+ "_name_or_path": "Qwen/Qwen1.5-1.8B",
3
  "architectures": [
4
  "LlavaQwen1_5ForCausalLM"
5
  ],
 
13
  "image_projector_type": "mlp2x_gelu",
14
  "initializer_range": 0.02,
15
  "intermediate_size": 5504,
 
16
  "max_position_embeddings": 32768,
17
  "max_window_layers": 21,
18
  "mm_hidden_size": 768,
19
  "mm_image_tower": "google/siglip-base-patch16-256-multilingual",
20
+ "mm_projector_lr": 1.25e-06,
21
  "mm_use_im_patch_token": false,
22
  "mm_use_im_start_end": false,
23
  "mm_video_tower": null,
24
  "mm_vision_select_feature": "patch",
25
  "mm_vision_select_layer": -2,
26
+ "model_type": "llava_qwen1_5",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  "num_attention_heads": 16,
28
  "num_hidden_layers": 24,
29
  "num_key_value_heads": 16,
 
34
  "tie_word_embeddings": false,
35
  "tokenizer_padding_side": "right",
36
  "torch_dtype": "float16",
37
+ "transformers_version": "4.42.4",
38
  "tune_mm_mlp_adapter": false,
39
  "use_cache": true,
40
  "use_mm_proj": true,