flytech
/

devchat-llama-7b

@@ -1,6 +1,6 @@
 ---
 license: apache-2.0
-base_model: openlm-research/open_llama_7b
 tags:
 - generated_from_trainer
 model-index:
@@ -13,10 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
 # devchat-llama-7b
-This model is a test - fine-tuned version of [openlm-research/open_llama_7b](https://huggingface.co/openlm-research/open_llama_7b)
-The full code example to easily launch the model will be provided in future.
-# w.i.p
 ## Model description
@@ -35,16 +32,14 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.001
-- train_batch_size: 1
-- eval_batch_size: 1
 - seed: 42
-- gradient_accumulation_steps: 2
-- total_train_batch_size: 2
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: constant
 - lr_scheduler_warmup_ratio: 0.1
-- num_epochs: 3.0
 ### Training results
@@ -52,7 +47,7 @@ The following hyperparameters were used during training:
 ### Framework versions
-- Transformers 4.33.0
 - Pytorch 2.0.1+cu118
-- Datasets 2.14.4
 - Tokenizers 0.13.3

 ---
 license: apache-2.0
+base_model: openlm-research/open_llama_7b_v2
 tags:
 - generated_from_trainer
 model-index:
 # devchat-llama-7b
+This model is a fine-tuned version of [openlm-research/open_llama_7b_v2](https://huggingface.co/openlm-research/open_llama_7b_v2) on an unknown dataset.
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 2
+- eval_batch_size: 2
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: constant
 - lr_scheduler_warmup_ratio: 0.1
+- training_steps: 100
 ### Training results
 ### Framework versions
+- Transformers 4.33.1
 - Pytorch 2.0.1+cu118
+- Datasets 2.14.5
 - Tokenizers 0.13.3

adapter_config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "auto_mapping": null,
-  "base_model_name_or_path": "openlm-research/open_llama_7b",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
@@ -17,7 +17,10 @@
     "q_proj",
     "v_proj",
     "k_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

 {
   "auto_mapping": null,
+  "base_model_name_or_path": "openlm-research/open_llama_7b_v2",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
     "q_proj",
     "v_proj",
     "k_proj",
+    "o_proj",
+    "gate_proj",
+    "up_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2c2a520d580c52fc8312303b1446190488b5031f5aa28c26900f795dcb6920ad
-size 16811144

 version https://git-lfs.github.com/spec/v1
+oid sha256:7421a8f90dfd28fa615fe6b473c9b4ae96e07d31c38b71aee977c3088e7520fb
+size 40036040

tokenizer.model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab1b681ec7fc02fed5edd3026687d7a692a918c4dd8e150ca2e3994a6229843b
-size 534194

 version https://git-lfs.github.com/spec/v1
+oid sha256:91b289e85fa20fd375d8b33dc12f77616f18abc6359804471d1fafcb425fecb8
+size 511574

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e2562c2fc10a976832d0ec9c920dc2901bc73ee0591fd2549b5d9b59ac5ba00
-size 4027

 version https://git-lfs.github.com/spec/v1
+oid sha256:e260e040297d784fd2f4c28731614c59e40275b7ca5660a425c5ffdad038e56f
+size 4091