venetis
/

llama3-8b-hermes-sandals-sample-10k

venetis commited on May 24

Commit

372e3b6

•

1 Parent(s): a1aad76

End of training

Files changed (2) hide show

README.md CHANGED Viewed

@@ -88,8 +88,16 @@ deepspeed:
 weight_decay: 0.0
 fsdp:
 fsdp_config:
 special_tokens:
-   pad_token: <|end_of_text|>
 ```
@@ -99,7 +107,7 @@ special_tokens:
 This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.7611
 ## Model description
@@ -133,22 +141,22 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 0.918         | 0.0102 | 1    | 0.9709          |
-| 0.7146        | 0.2538 | 25   | 0.7678          |
-| 0.728         | 0.5076 | 50   | 0.7419          |
-| 0.6843        | 0.7614 | 75   | 0.7328          |
-| 0.6819        | 1.0152 | 100  | 0.7259          |
-| 0.6342        | 1.2487 | 125  | 0.7269          |
-| 0.616         | 1.5025 | 150  | 0.7298          |
-| 0.7092        | 1.7563 | 175  | 0.7250          |
-| 0.6453        | 2.0102 | 200  | 0.7224          |
-| 0.5267        | 2.2411 | 225  | 0.7425          |
-| 0.5702        | 2.4949 | 250  | 0.7424          |
-| 0.5459        | 2.7487 | 275  | 0.7421          |
-| 0.6327        | 3.0025 | 300  | 0.7428          |
-| 0.5649        | 3.2335 | 325  | 0.7573          |
-| 0.4318        | 3.4873 | 350  | 0.7617          |
-| 0.5523        | 3.7411 | 375  | 0.7611          |
 ### Framework versions

 weight_decay: 0.0
 fsdp:
 fsdp_config:
+#UPDATES mk2 - added special tokens
 special_tokens:
+  eos_token: "<|im_end|>"
+  pad_token: "<|end_of_text|>"
+tokens:
+  - "<|im_start|>"
+  - "<|im_end|>"
+lora_modules_to_save:
+  - embed_tokens
+  - lm_head
 ```
 This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.8933
 ## Model description
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 0.9791        | 0.0102 | 1    | 1.0349          |
+| 0.7725        | 0.2538 | 25   | 0.8235          |
+| 0.8046        | 0.5076 | 50   | 0.8169          |
+| 0.7678        | 0.7614 | 75   | 0.8099          |
+| 0.7324        | 1.0152 | 100  | 0.7924          |
+| 0.4486        | 1.2487 | 125  | 0.8461          |
+| 0.4419        | 1.5025 | 150  | 0.8462          |
+| 0.4992        | 1.7563 | 175  | 0.8350          |
+| 0.4671        | 2.0102 | 200  | 0.8272          |
+| 0.2618        | 2.2411 | 225  | 0.8615          |
+| 0.275         | 2.4949 | 250  | 0.8697          |
+| 0.2583        | 2.7487 | 275  | 0.8672          |
+| 0.3158        | 3.0025 | 300  | 0.8639          |
+| 0.2073        | 3.2335 | 325  | 0.8940          |
+| 0.1602        | 3.4873 | 350  | 0.8931          |
+| 0.1904        | 3.7411 | 375  | 0.8933          |
 ### Framework versions

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6c9a9bdcf948f6f268d7a14bd0697a518cc1abca7a3bf101fd30eb3657b1a4c8
-size 167934026

 version https://git-lfs.github.com/spec/v1
+oid sha256:11818ca0411c345a582973e166a7ea0a185370f4a65a9a42478da155eee042dd
+size 4370694462