danielhanchen commited on
Commit
2bc2f21
1 Parent(s): 89e4fd4

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +47 -1
tokenizer_config.json CHANGED
@@ -1,3 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  {
2
  "added_tokens_decoder": {
3
  "128000": {
@@ -2052,7 +2097,7 @@
2052
  "bos_token": "<|begin_of_text|>",
2053
  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
- "eos_token": "<|end_of_text|>",
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
@@ -2062,3 +2107,4 @@
2062
  "padding_side": "left",
2063
  "tokenizer_class": "PreTrainedTokenizerFast"
2064
  }
 
 
1
+
2
+ Hugging Face's logo Hugging Face
3
+
4
+ Models
5
+ Datasets
6
+ Spaces
7
+ Posts
8
+ Docs
9
+ Pricing
10
+
11
+ meta-llama
12
+ /
13
+ Meta-Llama-3-8B-Instruct
14
+ Text Generation
15
+ Transformers
16
+ Safetensors
17
+ PyTorch
18
+ English
19
+ llama
20
+ facebook
21
+ meta
22
+ llama-3
23
+ conversational
24
+ Inference Endpoints
25
+ text-generation-inference
26
+ Model card
27
+ Files and versions
28
+ Community
29
+ 108
30
+ Meta-Llama-3-8B-Instruct
31
+ / tokenizer_config.json
32
+ ArthurZ's picture
33
+ ArthurZ
34
+ HF staff
35
+ Update tokenizer_config.json (#60)
36
+ a897769
37
+ verified
38
+ 5 days ago
39
+ raw
40
+ history
41
+ blame
42
+ contribute
43
+ delete
44
+ No virus
45
+ 51 kB
46
  {
47
  "added_tokens_decoder": {
48
  "128000": {
 
2097
  "bos_token": "<|begin_of_text|>",
2098
  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2099
  "clean_up_tokenization_spaces": true,
2100
+ "eos_token": "<|eot_id|>",
2101
  "model_input_names": [
2102
  "input_ids",
2103
  "attention_mask"
 
2107
  "padding_side": "left",
2108
  "tokenizer_class": "PreTrainedTokenizerFast"
2109
  }
2110
+