bjoernp commited on
Commit
8bc376c
1 Parent(s): 57e3994

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +7 -1
  2. tokenizer_config.json +4 -2
special_tokens_map.json CHANGED
@@ -2,9 +2,15 @@
2
  "additional_special_tokens": [
3
  "<unk>",
4
  "<s>",
5
- "</s>"
 
 
6
  ],
7
  "bos_token": "<s>",
 
8
  "eos_token": "</s>",
 
 
 
9
  "unk_token": "<unk>"
10
  }
 
2
  "additional_special_tokens": [
3
  "<unk>",
4
  "<s>",
5
+ "</s>",
6
+ "<|im_start|>",
7
+ "<|im_end|>"
8
  ],
9
  "bos_token": "<s>",
10
+ "cls_token": "</s>",
11
  "eos_token": "</s>",
12
+ "mask_token": "</s>",
13
+ "pad_token": "</s>",
14
+ "sep_token": "</s>",
15
  "unk_token": "<unk>"
16
  }
tokenizer_config.json CHANGED
@@ -44,7 +44,9 @@
44
  "additional_special_tokens": [
45
  "<unk>",
46
  "<s>",
47
- "</s>"
 
 
48
  ],
49
  "bos_token": "<s>",
50
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
@@ -52,7 +54,7 @@
52
  "eos_token": "</s>",
53
  "legacy": true,
54
  "model_max_length": 1000000000000000019884624838656,
55
- "pad_token": null,
56
  "sp_model_kwargs": {},
57
  "spaces_between_special_tokens": false,
58
  "tokenizer_class": "LlamaTokenizer",
 
44
  "additional_special_tokens": [
45
  "<unk>",
46
  "<s>",
47
+ "</s>",
48
+ "<|im_start|>",
49
+ "<|im_end|>"
50
  ],
51
  "bos_token": "<s>",
52
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
 
54
  "eos_token": "</s>",
55
  "legacy": true,
56
  "model_max_length": 1000000000000000019884624838656,
57
+ "pad_token": "</s>",
58
  "sp_model_kwargs": {},
59
  "spaces_between_special_tokens": false,
60
  "tokenizer_class": "LlamaTokenizer",