xray1111 commited on
Commit
25cbe4a
1 Parent(s): 07ed6b7

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.model +2 -2
  2. tokenizer_config.json +9 -4
tokenizer.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
- size 499723
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4be54af290d93c113bcbf421115ae9eed9d6340408f564898f1e966dc738ef01
3
+ size 1136699
tokenizer_config.json CHANGED
@@ -1,6 +1,12 @@
1
  {
2
- "add_bos_token": true,
3
  "add_eos_token": false,
 
 
 
 
 
 
4
  "bos_token": {
5
  "__type": "AddedToken",
6
  "content": "<s>",
@@ -18,11 +24,10 @@
18
  "rstrip": false,
19
  "single_word": false
20
  },
21
- "model_max_length": 2048,
22
  "pad_token": null,
23
- "padding_side": "right",
24
  "sp_model_kwargs": {},
25
- "tokenizer_class": "LlamaTokenizer",
26
  "unk_token": {
27
  "__type": "AddedToken",
28
  "content": "<unk>",
 
1
  {
2
+ "add_bos_token": false,
3
  "add_eos_token": false,
4
+ "auto_map": {
5
+ "AutoTokenizer": [
6
+ "baichuan-inc/baichuan-7B--tokenization_baichuan.BaiChuanTokenizer",
7
+ null
8
+ ]
9
+ },
10
  "bos_token": {
11
  "__type": "AddedToken",
12
  "content": "<s>",
 
24
  "rstrip": false,
25
  "single_word": false
26
  },
27
+ "model_max_length": 1000000000000000019884624838656,
28
  "pad_token": null,
 
29
  "sp_model_kwargs": {},
30
+ "tokenizer_class": "BaiChuanTokenizer",
31
  "unk_token": {
32
  "__type": "AddedToken",
33
  "content": "<unk>",