modelo-scad-book / tokenizer.json
SantiagoCorley's picture
Upload tokenizer
17ed3ae
raw
history blame contribute delete
No virus
12.9 kB
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<|endoftext|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": true
},
"post_processor": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": false,
"use_regex": true
},
"decoder": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": "",
"end_of_word_suffix": "",
"fuse_unk": false,
"byte_fallback": false,
"vocab": {
"<|endoftext|>": 0,
"!": 1,
"\"": 2,
"#": 3,
"$": 4,
"%": 5,
"&": 6,
"'": 7,
"(": 8,
")": 9,
"*": 10,
"+": 11,
",": 12,
"-": 13,
".": 14,
"/": 15,
"0": 16,
"1": 17,
"2": 18,
"3": 19,
"4": 20,
"5": 21,
"6": 22,
"7": 23,
"8": 24,
"9": 25,
":": 26,
";": 27,
"<": 28,
"=": 29,
">": 30,
"?": 31,
"@": 32,
"A": 33,
"B": 34,
"C": 35,
"D": 36,
"E": 37,
"F": 38,
"G": 39,
"H": 40,
"I": 41,
"J": 42,
"K": 43,
"L": 44,
"M": 45,
"N": 46,
"O": 47,
"P": 48,
"Q": 49,
"R": 50,
"S": 51,
"T": 52,
"U": 53,
"V": 54,
"W": 55,
"X": 56,
"Y": 57,
"Z": 58,
"[": 59,
"\\": 60,
"]": 61,
"^": 62,
"_": 63,
"`": 64,
"a": 65,
"b": 66,
"c": 67,
"d": 68,
"e": 69,
"f": 70,
"g": 71,
"h": 72,
"i": 73,
"j": 74,
"k": 75,
"l": 76,
"m": 77,
"n": 78,
"o": 79,
"p": 80,
"q": 81,
"r": 82,
"s": 83,
"t": 84,
"u": 85,
"v": 86,
"w": 87,
"x": 88,
"y": 89,
"z": 90,
"{": 91,
"|": 92,
"}": 93,
"~": 94,
"¡": 95,
"¢": 96,
"£": 97,
"¤": 98,
"¥": 99,
"¦": 100,
"§": 101,
"¨": 102,
"©": 103,
"ª": 104,
"«": 105,
"¬": 106,
"®": 107,
"¯": 108,
"°": 109,
"±": 110,
"²": 111,
"³": 112,
"´": 113,
"µ": 114,
"¶": 115,
"·": 116,
"¸": 117,
"¹": 118,
"º": 119,
"»": 120,
"¼": 121,
"½": 122,
"¾": 123,
"¿": 124,
"À": 125,
"Á": 126,
"Â": 127,
"Ã": 128,
"Ä": 129,
"Å": 130,
"Æ": 131,
"Ç": 132,
"È": 133,
"É": 134,
"Ê": 135,
"Ë": 136,
"Ì": 137,
"Í": 138,
"Î": 139,
"Ï": 140,
"Ð": 141,
"Ñ": 142,
"Ò": 143,
"Ó": 144,
"Ô": 145,
"Õ": 146,
"Ö": 147,
"×": 148,
"Ø": 149,
"Ù": 150,
"Ú": 151,
"Û": 152,
"Ü": 153,
"Ý": 154,
"Þ": 155,
"ß": 156,
"à": 157,
"á": 158,
"â": 159,
"ã": 160,
"ä": 161,
"å": 162,
"æ": 163,
"ç": 164,
"è": 165,
"é": 166,
"ê": 167,
"ë": 168,
"ì": 169,
"í": 170,
"î": 171,
"ï": 172,
"ð": 173,
"ñ": 174,
"ò": 175,
"ó": 176,
"ô": 177,
"õ": 178,
"ö": 179,
"÷": 180,
"ø": 181,
"ù": 182,
"ú": 183,
"û": 184,
"ü": 185,
"ý": 186,
"þ": 187,
"ÿ": 188,
"Ā": 189,
"ā": 190,
"Ă": 191,
"ă": 192,
"Ą": 193,
"ą": 194,
"Ć": 195,
"ć": 196,
"Ĉ": 197,
"ĉ": 198,
"Ċ": 199,
"ċ": 200,
"Č": 201,
"č": 202,
"Ď": 203,
"ď": 204,
"Đ": 205,
"đ": 206,
"Ē": 207,
"ē": 208,
"Ĕ": 209,
"ĕ": 210,
"Ė": 211,
"ė": 212,
"Ę": 213,
"ę": 214,
"Ě": 215,
"ě": 216,
"Ĝ": 217,
"ĝ": 218,
"Ğ": 219,
"ğ": 220,
"Ġ": 221,
"ġ": 222,
"Ģ": 223,
"ģ": 224,
"Ĥ": 225,
"ĥ": 226,
"Ħ": 227,
"ħ": 228,
"Ĩ": 229,
"ĩ": 230,
"Ī": 231,
"ī": 232,
"Ĭ": 233,
"ĭ": 234,
"Į": 235,
"į": 236,
"İ": 237,
"ı": 238,
"IJ": 239,
"ij": 240,
"Ĵ": 241,
"ĵ": 242,
"Ķ": 243,
"ķ": 244,
"ĸ": 245,
"Ĺ": 246,
"ĺ": 247,
"Ļ": 248,
"ļ": 249,
"Ľ": 250,
"ľ": 251,
"Ŀ": 252,
"ŀ": 253,
"Ł": 254,
"ł": 255,
"Ń": 256,
"Ġa": 257,
"nd": 258,
"Ġs": 259,
");": 260,
"Ġand": 261,
"re": 262,
"Ġ=": 263,
"of": 264,
"Ġof": 265,
"ci": 266,
"he": 267,
"iz": 268,
"Ġsiz": 269,
"Ġsize": 270,
"are": 271,
"qu": 272,
"quare": 273,
"er": 274,
"ind": 275,
"lind": 276,
"linder": 277,
"Ġci": 278,
"ad": 279,
"io": 280,
"rad": 281,
"Ġrad": 282,
"Ġradio": 283,
"cl": 284,
"rcl": 285,
"rcle": 286,
"phe": 287,
"phere": 288,
"be": 289,
"cu": 290,
"cube": 291,
"square": 292,
"Ġsquare": 293,
"cy": 294,
"gh": 295,
"igh": 296,
"Ġh": 297,
"Ġhe": 298,
"Ġcilinder": 299,
"cylinder": 300,
"ight": 301,
"Ġheight": 302,
"circle": 303,
"Ġcircle": 304,
"sphere": 305,
"Ġsphere": 306,
"Ġcube": 307,
"Ġ2": 308,
"Ġ5": 309,
"Ġ3": 310,
"Ġ4": 311,
"Ġ8": 312,
"Ġ6": 313,
"Ġ7": 314,
"Ġ9": 315,
"Ġ1": 316,
"ĠĊ": 317,
"Ġ54": 318,
"Ġ33": 319,
"Ġ15": 320,
"Ġ31": 321,
"Ġ17": 322,
"Ġ21": 323,
"Ġ65": 324,
"Ġ29": 325,
"Ġ59": 326,
"Ġ26": 327,
"Ġ27": 328,
"Ġ61": 329,
"Ġ62": 330,
"Ġ48": 331,
"Ġ45": 332,
"Ġ86": 333,
"Ġ70": 334,
"Ġ84": 335,
"Ġ28": 336,
"Ġ71": 337,
"Ġ93": 338,
"Ġ46": 339,
"Ġ22": 340,
"Ġ78": 341,
"Ġ88": 342,
"Ġ51": 343,
"Ġ42": 344,
"Ġ47": 345,
"Ġ56": 346,
"Ġ37": 347,
"Ġ83": 348,
"Ġ19": 349,
"Ġ32": 350,
"Ġ87": 351,
"Ġ41": 352,
"Ġ10": 353,
"Ġ58": 354,
"Ġ36": 355,
"Ġ60": 356,
"Ġ94": 357,
"Ġ96": 358,
"Ġ23": 359,
"Ġ38": 360,
"Ġ44": 361,
"Ġ81": 362,
"Ġ79": 363,
"Ġ35": 364,
"Ġ52": 365,
"Ġ64": 366,
"Ġ82": 367,
"Ġ76": 368,
"Ġ53": 369,
"Ġ97": 370,
"Ġ34": 371,
"Ġ69": 372,
"Ġ63": 373,
"Ġ66": 374,
"Ġ98": 375,
"Ġ85": 376,
"Ġ18": 377,
"Ġ77": 378,
"Ġ90": 379,
"Ġ73": 380,
"Ġ14": 381,
"Ġ57": 382,
"Ġ74": 383,
"Ġ20": 384,
"Ġ50": 385,
"Ġ75": 386,
"Ġ91": 387,
"Ġ92": 388,
"Ġ89": 389,
"Ġ12": 390,
"Ġ24": 391,
"Ġ25": 392,
"Ġ99": 393,
"Ġ39": 394,
"Ġ40": 395,
"Ġ55": 396,
"Ġ16": 397,
"Ġ11": 398,
"Ġ43": 399,
"Ġ95": 400,
"Ġ80": 401,
"Ġ30": 402,
"Ġ67": 403,
"Ġ49": 404,
"Ġ72": 405,
"Ġ68": 406,
"Ġ13": 407,
"26": 408,
"17": 409,
"29": 410,
"59": 411,
"70": 412,
"60": 413,
"15": 414,
"54": 415,
"88": 416,
"31": 417,
"84": 418,
"19": 419,
"44": 420,
"62": 421,
"65": 422,
"48": 423,
"33": 424,
"86": 425,
"41": 426,
"85": 427,
"42": 428,
"76": 429,
"82": 430,
"10": 431,
"71": 432,
"36": 433,
"21": 434,
"46": 435,
"66": 436,
"98": 437,
"11": 438,
"28": 439,
"38": 440,
"56": 441,
"61": 442,
"99": 443,
"96": 444,
"22": 445,
"55": 446,
"79": 447,
"37": 448,
"64": 449,
"97": 450,
"69": 451,
"94": 452,
"16": 453,
"74": 454,
"78": 455,
"32": 456,
"35": 457,
"27": 458,
"18": 459,
"93": 460,
"47": 461,
"14": 462,
"23": 463,
"52": 464,
"51": 465,
"83": 466,
"90": 467,
"34": 468,
"63": 469,
"89": 470,
"45": 471,
"92": 472,
"24": 473,
"77": 474,
"25": 475,
"58": 476,
"87": 477,
"50": 478,
"95": 479,
"39": 480,
"40": 481,
"80": 482,
"43": 483,
"91": 484,
"30": 485,
"73": 486,
"67": 487,
"12": 488,
"75": 489,
"81": 490,
"57": 491,
"49": 492,
"20": 493,
"53": 494,
"13": 495,
"68": 496,
"72": 497
},
"merges": [
"Ġ a",
"n d",
"Ġ s",
") ;",
"Ġa nd",
"r e",
"Ġ =",
"o f",
"Ġ of",
"c i",
"h e",
"i z",
"Ġs iz",
"Ġsiz e",
"a re",
"q u",
"qu are",
"e r",
"i nd",
"l ind",
"lind er",
"Ġ ci",
"a d",
"i o",
"r ad",
"Ġ rad",
"Ġrad io",
"c l",
"r cl",
"rcl e",
"p he",
"phe re",
"b e",
"c u",
"cu be",
"s quare",
"Ġs quare",
"c y",
"g h",
"i gh",
"Ġ h",
"Ġ he",
"Ġci linder",
"cy linder",
"igh t",
"Ġhe ight",
"ci rcle",
"Ġci rcle",
"s phere",
"Ġs phere",
"Ġ cube",
"Ġ 2",
"Ġ 5",
"Ġ 3",
"Ġ 4",
"Ġ 8",
"Ġ 6",
"Ġ 7",
"Ġ 9",
"Ġ 1",
"Ġ Ċ",
"Ġ5 4",
"Ġ3 3",
"Ġ1 5",
"Ġ3 1",
"Ġ1 7",
"Ġ2 1",
"Ġ6 5",
"Ġ2 9",
"Ġ5 9",
"Ġ2 6",
"Ġ2 7",
"Ġ6 1",
"Ġ6 2",
"Ġ4 8",
"Ġ4 5",
"Ġ8 6",
"Ġ7 0",
"Ġ8 4",
"Ġ2 8",
"Ġ7 1",
"Ġ9 3",
"Ġ4 6",
"Ġ2 2",
"Ġ7 8",
"Ġ8 8",
"Ġ5 1",
"Ġ4 2",
"Ġ4 7",
"Ġ5 6",
"Ġ3 7",
"Ġ8 3",
"Ġ1 9",
"Ġ3 2",
"Ġ8 7",
"Ġ4 1",
"Ġ1 0",
"Ġ5 8",
"Ġ3 6",
"Ġ6 0",
"Ġ9 4",
"Ġ9 6",
"Ġ2 3",
"Ġ3 8",
"Ġ4 4",
"Ġ8 1",
"Ġ7 9",
"Ġ3 5",
"Ġ5 2",
"Ġ6 4",
"Ġ8 2",
"Ġ7 6",
"Ġ5 3",
"Ġ9 7",
"Ġ3 4",
"Ġ6 9",
"Ġ6 3",
"Ġ6 6",
"Ġ9 8",
"Ġ8 5",
"Ġ1 8",
"Ġ7 7",
"Ġ9 0",
"Ġ7 3",
"Ġ1 4",
"Ġ5 7",
"Ġ7 4",
"Ġ2 0",
"Ġ5 0",
"Ġ7 5",
"Ġ9 1",
"Ġ9 2",
"Ġ8 9",
"Ġ1 2",
"Ġ2 4",
"Ġ2 5",
"Ġ9 9",
"Ġ3 9",
"Ġ4 0",
"Ġ5 5",
"Ġ1 6",
"Ġ1 1",
"Ġ4 3",
"Ġ9 5",
"Ġ8 0",
"Ġ3 0",
"Ġ6 7",
"Ġ4 9",
"Ġ7 2",
"Ġ6 8",
"Ġ1 3",
"2 6",
"1 7",
"2 9",
"5 9",
"7 0",
"6 0",
"1 5",
"5 4",
"8 8",
"3 1",
"8 4",
"1 9",
"4 4",
"6 2",
"6 5",
"4 8",
"3 3",
"8 6",
"4 1",
"8 5",
"4 2",
"7 6",
"8 2",
"1 0",
"7 1",
"3 6",
"2 1",
"4 6",
"6 6",
"9 8",
"1 1",
"2 8",
"3 8",
"5 6",
"6 1",
"9 9",
"9 6",
"2 2",
"5 5",
"7 9",
"3 7",
"6 4",
"9 7",
"6 9",
"9 4",
"1 6",
"7 4",
"7 8",
"3 2",
"3 5",
"2 7",
"1 8",
"9 3",
"4 7",
"1 4",
"2 3",
"5 2",
"5 1",
"8 3",
"9 0",
"3 4",
"6 3",
"8 9",
"4 5",
"9 2",
"2 4",
"7 7",
"2 5",
"5 8",
"8 7",
"5 0",
"9 5",
"3 9",
"4 0",
"8 0",
"4 3",
"9 1",
"3 0",
"7 3",
"6 7",
"1 2",
"7 5",
"8 1",
"5 7",
"4 9",
"2 0",
"5 3",
"1 3",
"6 8",
"7 2"
]
}
}