beast / tokenizer.json
Ryan Brooks
Upload 3 files
960db59 verified
raw
history blame
No virus
19 kB
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<|void|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Sequence",
"pretokenizers": [
{
"type": "Digits",
"individual_digits": true
},
{
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": true
}
]
},
"post_processor": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"decoder": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "BPE",
"dropout": 0.9,
"unk_token": "<|void|>",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": true,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"<|void|>": 0,
"!": 1,
"\"": 2,
"#": 3,
"$": 4,
"%": 5,
"&": 6,
"'": 7,
"(": 8,
")": 9,
"*": 10,
"+": 11,
",": 12,
"-": 13,
".": 14,
"/": 15,
"0": 16,
"1": 17,
"2": 18,
"3": 19,
"4": 20,
"5": 21,
"6": 22,
"7": 23,
"8": 24,
"9": 25,
":": 26,
";": 27,
"<": 28,
"=": 29,
">": 30,
"?": 31,
"@": 32,
"A": 33,
"B": 34,
"C": 35,
"D": 36,
"E": 37,
"F": 38,
"G": 39,
"H": 40,
"I": 41,
"J": 42,
"K": 43,
"L": 44,
"M": 45,
"N": 46,
"O": 47,
"P": 48,
"Q": 49,
"R": 50,
"S": 51,
"T": 52,
"U": 53,
"V": 54,
"W": 55,
"X": 56,
"Y": 57,
"Z": 58,
"[": 59,
"\\": 60,
"]": 61,
"^": 62,
"_": 63,
"`": 64,
"a": 65,
"b": 66,
"c": 67,
"d": 68,
"e": 69,
"f": 70,
"g": 71,
"h": 72,
"i": 73,
"j": 74,
"k": 75,
"l": 76,
"m": 77,
"n": 78,
"o": 79,
"p": 80,
"q": 81,
"r": 82,
"s": 83,
"t": 84,
"u": 85,
"v": 86,
"w": 87,
"x": 88,
"y": 89,
"z": 90,
"{": 91,
"|": 92,
"}": 93,
"~": 94,
"¡": 95,
"¢": 96,
"£": 97,
"¤": 98,
"¥": 99,
"¦": 100,
"§": 101,
"¨": 102,
"©": 103,
"ª": 104,
"«": 105,
"¬": 106,
"®": 107,
"¯": 108,
"°": 109,
"±": 110,
"²": 111,
"³": 112,
"´": 113,
"µ": 114,
"¶": 115,
"·": 116,
"¸": 117,
"¹": 118,
"º": 119,
"»": 120,
"¼": 121,
"½": 122,
"¾": 123,
"¿": 124,
"À": 125,
"Á": 126,
"Â": 127,
"Ã": 128,
"Ä": 129,
"Å": 130,
"Æ": 131,
"Ç": 132,
"È": 133,
"É": 134,
"Ê": 135,
"Ë": 136,
"Ì": 137,
"Í": 138,
"Î": 139,
"Ï": 140,
"Ð": 141,
"Ñ": 142,
"Ò": 143,
"Ó": 144,
"Ô": 145,
"Õ": 146,
"Ö": 147,
"×": 148,
"Ø": 149,
"Ù": 150,
"Ú": 151,
"Û": 152,
"Ü": 153,
"Ý": 154,
"Þ": 155,
"ß": 156,
"à": 157,
"á": 158,
"â": 159,
"ã": 160,
"ä": 161,
"å": 162,
"æ": 163,
"ç": 164,
"è": 165,
"é": 166,
"ê": 167,
"ë": 168,
"ì": 169,
"í": 170,
"î": 171,
"ï": 172,
"ð": 173,
"ñ": 174,
"ò": 175,
"ó": 176,
"ô": 177,
"õ": 178,
"ö": 179,
"÷": 180,
"ø": 181,
"ù": 182,
"ú": 183,
"û": 184,
"ü": 185,
"ý": 186,
"þ": 187,
"ÿ": 188,
"Ā": 189,
"ā": 190,
"Ă": 191,
"ă": 192,
"Ą": 193,
"ą": 194,
"Ć": 195,
"ć": 196,
"Ĉ": 197,
"ĉ": 198,
"Ċ": 199,
"ċ": 200,
"Č": 201,
"č": 202,
"Ď": 203,
"ď": 204,
"Đ": 205,
"đ": 206,
"Ē": 207,
"ē": 208,
"Ĕ": 209,
"ĕ": 210,
"Ė": 211,
"ė": 212,
"Ę": 213,
"ę": 214,
"Ě": 215,
"ě": 216,
"Ĝ": 217,
"ĝ": 218,
"Ğ": 219,
"ğ": 220,
"Ġ": 221,
"ġ": 222,
"Ģ": 223,
"ģ": 224,
"Ĥ": 225,
"ĥ": 226,
"Ħ": 227,
"ħ": 228,
"Ĩ": 229,
"ĩ": 230,
"Ī": 231,
"ī": 232,
"Ĭ": 233,
"ĭ": 234,
"Į": 235,
"į": 236,
"İ": 237,
"ı": 238,
"IJ": 239,
"ij": 240,
"Ĵ": 241,
"ĵ": 242,
"Ķ": 243,
"ķ": 244,
"ĸ": 245,
"Ĺ": 246,
"ĺ": 247,
"Ļ": 248,
"ļ": 249,
"Ľ": 250,
"ľ": 251,
"Ŀ": 252,
"ŀ": 253,
"Ł": 254,
"ł": 255,
"Ń": 256,
"Ġt": 257,
"Ġa": 258,
"in": 259,
"he": 260,
"on": 261,
"ti": 262,
"re": 263,
"Ġthe": 264,
"er": 265,
"Ġc": 266,
"nd": 267,
"es": 268,
"Ġo": 269,
"Ġs": 270,
"tion": 271,
"en": 272,
"al": 273,
"or": 274,
"is": 275,
"at": 276,
"ing": 277,
"Ġand": 278,
"ĠĠ": 279,
"Ġp": 280,
"an": 281,
"Ġin": 282,
"le": 283,
"it": 284,
"Ġof": 285,
"Ġf": 286,
"ro": 287,
"Ġd": 288,
"Ġm": 289,
"ic": 290,
"Ġw": 291,
"Ġto": 292,
"ec": 293,
"ar": 294,
"ed": 295,
"Ġb": 296,
"us": 297,
"Ġe": 298,
"ation": 299,
"ent": 300,
"mp": 301,
"Ġre": 302,
"lo": 303,
"as": 304,
"ac": 305,
"ve": 306,
"Ġis": 307,
"Ġco": 308,
"st": 309,
"Ġth": 310,
"et": 311,
"ou": 312,
"##": 313,
"ĠT": 314,
"Ġpro": 315,
"de": 316,
"Ġn": 317,
"ment": 318,
"Ġcon": 319,
"uc": 320,
"ul": 321,
"il": 322,
"Ġh": 323,
"ol": 324,
"ce": 325,
"ig": 326,
"Ġus": 327,
"ut": 328,
"Ġfor": 329,
"Ġst": 330,
"Ġcan": 331,
"``": 332,
"ter": 333,
"Ġex": 334,
"un": 335,
"ĠĠĠ": 336,
"am": 337,
"ate": 338,
"ur": 339,
"qu": 340,
"Ġv": 341,
"Ġ`": 342,
"Ġi": 343,
"Ġas": 344,
"ab": 345,
"Ġal": 346,
"ly": 347,
"Ġbe": 348,
"Ġthat": 349,
"se": 350,
"ĠS": 351,
"si": 352,
"ith": 353,
"In": 354,
"ĠC": 355,
"Ġl": 356,
"ge": 357,
"em": 358,
"ity": 359,
"Ġan": 360,
"ess": 361,
"vi": 362,
"Ġde": 363,
"ts": 364,
"res": 365,
"ri": 366,
"pp": 367,
"Ġmo": 368,
"Ġy": 369,
"Ġg": 370,
"Ġare": 371,
"ers": 372,
"ff": 373,
"Ġit": 374,
"and": 375,
"um": 376,
"om": 377,
"ations": 378,
"ot": 379,
"ap": 380,
"tic": 381,
"mple": 382,
"ĠThe": 383,
"Ġwe": 384,
"Ġyou": 385,
"ĠA": 386,
"Ġon": 387,
"Ġ-": 388,
"Ġor": 389,
"ver": 390,
"ain": 391,
"Ġwith": 392,
"im": 393,
"ir": 394,
"ction": 395,
"ection": 396,
"ow": 397,
"Ġ[": 398,
"ign": 399,
"ay": 400,
"ad": 401,
"ure": 402,
"yst": 403,
"iz": 404,
"ect": 405,
"Ġen": 406,
"ĠE": 407,
"her": 408,
"per": 409,
"ystem": 410,
"el": 411,
"```": 412,
"ine": 413,
"ort": 414,
"his": 415,
"low": 416,
"](": 417,
"od": 418,
"](#": 419,
"ĠP": 420,
"Ġch": 421,
"Ġ=": 422,
"ĠI": 423,
"ĠĠĠĠ": 424,
"ant": 425,
"lic": 426,
"ial": 427,
"Ġimp": 428,
"ren": 429,
"ĠThis": 430,
"for": 431,
"ill": 432,
"Ġapp": 433,
"Ġdi": 434,
"ch": 435,
"unction": 436,
"th": 437,
"cess": 438,
"age": 439,
"ak": 440,
"ĠD": 441,
"Ġsystem": 442,
"ari": 443,
"duc": 444,
"Ġcont": 445,
"ech": 446,
"ata": 447,
"cl": 448,
"Ġint": 449,
"ance": 450,
"ise": 451,
"ub": 452,
"Ġcomp": 453,
"if": 454,
"Ġu": 455,
"able": 456,
"form": 457,
"Ġcom": 458,
"Ġthis": 459,
"ĠM": 460,
"tive": 461,
"ruc": 462,
"Ġr": 463,
"so": 464,
"ave": 465,
"Ġfunction": 466,
"Ġ(": 467,
"ĠF": 468,
"ud": 469,
"Ġused": 470,
"ĠR": 471,
"Ġad": 472,
"gr": 473,
"ĠIn": 474,
"Ġres": 475,
"Ġsuc": 476,
"Ġwill": 477,
"Ġby": 478,
"erc": 479,
"id": 480,
"alu": 481,
"Ġle": 482,
"act": 483,
"Ġne": 484,
"ist": 485,
"Ġwh": 486,
"ĠB": 487,
"'s": 488,
"ct": 489,
"ĠW": 490,
"all": 491,
"xerc": 492,
"ical": 493,
"nder": 494,
"Ġtr": 495,
"ion": 496,
"enti": 497,
"Ġexa": 498,
"Ġdata": 499,
"igh": 500,
"Ġvari": 501,
"yp": 502,
"ater": 503,
"Ġuse": 504,
"ous": 505,
"Ġsuch": 506,
"Ġhave": 507,
"rin": 508,
"Ġcomm": 509,
"ble": 510,
"Ġdes": 511,
"ution": 512,
"Ġsp": 513,
"echn": 514,
"ach": 515,
"Ġac": 516,
"ip": 517,
"rom": 518,
"###": 519,
"ments": 520,
"Ġalso": 521,
"ges": 522,
"xercise": 523,
"red": 524,
"Ġexample": 525,
"tim": 526,
"ors": 527,
"ics": 528,
"Ġexp": 529,
"eren": 530,
"les": 531,
"di": 532,
"Ġse": 533,
"ile": 534,
"ass": 535,
"eth": 536,
"xt": 537,
"log": 538,
"velo": 539,
"ethod": 540,
"Ġyour": 541,
"The": 542,
"ork": 543,
"ies": 544,
"apter": 545,
"Ġtheir": 546,
"hen": 547,
"Ġeff": 548,
"ust": 549,
"est": 550,
"ĠO": 551,
"ame": 552,
"Ġprocess": 553,
"Ġdiff": 554,
"stand": 555,
"Ġvalu": 556,
"Ġunder": 557,
"op": 558,
"Ġop": 559,
"ĠH": 560,
"velop": 561,
"del": 562,
"ties": 563,
"Ġprovi": 564,
"Ġdis": 565,
"ms": 566,
"ther": 567,
"Ġcode": 568,
"Ġpar": 569,
"ron": 570,
"Ġtechn": 571,
"ich": 572,
"Ġk": 573,
"os": 574,
"ack": 575,
"Ġfrom": 576,
"Ġ'": 577,
"sion": 578,
"Ġusing": 579,
"yn": 580,
"ans": 581,
"Ġmethod": 582,
"te": 583,
"Ġhe": 584,
"Ġ\"": 585,
"etw": 586,
"put": 587,
"umb": 588,
"Ġele": 589,
"Ġwhich": 590,
"ase": 591,
"ĠĠĠĠĠ": 592,
"ated": 593,
"iqu": 594,
"der": 595,
"Ġcl": 596,
"ility": 597,
"olution": 598,
"ĠL": 599,
"ĠIt": 600,
"Ġat": 601,
"lications": 602,
"Ġdifferen": 603,
"Ġdesign": 604,
":-": 605,
"ces": 606,
"Ġro": 607,
"Ġits": 608,
"ult": 609,
"oc": 610,
"Ġallow": 611,
"ĠĠĠĠĠĠĠ": 612,
"are": 613,
"Ġsystems": 614,
"aly": 615,
"Ġper": 616,
"tions": 617,
"Ġsh": 618,
"Ġinter": 619,
"Ġhas": 620,
"Ġar": 621,
"ves": 622,
"gor": 623,
"ating": 624,
"Ġtyp": 625,
"Ġunderstand": 626,
"cep": 627,
"Ġcre": 628,
"Ġpl": 629,
"ence": 630,
"ld": 631,
"gram": 632,
"Ġdevelop": 633,
"####": 634,
"ory": 635,
"our": 636,
"Ġmore": 637,
"ject": 638,
"Ġman": 639,
"Ġsc": 640,
"Ġincl": 641,
"',": 642,
"ization": 643,
"ruct": 644,
"gorith": 645,
"Ġimport": 646,
"ll": 647,
"ace": 648,
"Ġqu": 649,
"Ġinto": 650,
"ĠG": 651,
"Ġthese": 652,
"ific": 653,
"rint": 654,
"ug": 655,
"ary": 656,
"rib": 657,
"ential": 658,
"ents": 659,
"ual": 660,
"Ġmak": 661,
"Ġmodel": 662,
"Ġdifferent": 663,
"ures": 664,
"Ġob": 665
},
"merges": [
"Ġ t",
"Ġ a",
"i n",
"h e",
"o n",
"t i",
"r e",
"Ġt he",
"e r",
"Ġ c",
"n d",
"e s",
"Ġ o",
"Ġ s",
"ti on",
"e n",
"a l",
"o r",
"i s",
"a t",
"in g",
"Ġa nd",
"Ġ Ġ",
"Ġ p",
"a n",
"Ġ in",
"l e",
"i t",
"Ġo f",
"Ġ f",
"r o",
"Ġ d",
"Ġ m",
"i c",
"Ġ w",
"Ġt o",
"e c",
"a r",
"e d",
"Ġ b",
"u s",
"Ġ e",
"a tion",
"en t",
"m p",
"Ġ re",
"l o",
"a s",
"a c",
"v e",
"Ġ is",
"Ġc o",
"s t",
"Ġt h",
"e t",
"o u",
"# #",
"Ġ T",
"Ġp ro",
"d e",
"Ġ n",
"m ent",
"Ġc on",
"u c",
"u l",
"i l",
"Ġ h",
"o l",
"c e",
"i g",
"Ġ us",
"u t",
"Ġf or",
"Ġs t",
"Ġc an",
"` `",
"t er",
"Ġe x",
"u n",
"ĠĠ Ġ",
"a m",
"at e",
"u r",
"q u",
"Ġ v",
"Ġ `",
"Ġ i",
"Ġa s",
"a b",
"Ġa l",
"l y",
"Ġb e",
"Ġth at",
"s e",
"Ġ S",
"s i",
"it h",
"I n",
"Ġ C",
"Ġ l",
"g e",
"e m",
"it y",
"Ġa n",
"es s",
"v i",
"Ġd e",
"t s",
"re s",
"r i",
"p p",
"Ġm o",
"Ġ y",
"Ġ g",
"Ġa re",
"er s",
"f f",
"Ġ it",
"a nd",
"u m",
"o m",
"ation s",
"o t",
"a p",
"ti c",
"mp le",
"ĠT he",
"Ġw e",
"Ġy ou",
"Ġ A",
"Ġ on",
"Ġ -",
"Ġo r",
"v er",
"a in",
"Ġw ith",
"i m",
"i r",
"c tion",
"ec tion",
"o w",
"Ġ [",
"ig n",
"a y",
"a d",
"u re",
"y st",
"i z",
"ec t",
"Ġ en",
"Ġ E",
"he r",
"p er",
"yst em",
"e l",
"`` `",
"in e",
"or t",
"h is",
"lo w",
"] (",
"o d",
"]( #",
"Ġ P",
"Ġc h",
"Ġ =",
"Ġ I",
"ĠĠ ĠĠ",
"an t",
"l ic",
"i al",
"Ġi mp",
"re n",
"ĠT his",
"f or",
"il l",
"Ġa pp",
"Ġd i",
"c h",
"un ction",
"t h",
"c ess",
"a ge",
"a k",
"Ġ D",
"Ġs ystem",
"ar i",
"d uc",
"Ġcon t",
"ec h",
"at a",
"c l",
"Ġin t",
"an ce",
"is e",
"u b",
"Ġco mp",
"i f",
"Ġ u",
"ab le",
"for m",
"Ġco m",
"Ġth is",
"Ġ M",
"ti ve",
"r uc",
"Ġ r",
"s o",
"a ve",
"Ġf unction",
"Ġ (",
"Ġ F",
"u d",
"Ġus ed",
"Ġ R",
"Ġa d",
"g r",
"Ġ In",
"Ġre s",
"Ġs uc",
"Ġw ill",
"Ġb y",
"er c",
"i d",
"al u",
"Ġ le",
"ac t",
"Ġn e",
"is t",
"Ġw h",
"Ġ B",
"' s",
"c t",
"Ġ W",
"al l",
"x erc",
"ic al",
"nd er",
"Ġt r",
"i on",
"en ti",
"Ġex a",
"Ġd ata",
"ig h",
"Ġv ari",
"y p",
"at er",
"Ġus e",
"o us",
"Ġsuc h",
"Ġh ave",
"r in",
"Ġcom m",
"b le",
"Ġd es",
"u tion",
"Ġs p",
"ech n",
"ac h",
"Ġa c",
"i p",
"ro m",
"## #",
"ment s",
"Ġal so",
"g es",
"xerc ise",
"re d",
"Ġexa mple",
"ti m",
"or s",
"ic s",
"Ġex p",
"e ren",
"l es",
"d i",
"Ġs e",
"i le",
"as s",
"et h",
"x t",
"lo g",
"ve lo",
"eth od",
"Ġyou r",
"T he",
"or k",
"i es",
"ap ter",
"Ġthe ir",
"he n",
"Ġe ff",
"us t",
"es t",
"Ġ O",
"am e",
"Ġpro cess",
"Ġdi ff",
"st and",
"Ġv alu",
"Ġu nder",
"o p",
"Ġo p",
"Ġ H",
"velo p",
"de l",
"ti es",
"Ġpro vi",
"Ġd is",
"m s",
"t her",
"Ġco de",
"Ġp ar",
"r on",
"Ġt echn",
"ic h",
"Ġ k",
"o s",
"ac k",
"Ġf rom",
"Ġ '",
"si on",
"Ġus ing",
"y n",
"an s",
"Ġm ethod",
"t e",
"Ġ he",
"Ġ \"",
"et w",
"p ut",
"um b",
"Ġe le",
"Ġwh ich",
"as e",
"ĠĠ ĠĠĠ",
"at ed",
"i qu",
"d er",
"Ġc l",
"il ity",
"ol ution",
"Ġ L",
"ĠI t",
"Ġa t",
"lic ations",
"Ġdiff eren",
"Ġdes ign",
": -",
"c es",
"Ġ ro",
"Ġit s",
"ul t",
"o c",
"Ġal low",
"ĠĠĠĠ ĠĠĠ",
"a re",
"Ġsystem s",
"al y",
"Ġp er",
"tion s",
"Ġs h",
"Ġin ter",
"Ġh as",
"Ġa r",
"v es",
"g or",
"at ing",
"Ġt yp",
"Ġunder stand",
"ce p",
"Ġc re",
"Ġp l",
"en ce",
"l d",
"gr am",
"Ġde velop",
"## ##",
"or y",
"ou r",
"Ġmo re",
"j ect",
"Ġm an",
"Ġs c",
"Ġin cl",
"' ,",
"iz ation",
"ruc t",
"gor ith",
"Ġimp ort",
"l l",
"ac e",
"Ġ qu",
"Ġint o",
"Ġ G",
"Ġthe se",
"if ic",
"rin t",
"u g",
"ar y",
"ri b",
"enti al",
"ent s",
"u al",
"Ġm ak",
"Ġmo del",
"Ġdifferen t",
"u res",
"Ġo b"
]
}
}