diff --git a/flax_model-00001-of-00072.msgpack b/flax_model-00001-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..fbf4a4bf8d5d5a8539ff4b2d1c45ee3cd75faa57 --- /dev/null +++ b/flax_model-00001-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bb30ce7465c65a39063f77b0f72b112da7688644883a3e076bc885288d0149c +size 7193289083 diff --git a/flax_model-00002-of-00072.msgpack b/flax_model-00002-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..d085ad9ce9e8f2651d53632d1d347772411fdea4 --- /dev/null +++ b/flax_model-00002-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd8f0616874656c9108578599cb34d1ffce632d1eba0f4fd3823e0bca463c0fd +size 4932874987 diff --git a/flax_model-00003-of-00072.msgpack b/flax_model-00003-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..b40bfa2620b3fb5bf0d0a12ea869b1672cd4d8db --- /dev/null +++ b/flax_model-00003-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:280838d43eec588ab564c359b10928a3324121033cea777e688ade2b0af82ad6 +size 4932874987 diff --git a/flax_model-00004-of-00072.msgpack b/flax_model-00004-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..f4f17d88d06fb24393ea656a1ad43d78b43dbd08 --- /dev/null +++ b/flax_model-00004-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ed6285b27e30cb1f7a3ced2a0a138164c6d313d464f2cc281306558b85413e +size 4932874987 diff --git a/flax_model-00005-of-00072.msgpack b/flax_model-00005-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..f3da055c3fbb411efb8b4b1dcacb93ae70dd68ec --- /dev/null +++ b/flax_model-00005-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f47cc204d125ab740c0cfe94a1967658c9546b8735ca8960fc9b8f95fb4d515 +size 4932874987 diff --git a/flax_model-00006-of-00072.msgpack b/flax_model-00006-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..6b1d75781dd0ace500451b14f4d4f4e00aebe299 --- /dev/null +++ b/flax_model-00006-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e27cbeef861dbd78edd7dae3c6c83eee2312f40165e1ca48e78f446310a5baa +size 4932874987 diff --git a/flax_model-00007-of-00072.msgpack b/flax_model-00007-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..81b93757b066c1597731c836d172e0ab630ce70e --- /dev/null +++ b/flax_model-00007-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ad9d0895c5dd75bba62dc403149b7b531e242f2c713d4178663cecd288a94b0 +size 4932874987 diff --git a/flax_model-00008-of-00072.msgpack b/flax_model-00008-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..ad6b4a554c2ccf973cdc5ead9ad4e0638e97d824 --- /dev/null +++ b/flax_model-00008-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff1aa0ea5f29f88b73ff94ad1c720e94ed672ec6193e29660c709bd5fdca109e +size 4932874987 diff --git a/flax_model-00009-of-00072.msgpack b/flax_model-00009-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..008499f302f2fa34f14431989b554e921de0df6a --- /dev/null +++ b/flax_model-00009-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e60adb4de5d6b1295b0f6c4520fd582b46cc824f090fcb5079fa708c69a257f +size 4932874987 diff --git a/flax_model-00010-of-00072.msgpack b/flax_model-00010-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..1b747cca41eb3d1c04b6e778e101ef6ea148ed47 --- /dev/null +++ b/flax_model-00010-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edc35d861fe81460bf8e3be30f9e5053d066c7b9099b3826c7171bef6c472dd1 +size 4932874987 diff --git a/flax_model-00011-of-00072.msgpack b/flax_model-00011-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..8993458fdf969118b0200f135a3a4b6ade36c9a6 --- /dev/null +++ b/flax_model-00011-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:796a08b927940d7ef517e8b7071250e5a967274f9004ba570c2945a84d8904fe +size 4932874987 diff --git a/flax_model-00012-of-00072.msgpack b/flax_model-00012-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..46c1afc54df843c60fa159b2c32e57044264e995 --- /dev/null +++ b/flax_model-00012-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50fff61bccddef04d51f6c270250a328552e09343f18eb5b9e36ce118caa047b +size 4932874988 diff --git a/flax_model-00013-of-00072.msgpack b/flax_model-00013-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..f397209f8ef98df149630acf3f0d1b587972cdf7 --- /dev/null +++ b/flax_model-00013-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec467f74b912b4f27d315c2cc886350bdbee9082694995562410abbb74411124 +size 4932874988 diff --git a/flax_model-00014-of-00072.msgpack b/flax_model-00014-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..f664b0b6488560cfe1bc1662f9c5552b7d3ec33a --- /dev/null +++ b/flax_model-00014-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f288cd3da318ee2df8dd78b5666d096e5a90fb44008afc180c7f2465ff9c7608 +size 4932874988 diff --git a/flax_model-00015-of-00072.msgpack b/flax_model-00015-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..16284c276b228807139deee803ab615374d9a59a --- /dev/null +++ b/flax_model-00015-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:932d4b99d667b3fd68e940e4305d38c8da1e062b9df06042226042ad752cdbeb +size 4932874988 diff --git a/flax_model-00016-of-00072.msgpack b/flax_model-00016-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..ccae39564dcf440a769b6d361016945802ac83e5 --- /dev/null +++ b/flax_model-00016-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cebb3a69e418c730b8eebd3d9d9804e0114efa3bf1fc435d049981e57c1362b +size 4932874988 diff --git a/flax_model-00017-of-00072.msgpack b/flax_model-00017-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..b558ff267d2e586120ad74c2af4e5ae5e71fe66b --- /dev/null +++ b/flax_model-00017-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2102e69c8fb924021332f0c362ddc26250297237effdbb3ae3e3d455b0ab88c7 +size 4932874988 diff --git a/flax_model-00018-of-00072.msgpack b/flax_model-00018-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..2e2bcfad0cce1d45da080366a9ce05fdcac01267 --- /dev/null +++ b/flax_model-00018-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bfb541d59acfca0d989bf3e66c825d683385412cc38b0bf5af645714d17f697 +size 4932874988 diff --git a/flax_model-00019-of-00072.msgpack b/flax_model-00019-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..bc3594aa69f8bccaf23103918b6a202e3b192703 --- /dev/null +++ b/flax_model-00019-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a328f1f3be883996e96cd2533ae27a24779c3bcf7119b1f4de82bac876eac7ae +size 4932874988 diff --git a/flax_model-00020-of-00072.msgpack b/flax_model-00020-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..897822b33cf46e4fcddc2b3ca87fe582e6ef7360 --- /dev/null +++ b/flax_model-00020-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c984fde0b831e772b07085584395af7347edcc3073c69efabf1df6c84215835f +size 4932874988 diff --git a/flax_model-00021-of-00072.msgpack b/flax_model-00021-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..02ce19a099e562aac0a184995a3cdfbadde65295 --- /dev/null +++ b/flax_model-00021-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bdb71a5bcb2e00364266377d7c43bb25e5d2cd91b5a85622d35c128ba303680 +size 4932874988 diff --git a/flax_model-00022-of-00072.msgpack b/flax_model-00022-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..ba71d4044bf834eaa8d33ed98db923bdeebaf785 --- /dev/null +++ b/flax_model-00022-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb61fa837115f472e396e6fe8aad056c1e09819b307034c7134ee5b5280f90cd +size 4932874988 diff --git a/flax_model-00023-of-00072.msgpack b/flax_model-00023-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..34ac6abe67a3adc0dc08c8d14a3872f4fb262212 --- /dev/null +++ b/flax_model-00023-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d764c3ba42402c3bb25aadce240351ade21bc0102929a9467d81df6bdde2af90 +size 4932874988 diff --git a/flax_model-00024-of-00072.msgpack b/flax_model-00024-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..3ce56adc79675524bcb9fa672babfa0bdc90ccc4 --- /dev/null +++ b/flax_model-00024-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b203c3d2a6cd708b375d597621f54b02fe2d251f70f974db5a6550dbfb7a5d03 +size 4932874988 diff --git a/flax_model-00025-of-00072.msgpack b/flax_model-00025-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..08ecab1efa88833d7998731b1fbe2a61047092d4 --- /dev/null +++ b/flax_model-00025-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48864e3b840257cd15623d850a8ab6b0af682ce71637cf5470de9c86c395f0b4 +size 4932874988 diff --git a/flax_model-00026-of-00072.msgpack b/flax_model-00026-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..44f164e05d51be182652d7736c5f97676b7bcdbc --- /dev/null +++ b/flax_model-00026-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28ef8e010db162c61a146e44fcbe67871a3535cf02bef2fe47432fe39c3ecf26 +size 4932874988 diff --git a/flax_model-00027-of-00072.msgpack b/flax_model-00027-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..3d9e65a750359072338404288d430f19d257a62c --- /dev/null +++ b/flax_model-00027-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae3e9023af432dbf29bde1ae424b9db7197ce23d0cdd1401b207f62f36b0754f +size 4932874988 diff --git a/flax_model-00028-of-00072.msgpack b/flax_model-00028-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..690a674d61f0e0145f28ec7398dca7efb58b9b2d --- /dev/null +++ b/flax_model-00028-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fbad073c701c8579f6239fe3c7c791b985dd104dd1d6dc76efff7d7ff12d6a7 +size 4932874988 diff --git a/flax_model-00029-of-00072.msgpack b/flax_model-00029-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..77be5a5ab98a7a5d9534d9300e15a5f8c979df95 --- /dev/null +++ b/flax_model-00029-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea6b4403b3f9125f9023e9b797743371463224cae27494aae219d299db0c7b88 +size 4932874988 diff --git a/flax_model-00030-of-00072.msgpack b/flax_model-00030-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..8501a792b0b9b1b9d982c3ba71678565c10c7668 --- /dev/null +++ b/flax_model-00030-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:515ea8fbac667f2aa7cca85d918ed83a74b339c28336f57c8e8a3d75bd35244e +size 4932874988 diff --git a/flax_model-00031-of-00072.msgpack b/flax_model-00031-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..d514462c1eb6b693c994b765c28cbccc5f3379c6 --- /dev/null +++ b/flax_model-00031-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f3b5b1fde0cb12e82d25423acfa1f7495f7d3c13cbeca2d65fcb99cf95d59d9 +size 4932874988 diff --git a/flax_model-00032-of-00072.msgpack b/flax_model-00032-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..7ab81174c9d34befc960653ca93f333ba8093195 --- /dev/null +++ b/flax_model-00032-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89283e0ae6586d922d82a3b17b1f760e7d9ca14508bab0b8f262ab35fa255672 +size 4932874988 diff --git a/flax_model-00033-of-00072.msgpack b/flax_model-00033-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..18a06f079543a9b026eb1463a5feba2f2dc7afec --- /dev/null +++ b/flax_model-00033-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3abee4ebbde1c84de7942e4b476c57e7ab7217fef78501f2a9817789e6844363 +size 4932874988 diff --git a/flax_model-00034-of-00072.msgpack b/flax_model-00034-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..cd576c5eb6ae2a5bbaa77ff3c3a3366fa09202f1 --- /dev/null +++ b/flax_model-00034-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:549bd9970d30a0255e40be44e3b93c9861641c8092d042378fd734a8ba94efd7 +size 4932874988 diff --git a/flax_model-00035-of-00072.msgpack b/flax_model-00035-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..0b4d0897179685b17983c18b9d02ac202fc7cb73 --- /dev/null +++ b/flax_model-00035-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bba3369500f8d4d2c26b2f8ac6624e1345ee1ac4eaa80b5c6f546dc5bc824e6 +size 4932874988 diff --git a/flax_model-00036-of-00072.msgpack b/flax_model-00036-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..af9819c9c8f2e441fb4fbbf5762e1a0925f4f858 --- /dev/null +++ b/flax_model-00036-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be2c4708baa095ecba5c10aebbef58dc49029e3b768fae80f3c458afc7fbecdc +size 4932874988 diff --git a/flax_model-00037-of-00072.msgpack b/flax_model-00037-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..082bed15e9dd1a16f88a5efe7120010d7007344f --- /dev/null +++ b/flax_model-00037-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02cface56cd2bdaa22387e5e3facc7fb3bd2364c7f27d9a2cd41167cec8d096a +size 4932874988 diff --git a/flax_model-00038-of-00072.msgpack b/flax_model-00038-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..52cebdacbb4b09c65d32031dddddb60881976df6 --- /dev/null +++ b/flax_model-00038-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b697aa482c6a021aa7f46090f4bd89531cfd8edb19dbe50a269b49029b3777fc +size 4932874988 diff --git a/flax_model-00039-of-00072.msgpack b/flax_model-00039-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..108a87f9912ce1b9f06367f6a57eededa64d5a71 --- /dev/null +++ b/flax_model-00039-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44a0ed58fbe7f88a326c2f1419accefee0e11ec220cb36ae0758a6e515b22865 +size 4932874988 diff --git a/flax_model-00040-of-00072.msgpack b/flax_model-00040-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..d4aecbef7f62830f52c8abe3c3d3cfb8e631a9d1 --- /dev/null +++ b/flax_model-00040-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ba69bae1f8a86e17dabe8b26c2e3b6b4c4a4cc30e00ab4bc72ddd6d283c0f7 +size 4932874988 diff --git a/flax_model-00041-of-00072.msgpack b/flax_model-00041-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..7ed1162cb75e2fb55826616c52a81d4ad5c789e4 --- /dev/null +++ b/flax_model-00041-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6b56648b80db5bdfc56d517232ccf1193cd262c95015d3f3cbc46134ffc2767 +size 4932874988 diff --git a/flax_model-00042-of-00072.msgpack b/flax_model-00042-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..04077dbd370856b3132f48ff7a01b5acd5d2f8ce --- /dev/null +++ b/flax_model-00042-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afd75cd3541850f48c96625e3cd02cdb92017b540a2a2229baa91def0706018f +size 4932874988 diff --git a/flax_model-00043-of-00072.msgpack b/flax_model-00043-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..541a61239d8e04a63b55ae68e92de9a343e0573a --- /dev/null +++ b/flax_model-00043-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9588902e20d771dfd54fb23a71b5c227e895d42741823a5c3750b18ad557670d +size 4932874988 diff --git a/flax_model-00044-of-00072.msgpack b/flax_model-00044-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..c52a9c239dd20db3baa072b71733c8b8c7735d54 --- /dev/null +++ b/flax_model-00044-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84092e227bacb65a64553274f73475d98aeb2b94fa9fb54314eb9fec98c45cae +size 4932874988 diff --git a/flax_model-00045-of-00072.msgpack b/flax_model-00045-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..5e64abb80a842cb43d746e98d5f39ab8c887b7c2 --- /dev/null +++ b/flax_model-00045-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b54e1a273fff705edab7ebebf77b6954f247ae53afe4fb4667e8de5a14b1a22d +size 4932874988 diff --git a/flax_model-00046-of-00072.msgpack b/flax_model-00046-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..b7c310e64d3f99f0fdf114cd7e014f5745965549 --- /dev/null +++ b/flax_model-00046-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb65d06df8f5515c4b222453138f868ad7b81a267937bb99593c7b47778e439 +size 4932874988 diff --git a/flax_model-00047-of-00072.msgpack b/flax_model-00047-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..8733bcd2d0b5c0a45c86c7d94688e6d195793f99 --- /dev/null +++ b/flax_model-00047-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d727c860368a0f9b600203e44f65d2a7e855898008c797c9869461b8e598d21b +size 4932874988 diff --git a/flax_model-00048-of-00072.msgpack b/flax_model-00048-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..e503fdaff30e5a0f8552e284f5a2e0b7aa9df5a4 --- /dev/null +++ b/flax_model-00048-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c52c50af0e2e2a0d348f43dbe019f01f852a175e7f1c7701537a6d9575bed397 +size 4932874988 diff --git a/flax_model-00049-of-00072.msgpack b/flax_model-00049-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..8c650dc84bece94ad29a96ca412d49771e5a139c --- /dev/null +++ b/flax_model-00049-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3361925f0e9511fdd3367cf59c4729102b7fe13f1e90f770703f09ddafa0b01 +size 4932874988 diff --git a/flax_model-00050-of-00072.msgpack b/flax_model-00050-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..6fdf62942a052a62dc220d4c612a32f9e53175d8 --- /dev/null +++ b/flax_model-00050-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2de7ec730278fae72d6522e13b1c6024384bac4bb8b26d111ec369ed4f695d1f +size 4932874988 diff --git a/flax_model-00051-of-00072.msgpack b/flax_model-00051-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..4b746734effe4f986d077283fafee9d250877b3f --- /dev/null +++ b/flax_model-00051-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff85ea7d78e92a501a0d0675e078cc486752235f0991eaf32aceaca79f417fa +size 4932874988 diff --git a/flax_model-00052-of-00072.msgpack b/flax_model-00052-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..fd5b4c3cbe46b84b6dc3f5f63c89b7fa4bc6af8f --- /dev/null +++ b/flax_model-00052-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b43bbb46cc42d968b622e692c63943344238c4835c01444ef05ceb2f6874094 +size 4932874988 diff --git a/flax_model-00053-of-00072.msgpack b/flax_model-00053-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..884a6fa8e77269a3f3feca18f5b8e4cb2608ce9e --- /dev/null +++ b/flax_model-00053-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73fc4e3aab84e15d2ca8bd7c9b64a92a38a6e63d6fd435a108ab8a32b78428ec +size 4932874988 diff --git a/flax_model-00054-of-00072.msgpack b/flax_model-00054-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..f19e6d0a89f2cab16c1c2ecb3b3f25d8fb2e2d7e --- /dev/null +++ b/flax_model-00054-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d9281cd445e730b82393095c2d2d2e20d2f7ee5d968aecd1c67482d3fec65e7 +size 4932874988 diff --git a/flax_model-00055-of-00072.msgpack b/flax_model-00055-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..dde0f940a5cd9b8ea2dcc7ee7a4f4a9ea5484cd9 --- /dev/null +++ b/flax_model-00055-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3609193c66507e8aea1c17db5de5c588992b22cdc0d89a2598562f338181dd2 +size 4932874988 diff --git a/flax_model-00056-of-00072.msgpack b/flax_model-00056-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..4ad1ff18ce2ad015f32913e12fae9c4feac008bd --- /dev/null +++ b/flax_model-00056-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8be705203c88ad3d4f13218adcd97cd371dd7abc42f6a2c94cc18c5566b7d2f3 +size 4932874988 diff --git a/flax_model-00057-of-00072.msgpack b/flax_model-00057-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..99866c2ed4df36d0650594fcae715cc750e76bda --- /dev/null +++ b/flax_model-00057-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e52268f571d4eeed8c07c28103853584a584301ca8901e6dad8f2153eb334d11 +size 4932874988 diff --git a/flax_model-00058-of-00072.msgpack b/flax_model-00058-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..aab78d5915a9d7800ec88025890333863c70ff67 --- /dev/null +++ b/flax_model-00058-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae31511fd44fb4efe49702ce657be0d13e71237218f014daef44d52038113ecb +size 4932874988 diff --git a/flax_model-00059-of-00072.msgpack b/flax_model-00059-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..bab9411b59db42f01f291c323f757246a483aa77 --- /dev/null +++ b/flax_model-00059-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f9baf875f2e4322ab4dd297208895238211192feccaea01438c85273e0521cc +size 4932874988 diff --git a/flax_model-00060-of-00072.msgpack b/flax_model-00060-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..76de2c1c7bbe7319b3779cf6e9d2c6c123b029cc --- /dev/null +++ b/flax_model-00060-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bff8c8ba21ec800f285da363e44c677a193e4cdd69eb44a6df8b14c05009a52 +size 4932874988 diff --git a/flax_model-00061-of-00072.msgpack b/flax_model-00061-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..a505054372fe0c228a0c89ef2b362350d3331676 --- /dev/null +++ b/flax_model-00061-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfa57e5cbb80fb57bb88ac267fc9ad51e74172a083f77da01476efe78e941c76 +size 4932874988 diff --git a/flax_model-00062-of-00072.msgpack b/flax_model-00062-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..eba56caf4e8aa474742fd2b98593bbd7920c43ab --- /dev/null +++ b/flax_model-00062-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5771f6d10b5e087755ec4ea4d4adbac87b7fb320be8a6c049d29bf0e86398341 +size 4932874988 diff --git a/flax_model-00063-of-00072.msgpack b/flax_model-00063-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..45017725574294274efb31678763926ced277272 --- /dev/null +++ b/flax_model-00063-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:325297b31297eae372cbbb05392a763d5b967eb239d80bf151df0eb55f666a11 +size 4932874988 diff --git a/flax_model-00064-of-00072.msgpack b/flax_model-00064-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..27ac752ebb49443ce6d25bc170ee920adf3f75e2 --- /dev/null +++ b/flax_model-00064-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:082b05bd9116490e295d74eb8458600ec7a0c21f6d553698605ade1f46679ea0 +size 4932874988 diff --git a/flax_model-00065-of-00072.msgpack b/flax_model-00065-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..d772bd0b4152e49d101d5b4e5f3714123249e29a --- /dev/null +++ b/flax_model-00065-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf536b7393b3ef4c4f51beb40e79941b5906d9ba4f396da7b95492e5cb70d4fe +size 4932874988 diff --git a/flax_model-00066-of-00072.msgpack b/flax_model-00066-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..5cea717175a0c7e6ff1dc4c4186ea7a3c3a27496 --- /dev/null +++ b/flax_model-00066-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01b823761879edf1e9415ce14e03a5d39fe6f39e105433e56ee2999bf27de397 +size 4932874988 diff --git a/flax_model-00067-of-00072.msgpack b/flax_model-00067-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..e5c588cea8a622051a86d126476795fa80c256e0 --- /dev/null +++ b/flax_model-00067-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98acd1f74187fdfa936b1dcc830d5bfd8b3607fa3aaea6901a373ce7a0e002ac +size 4932874988 diff --git a/flax_model-00068-of-00072.msgpack b/flax_model-00068-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..8cd1de1e77c70b03ec70cd3787ebfd96e2a2d1f5 --- /dev/null +++ b/flax_model-00068-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2600505885f57d6d14afa551964f67a4eeb2b1eea164ef201cb9d0f0d03385b +size 4932874988 diff --git a/flax_model-00069-of-00072.msgpack b/flax_model-00069-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..1ac7003a8ad72eae5c704c397c1ce54d202689c7 --- /dev/null +++ b/flax_model-00069-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73d721f8fbac28fc52d01b04eb8857b5e41186498c4139f7528a130428e3fb34 +size 4932874988 diff --git a/flax_model-00070-of-00072.msgpack b/flax_model-00070-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..3d745eafd5a4b54fa097fcba3dc6ec0590eb9398 --- /dev/null +++ b/flax_model-00070-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:183dd98c856c7c85176a4083da3ce4fa6257e181fc47676af102e67ddaa1d826 +size 4932874988 diff --git a/flax_model-00071-of-00072.msgpack b/flax_model-00071-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..3a7af4082b0afcd08c129c75108dc2eed62970af --- /dev/null +++ b/flax_model-00071-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b9b01eaca6dec188135d2f0ea946cbfb9ea334bdb77dfd89167df84b8c840b6 +size 4932874988 diff --git a/flax_model-00072-of-00072.msgpack b/flax_model-00072-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..43e3df0d276f6306eaa944a2663716b13831c1c2 --- /dev/null +++ b/flax_model-00072-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42f8a18b3c4e8c0cb55b8958eab73e4359a1f62bf9b97cef788c2f60efc0072e +size 57417 diff --git a/flax_model.msgpack.index.json b/flax_model.msgpack.index.json new file mode 100644 index 0000000000000000000000000000000000000000..7cc3ed312606474d0ff993e04eaaa06392872ffd --- /dev/null +++ b/flax_model.msgpack.index.json @@ -0,0 +1,852 @@ +{ + "metadata": { + "total_size": 352494542848 + }, + "weight_map": { + "transformer/h/0/input_layernorm/bias": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/input_layernorm/kernel": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/mlp/dense_4h_to_h/bias": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/mlp/dense_4h_to_h/kernel": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/mlp/dense_h_to_4h/bias": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/mlp/dense_h_to_4h/kernel": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/post_attention_layernorm/bias": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/post_attention_layernorm/kernel": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/self_attention/dense/bias": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/self_attention/dense/kernel": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/self_attention/query_key_value/bias": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/self_attention/query_key_value/kernel": "flax_model-00002-of-00072.msgpack", + "transformer/h/1/input_layernorm/bias": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/input_layernorm/kernel": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/mlp/dense_4h_to_h/bias": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/mlp/dense_4h_to_h/kernel": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/mlp/dense_h_to_4h/bias": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/mlp/dense_h_to_4h/kernel": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/post_attention_layernorm/bias": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/post_attention_layernorm/kernel": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/self_attention/dense/bias": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/self_attention/dense/kernel": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/self_attention/query_key_value/bias": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/self_attention/query_key_value/kernel": "flax_model-00003-of-00072.msgpack", + "transformer/h/10/input_layernorm/bias": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/input_layernorm/kernel": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/mlp/dense_4h_to_h/bias": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/mlp/dense_4h_to_h/kernel": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/mlp/dense_h_to_4h/bias": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/mlp/dense_h_to_4h/kernel": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/post_attention_layernorm/bias": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/post_attention_layernorm/kernel": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/self_attention/dense/bias": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/self_attention/dense/kernel": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/self_attention/query_key_value/bias": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/self_attention/query_key_value/kernel": "flax_model-00012-of-00072.msgpack", + "transformer/h/11/input_layernorm/bias": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/input_layernorm/kernel": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/mlp/dense_4h_to_h/bias": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/mlp/dense_4h_to_h/kernel": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/mlp/dense_h_to_4h/bias": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/mlp/dense_h_to_4h/kernel": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/post_attention_layernorm/bias": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/post_attention_layernorm/kernel": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/self_attention/dense/bias": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/self_attention/dense/kernel": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/self_attention/query_key_value/bias": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/self_attention/query_key_value/kernel": "flax_model-00013-of-00072.msgpack", + "transformer/h/12/input_layernorm/bias": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/input_layernorm/kernel": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/mlp/dense_4h_to_h/bias": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/mlp/dense_4h_to_h/kernel": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/mlp/dense_h_to_4h/bias": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/mlp/dense_h_to_4h/kernel": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/post_attention_layernorm/bias": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/post_attention_layernorm/kernel": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/self_attention/dense/bias": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/self_attention/dense/kernel": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/self_attention/query_key_value/bias": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/self_attention/query_key_value/kernel": "flax_model-00014-of-00072.msgpack", + "transformer/h/13/input_layernorm/bias": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/input_layernorm/kernel": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/mlp/dense_4h_to_h/bias": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/mlp/dense_4h_to_h/kernel": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/mlp/dense_h_to_4h/bias": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/mlp/dense_h_to_4h/kernel": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/post_attention_layernorm/bias": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/post_attention_layernorm/kernel": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/self_attention/dense/bias": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/self_attention/dense/kernel": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/self_attention/query_key_value/bias": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/self_attention/query_key_value/kernel": "flax_model-00015-of-00072.msgpack", + "transformer/h/14/input_layernorm/bias": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/input_layernorm/kernel": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/mlp/dense_4h_to_h/bias": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/mlp/dense_4h_to_h/kernel": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/mlp/dense_h_to_4h/bias": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/mlp/dense_h_to_4h/kernel": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/post_attention_layernorm/bias": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/post_attention_layernorm/kernel": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/self_attention/dense/bias": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/self_attention/dense/kernel": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/self_attention/query_key_value/bias": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/self_attention/query_key_value/kernel": "flax_model-00016-of-00072.msgpack", + "transformer/h/15/input_layernorm/bias": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/input_layernorm/kernel": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/mlp/dense_4h_to_h/bias": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/mlp/dense_4h_to_h/kernel": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/mlp/dense_h_to_4h/bias": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/mlp/dense_h_to_4h/kernel": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/post_attention_layernorm/bias": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/post_attention_layernorm/kernel": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/self_attention/dense/bias": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/self_attention/dense/kernel": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/self_attention/query_key_value/bias": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/self_attention/query_key_value/kernel": "flax_model-00017-of-00072.msgpack", + "transformer/h/16/input_layernorm/bias": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/input_layernorm/kernel": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/mlp/dense_4h_to_h/bias": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/mlp/dense_4h_to_h/kernel": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/mlp/dense_h_to_4h/bias": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/mlp/dense_h_to_4h/kernel": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/post_attention_layernorm/bias": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/post_attention_layernorm/kernel": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/self_attention/dense/bias": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/self_attention/dense/kernel": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/self_attention/query_key_value/bias": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/self_attention/query_key_value/kernel": "flax_model-00018-of-00072.msgpack", + "transformer/h/17/input_layernorm/bias": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/input_layernorm/kernel": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/mlp/dense_4h_to_h/bias": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/mlp/dense_4h_to_h/kernel": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/mlp/dense_h_to_4h/bias": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/mlp/dense_h_to_4h/kernel": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/post_attention_layernorm/bias": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/post_attention_layernorm/kernel": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/self_attention/dense/bias": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/self_attention/dense/kernel": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/self_attention/query_key_value/bias": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/self_attention/query_key_value/kernel": "flax_model-00019-of-00072.msgpack", + "transformer/h/18/input_layernorm/bias": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/input_layernorm/kernel": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/mlp/dense_4h_to_h/bias": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/mlp/dense_4h_to_h/kernel": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/mlp/dense_h_to_4h/bias": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/mlp/dense_h_to_4h/kernel": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/post_attention_layernorm/bias": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/post_attention_layernorm/kernel": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/self_attention/dense/bias": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/self_attention/dense/kernel": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/self_attention/query_key_value/bias": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/self_attention/query_key_value/kernel": "flax_model-00020-of-00072.msgpack", + "transformer/h/19/input_layernorm/bias": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/input_layernorm/kernel": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/mlp/dense_4h_to_h/bias": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/mlp/dense_4h_to_h/kernel": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/mlp/dense_h_to_4h/bias": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/mlp/dense_h_to_4h/kernel": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/post_attention_layernorm/bias": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/post_attention_layernorm/kernel": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/self_attention/dense/bias": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/self_attention/dense/kernel": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/self_attention/query_key_value/bias": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/self_attention/query_key_value/kernel": "flax_model-00021-of-00072.msgpack", + "transformer/h/2/input_layernorm/bias": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/input_layernorm/kernel": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/mlp/dense_4h_to_h/bias": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/mlp/dense_4h_to_h/kernel": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/mlp/dense_h_to_4h/bias": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/mlp/dense_h_to_4h/kernel": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/post_attention_layernorm/bias": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/post_attention_layernorm/kernel": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/self_attention/dense/bias": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/self_attention/dense/kernel": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/self_attention/query_key_value/bias": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/self_attention/query_key_value/kernel": "flax_model-00004-of-00072.msgpack", + "transformer/h/20/input_layernorm/bias": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/input_layernorm/kernel": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/mlp/dense_4h_to_h/bias": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/mlp/dense_4h_to_h/kernel": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/mlp/dense_h_to_4h/bias": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/mlp/dense_h_to_4h/kernel": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/post_attention_layernorm/bias": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/post_attention_layernorm/kernel": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/self_attention/dense/bias": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/self_attention/dense/kernel": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/self_attention/query_key_value/bias": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/self_attention/query_key_value/kernel": "flax_model-00022-of-00072.msgpack", + "transformer/h/21/input_layernorm/bias": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/input_layernorm/kernel": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/mlp/dense_4h_to_h/bias": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/mlp/dense_4h_to_h/kernel": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/mlp/dense_h_to_4h/bias": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/mlp/dense_h_to_4h/kernel": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/post_attention_layernorm/bias": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/post_attention_layernorm/kernel": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/self_attention/dense/bias": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/self_attention/dense/kernel": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/self_attention/query_key_value/bias": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/self_attention/query_key_value/kernel": "flax_model-00023-of-00072.msgpack", + "transformer/h/22/input_layernorm/bias": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/input_layernorm/kernel": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/mlp/dense_4h_to_h/bias": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/mlp/dense_4h_to_h/kernel": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/mlp/dense_h_to_4h/bias": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/mlp/dense_h_to_4h/kernel": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/post_attention_layernorm/bias": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/post_attention_layernorm/kernel": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/self_attention/dense/bias": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/self_attention/dense/kernel": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/self_attention/query_key_value/bias": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/self_attention/query_key_value/kernel": "flax_model-00024-of-00072.msgpack", + "transformer/h/23/input_layernorm/bias": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/input_layernorm/kernel": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/mlp/dense_4h_to_h/bias": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/mlp/dense_4h_to_h/kernel": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/mlp/dense_h_to_4h/bias": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/mlp/dense_h_to_4h/kernel": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/post_attention_layernorm/bias": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/post_attention_layernorm/kernel": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/self_attention/dense/bias": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/self_attention/dense/kernel": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/self_attention/query_key_value/bias": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/self_attention/query_key_value/kernel": "flax_model-00025-of-00072.msgpack", + "transformer/h/24/input_layernorm/bias": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/input_layernorm/kernel": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/mlp/dense_4h_to_h/bias": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/mlp/dense_4h_to_h/kernel": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/mlp/dense_h_to_4h/bias": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/mlp/dense_h_to_4h/kernel": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/post_attention_layernorm/bias": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/post_attention_layernorm/kernel": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/self_attention/dense/bias": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/self_attention/dense/kernel": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/self_attention/query_key_value/bias": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/self_attention/query_key_value/kernel": "flax_model-00026-of-00072.msgpack", + "transformer/h/25/input_layernorm/bias": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/input_layernorm/kernel": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/mlp/dense_4h_to_h/bias": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/mlp/dense_4h_to_h/kernel": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/mlp/dense_h_to_4h/bias": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/mlp/dense_h_to_4h/kernel": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/post_attention_layernorm/bias": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/post_attention_layernorm/kernel": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/self_attention/dense/bias": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/self_attention/dense/kernel": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/self_attention/query_key_value/bias": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/self_attention/query_key_value/kernel": "flax_model-00027-of-00072.msgpack", + "transformer/h/26/input_layernorm/bias": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/input_layernorm/kernel": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/mlp/dense_4h_to_h/bias": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/mlp/dense_4h_to_h/kernel": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/mlp/dense_h_to_4h/bias": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/mlp/dense_h_to_4h/kernel": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/post_attention_layernorm/bias": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/post_attention_layernorm/kernel": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/self_attention/dense/bias": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/self_attention/dense/kernel": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/self_attention/query_key_value/bias": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/self_attention/query_key_value/kernel": "flax_model-00028-of-00072.msgpack", + "transformer/h/27/input_layernorm/bias": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/input_layernorm/kernel": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/mlp/dense_4h_to_h/bias": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/mlp/dense_4h_to_h/kernel": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/mlp/dense_h_to_4h/bias": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/mlp/dense_h_to_4h/kernel": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/post_attention_layernorm/bias": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/post_attention_layernorm/kernel": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/self_attention/dense/bias": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/self_attention/dense/kernel": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/self_attention/query_key_value/bias": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/self_attention/query_key_value/kernel": "flax_model-00029-of-00072.msgpack", + "transformer/h/28/input_layernorm/bias": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/input_layernorm/kernel": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/mlp/dense_4h_to_h/bias": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/mlp/dense_4h_to_h/kernel": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/mlp/dense_h_to_4h/bias": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/mlp/dense_h_to_4h/kernel": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/post_attention_layernorm/bias": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/post_attention_layernorm/kernel": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/self_attention/dense/bias": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/self_attention/dense/kernel": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/self_attention/query_key_value/bias": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/self_attention/query_key_value/kernel": "flax_model-00030-of-00072.msgpack", + "transformer/h/29/input_layernorm/bias": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/input_layernorm/kernel": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/mlp/dense_4h_to_h/bias": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/mlp/dense_4h_to_h/kernel": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/mlp/dense_h_to_4h/bias": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/mlp/dense_h_to_4h/kernel": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/post_attention_layernorm/bias": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/post_attention_layernorm/kernel": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/self_attention/dense/bias": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/self_attention/dense/kernel": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/self_attention/query_key_value/bias": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/self_attention/query_key_value/kernel": "flax_model-00031-of-00072.msgpack", + "transformer/h/3/input_layernorm/bias": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/input_layernorm/kernel": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/mlp/dense_4h_to_h/bias": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/mlp/dense_4h_to_h/kernel": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/mlp/dense_h_to_4h/bias": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/mlp/dense_h_to_4h/kernel": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/post_attention_layernorm/bias": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/post_attention_layernorm/kernel": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/self_attention/dense/bias": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/self_attention/dense/kernel": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/self_attention/query_key_value/bias": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/self_attention/query_key_value/kernel": "flax_model-00005-of-00072.msgpack", + "transformer/h/30/input_layernorm/bias": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/input_layernorm/kernel": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/mlp/dense_4h_to_h/bias": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/mlp/dense_4h_to_h/kernel": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/mlp/dense_h_to_4h/bias": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/mlp/dense_h_to_4h/kernel": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/post_attention_layernorm/bias": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/post_attention_layernorm/kernel": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/self_attention/dense/bias": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/self_attention/dense/kernel": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/self_attention/query_key_value/bias": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/self_attention/query_key_value/kernel": "flax_model-00032-of-00072.msgpack", + "transformer/h/31/input_layernorm/bias": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/input_layernorm/kernel": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/mlp/dense_4h_to_h/bias": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/mlp/dense_4h_to_h/kernel": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/mlp/dense_h_to_4h/bias": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/mlp/dense_h_to_4h/kernel": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/post_attention_layernorm/bias": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/post_attention_layernorm/kernel": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/self_attention/dense/bias": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/self_attention/dense/kernel": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/self_attention/query_key_value/bias": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/self_attention/query_key_value/kernel": "flax_model-00033-of-00072.msgpack", + "transformer/h/32/input_layernorm/bias": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/input_layernorm/kernel": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/mlp/dense_4h_to_h/bias": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/mlp/dense_4h_to_h/kernel": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/mlp/dense_h_to_4h/bias": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/mlp/dense_h_to_4h/kernel": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/post_attention_layernorm/bias": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/post_attention_layernorm/kernel": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/self_attention/dense/bias": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/self_attention/dense/kernel": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/self_attention/query_key_value/bias": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/self_attention/query_key_value/kernel": "flax_model-00034-of-00072.msgpack", + "transformer/h/33/input_layernorm/bias": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/input_layernorm/kernel": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/mlp/dense_4h_to_h/bias": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/mlp/dense_4h_to_h/kernel": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/mlp/dense_h_to_4h/bias": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/mlp/dense_h_to_4h/kernel": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/post_attention_layernorm/bias": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/post_attention_layernorm/kernel": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/self_attention/dense/bias": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/self_attention/dense/kernel": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/self_attention/query_key_value/bias": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/self_attention/query_key_value/kernel": "flax_model-00035-of-00072.msgpack", + "transformer/h/34/input_layernorm/bias": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/input_layernorm/kernel": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/mlp/dense_4h_to_h/bias": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/mlp/dense_4h_to_h/kernel": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/mlp/dense_h_to_4h/bias": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/mlp/dense_h_to_4h/kernel": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/post_attention_layernorm/bias": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/post_attention_layernorm/kernel": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/self_attention/dense/bias": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/self_attention/dense/kernel": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/self_attention/query_key_value/bias": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/self_attention/query_key_value/kernel": "flax_model-00036-of-00072.msgpack", + "transformer/h/35/input_layernorm/bias": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/input_layernorm/kernel": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/mlp/dense_4h_to_h/bias": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/mlp/dense_4h_to_h/kernel": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/mlp/dense_h_to_4h/bias": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/mlp/dense_h_to_4h/kernel": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/post_attention_layernorm/bias": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/post_attention_layernorm/kernel": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/self_attention/dense/bias": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/self_attention/dense/kernel": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/self_attention/query_key_value/bias": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/self_attention/query_key_value/kernel": "flax_model-00037-of-00072.msgpack", + "transformer/h/36/input_layernorm/bias": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/input_layernorm/kernel": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/mlp/dense_4h_to_h/bias": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/mlp/dense_4h_to_h/kernel": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/mlp/dense_h_to_4h/bias": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/mlp/dense_h_to_4h/kernel": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/post_attention_layernorm/bias": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/post_attention_layernorm/kernel": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/self_attention/dense/bias": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/self_attention/dense/kernel": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/self_attention/query_key_value/bias": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/self_attention/query_key_value/kernel": "flax_model-00038-of-00072.msgpack", + "transformer/h/37/input_layernorm/bias": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/input_layernorm/kernel": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/mlp/dense_4h_to_h/bias": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/mlp/dense_4h_to_h/kernel": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/mlp/dense_h_to_4h/bias": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/mlp/dense_h_to_4h/kernel": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/post_attention_layernorm/bias": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/post_attention_layernorm/kernel": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/self_attention/dense/bias": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/self_attention/dense/kernel": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/self_attention/query_key_value/bias": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/self_attention/query_key_value/kernel": "flax_model-00039-of-00072.msgpack", + "transformer/h/38/input_layernorm/bias": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/input_layernorm/kernel": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/mlp/dense_4h_to_h/bias": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/mlp/dense_4h_to_h/kernel": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/mlp/dense_h_to_4h/bias": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/mlp/dense_h_to_4h/kernel": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/post_attention_layernorm/bias": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/post_attention_layernorm/kernel": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/self_attention/dense/bias": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/self_attention/dense/kernel": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/self_attention/query_key_value/bias": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/self_attention/query_key_value/kernel": "flax_model-00040-of-00072.msgpack", + "transformer/h/39/input_layernorm/bias": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/input_layernorm/kernel": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/mlp/dense_4h_to_h/bias": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/mlp/dense_4h_to_h/kernel": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/mlp/dense_h_to_4h/bias": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/mlp/dense_h_to_4h/kernel": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/post_attention_layernorm/bias": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/post_attention_layernorm/kernel": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/self_attention/dense/bias": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/self_attention/dense/kernel": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/self_attention/query_key_value/bias": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/self_attention/query_key_value/kernel": "flax_model-00041-of-00072.msgpack", + "transformer/h/4/input_layernorm/bias": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/input_layernorm/kernel": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/mlp/dense_4h_to_h/bias": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/mlp/dense_4h_to_h/kernel": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/mlp/dense_h_to_4h/bias": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/mlp/dense_h_to_4h/kernel": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/post_attention_layernorm/bias": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/post_attention_layernorm/kernel": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/self_attention/dense/bias": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/self_attention/dense/kernel": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/self_attention/query_key_value/bias": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/self_attention/query_key_value/kernel": "flax_model-00006-of-00072.msgpack", + "transformer/h/40/input_layernorm/bias": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/input_layernorm/kernel": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/mlp/dense_4h_to_h/bias": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/mlp/dense_4h_to_h/kernel": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/mlp/dense_h_to_4h/bias": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/mlp/dense_h_to_4h/kernel": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/post_attention_layernorm/bias": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/post_attention_layernorm/kernel": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/self_attention/dense/bias": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/self_attention/dense/kernel": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/self_attention/query_key_value/bias": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/self_attention/query_key_value/kernel": "flax_model-00042-of-00072.msgpack", + "transformer/h/41/input_layernorm/bias": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/input_layernorm/kernel": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/mlp/dense_4h_to_h/bias": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/mlp/dense_4h_to_h/kernel": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/mlp/dense_h_to_4h/bias": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/mlp/dense_h_to_4h/kernel": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/post_attention_layernorm/bias": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/post_attention_layernorm/kernel": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/self_attention/dense/bias": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/self_attention/dense/kernel": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/self_attention/query_key_value/bias": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/self_attention/query_key_value/kernel": "flax_model-00043-of-00072.msgpack", + "transformer/h/42/input_layernorm/bias": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/input_layernorm/kernel": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/mlp/dense_4h_to_h/bias": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/mlp/dense_4h_to_h/kernel": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/mlp/dense_h_to_4h/bias": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/mlp/dense_h_to_4h/kernel": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/post_attention_layernorm/bias": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/post_attention_layernorm/kernel": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/self_attention/dense/bias": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/self_attention/dense/kernel": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/self_attention/query_key_value/bias": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/self_attention/query_key_value/kernel": "flax_model-00044-of-00072.msgpack", + "transformer/h/43/input_layernorm/bias": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/input_layernorm/kernel": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/mlp/dense_4h_to_h/bias": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/mlp/dense_4h_to_h/kernel": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/mlp/dense_h_to_4h/bias": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/mlp/dense_h_to_4h/kernel": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/post_attention_layernorm/bias": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/post_attention_layernorm/kernel": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/self_attention/dense/bias": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/self_attention/dense/kernel": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/self_attention/query_key_value/bias": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/self_attention/query_key_value/kernel": "flax_model-00045-of-00072.msgpack", + "transformer/h/44/input_layernorm/bias": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/input_layernorm/kernel": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/mlp/dense_4h_to_h/bias": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/mlp/dense_4h_to_h/kernel": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/mlp/dense_h_to_4h/bias": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/mlp/dense_h_to_4h/kernel": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/post_attention_layernorm/bias": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/post_attention_layernorm/kernel": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/self_attention/dense/bias": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/self_attention/dense/kernel": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/self_attention/query_key_value/bias": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/self_attention/query_key_value/kernel": "flax_model-00046-of-00072.msgpack", + "transformer/h/45/input_layernorm/bias": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/input_layernorm/kernel": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/mlp/dense_4h_to_h/bias": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/mlp/dense_4h_to_h/kernel": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/mlp/dense_h_to_4h/bias": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/mlp/dense_h_to_4h/kernel": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/post_attention_layernorm/bias": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/post_attention_layernorm/kernel": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/self_attention/dense/bias": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/self_attention/dense/kernel": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/self_attention/query_key_value/bias": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/self_attention/query_key_value/kernel": "flax_model-00047-of-00072.msgpack", + "transformer/h/46/input_layernorm/bias": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/input_layernorm/kernel": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/mlp/dense_4h_to_h/bias": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/mlp/dense_4h_to_h/kernel": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/mlp/dense_h_to_4h/bias": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/mlp/dense_h_to_4h/kernel": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/post_attention_layernorm/bias": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/post_attention_layernorm/kernel": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/self_attention/dense/bias": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/self_attention/dense/kernel": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/self_attention/query_key_value/bias": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/self_attention/query_key_value/kernel": "flax_model-00048-of-00072.msgpack", + "transformer/h/47/input_layernorm/bias": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/input_layernorm/kernel": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/mlp/dense_4h_to_h/bias": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/mlp/dense_4h_to_h/kernel": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/mlp/dense_h_to_4h/bias": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/mlp/dense_h_to_4h/kernel": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/post_attention_layernorm/bias": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/post_attention_layernorm/kernel": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/self_attention/dense/bias": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/self_attention/dense/kernel": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/self_attention/query_key_value/bias": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/self_attention/query_key_value/kernel": "flax_model-00049-of-00072.msgpack", + "transformer/h/48/input_layernorm/bias": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/input_layernorm/kernel": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/mlp/dense_4h_to_h/bias": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/mlp/dense_4h_to_h/kernel": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/mlp/dense_h_to_4h/bias": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/mlp/dense_h_to_4h/kernel": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/post_attention_layernorm/bias": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/post_attention_layernorm/kernel": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/self_attention/dense/bias": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/self_attention/dense/kernel": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/self_attention/query_key_value/bias": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/self_attention/query_key_value/kernel": "flax_model-00050-of-00072.msgpack", + "transformer/h/49/input_layernorm/bias": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/input_layernorm/kernel": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/mlp/dense_4h_to_h/bias": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/mlp/dense_4h_to_h/kernel": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/mlp/dense_h_to_4h/bias": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/mlp/dense_h_to_4h/kernel": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/post_attention_layernorm/bias": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/post_attention_layernorm/kernel": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/self_attention/dense/bias": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/self_attention/dense/kernel": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/self_attention/query_key_value/bias": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/self_attention/query_key_value/kernel": "flax_model-00051-of-00072.msgpack", + "transformer/h/5/input_layernorm/bias": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/input_layernorm/kernel": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/mlp/dense_4h_to_h/bias": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/mlp/dense_4h_to_h/kernel": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/mlp/dense_h_to_4h/bias": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/mlp/dense_h_to_4h/kernel": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/post_attention_layernorm/bias": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/post_attention_layernorm/kernel": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/self_attention/dense/bias": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/self_attention/dense/kernel": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/self_attention/query_key_value/bias": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/self_attention/query_key_value/kernel": "flax_model-00007-of-00072.msgpack", + "transformer/h/50/input_layernorm/bias": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/input_layernorm/kernel": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/mlp/dense_4h_to_h/bias": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/mlp/dense_4h_to_h/kernel": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/mlp/dense_h_to_4h/bias": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/mlp/dense_h_to_4h/kernel": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/post_attention_layernorm/bias": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/post_attention_layernorm/kernel": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/self_attention/dense/bias": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/self_attention/dense/kernel": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/self_attention/query_key_value/bias": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/self_attention/query_key_value/kernel": "flax_model-00052-of-00072.msgpack", + "transformer/h/51/input_layernorm/bias": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/input_layernorm/kernel": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/mlp/dense_4h_to_h/bias": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/mlp/dense_4h_to_h/kernel": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/mlp/dense_h_to_4h/bias": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/mlp/dense_h_to_4h/kernel": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/post_attention_layernorm/bias": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/post_attention_layernorm/kernel": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/self_attention/dense/bias": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/self_attention/dense/kernel": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/self_attention/query_key_value/bias": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/self_attention/query_key_value/kernel": "flax_model-00053-of-00072.msgpack", + "transformer/h/52/input_layernorm/bias": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/input_layernorm/kernel": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/mlp/dense_4h_to_h/bias": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/mlp/dense_4h_to_h/kernel": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/mlp/dense_h_to_4h/bias": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/mlp/dense_h_to_4h/kernel": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/post_attention_layernorm/bias": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/post_attention_layernorm/kernel": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/self_attention/dense/bias": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/self_attention/dense/kernel": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/self_attention/query_key_value/bias": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/self_attention/query_key_value/kernel": "flax_model-00054-of-00072.msgpack", + "transformer/h/53/input_layernorm/bias": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/input_layernorm/kernel": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/mlp/dense_4h_to_h/bias": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/mlp/dense_4h_to_h/kernel": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/mlp/dense_h_to_4h/bias": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/mlp/dense_h_to_4h/kernel": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/post_attention_layernorm/bias": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/post_attention_layernorm/kernel": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/self_attention/dense/bias": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/self_attention/dense/kernel": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/self_attention/query_key_value/bias": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/self_attention/query_key_value/kernel": "flax_model-00055-of-00072.msgpack", + "transformer/h/54/input_layernorm/bias": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/input_layernorm/kernel": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/mlp/dense_4h_to_h/bias": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/mlp/dense_4h_to_h/kernel": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/mlp/dense_h_to_4h/bias": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/mlp/dense_h_to_4h/kernel": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/post_attention_layernorm/bias": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/post_attention_layernorm/kernel": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/self_attention/dense/bias": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/self_attention/dense/kernel": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/self_attention/query_key_value/bias": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/self_attention/query_key_value/kernel": "flax_model-00056-of-00072.msgpack", + "transformer/h/55/input_layernorm/bias": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/input_layernorm/kernel": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/mlp/dense_4h_to_h/bias": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/mlp/dense_4h_to_h/kernel": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/mlp/dense_h_to_4h/bias": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/mlp/dense_h_to_4h/kernel": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/post_attention_layernorm/bias": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/post_attention_layernorm/kernel": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/self_attention/dense/bias": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/self_attention/dense/kernel": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/self_attention/query_key_value/bias": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/self_attention/query_key_value/kernel": "flax_model-00057-of-00072.msgpack", + "transformer/h/56/input_layernorm/bias": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/input_layernorm/kernel": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/mlp/dense_4h_to_h/bias": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/mlp/dense_4h_to_h/kernel": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/mlp/dense_h_to_4h/bias": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/mlp/dense_h_to_4h/kernel": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/post_attention_layernorm/bias": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/post_attention_layernorm/kernel": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/self_attention/dense/bias": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/self_attention/dense/kernel": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/self_attention/query_key_value/bias": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/self_attention/query_key_value/kernel": "flax_model-00058-of-00072.msgpack", + "transformer/h/57/input_layernorm/bias": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/input_layernorm/kernel": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/mlp/dense_4h_to_h/bias": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/mlp/dense_4h_to_h/kernel": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/mlp/dense_h_to_4h/bias": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/mlp/dense_h_to_4h/kernel": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/post_attention_layernorm/bias": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/post_attention_layernorm/kernel": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/self_attention/dense/bias": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/self_attention/dense/kernel": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/self_attention/query_key_value/bias": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/self_attention/query_key_value/kernel": "flax_model-00059-of-00072.msgpack", + "transformer/h/58/input_layernorm/bias": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/input_layernorm/kernel": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/mlp/dense_4h_to_h/bias": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/mlp/dense_4h_to_h/kernel": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/mlp/dense_h_to_4h/bias": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/mlp/dense_h_to_4h/kernel": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/post_attention_layernorm/bias": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/post_attention_layernorm/kernel": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/self_attention/dense/bias": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/self_attention/dense/kernel": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/self_attention/query_key_value/bias": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/self_attention/query_key_value/kernel": "flax_model-00060-of-00072.msgpack", + "transformer/h/59/input_layernorm/bias": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/input_layernorm/kernel": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/mlp/dense_4h_to_h/bias": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/mlp/dense_4h_to_h/kernel": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/mlp/dense_h_to_4h/bias": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/mlp/dense_h_to_4h/kernel": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/post_attention_layernorm/bias": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/post_attention_layernorm/kernel": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/self_attention/dense/bias": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/self_attention/dense/kernel": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/self_attention/query_key_value/bias": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/self_attention/query_key_value/kernel": "flax_model-00061-of-00072.msgpack", + "transformer/h/6/input_layernorm/bias": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/input_layernorm/kernel": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/mlp/dense_4h_to_h/bias": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/mlp/dense_4h_to_h/kernel": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/mlp/dense_h_to_4h/bias": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/mlp/dense_h_to_4h/kernel": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/post_attention_layernorm/bias": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/post_attention_layernorm/kernel": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/self_attention/dense/bias": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/self_attention/dense/kernel": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/self_attention/query_key_value/bias": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/self_attention/query_key_value/kernel": "flax_model-00008-of-00072.msgpack", + "transformer/h/60/input_layernorm/bias": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/input_layernorm/kernel": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/mlp/dense_4h_to_h/bias": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/mlp/dense_4h_to_h/kernel": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/mlp/dense_h_to_4h/bias": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/mlp/dense_h_to_4h/kernel": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/post_attention_layernorm/bias": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/post_attention_layernorm/kernel": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/self_attention/dense/bias": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/self_attention/dense/kernel": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/self_attention/query_key_value/bias": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/self_attention/query_key_value/kernel": "flax_model-00062-of-00072.msgpack", + "transformer/h/61/input_layernorm/bias": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/input_layernorm/kernel": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/mlp/dense_4h_to_h/bias": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/mlp/dense_4h_to_h/kernel": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/mlp/dense_h_to_4h/bias": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/mlp/dense_h_to_4h/kernel": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/post_attention_layernorm/bias": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/post_attention_layernorm/kernel": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/self_attention/dense/bias": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/self_attention/dense/kernel": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/self_attention/query_key_value/bias": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/self_attention/query_key_value/kernel": "flax_model-00063-of-00072.msgpack", + "transformer/h/62/input_layernorm/bias": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/input_layernorm/kernel": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/mlp/dense_4h_to_h/bias": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/mlp/dense_4h_to_h/kernel": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/mlp/dense_h_to_4h/bias": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/mlp/dense_h_to_4h/kernel": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/post_attention_layernorm/bias": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/post_attention_layernorm/kernel": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/self_attention/dense/bias": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/self_attention/dense/kernel": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/self_attention/query_key_value/bias": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/self_attention/query_key_value/kernel": "flax_model-00064-of-00072.msgpack", + "transformer/h/63/input_layernorm/bias": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/input_layernorm/kernel": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/mlp/dense_4h_to_h/bias": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/mlp/dense_4h_to_h/kernel": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/mlp/dense_h_to_4h/bias": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/mlp/dense_h_to_4h/kernel": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/post_attention_layernorm/bias": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/post_attention_layernorm/kernel": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/self_attention/dense/bias": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/self_attention/dense/kernel": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/self_attention/query_key_value/bias": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/self_attention/query_key_value/kernel": "flax_model-00065-of-00072.msgpack", + "transformer/h/64/input_layernorm/bias": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/input_layernorm/kernel": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/mlp/dense_4h_to_h/bias": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/mlp/dense_4h_to_h/kernel": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/mlp/dense_h_to_4h/bias": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/mlp/dense_h_to_4h/kernel": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/post_attention_layernorm/bias": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/post_attention_layernorm/kernel": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/self_attention/dense/bias": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/self_attention/dense/kernel": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/self_attention/query_key_value/bias": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/self_attention/query_key_value/kernel": "flax_model-00066-of-00072.msgpack", + "transformer/h/65/input_layernorm/bias": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/input_layernorm/kernel": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/mlp/dense_4h_to_h/bias": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/mlp/dense_4h_to_h/kernel": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/mlp/dense_h_to_4h/bias": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/mlp/dense_h_to_4h/kernel": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/post_attention_layernorm/bias": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/post_attention_layernorm/kernel": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/self_attention/dense/bias": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/self_attention/dense/kernel": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/self_attention/query_key_value/bias": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/self_attention/query_key_value/kernel": "flax_model-00067-of-00072.msgpack", + "transformer/h/66/input_layernorm/bias": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/input_layernorm/kernel": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/mlp/dense_4h_to_h/bias": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/mlp/dense_4h_to_h/kernel": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/mlp/dense_h_to_4h/bias": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/mlp/dense_h_to_4h/kernel": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/post_attention_layernorm/bias": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/post_attention_layernorm/kernel": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/self_attention/dense/bias": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/self_attention/dense/kernel": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/self_attention/query_key_value/bias": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/self_attention/query_key_value/kernel": "flax_model-00068-of-00072.msgpack", + "transformer/h/67/input_layernorm/bias": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/input_layernorm/kernel": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/mlp/dense_4h_to_h/bias": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/mlp/dense_4h_to_h/kernel": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/mlp/dense_h_to_4h/bias": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/mlp/dense_h_to_4h/kernel": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/post_attention_layernorm/bias": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/post_attention_layernorm/kernel": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/self_attention/dense/bias": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/self_attention/dense/kernel": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/self_attention/query_key_value/bias": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/self_attention/query_key_value/kernel": "flax_model-00069-of-00072.msgpack", + "transformer/h/68/input_layernorm/bias": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/input_layernorm/kernel": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/mlp/dense_4h_to_h/bias": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/mlp/dense_4h_to_h/kernel": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/mlp/dense_h_to_4h/bias": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/mlp/dense_h_to_4h/kernel": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/post_attention_layernorm/bias": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/post_attention_layernorm/kernel": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/self_attention/dense/bias": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/self_attention/dense/kernel": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/self_attention/query_key_value/bias": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/self_attention/query_key_value/kernel": "flax_model-00070-of-00072.msgpack", + "transformer/h/69/input_layernorm/bias": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/input_layernorm/kernel": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/mlp/dense_4h_to_h/bias": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/mlp/dense_4h_to_h/kernel": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/mlp/dense_h_to_4h/bias": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/mlp/dense_h_to_4h/kernel": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/post_attention_layernorm/bias": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/post_attention_layernorm/kernel": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/self_attention/dense/bias": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/self_attention/dense/kernel": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/self_attention/query_key_value/bias": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/self_attention/query_key_value/kernel": "flax_model-00071-of-00072.msgpack", + "transformer/h/7/input_layernorm/bias": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/input_layernorm/kernel": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/mlp/dense_4h_to_h/bias": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/mlp/dense_4h_to_h/kernel": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/mlp/dense_h_to_4h/bias": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/mlp/dense_h_to_4h/kernel": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/post_attention_layernorm/bias": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/post_attention_layernorm/kernel": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/self_attention/dense/bias": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/self_attention/dense/kernel": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/self_attention/query_key_value/bias": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/self_attention/query_key_value/kernel": "flax_model-00009-of-00072.msgpack", + "transformer/h/8/input_layernorm/bias": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/input_layernorm/kernel": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/mlp/dense_4h_to_h/bias": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/mlp/dense_4h_to_h/kernel": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/mlp/dense_h_to_4h/bias": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/mlp/dense_h_to_4h/kernel": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/post_attention_layernorm/bias": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/post_attention_layernorm/kernel": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/self_attention/dense/bias": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/self_attention/dense/kernel": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/self_attention/query_key_value/bias": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/self_attention/query_key_value/kernel": "flax_model-00010-of-00072.msgpack", + "transformer/h/9/input_layernorm/bias": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/input_layernorm/kernel": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/mlp/dense_4h_to_h/bias": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/mlp/dense_4h_to_h/kernel": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/mlp/dense_h_to_4h/bias": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/mlp/dense_h_to_4h/kernel": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/post_attention_layernorm/bias": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/post_attention_layernorm/kernel": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/self_attention/dense/bias": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/self_attention/dense/kernel": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/self_attention/query_key_value/bias": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/self_attention/query_key_value/kernel": "flax_model-00011-of-00072.msgpack", + "transformer/ln_f/bias": "flax_model-00072-of-00072.msgpack", + "transformer/ln_f/scale": "flax_model-00072-of-00072.msgpack", + "transformer/word_embeddings/embedding": "flax_model-00001-of-00072.msgpack", + "transformer/word_embeddings_layernorm/bias": "flax_model-00001-of-00072.msgpack", + "transformer/word_embeddings_layernorm/scale": "flax_model-00001-of-00072.msgpack" + } +}