japanese-spoken-language-bert / download_wikipedia_bert.py
Katsumata420's picture
Upload scripts
bff929a
raw
history blame contribute delete
No virus
694 Bytes
from transformers import BertJapaneseTokenizer
from transformers import BertConfig
from transformers import BertForPreTraining
tokenizer = BertJapaneseTokenizer.from_pretrained('cl-tohoku/bert-base-japanese-whole-word-masking')
config = BertConfig().from_pretrained('cl-tohoku/bert-base-japanese-whole-word-masking')
tokenizer.save_pretrained('models/1-6_layer-wise')
config.save_pretrained('models/1-6_layer-wise')
tokenizer.save_pretrained('models/tapt512_60K')
config.save_pretrained('models/tapt512_60K')
tokenizer.save_pretrained('models/dapt128-tapt512')
config.save_pretrained('models/dapt128-tapt512')
# model = BertForPreTraining(config).from_pretrained('models/dapt128-tapt512')