File size: 1,491 Bytes
57d9b6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import torch, transformers
import sys, os
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
from transformers import AutoModelForCausalLM,AutoTokenizer,LlamaTokenizer

from yuan_moe_hf_model import YuanModel, YuanForCausalLM


print("Creat tokenizer...")

tokenizer = LlamaTokenizer.from_pretrained('/temp_data/LLM_test/MOE/moe_yuan', add_eos_token=False, add_bos_token=False, eos_token='<eod>')
tokenizer.add_tokens(['<sep>', '<pad>', '<mask>', '<predict>', '<FIM_SUFFIX>', '<FIM_PREFIX>', '<FIM_MIDDLE>','<commit_before>','<commit_msg>','<commit_after>','<jupyter_start>','<jupyter_text>','<jupyter_code>','<jupyter_output>','<empty_output>'], special_tokens=True)

#import pdb;pdb.set_trace()
print("Creat model...")
model = YuanForCausalLM.from_pretrained('/temp_data/LLM_test/MOE/moe_yuan', torch_dtype=torch.bfloat16, trust_remote_code=True).to("cuda:0")
#model = YuanForCausalLM.from_pretrained('/mnt/beegfs/sunzeyu/32experts_bin_196800_lmhead',device_map='auto',torch_dtype=torch.bfloat16,trust_remote_code=True)

#import pdb;pdb.set_trace()

generation_params = {
    "max_length": 1024,
    "top_k": 1,         
    "top_p": 0.0,      
    "temperature": 1.0
}

inputs = tokenizer("如果你是一个算法工程师,让你写一个大模型相关的规划,你应该怎么写?", return_tensors="pt")["input_ids"].to("cuda:0")
outputs = model.generate(inputs,do_sample=False,**generation_params)
print(tokenizer.decode(outputs[0]))