cloudyu commited on
Commit
483359d
1 Parent(s): a5e4fad

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +64 -0
README.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-4.0
3
+ ---
4
+
5
+ # Yi based MOE 2x34B with mixtral architecture
6
+
7
+ This is an English & Chinese MoE Model , slightly different with cloudyu/Mixtral_34Bx2_MoE_60B, and also based on
8
+ * [jondurbin/bagel-dpo-34b-v0.2]
9
+ * [SUSTech/SUS-Chat-34B]
10
+
11
+
12
+ gpu code example
13
+
14
+ ```
15
+ import torch
16
+ from transformers import AutoTokenizer, AutoModelForCausalLM
17
+ import math
18
+
19
+ ## v2 models
20
+ model_path = "cloudyu/Yi-34Bx2-MoE-60B"
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained(model_path, use_default_system_prompt=False)
23
+ model = AutoModelForCausalLM.from_pretrained(
24
+ model_path, torch_dtype=torch.float32, device_map='auto',local_files_only=False, load_in_4bit=True
25
+ )
26
+ print(model)
27
+ prompt = input("please input prompt:")
28
+ while len(prompt) > 0:
29
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
30
+
31
+ generation_output = model.generate(
32
+ input_ids=input_ids, max_new_tokens=500,repetition_penalty=1.2
33
+ )
34
+ print(tokenizer.decode(generation_output[0]))
35
+ prompt = input("please input prompt:")
36
+ ```
37
+
38
+ CPU example
39
+
40
+ ```
41
+ import torch
42
+ from transformers import AutoTokenizer, AutoModelForCausalLM
43
+ import math
44
+
45
+ ## v2 models
46
+ model_path = "cloudyu/Yi-34Bx2-MoE-60B"
47
+
48
+ tokenizer = AutoTokenizer.from_pretrained(model_path, use_default_system_prompt=False)
49
+ model = AutoModelForCausalLM.from_pretrained(
50
+ model_path, torch_dtype=torch.bfloat16, device_map='cpu'
51
+ )
52
+ print(model)
53
+ prompt = input("please input prompt:")
54
+ while len(prompt) > 0:
55
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids
56
+
57
+ generation_output = model.generate(
58
+ input_ids=input_ids, max_new_tokens=500,repetition_penalty=1.2
59
+ )
60
+ print(tokenizer.decode(generation_output[0]))
61
+ prompt = input("please input prompt:")
62
+
63
+ ```
64
+