sub314xxl IlyaGusev commited on
Commit
9d48898
0 Parent(s):

Duplicate from IlyaGusev/saiga2_13b_ggml

Browse files

Co-authored-by: Ilya Gusev <IlyaGusev@users.noreply.huggingface.co>

Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +11 -0
  3. app.py +210 -0
  4. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Saiga2 13b GGML Q4_1
3
+ emoji: 🔥
4
+ colorFrom: pink
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.32.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: IlyaGusev/saiga2_13b_ggml
11
+ ---
app.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ import copy
4
+ import random
5
+ import os
6
+ import requests
7
+ import time
8
+ import sys
9
+
10
+ from huggingface_hub import snapshot_download
11
+ from llama_cpp import Llama
12
+
13
+
14
+ SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
15
+ SYSTEM_TOKEN = 1788
16
+ USER_TOKEN = 1404
17
+ BOT_TOKEN = 9225
18
+ LINEBREAK_TOKEN = 13
19
+
20
+
21
+ ROLE_TOKENS = {
22
+ "user": USER_TOKEN,
23
+ "bot": BOT_TOKEN,
24
+ "system": SYSTEM_TOKEN
25
+ }
26
+
27
+
28
+ def get_message_tokens(model, role, content):
29
+ message_tokens = model.tokenize(content.encode("utf-8"))
30
+ message_tokens.insert(1, ROLE_TOKENS[role])
31
+ message_tokens.insert(2, LINEBREAK_TOKEN)
32
+ message_tokens.append(model.token_eos())
33
+ return message_tokens
34
+
35
+
36
+ def get_system_tokens(model):
37
+ system_message = {"role": "system", "content": SYSTEM_PROMPT}
38
+ return get_message_tokens(model, **system_message)
39
+
40
+
41
+ repo_name = "IlyaGusev/saiga2_13b_ggml"
42
+ model_name = "ggml-model-q4_1.bin"
43
+
44
+ snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
45
+
46
+ model = Llama(
47
+ model_path=model_name,
48
+ n_ctx=2000,
49
+ n_parts=1,
50
+ )
51
+
52
+ max_new_tokens = 1500
53
+
54
+ def user(message, history):
55
+ new_history = history + [[message, None]]
56
+ return "", new_history
57
+
58
+
59
+ def bot(
60
+ history,
61
+ system_prompt,
62
+ top_p,
63
+ top_k,
64
+ temp
65
+ ):
66
+ tokens = get_system_tokens(model)[:]
67
+ tokens.append(LINEBREAK_TOKEN)
68
+
69
+ for user_message, bot_message in history[:-1]:
70
+ message_tokens = get_message_tokens(model=model, role="user", content=user_message)
71
+ tokens.extend(message_tokens)
72
+ if bot_message:
73
+ message_tokens = get_message_tokens(model=model, role="bot", content=bot_message)
74
+ tokens.extend(message_tokens)
75
+
76
+ last_user_message = history[-1][0]
77
+ message_tokens = get_message_tokens(model=model, role="user", content=last_user_message)
78
+ tokens.extend(message_tokens)
79
+
80
+ role_tokens = [model.token_bos(), BOT_TOKEN, LINEBREAK_TOKEN]
81
+ tokens.extend(role_tokens)
82
+ generator = model.generate(
83
+ tokens,
84
+ top_k=top_k,
85
+ top_p=top_p,
86
+ temp=temp
87
+ )
88
+
89
+ partial_text = ""
90
+ for i, token in enumerate(generator):
91
+ if token == model.token_eos() or (max_new_tokens is not None and i >= max_new_tokens):
92
+ break
93
+ partial_text += model.detokenize([token]).decode("utf-8", "ignore")
94
+ history[-1][1] = partial_text
95
+ yield history
96
+
97
+
98
+ with gr.Blocks(
99
+ theme=gr.themes.Soft()
100
+ ) as demo:
101
+ favicon = '<img src="https://cdn.midjourney.com/b88e5beb-6324-4820-8504-a1a37a9ba36d/0_1.png" width="48px" style="display: inline">'
102
+ gr.Markdown(
103
+ f"""<h1><center>{favicon}Saiga2 13B GGML Q4_1</center></h1>
104
+
105
+ This is a demo of a **Russian**-speaking LLaMA2-based model. If you are interested in other languages, please check other models, such as [MPT-7B-Chat](https://huggingface.co/spaces/mosaicml/mpt-7b-chat).
106
+
107
+ Это демонстрационная версия [квантованной Сайги-2 с 13 миллиардами параметров](https://huggingface.co/IlyaGusev/saiga2_13b_ggml), работающая на CPU.
108
+
109
+ Сайга-2 — это разговорная языковая модель, которая основана на [LLaMA-2](https://ai.meta.com/llama/) и дообучена на корпусах, сгенерированных ChatGPT, таких как [ru_turbo_alpaca](https://huggingface.co/datasets/IlyaGusev/ru_turbo_alpaca), [ru_turbo_saiga](https://huggingface.co/datasets/IlyaGusev/ru_turbo_saiga) и [gpt_roleplay_realm](https://huggingface.co/datasets/IlyaGusev/gpt_roleplay_realm).
110
+ """
111
+ )
112
+ with gr.Row():
113
+ with gr.Column(scale=5):
114
+ system_prompt = gr.Textbox(label="Системный промпт", placeholder="", value=SYSTEM_PROMPT, interactive=False)
115
+ chatbot = gr.Chatbot(label="Диалог").style(height=400)
116
+ with gr.Column(min_width=80, scale=1):
117
+ with gr.Tab(label="Параметры генерации"):
118
+ top_p = gr.Slider(
119
+ minimum=0.0,
120
+ maximum=1.0,
121
+ value=0.9,
122
+ step=0.05,
123
+ interactive=True,
124
+ label="Top-p",
125
+ )
126
+ top_k = gr.Slider(
127
+ minimum=10,
128
+ maximum=100,
129
+ value=30,
130
+ step=5,
131
+ interactive=True,
132
+ label="Top-k",
133
+ )
134
+ temp = gr.Slider(
135
+ minimum=0.0,
136
+ maximum=2.0,
137
+ value=0.1,
138
+ step=0.1,
139
+ interactive=True,
140
+ label="Temp"
141
+ )
142
+ with gr.Row():
143
+ with gr.Column():
144
+ msg = gr.Textbox(
145
+ label="Отправить сообщение",
146
+ placeholder="Отправить сообщение",
147
+ show_label=False,
148
+ ).style(container=False)
149
+ with gr.Column():
150
+ with gr.Row():
151
+ submit = gr.Button("Отправить")
152
+ stop = gr.Button("Остановить")
153
+ clear = gr.Button("Очистить")
154
+ with gr.Row():
155
+ gr.Markdown(
156
+ """ПРЕДУПРЕЖДЕНИЕ: Модель может генерировать фактически или этически некорректные тексты. Мы не несём за это ответственность."""
157
+ )
158
+
159
+ # Pressing Enter
160
+ submit_event = msg.submit(
161
+ fn=user,
162
+ inputs=[msg, chatbot],
163
+ outputs=[msg, chatbot],
164
+ queue=False,
165
+ ).success(
166
+ fn=bot,
167
+ inputs=[
168
+ chatbot,
169
+ system_prompt,
170
+ top_p,
171
+ top_k,
172
+ temp
173
+ ],
174
+ outputs=chatbot,
175
+ queue=True,
176
+ )
177
+
178
+ # Pressing the button
179
+ submit_click_event = submit.click(
180
+ fn=user,
181
+ inputs=[msg, chatbot],
182
+ outputs=[msg, chatbot],
183
+ queue=False,
184
+ ).success(
185
+ fn=bot,
186
+ inputs=[
187
+ chatbot,
188
+ system_prompt,
189
+ top_p,
190
+ top_k,
191
+ temp
192
+ ],
193
+ outputs=chatbot,
194
+ queue=True,
195
+ )
196
+
197
+ # Stop generation
198
+ stop.click(
199
+ fn=None,
200
+ inputs=None,
201
+ outputs=None,
202
+ cancels=[submit_event, submit_click_event],
203
+ queue=False,
204
+ )
205
+
206
+ # Clear history
207
+ clear.click(lambda: None, None, chatbot, queue=False)
208
+
209
+ demo.queue(max_size=128, concurrency_count=1)
210
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ llama-cpp-python==0.1.77
2
+ huggingface-hub==0.14.1
3
+ gradio==3.32.0