muryshev commited on
Commit
938c23c
1 Parent(s): aec9a86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -8
app.py CHANGED
@@ -22,7 +22,7 @@ ROLE_TOKENS = {
22
  }
23
 
24
  CONTEXT_SIZE = 2000
25
- ENABLE_GPU = True
26
  GPU_LAYERS = 70
27
 
28
  # Create a lock object
@@ -39,8 +39,8 @@ app.logger.setLevel(logging.DEBUG) # Set the desired logging level
39
  #repo_name = "IlyaGusev/saiga2_13b_gguf"
40
  #model_name = "model-q4_K.gguf"
41
 
42
- repo_name = "IlyaGusev/saiga2_70b_gguf"
43
- model_name = "ggml-model-q4_1.gguf"
44
 
45
  #repo_name = "IlyaGusev/saiga2_7b_gguf"
46
  #model_name = "model-q4_K.gguf"
@@ -98,8 +98,8 @@ def init_model(context_size, enable_gpu=False, gpu_layer_number=35):
98
  logits_all=True,
99
  #n_threads=12,
100
  verbose=True,
101
- n_gpu_layers=gpu_layer_number,
102
- n_gqa=8 #must be set for 70b models
103
  )
104
  return model
105
  else:
@@ -110,8 +110,8 @@ def init_model(context_size, enable_gpu=False, gpu_layer_number=35):
110
  #n_batch=100,
111
  logits_all=True,
112
  #n_threads=12,
113
- verbose=True,
114
- n_gqa=8 #must be set for 70b models
115
  )
116
  return model
117
 
@@ -236,7 +236,7 @@ def generate_and_log_tokens(user_request, model, generator):
236
  global response_tokens
237
  for token in generate_tokens(model, generator):
238
  if token == b'': # or (max_new_tokens is not None and i >= max_new_tokens):
239
- log(user_request, response_tokens.decode("utf-8", errors="ignore"))
240
  response_tokens = bytearray()
241
  break
242
  response_tokens.extend(token)
@@ -271,6 +271,8 @@ def generate_response():
271
  for message in messages:
272
  if message.get("from") == "assistant":
273
  message_tokens = get_message_tokens(model=model, role="bot", content=message.get("content", ""))
 
 
274
  else:
275
  message_tokens = get_message_tokens(model=model, role="user", content=message.get("content", ""))
276
 
 
22
  }
23
 
24
  CONTEXT_SIZE = 2000
25
+ ENABLE_GPU = False
26
  GPU_LAYERS = 70
27
 
28
  # Create a lock object
 
39
  #repo_name = "IlyaGusev/saiga2_13b_gguf"
40
  #model_name = "model-q4_K.gguf"
41
 
42
+ repo_name = "IlyaGusev/saiga2_13b_gguf"
43
+ model_name = "model-q8_0.gguf"
44
 
45
  #repo_name = "IlyaGusev/saiga2_7b_gguf"
46
  #model_name = "model-q4_K.gguf"
 
98
  logits_all=True,
99
  #n_threads=12,
100
  verbose=True,
101
+ n_gpu_layers=gpu_layer_number#,
102
+ #n_gqa=8 #must be set for 70b models
103
  )
104
  return model
105
  else:
 
110
  #n_batch=100,
111
  logits_all=True,
112
  #n_threads=12,
113
+ verbose=True#,
114
+ #n_gqa=8 #must be set for 70b models
115
  )
116
  return model
117
 
 
236
  global response_tokens
237
  for token in generate_tokens(model, generator):
238
  if token == b'': # or (max_new_tokens is not None and i >= max_new_tokens):
239
+ #log(user_request, response_tokens.decode("utf-8", errors="ignore"))
240
  response_tokens = bytearray()
241
  break
242
  response_tokens.extend(token)
 
271
  for message in messages:
272
  if message.get("from") == "assistant":
273
  message_tokens = get_message_tokens(model=model, role="bot", content=message.get("content", ""))
274
+ elif message.get("from") == "system":
275
+ message_tokens = get_message_tokens(model=model, role="system", content=message.get("content", ""))
276
  else:
277
  message_tokens = get_message_tokens(model=model, role="user", content=message.get("content", ""))
278