CyberNative commited on
Commit
b1e1493
1 Parent(s): e655844

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -9
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import gradio as gr
2
  import os
3
  import spaces
4
- from transformers import GemmaTokenizer, AutoModelForCausalLM
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
  from threading import Thread
7
 
8
  # Set an environment variable
@@ -46,13 +46,8 @@ h1 {
46
  """
47
 
48
  # Load the tokenizer and model
49
- tokenizer = AutoTokenizer.from_pretrained("CyberNative-AI/Colibri_8b_v0.1")
50
- #model = AutoModelForCausalLM.from_pretrained("CyberNative-AI/Colibri_8b_v0.1", load_in_4bit=True, load_in_8bit=False, device_map="auto")
51
-
52
- from transformers import BitsAndBytesConfig
53
-
54
- nf4_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4")
55
- model = AutoModelForCausalLM.from_pretrained("CyberNative-AI/Colibri_8b_v0.1", quantization_config=nf4_config)
56
 
57
  terminators = [
58
  tokenizer.eos_token_id,
 
1
  import gradio as gr
2
  import os
3
  import spaces
4
+ from transformers import AutoTokenizer
5
+ from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
6
  from threading import Thread
7
 
8
  # Set an environment variable
 
46
  """
47
 
48
  # Load the tokenizer and model
49
+ tokenizer = AutoTokenizer.from_pretrained("CyberNative-AI/Colibri_8b_v0.1_gptq_128_4bit")
50
+ model = AutoGPTQForCausalLM.from_quantized("CyberNative-AI/Colibri_8b_v0.1_gptq_128_4bit", device="cpu")
 
 
 
 
 
51
 
52
  terminators = [
53
  tokenizer.eos_token_id,