Demo-MR-Breexe-8x7B

Running

App Files Files Community

cllatMTK commited on Mar 1

Commit

163d2e6

•

1 Parent(s): 5885208

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -33

app.py CHANGED Viewed

@@ -45,7 +45,7 @@ HEADERS = {
 MAX_SEC = 30
 MAX_INPUT_LENGTH = 5000
-tokenizer = AutoTokenizer.from_pretrained("MediaTek-Research/Breexe-8x7B-Instruct-v0_1", use_auth_token=os.environ.get("HF_TOKEN"))
 def insert_to_db(prompt, response, temperature, top_p):
     try:
@@ -95,12 +95,30 @@ def refusal_condition(query):
 with gr.Blocks() as demo:
     gr.Markdown(DESCRIPTION)
-    system_prompt = gr.Textbox(label='System prompt',
-                               value=DEFAULT_SYSTEM_PROMPT,
-                               lines=1)
-    with gr.Accordion(label='Advanced options', open=False):
         max_new_tokens = gr.Slider(
             label='Max new tokens',
             minimum=32,
@@ -111,7 +129,7 @@ with gr.Blocks() as demo:
         temperature = gr.Slider(
             label='Temperature',
             minimum=0.01,
-            maximum=0.5,
             step=0.01,
             value=0.01,
         )
@@ -122,29 +140,13 @@ with gr.Blocks() as demo:
             step=0.01,
             value=0.01,
         )
-    chatbot = gr.Chatbot()
-    with gr.Row():
-        msg = gr.Textbox(
-            container=False,
-            show_label=False,
-            placeholder='Type a message...',
-            scale=10,
-            lines=6
         )
-        submit_button = gr.Button('Submit',
-                                  variant='primary',
-                                  scale=1,
-                                  min_width=0)
-    with gr.Row():
-        retry_button = gr.Button('🔄  Retry', variant='secondary')
-        undo_button = gr.Button('↩️ Undo', variant='secondary')
-        clear = gr.Button('🗑️  Clear', variant='secondary')
-    saved_input = gr.State()
     def user(user_message, history):
@@ -184,7 +186,7 @@ with gr.Blocks() as demo:
             # start_time = time.time()
-    def bot(history, max_new_tokens, temperature, top_p, system_prompt):
         chat_data = []
         system_prompt = system_prompt.strip()
         if system_prompt:
@@ -206,13 +208,19 @@ with gr.Blocks() as demo:
             yield history
         else:
             data = {
-                "model_type": "breexe-8x7b-instruct-v01",
                 "prompt": str(message),
                 "parameters": {
                     "temperature": float(temperature),
                     "top_p": float(top_p),
                     "max_new_tokens": int(max_new_tokens),
-                    "repetition_penalty": 1.1
                 }
             }
@@ -248,6 +256,7 @@ with gr.Blocks() as demo:
             temperature,
             top_p,
             system_prompt,
         ],
         outputs=chatbot
     )
@@ -261,6 +270,7 @@ with gr.Blocks() as demo:
             temperature,
             top_p,
             system_prompt,
         ],
         outputs=chatbot
     )
@@ -300,6 +310,7 @@ with gr.Blocks() as demo:
             temperature,
             top_p,
             system_prompt,
         ],
         outputs=chatbot,
     )
@@ -322,5 +333,5 @@ with gr.Blocks() as demo:
     gr.Markdown(LICENSE)
-demo.queue(concurrency_count=4, max_size=128)
 demo.launch()

 MAX_SEC = 30
 MAX_INPUT_LENGTH = 5000
+tokenizer = AutoTokenizer.from_pretrained("MediaTek-Research/Breeze-7B-Instruct-v0_1")
 def insert_to_db(prompt, response, temperature, top_p):
     try:
 with gr.Blocks() as demo:
     gr.Markdown(DESCRIPTION)
+    chatbot = gr.Chatbot()
+    with gr.Row():
+        msg = gr.Textbox(
+            container=False,
+            show_label=False,
+            placeholder='Type a message...',
+            scale=10,
+        )
+        submit_button = gr.Button('Submit',
+                                  variant='primary',
+                                  scale=1,
+                                  min_width=0)
+    with gr.Row():
+        retry_button = gr.Button('🔄  Retry', variant='secondary')
+        undo_button = gr.Button('↩️ Undo', variant='secondary')
+        clear = gr.Button('🗑️  Clear', variant='secondary')
+    saved_input = gr.State()
+    with gr.Accordion(label='Advanced options', open=False):
+        system_prompt = gr.Textbox(label='System prompt',
+                                   value=DEFAULT_SYSTEM_PROMPT,
+                                   lines=6)
         max_new_tokens = gr.Slider(
             label='Max new tokens',
             minimum=32,
         temperature = gr.Slider(
             label='Temperature',
             minimum=0.01,
+            maximum=1.0,
             step=0.01,
             value=0.01,
         )
             step=0.01,
             value=0.01,
         )
+        repetition_penalty = gr.Slider(
+            label='Repetition Penalty',
+            minimum=0.1,
+            maximum=2,
+            step=0.01,
+            value=1.1,
         )
     def user(user_message, history):
             # start_time = time.time()
+    def bot(history, max_new_tokens, temperature, top_p, system_prompt, repetition_penalty):
         chat_data = []
         system_prompt = system_prompt.strip()
         if system_prompt:
             yield history
         else:
             data = {
+                "model_type": "breeze-7b-instruct-v10",
                 "prompt": str(message),
                 "parameters": {
                     "temperature": float(temperature),
                     "top_p": float(top_p),
                     "max_new_tokens": int(max_new_tokens),
+                    "repetition_penalty": float(repetition_penalty),
+                    "num_beams":1,    # w/o beam search
+                    "typical_p":0.99,
+                    "top_k":0,        # w/o top_k
+                    "do_sample": True,
+                    "min_length":1,
                 }
             }
             temperature,
             top_p,
             system_prompt,
+            repetition_penalty,
         ],
         outputs=chatbot
     )
             temperature,
             top_p,
             system_prompt,
+            repetition_penalty,
         ],
         outputs=chatbot
     )
             temperature,
             top_p,
             system_prompt,
+            repetition_penalty,
         ],
         outputs=chatbot,
     )
     gr.Markdown(LICENSE)
+demo.queue(concurrency_count=2, max_size=128)
 demo.launch()