Breeze

Runtime error

File size: 6,376 Bytes

7d1962a
9aa8f5f
ab13bd6
 
55029ff
ef219f6
9aa8f5f
 
 
 
 
 
 
 
 
 
 
4d31b4c
b62d3e2
4d31b4c
 
 
 
 
b62d3e2
ab13bd6
05eef7a
ca877b2
ab13bd6
ec98626
 
4d31b4c
ec98626
ab13bd6
835f9a2
9aa8f5f
ab13bd6
 
4d31b4c
 
ab13bd6
4d31b4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b61b41
4d31b4c
 
 
 
 
 
9b61b41
4d31b4c
 
 
 
 
 
 
 
ab13bd6
ca877b2
 
ab13bd6
4d31b4c
 
55029ff
05eef7a
4d31b4c
05eef7a
 
 
 
ef219f6
 
 
4d31b4c
ab13bd6
ca877b2
05eef7a
 
4d31b4c
 
 
 
9b61b41
05eef7a
 
 
 
 
 
9aa8f5f
 
 
 
 
 
 
 
 
 
 
 
4d31b4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05eef7a
 
4d31b4c
 
 
 
 
 
 
ab13bd6
05eef7a
4d31b4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca877b2
4d31b4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca877b2
ab13bd6
4d31b4c
 
7a3d937
9aa8f5f

import os

import gradio as gr
from text_generation import Client
from conversation import get_conv_template
from transformers import AutoTokenizer
from pymongo import MongoClient

DB_NAME = os.getenv("MONGO_DBNAME", "taiwan-llm")
USER = os.getenv("MONGO_USER")
PASSWORD = os.getenv("MONGO_PASSWORD")

uri = f"mongodb+srv://{USER}:{PASSWORD}@{DB_NAME}.kvwjiok.mongodb.net/?retryWrites=true&w=majority"
mongo_client = MongoClient(uri)
db = mongo_client[DB_NAME]
conversations_collection = db['conversations']

DESCRIPTION = """
# Breeze
"""

LICENSE = """
"""

DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant built by MediaTek Research. The user you are helping speaks Traditional Chinese and comes from Taiwan."

endpoint_url = os.environ.get("ENDPOINT_URL", "http://127.0.0.1:8080")
client = Client(endpoint_url, timeout=120)
eos_token = "</s>"
MAX_MAX_NEW_TOKENS = 4096
DEFAULT_MAX_NEW_TOKENS = 1536

max_prompt_length = 8192 - MAX_MAX_NEW_TOKENS - 10

model_name = "yentinglin/Taiwan-LLM-7B-v2.0-chat"
tokenizer = AutoTokenizer.from_pretrained(model_name)

with gr.Blocks() as demo:
    gr.Markdown(DESCRIPTION)

    chatbot = gr.Chatbot()
    with gr.Row():
        msg = gr.Textbox(
            container=False,
            show_label=False,
            placeholder='Type a message...',
            scale=10,
        )
        submit_button = gr.Button('Submit',
                                  variant='primary',
                                  scale=1,
                                  min_width=0)

    with gr.Row():
        retry_button = gr.Button('🔄  Retry', variant='secondary')
        undo_button = gr.Button('↩️ Undo', variant='secondary')
        clear = gr.Button('🗑️  Clear', variant='secondary')

    saved_input = gr.State()

    with gr.Accordion(label='Advanced options', open=False):
        system_prompt = gr.Textbox(label='System prompt',
                                   value=DEFAULT_SYSTEM_PROMPT,
                                   lines=6)
        max_new_tokens = gr.Slider(
            label='Max new tokens',
            minimum=1,
            maximum=MAX_MAX_NEW_TOKENS,
            step=1,
            value=DEFAULT_MAX_NEW_TOKENS,
        )
        temperature = gr.Slider(
            label='Temperature',
            minimum=0.1,
            maximum=1.0,
            step=0.1,
            value=0.3,
        )
        top_p = gr.Slider(
            label='Top-p (nucleus sampling)',
            minimum=0.05,
            maximum=1.0,
            step=0.05,
            value=0.95,
        )
        top_k = gr.Slider(
            label='Top-k',
            minimum=1,
            maximum=1000,
            step=1,
            value=50,
        )

    def user(user_message, history):
        return "", history + [[user_message, None]]


    def bot(history, max_new_tokens, temperature, top_p, top_k, system_prompt):
        conv = get_conv_template("twllm_v2").copy()
        roles = {"human": conv.roles[0], "gpt": conv.roles[1]}  # map human to USER and gpt to ASSISTANT
        conv.system = system_prompt
        for user, bot in history:
            conv.append_message(roles['human'], user)
            conv.append_message(roles["gpt"], bot)
        msg = conv.get_prompt()
        prompt_tokens = tokenizer.encode(msg)
        length_of_prompt = len(prompt_tokens)
        if length_of_prompt > max_prompt_length:
            msg = tokenizer.decode(prompt_tokens[-max_prompt_length + 1:])

        history[-1][1] = ""
        for response in client.generate_stream(
                msg,
                max_new_tokens=max_new_tokens,
                temperature=temperature,
                top_p=top_p,
                top_k=top_k,
                repetition_penalty=1.1,
        ):
            if not response.token.special:
                character = response.token.text
                history[-1][1] += character
                yield history

        # After generating the response, store the conversation history in MongoDB
        conversation_document = {
            "model_name": model_name,
            "history": history,
            "system_prompt": system_prompt,
            "max_new_tokens": max_new_tokens,
            "temperature": temperature,
            "top_p": top_p,
            "top_k": top_k,
        }
        conversations_collection.insert_one(conversation_document)

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        fn=bot,
        inputs=[
            chatbot,
            max_new_tokens,
            temperature,
            top_p,
            top_k,
            system_prompt,
        ],
        outputs=chatbot
    )
    submit_button.click(
        user, [msg, chatbot], [msg, chatbot], queue=False
    ).then(
        fn=bot,
        inputs=[
            chatbot,
            max_new_tokens,
            temperature,
            top_p,
            top_k,
            system_prompt,
        ],
        outputs=chatbot
    )


    def delete_prev_fn(
            history: list[tuple[str, str]]) -> tuple[list[tuple[str, str]], str]:
        try:
            message, _ = history.pop()
        except IndexError:
            message = ''
        return history, message or ''


    def display_input(message: str,
                      history: list[tuple[str, str]]) -> list[tuple[str, str]]:
        history.append((message, ''))
        return history

    retry_button.click(
        fn=delete_prev_fn,
        inputs=chatbot,
        outputs=[chatbot, saved_input],
        api_name=False,
        queue=False,
    ).then(
        fn=display_input,
        inputs=[saved_input, chatbot],
        outputs=chatbot,
        api_name=False,
        queue=False,
    ).then(
        fn=bot,
        inputs=[
            chatbot,
            max_new_tokens,
            temperature,
            top_p,
            top_k,
            system_prompt,
        ],
        outputs=chatbot,
    )

    undo_button.click(
        fn=delete_prev_fn,
        inputs=chatbot,
        outputs=[chatbot, saved_input],
        api_name=False,
        queue=False,
    ).then(
        fn=lambda x: x,
        inputs=[saved_input],
        outputs=msg,
        api_name=False,
        queue=False,
    )

    clear.click(lambda: None, None, chatbot, queue=False)

    gr.Markdown(LICENSE)

demo.queue(concurrency_count=4, max_size=128)
demo.launch()