Spaces:

Quardo
/

gpt-4o-mini

Running

App Files Files Community

Quardo commited on Jul 26

Commit

3655a2f

•

1 Parent(s): 177e4c2

Updated Space

Browse files

Files changed (3) hide show

app.py +115 -41
index.html +47 -22
requirements.txt +0 -1

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
-from sse_starlette.sse import EventSourceResponse
 from starlette.responses import JSONResponse, FileResponse
 from fastapi import FastAPI, Request
 import gradio as gr
 import requests
 import argparse
@@ -16,9 +18,10 @@ import os
 # --- === CONFIG === ---
 IMAGE_HANDLE = "url"# or "base64"
-API_BASE = "openai"# or "env"
 api_key = os.environ['OPENAI_API_KEY']
 base_url = os.environ.get('OPENAI_BASE_URL', "https://api.openai.com/v1")
 # --- === CONFIG === ---
@@ -29,11 +32,57 @@ if API_BASE == "env":
         models = response.json()
         if not ('data' in models):
             base_url = "https://api.openai.com/v1"
     except Exception as e:
         print(f"Error testing API endpoint: {e}")
 else:
     base_url = "https://api.openai.com/v1"
 async def streamChat(params):
     async with aiohttp.ClientSession() as session:
         async with session.post(f"{base_url}/chat/completions", headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, json=params) as r:
@@ -55,20 +104,27 @@ def rnd(length=8):
     letters = string.ascii_letters + string.digits
     return ''.join(random.choice(letters) for i in range(length))
-def getModels():
-    response = requests.get(f"{base_url}/models", headers={"Authorization": f"Bearer {api_key}",})
-    response.raise_for_status()
-    models = response.json()
-    return sorted([
-        model['id'] for model in models['data']
-        if 'gpt' in model['id'] and model['id'] not in {"gpt-3.5-turbo-instruct", "gpt-3.5-turbo-instruct-0914"}
-    ])
 def handleMultimodalData(model, role, data):
-    if type(data) == str:
-        return {"role": role, "content": str(data)}
     elif isinstance(data, str):
-        return {"role": role, "content": data.text}
     elif hasattr(data, 'files') and data.files and len(data.files) > 0 and model in {"gpt-4-1106-vision-preview", "gpt-4-vision-preview", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-mini", "gpt-4o-mini-2024-07-18"}:
         result, handler, hasFoundFile = [], ["[System: This message contains files; the system will be splitting it.]"], False
         for file in data.files:
@@ -81,8 +137,11 @@ def handleMultimodalData(model, role, data):
                     result.append({"type": "image_url", "image_url": {"url": file.url}})
             if file.mime_type.startswith("text/") or file.mime_type.startswith("application/"):
                 hasFoundFile = True
-                with open(file.path, "rb") as data_file:
-                    handler.append("<|file_start|>" + file.orig_name + "\n" + data_file.read().decode('utf-8') + "<|file_end|>")
         if hasFoundFile:
             handler.append(data.text)
             return {"role": role, "content": [{"type": "text", "text": "\n\n".join(handler)}] + result}
@@ -93,21 +152,30 @@ def handleMultimodalData(model, role, data):
         for file in data.files:
             if file.mime_type.startswith("text/") or file.mime_type.startswith("application/"):
                 hasFoundFile = True
-                with open(file.path, "rb") as data_file:
-                    handler.append("<|file_start|>" + file.orig_name + "\n" + data_file.read().decode('utf-8') + "<|file_end|>")
-        if hasFoundFile:
-            handler.append(data.text)
-            return {"role": role, "content": "\n\n".join(handler)}
-        else:
-            return {"role": role, "content": data.text}
     else:
         if isinstance(data, tuple):
             return {"role": role, "content": str(data)}
         return {"role": role, "content": getattr(data, 'text', str(data))}
 async def respond(
     message,
-    history: list[tuple[str, str]],
     system_message,
     model_name,
     max_tokens,
@@ -116,16 +184,27 @@ async def respond(
     seed,
     random_seed
 ):
-    messages = [{"role": "system", "content": "If user submits any file that file will be visible only that turn. This is not due to privacy related things but rather due to developer's lazyness; Ask user to upload the file again if they ask a follow-up question without the data."}, {"role": "system", "content": system_message}]
     for val in history:
-        if val[0]:
-            messages.append(handleMultimodalData(model_name,"user",val[0]))
-        if val[1]:
-            messages.append(handleMultimodalData(model_name,"assistant",val[1]))
-    messages.append(handleMultimodalData(model_name,"user",message))
     response = ""
     completion = streamChat({
@@ -146,11 +225,11 @@ async def respond(
 demo = gr.ChatInterface(
     respond,
     title="GPT-4O-mini",
-    description="A simple proxy to OpenAI!<br/>You can use this space as a proxy! click [here](/api/v1/docs) to view the documents.<br/>Also you can only submit images to vision/4o models but can submit txt/code/etc. files to all models.<br/>###### Also the file queries are only shown to model for 1 round cuz gradio.",
     multimodal=True,
     additional_inputs=[
         gr.Textbox(value="You are a helpful assistant.", label="System message"),
-        gr.Dropdown(choices=getModels(), value="gpt-4o-mini-2024-07-18", label="Model"),
         gr.Slider(minimum=1, maximum=4096, value=4096, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.05, label="Temperature"),
         gr.Slider(
@@ -173,14 +252,7 @@ def html():
 @app.get("/api/v1/models")
 async def test_endpoint():
-    response = requests.get(f"{base_url}/models", headers={"Authorization": f"Bearer {api_key}"})
-    response.raise_for_status()
-    models = response.json()
-    models['data'] = sorted(
-        [model for model in models['data'] if 'gpt' in model['id'] and model['id'] not in {"gpt-3.5-turbo-instruct", "gpt-3.5-turbo-instruct-0914"}],
-        key=lambda x: x['id']
-    )
-    return JSONResponse(content=models)
 @app.post("/api/v1/chat/completions")
 async def chat_completion(request: Request):
@@ -189,6 +261,8 @@ async def chat_completion(request: Request):
         if not body.get("messages") or not body.get("model"):
             return JSONResponse(content={"error": { "code": "MISSING_VALUE", "message": "Both 'messages' and 'model' are required fields."}}, status_code=400)
         params = {
             key: value for key, value in {

 from starlette.responses import JSONResponse, FileResponse
+from gradio.data_classes import FileData, GradioModel
+from sse_starlette.sse import EventSourceResponse
 from fastapi import FastAPI, Request
+from typing import (List, Tuple, Optional)
 import gradio as gr
 import requests
 import argparse
 # --- === CONFIG === ---
 IMAGE_HANDLE = "url"# or "base64"
+API_BASE = "env"# or "env"
 api_key = os.environ['OPENAI_API_KEY']
 base_url = os.environ.get('OPENAI_BASE_URL', "https://api.openai.com/v1")
+def_models = '["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-0125-preview", "gpt-4-0314", "gpt-4-0613", "gpt-4-1106-preview", "gpt-4-1106-vision-preview", "gpt-4-32k-0314", "gpt-4-turbo", "gpt-4-turbo-2024-04-09", "gpt-4-turbo-preview", "gpt-4-vision-preview", "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-mini", "gpt-4o-mini-2024-07-18"]'
 # --- === CONFIG === ---
         models = response.json()
         if not ('data' in models):
             base_url = "https://api.openai.com/v1"
+            print("no models?")
     except Exception as e:
         print(f"Error testing API endpoint: {e}")
 else:
     base_url = "https://api.openai.com/v1"
+try:
+    models = json.loads(os.environ.get('OPENAI_API_MODELS', def_models))
+except json.JSONDecodeError:
+    models = json.loads(def_models)
+models = sorted(models)
+modelList = {
+    "object": "list",
+    "data": []
+}
+for i, v in enumerate(models):
+    modelList["data"].append({"id": v, "object": "model", "created": 0, "owned_by": "system"})
+def encodeChat(messages):
+    output = []
+    for message in messages:
+        role = message['role']
+        name = f" [{message['name']}]" if 'name' in message else ''
+        content = message['content']
+        formatted_message = f"<|im_start|>{role}{name}\n{content}<|end_of_text|>"
+        output.append(formatted_message)
+    return "\n".join(output)
+def moderate(messages):
+    try:
+        response = requests.post(
+            f"{base_url}/moderations",
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {api_key}"
+            },
+            json={"input": encodeChat(messages)}
+        )
+        response.raise_for_status()
+        moderation_result = response.json()
+        try:
+            return any(result["flagged"] for result in moderation_result["results"])
+        except KeyError:
+            return moderation_result["flagged"]
+    except requests.exceptions.RequestException as e:
+        print(f"Error during moderation request: {e}")
+        return False
 async def streamChat(params):
     async with aiohttp.ClientSession() as session:
         async with session.post(f"{base_url}/chat/completions", headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, json=params) as r:
     letters = string.ascii_letters + string.digits
     return ''.join(random.choice(letters) for i in range(length))
 def handleMultimodalData(model, role, data):
+    if isinstance(data, tuple):
+        data = data[0]
+    if isinstance(data, FileData):
+        if data.mime_type.startswith("image/"):
+            if IMAGE_HANDLE == "base64":
+                with open(data.path, "rb") as image_file:
+                    b64image = base64.b64encode(image_file.read()).decode('utf-8')
+                    image_file.close()
+                    return {"role": role, "content": [{"type": "image_url", "image_url": {"url": "data:" + data.mime_type + ";base64," + b64image}}]}
+            else:
+                return {"role": role, "content": [{"type": "image_url", "image_url": {"url": data.url}}]}
+        elif data.mime_type.startswith("text/") or data.mime_type.startswith("application/"):
+            try:
+                with open(data.path, "rb") as data_file:
+                    return {"role": role, "content": "[System: This message contains file.]\n\n<|file_start|>" + data.orig_name + "\n" + data_file.read().decode('utf-8') + "<|file_end|>"}
+            except UnicodeDecodeError:
+                pass
     elif isinstance(data, str):
+        return {"role": role, "content": data}
     elif hasattr(data, 'files') and data.files and len(data.files) > 0 and model in {"gpt-4-1106-vision-preview", "gpt-4-vision-preview", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-mini", "gpt-4o-mini-2024-07-18"}:
         result, handler, hasFoundFile = [], ["[System: This message contains files; the system will be splitting it.]"], False
         for file in data.files:
                     result.append({"type": "image_url", "image_url": {"url": file.url}})
             if file.mime_type.startswith("text/") or file.mime_type.startswith("application/"):
                 hasFoundFile = True
+                try:
+                    with open(file.path, "rb") as data_file:
+                        handler.append("<|file_start|>" + file.orig_name + "\n" + data_file.read().decode('utf-8') + "<|file_end|>")
+                except UnicodeDecodeError:
+                    continue
         if hasFoundFile:
             handler.append(data.text)
             return {"role": role, "content": [{"type": "text", "text": "\n\n".join(handler)}] + result}
         for file in data.files:
             if file.mime_type.startswith("text/") or file.mime_type.startswith("application/"):
                 hasFoundFile = True
+                try:
+                    with open(file.path, "rb") as data_file:
+                        return {"role": role, "content":  "<|file_start|>" + file.orig_name + "\n" + data_file.read().decode('utf-8') + "<|file_end|>"}
+                except UnicodeDecodeError:
+                    continue
     else:
         if isinstance(data, tuple):
             return {"role": role, "content": str(data)}
         return {"role": role, "content": getattr(data, 'text', str(data))}
+class FileMessage(GradioModel):
+    file: FileData
+    alt_text: Optional[str] = None
+class MultimodalMessage(GradioModel):
+    text: Optional[str] = None
+    files: Optional[List[FileMessage]]
 async def respond(
     message,
+    history: List[Tuple[
+        Optional[MultimodalMessage],
+        Optional[MultimodalMessage],
+    ]],
     system_message,
     model_name,
     max_tokens,
     seed,
     random_seed
 ):
+    messages = [{"role": "system", "content": system_message}]
     for val in history:
+        if val[0] is not None:
+            user_message = handleMultimodalData(model_name, "user", val[0])
+            if user_message:
+                messages.append(user_message)
+        if val[1] is not None:
+            assistant_message = handleMultimodalData(model_name, "assistant", val[1])
+            if assistant_message:
+                messages.append(assistant_message)
+    user_message = handleMultimodalData(model_name, "user", message)
+    if user_message:
+        messages.append(user_message)
+    if moderate(messages):
+        response = "[MODERATION] I'm sorry, but I can't assist with that."
+        yield response
+        return
     response = ""
     completion = streamChat({
 demo = gr.ChatInterface(
     respond,
     title="GPT-4O-mini",
+    description="A simple proxy to OpenAI!<br/>You can use this space as a proxy! click [here](/api/v1/docs) to view the documents. <strong>[last update: Fixed file problems.]</strong><br/>Also you can only submit images to vision/4o models but can submit txt/code/etc. files to all models.",
     multimodal=True,
     additional_inputs=[
         gr.Textbox(value="You are a helpful assistant.", label="System message"),
+        gr.Dropdown(choices=models, value="gpt-4o-mini-2024-07-18", label="Model"),
         gr.Slider(minimum=1, maximum=4096, value=4096, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.05, label="Temperature"),
         gr.Slider(
 @app.get("/api/v1/models")
 async def test_endpoint():
+    return JSONResponse(content=modelList)
 @app.post("/api/v1/chat/completions")
 async def chat_completion(request: Request):
         if not body.get("messages") or not body.get("model"):
             return JSONResponse(content={"error": { "code": "MISSING_VALUE", "message": "Both 'messages' and 'model' are required fields."}}, status_code=400)
+        if not body.get("model") in models:
+            return JSONResponse(content={"error": { "code": "INVALID_MODEL", "message": "The model name provided in the request does not exists in predefined list of models."}}, status_code=400)
         params = {
             key: value for key, value in {

index.html CHANGED Viewed

@@ -36,9 +36,27 @@
 			<h1>Documentation of the API/updates</h1>
 			<hr/>
 			<h2>Updates</h2>
-			<p><strong id="w" alt="yes i love fear"> * </strong>yippeeee</p>
-				<script>let q=25;setInterval(_=>{q+=.001;w.style.fontSize=`${q}px`},100);</script>
-				<label style="font-size: 2px;">The message above is edited by a script every 100ms. you can check it via view-source.</label>
 			<hr/>
 			<h2>API Endpoints</h2>
 			<p>Here are some example codes to interact with the API:</p>
@@ -46,21 +64,21 @@
 from openai import OpenAI
 client = OpenAI(
-    # Or use the `OPENAI_BASE_URL` env var
-    base_url="https://quardo-gpt-4o-mini.hf.space/api/v1",
-    # No key is needed cause this is a proxy
-    api_key="none"
 )
 completion = client.chat.completions.create( # or openai.ChatCompletion.create (idk)
-    model="gpt-4o-mini",
-    messages=[
-        {
-            "role": "user",
-            "content": "Say this is a test",
-        }
-    ],
 )
 print(completion.choices[0].message.content)</textarea>
@@ -72,13 +90,13 @@ openai.api_key = 'none'
 openai.base_url = "https://quardo-gpt-4o-mini.hf.space/api/v1"
 completion = openai.chat.completions.create( # or openai.ChatCompletion.create (idk)
-    model="gpt-4o-mini",
-    messages=[
-        {
-            "role": "user",
-            "content": "Say this is a test",
-        },
-    ],
 )
 print(completion.choices[0].message.content)</textarea>
@@ -87,6 +105,13 @@ print(completion.choices[0].message.content)</textarea>
 # I am too lazy to look into openai's docs
 # so please kindly take a look at github there is probably examples there.</textarea>
 			<hr/>
 		</div>
 		<script>
 			CodeMirror.fromTextArea(document.getElementById("example1"), { lineNumbers: true, mode: "python", theme: "monokai", readOnly: true });

 			<h1>Documentation of the API/updates</h1>
 			<hr/>
 			<h2>Updates</h2>
+			<div>
+				<div>
+					<strong> - 3. Update.</strong>
+					<p> * Managed to fix the file being non-visible to the AI model after one round.</p>
+					<p> * And fix the div error on this page.</p>
+				</div><hr/>
+				<div>
+					<strong> - 2. Update.</strong>
+					<p> * Added light moderation. [Might add more strict detection later]</p>
+				</div><hr/>
+				<div>
+					<strong> - 1. Update.</strong>
+					<p> * Fixed a bug where file encoding could trash the entire chat.</p>
+					<p> * Made models allowed set by the maintainer. [OPENAI_API_MODELS variable]</p>
+				</div><hr/>
+				<div>
+					<p><strong id="w" alt="yes i love fear"> * </strong>yippeeee</p>
+					<script>let q=25;setInterval(_=>{q+=.001;w.style.fontSize=`${q}px`},100);</script>
+					<label style="font-size: 2px;">The message above is edited by a script every 100ms. you can check it via view-source.</label>
+				</div>
+			</div>
 			<hr/>
 			<h2>API Endpoints</h2>
 			<p>Here are some example codes to interact with the API:</p>
 from openai import OpenAI
 client = OpenAI(
+	# Or use the `OPENAI_BASE_URL` env var
+	base_url="https://quardo-gpt-4o-mini.hf.space/api/v1",
+	# No key is needed cause this is a proxy
+	api_key="none"
 )
 completion = client.chat.completions.create( # or openai.ChatCompletion.create (idk)
+	model="gpt-4o-mini",
+	messages=[
+		{
+			"role": "user",
+			"content": "Say this is a test",
+		}
+	],
 )
 print(completion.choices[0].message.content)</textarea>
 openai.base_url = "https://quardo-gpt-4o-mini.hf.space/api/v1"
 completion = openai.chat.completions.create( # or openai.ChatCompletion.create (idk)
+	model="gpt-4o-mini",
+	messages=[
+		{
+			"role": "user",
+			"content": "Say this is a test",
+		},
+	],
 )
 print(completion.choices[0].message.content)</textarea>
 # I am too lazy to look into openai's docs
 # so please kindly take a look at github there is probably examples there.</textarea>
 			<hr/>
+			<div>
+				<h3>Actual endpoints:</h3>
+				<p><strong>1. [GET]</strong> <a href="https://quardo-gpt-4o-mini.hf.space/api/v1/models">/api/v1/models</a></p>
+					<label>Simply shows you the available models</label>
+				<p><strong>3. [POST]</strong> <a href="https://quardo-gpt-4o-mini.hf.space/api/v1/chat/completions">/api/v1/chat/completions</a></p>
+					<label>Generates a chat completion based on the provided messages and model</label>
+			</div>
 		</div>
 		<script>
 			CodeMirror.fromTextArea(document.getElementById("example1"), { lineNumbers: true, mode: "python", theme: "monokai", readOnly: true });

requirements.txt CHANGED Viewed

@@ -1,6 +1,5 @@
 uvicorn==0.27.1
 starlette==0.37.2
 sse-starlette==2.1.2
-gradio==3.1.4
 requests==2.31.0
 aiohttp==3.8.5

 uvicorn==0.27.1
 starlette==0.37.2
 sse-starlette==2.1.2
 requests==2.31.0
 aiohttp==3.8.5