Quardo commited on
Commit
bf38597
1 Parent(s): df28b53

Update space

Browse files
Files changed (2) hide show
  1. app.py +126 -36
  2. requirements.txt +4 -2
app.py CHANGED
@@ -1,27 +1,111 @@
1
  from sse_starlette.sse import EventSourceResponse
2
- from starlette.responses import JSONResponse
3
  from fastapi import FastAPI, Request
4
  import gradio as gr
 
5
  import argparse
 
6
  import uvicorn
7
- import openai
8
  import random
9
  import string
 
10
  import json
11
  import sys
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def rnd(length=8):
14
  letters = string.ascii_letters + string.digits
15
  return ''.join(random.choice(letters) for i in range(length))
16
 
17
  def getModels():
18
- models = openai.Model.list()
 
 
19
  return sorted([
20
  model['id'] for model in models['data']
21
  if 'gpt' in model['id'] and model['id'] not in {"gpt-3.5-turbo-instruct", "gpt-3.5-turbo-instruct-0914"}
22
  ])
23
 
24
- def respond(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  message,
26
  history: list[tuple[str, str]],
27
  system_message,
@@ -30,71 +114,75 @@ def respond(
30
  temperature,
31
  top_p,
32
  seed,
 
33
  ):
34
- messages = [{"role": "system", "content": system_message}]
35
 
36
  for val in history:
37
  if val[0]:
38
- messages.append({"role": "user", "content": val[0]})
39
  if val[1]:
40
- messages.append({"role": "assistant", "content": val[1]})
41
 
42
- messages.append({"role": "user", "content": message})
43
 
44
  response = ""
45
 
46
- completion = openai.ChatCompletion.create(
47
- model=model_name,
48
- messages=messages,
49
- max_tokens=max_tokens,
50
- temperature=temperature,
51
- top_p=top_p,
52
- seed=seed,
53
- user=rnd(),
54
- stream=True,
55
- )
56
-
57
- for message in completion:
58
- token = message.choices[0].delta.get("content", "")
59
-
60
- response += token
61
  yield response
62
 
63
- """
64
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
65
- """
66
  demo = gr.ChatInterface(
67
  respond,
68
  title="GPT-4O-mini",
69
- description="A simple proxy to OpenAI!\nYou can use this space as a proxy! click [here](https://quardo-gpt-4o-mini.hf.space/v1/models) to see the available models. [only chat endpoint is open.]",
 
70
  additional_inputs=[
71
  gr.Textbox(value="You are a helpful assistant.", label="System message"),
72
  gr.Dropdown(choices=getModels(), value="gpt-4o-mini-2024-07-18", label="Model"),
73
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
74
- gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
75
  gr.Slider(
76
- minimum=0.1,
77
  maximum=1.0,
78
  value=0.95,
79
  step=0.05,
80
  label="Top-p (nucleus sampling)",
81
  ),
82
  gr.Slider(minimum=0, maximum=2**32, value=0, step=1, label="Seed"),
 
83
  ],
84
  )
85
 
86
  app = FastAPI()
87
 
88
- @app.get("/v1/models")
 
 
 
 
89
  async def test_endpoint():
90
- models = openai.Model.list()
 
 
91
  models['data'] = sorted(
92
  [model for model in models['data'] if 'gpt' in model['id'] and model['id'] not in {"gpt-3.5-turbo-instruct", "gpt-3.5-turbo-instruct-0914"}],
93
  key=lambda x: x['id']
94
  )
95
  return JSONResponse(content=models)
96
 
97
- @app.post("/v1/chat/completions")
98
  async def chat_completion(request: Request):
99
  try:
100
  body = await request.json()
@@ -129,12 +217,14 @@ async def chat_completion(request: Request):
129
  }
130
 
131
  if body.get("stream"):
132
- def event_generator():
133
- for event in openai.ChatCompletion.create(**params):
134
  yield json.dumps(event)
135
  return EventSourceResponse(event_generator())
136
  else:
137
- completion = openai.ChatCompletion.create(**params)
 
 
138
  return JSONResponse(content=completion)
139
  except Exception as e:
140
  return JSONResponse(content={"error": { "code": "SERVER_ERROR", "message": str(e)}}, status_code=400)
 
1
  from sse_starlette.sse import EventSourceResponse
2
+ from starlette.responses import JSONResponse, FileResponse
3
  from fastapi import FastAPI, Request
4
  import gradio as gr
5
+ import requests
6
  import argparse
7
+ import aiohttp
8
  import uvicorn
 
9
  import random
10
  import string
11
+ import base64
12
  import json
13
  import sys
14
+ import os
15
+
16
+ # --- === CONFIG === ---
17
+
18
+ IMAGE_HANDLE = "url"# or "base64"
19
+ API_BASE = "openai"# or "env"
20
+ api_key = os.environ['OPENAI_API_KEY']
21
+ base_url = os.environ.get('OPENAI_BASE_URL', "https://api.openai.com/v1")
22
+
23
+ # --- === CONFIG === ---
24
+
25
+ if API_BASE == "env":
26
+ try:
27
+ response = requests.get(f"{base_url}/models", headers={"Authorization": f"Bearer {api_key}"})
28
+ response.raise_for_status()
29
+ models = response.json()
30
+ if not ('data' in models):
31
+ base_url = "https://api.openai.com/v1"
32
+ except Exception as e:
33
+ print(f"Error testing API endpoint: {e}")
34
+ else:
35
+ base_url = "https://api.openai.com/v1"
36
+
37
+ async def streamChat(params):
38
+ async with aiohttp.ClientSession() as session:
39
+ async with session.post(f"{base_url}/chat/completions", headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, json=params) as r:
40
+ r.raise_for_status()
41
+ async for line in r.content:
42
+ if line:
43
+ line_str = line.decode('utf-8')
44
+ if line_str.startswith("data: "):
45
+ line_str = line_str[6:].strip()
46
+ if line_str == "[DONE]":
47
+ continue
48
+ try:
49
+ message = json.loads(line_str)
50
+ yield message
51
+ except json.JSONDecodeError:
52
+ continue
53
 
54
  def rnd(length=8):
55
  letters = string.ascii_letters + string.digits
56
  return ''.join(random.choice(letters) for i in range(length))
57
 
58
  def getModels():
59
+ response = requests.get(f"{base_url}/models", headers={"Authorization": f"Bearer {api_key}",})
60
+ response.raise_for_status()
61
+ models = response.json()
62
  return sorted([
63
  model['id'] for model in models['data']
64
  if 'gpt' in model['id'] and model['id'] not in {"gpt-3.5-turbo-instruct", "gpt-3.5-turbo-instruct-0914"}
65
  ])
66
 
67
+ def handleMultimodalData(model, role, data):
68
+ if type(data) == str:
69
+ return {"role": role, "content": str(data)}
70
+ elif isinstance(data, str):
71
+ return {"role": role, "content": data.text}
72
+ elif hasattr(data, 'files') and data.files and len(data.files) > 0 and model in {"gpt-4-1106-vision-preview", "gpt-4-vision-preview", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-mini", "gpt-4o-mini-2024-07-18"}:
73
+ result, handler, hasFoundFile = [], ["[System: This message contains files; the system will be splitting it.]"], False
74
+ for file in data.files:
75
+ if file.mime_type.startswith("image/"):
76
+ if IMAGE_HANDLE == "base64":
77
+ with open(file.path, "rb") as image_file:
78
+ result.append({"type": "image_url", "image_url": {"url": "data:" + file.mime_type + ";base64," + base64.b64encode(image_file.read()).decode('utf-8')}})
79
+ image_file.close()
80
+ else:
81
+ result.append({"type": "image_url", "image_url": {"url": file.url}})
82
+ if file.mime_type.startswith("text/") or file.mime_type.startswith("application/"):
83
+ hasFoundFile = True
84
+ with open(file.path, "rb") as data_file:
85
+ handler.append("<|file_start|>" + file.orig_name + "\n" + data_file.read().decode('utf-8') + "<|file_end|>")
86
+ if hasFoundFile:
87
+ handler.append(data.text)
88
+ return {"role": role, "content": [{"type": "text", "text": "\n\n".join(handler)}] + result}
89
+ else:
90
+ return {"role": role, "content": [{"type": "text", "text": data.text}] + result}
91
+ elif hasattr(data, 'files') and data.files and len(data.files) > 0 and not (model in {"gpt-4-1106-vision-preview", "gpt-4-vision-preview", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-mini", "gpt-4o-mini-2024-07-18"}):
92
+ handler, hasFoundFile = ["[System: This message contains files; the system will be splitting it.]"], False
93
+ for file in data.files:
94
+ if file.mime_type.startswith("text/") or file.mime_type.startswith("application/"):
95
+ hasFoundFile = True
96
+ with open(file.path, "rb") as data_file:
97
+ handler.append("<|file_start|>" + file.orig_name + "\n" + data_file.read().decode('utf-8') + "<|file_end|>")
98
+ if hasFoundFile:
99
+ handler.append(data.text)
100
+ return {"role": role, "content": "\n\n".join(handler)}
101
+ else:
102
+ return {"role": role, "content": data.text}
103
+ else:
104
+ if isinstance(data, tuple):
105
+ return {"role": role, "content": str(data)}
106
+ return {"role": role, "content": getattr(data, 'text', str(data))}
107
+
108
+ async def respond(
109
  message,
110
  history: list[tuple[str, str]],
111
  system_message,
 
114
  temperature,
115
  top_p,
116
  seed,
117
+ random_seed
118
  ):
119
+ messages = [{"role": "system", "content": "If user submits any file that file will be visible only that turn. This is not due to privacy related things but rather due to developer's lazyness; Ask user to upload the file again if they ask a follow-up question without the data."}, {"role": "system", "content": system_message}]
120
 
121
  for val in history:
122
  if val[0]:
123
+ messages.append(handleMultimodalData(model_name,"user",val[0]))
124
  if val[1]:
125
+ messages.append(handleMultimodalData(model_name,"assistant",val[1]))
126
 
127
+ messages.append(handleMultimodalData(model_name,"user",message))
128
 
129
  response = ""
130
 
131
+ completion = streamChat({
132
+ "model": model_name,
133
+ "messages": messages,
134
+ "max_tokens": max_tokens,
135
+ "temperature": temperature,
136
+ "top_p": top_p,
137
+ "seed": (random.randint(0, 2**32) if random_seed else seed),
138
+ "user": rnd(),
139
+ "stream": True
140
+ })
141
+
142
+ async for token in completion:
143
+ response += token['choices'][0]['delta'].get("content", "")
 
 
144
  yield response
145
 
 
 
 
146
  demo = gr.ChatInterface(
147
  respond,
148
  title="GPT-4O-mini",
149
+ description="A simple proxy to OpenAI!<br/>You can use this space as a proxy! click [here](/api/v1/docs) to view the documents.<br/>Also you can only submit images to vision/4o models but can submit txt/code/etc. files to all models.<br/>###### Also the file queries are only shown to model for 1 round cuz gradio.",
150
+ multimodal=True,
151
  additional_inputs=[
152
  gr.Textbox(value="You are a helpful assistant.", label="System message"),
153
  gr.Dropdown(choices=getModels(), value="gpt-4o-mini-2024-07-18", label="Model"),
154
+ gr.Slider(minimum=1, maximum=4096, value=4096, step=1, label="Max new tokens"),
155
+ gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.05, label="Temperature"),
156
  gr.Slider(
157
+ minimum=0.05,
158
  maximum=1.0,
159
  value=0.95,
160
  step=0.05,
161
  label="Top-p (nucleus sampling)",
162
  ),
163
  gr.Slider(minimum=0, maximum=2**32, value=0, step=1, label="Seed"),
164
+ gr.Checkbox(label="Randomize Seed", value=True),
165
  ],
166
  )
167
 
168
  app = FastAPI()
169
 
170
+ @app.get("/api/v1/docs")
171
+ def html():
172
+ return FileResponse("index.html")
173
+
174
+ @app.get("/api/v1/models")
175
  async def test_endpoint():
176
+ response = requests.get(f"{base_url}/models", headers={"Authorization": f"Bearer {api_key}"})
177
+ response.raise_for_status()
178
+ models = response.json()
179
  models['data'] = sorted(
180
  [model for model in models['data'] if 'gpt' in model['id'] and model['id'] not in {"gpt-3.5-turbo-instruct", "gpt-3.5-turbo-instruct-0914"}],
181
  key=lambda x: x['id']
182
  )
183
  return JSONResponse(content=models)
184
 
185
+ @app.post("/api/v1/chat/completions")
186
  async def chat_completion(request: Request):
187
  try:
188
  body = await request.json()
 
217
  }
218
 
219
  if body.get("stream"):
220
+ async def event_generator():
221
+ async for event in streamChat(params):
222
  yield json.dumps(event)
223
  return EventSourceResponse(event_generator())
224
  else:
225
+ response = requests.post(f"{base_url}/chat/completions", headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, json=params)
226
+ response.raise_for_status()
227
+ completion = response.json()
228
  return JSONResponse(content=completion)
229
  except Exception as e:
230
  return JSONResponse(content={"error": { "code": "SERVER_ERROR", "message": str(e)}}, status_code=400)
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
- openai==0.28.1
2
  uvicorn==0.27.1
3
  starlette==0.37.2
4
- sse-starlette==2.1.2
 
 
 
 
 
1
  uvicorn==0.27.1
2
  starlette==0.37.2
3
+ sse-starlette==2.1.2
4
+ gradio==3.1.4
5
+ requests==2.31.0
6
+ aiohttp==3.8.5