Update apis/chat_api.py
Browse files- apis/chat_api.py +20 -20
apis/chat_api.py
CHANGED
@@ -187,26 +187,26 @@ class ChatAPIApp:
|
|
187 |
return data_response
|
188 |
|
189 |
async def embedding(request: QueryRequest):
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
|
211 |
def setup_routes(self):
|
212 |
for prefix in ["", "/v1", "/api", "/api/v1"]:
|
|
|
187 |
return data_response
|
188 |
|
189 |
async def embedding(request: QueryRequest):
|
190 |
+
try:
|
191 |
+
for attempt in range(3): # Retry logic
|
192 |
+
try:
|
193 |
+
embeddings = await send_request_to_hugging_face(request.texts, request.model_name, request.api_key)
|
194 |
+
data = [
|
195 |
+
{"object": "embedding", "index": i, "embedding": embedding}
|
196 |
+
for i, embedding in enumerate(embeddings)
|
197 |
+
]
|
198 |
+
return {
|
199 |
+
"object": "list",
|
200 |
+
"data": data,
|
201 |
+
"model": request.model_name,
|
202 |
+
"usage": {"prompt_tokens": len(request.texts), "total_tokens": len(request.texts)}
|
203 |
+
}
|
204 |
+
except RuntimeError as e:
|
205 |
+
if attempt < 2: # Don't sleep on the last attempt
|
206 |
+
await asyncio.sleep(10) # Delay for the retry
|
207 |
+
raise HTTPException(status_code=503, detail="The model is currently loading, please try again later.")
|
208 |
+
except Exception as e:
|
209 |
+
raise HTTPException(status_code=500, detail=str(e))
|
210 |
|
211 |
def setup_routes(self):
|
212 |
for prefix in ["", "/v1", "/api", "/api/v1"]:
|