Spaces:

ka1kuk
/

LLM-api

Sleeping

App Files Files Community

ka1kuk commited on Mar 17

Commit

ffe180f

•

1 Parent(s): e6fd60f

Update apis/chat_api.py

Browse files

Files changed (1) hide show

apis/chat_api.py +16 -21

apis/chat_api.py CHANGED Viewed

@@ -188,35 +188,30 @@ class ChatAPIApp:
             data_response = streamer.chat_return_dict(stream_response)
             return data_response
-    async def embedding(request: QueryRequest, api_key: str = Depends(extract_api_key)):
         api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{request.model}"
         headers = {"Authorization": f"Bearer {api_key}"}
-        try:
-            response = requests.post(api_url, headers=headers, json={"inputs": request.input})
-            result = response.json()
-            if "error" in result:
-                logging.error(f"Error from Hugging Face API: {result.get('error', 'No detailed error message provided.')}")
-                raise HTTPException(status_code=503, detail="The model is currently loading, please re-run the query.")
-            if not (isinstance(result, list) and len(result) > 0 and isinstance(result[0], list)):
-                logging.error(f"Unexpected response format: {result}")
-                raise HTTPException(status_code=500, detail="Unexpected response format.")
-            # Assuming each embedding is a list of lists of floats, flatten it
-            flattened_embeddings = [sum(embedding, []) for embedding in result]
             data = [{"object": "embedding", "index": i, "embedding": embedding} for i, embedding in enumerate(flattened_embeddings)]
             return EmbeddingResponse(
                 object="list",
                 data=data,
-                model=request.model_name,
-                usage={"prompt_tokens": len(request.input_text), "total_tokens": len(request.input_text)}
             )
-        except Exception as e:
-            logging.error(f"An error occurred: {str(e)}")
-            raise HTTPException(status_code=500, detail=str(e))
     def setup_routes(self):
         for prefix in ["", "/v1", "/api", "/api/v1"]:

             data_response = streamer.chat_return_dict(stream_response)
             return data_response
+    async def embedding(self, request: QueryRequest, api_key: str = Depends(extract_api_key)):
         api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{request.model}"
         headers = {"Authorization": f"Bearer {api_key}"}
+        response = requests.post(api_url, headers=headers, json={"inputs": request.input})
+        result = response.json()
+        if "error" in result:
+            logging.error(f"Error from Hugging Face API: {result['error']}")
+            error_detail = result.get('error', 'No detailed error message provided.')
+            raise HTTPException(status_code=503, detail=f"The model is currently loading, please re-run the query. Detail: {error_detail}")
+        if isinstance(result, list) and len(result) > 0 and isinstance(result[0], list):
+            flattened_embeddings = [item for sublist in result for item in sublist]  # Flatten list of lists
             data = [{"object": "embedding", "index": i, "embedding": embedding} for i, embedding in enumerate(flattened_embeddings)]
             return EmbeddingResponse(
                 object="list",
                 data=data,
+                model=request.model,
+                usage={"prompt_tokens": len(request.input), "total_tokens": len(request.input)}
             )
+        else:
+            logging.error(f"Unexpected response format: {result}")
+            raise HTTPException(status_code=500, detail="Unexpected response format.")
     def setup_routes(self):
         for prefix in ["", "/v1", "/api", "/api/v1"]: