jpohhhh
/

msmarco-MiniLM-L-6-v3_onnx

sentence-embeddings

endpoints-template

Inference Endpoints

Model card Files Files and versions Community

jpohhhh commited on Jun 21, 2023

Commit

89b609f

•

1 Parent(s): 0f61114

Update handler.py

Files changed (1) hide show

handler.py +6 -5

handler.py CHANGED Viewed

@@ -15,6 +15,7 @@ class EndpointHandler():
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model = ORTModelForCustomTasks.from_pretrained("optimum/sbert-all-MiniLM-L6-with-pooler")
         self.tokenizer = AutoTokenizer.from_pretrained("optimum/sbert-all-MiniLM-L6-with-pooler")
         # self.model.to(self.device)
         # print("model will run on ", self.device)
@@ -27,9 +28,9 @@ class EndpointHandler():
             A :obj:`list` | `dict`: will be serialized and returned
         """
         sentences = data.pop("inputs",data)
-        inputs = tokenizer("I love burritos!", return_tensors="pt")
-        pred = self.model(**encoded_input)
         # Perform pooling. In this case, max pooling.
-        sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
-        return sentence_embeddings.tolist()

         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model = ORTModelForCustomTasks.from_pretrained("optimum/sbert-all-MiniLM-L6-with-pooler")
         self.tokenizer = AutoTokenizer.from_pretrained("optimum/sbert-all-MiniLM-L6-with-pooler")
+        self.onnx_extractor = pipeline("feature-extraction", model=model, tokenizer=tokenizer)
         # self.model.to(self.device)
         # print("model will run on ", self.device)
             A :obj:`list` | `dict`: will be serialized and returned
         """
         sentences = data.pop("inputs",data)
+        # inputs = tokenizer("I love burritos!", return_tensors="pt")
+        pred = onnx_extractor(sentences)
+        return pred
         # Perform pooling. In this case, max pooling.
+        # sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
+        # return sentence_embeddings.tolist()