jpohhhh
/

msmarco-MiniLM-L-6-v3_onnx

sentence-embeddings

endpoints-template

Inference Endpoints

Model card Files Files and versions Community

jpohhhh commited on Jun 21, 2023

Commit

96355e1

•

1 Parent(s): 89b609f

Update handler.py

Files changed (1) hide show

handler.py +4 -5

handler.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Dict, List, Any
 from transformers import AutoTokenizer, AutoModel
-from optimum.onnxruntime import ORTModelForCustomTasks
 import torch
@@ -12,10 +12,9 @@ def mean_pooling(model_output, attention_mask):
 class EndpointHandler():
     def __init__(self, path=""):
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.model = ORTModelForCustomTasks.from_pretrained("optimum/sbert-all-MiniLM-L6-with-pooler")
-        self.tokenizer = AutoTokenizer.from_pretrained("optimum/sbert-all-MiniLM-L6-with-pooler")
-        self.onnx_extractor = pipeline("feature-extraction", model=model, tokenizer=tokenizer)
         # self.model.to(self.device)
         # print("model will run on ", self.device)

 from typing import Dict, List, Any
 from transformers import AutoTokenizer, AutoModel
+from optimum.pipelines import pipeline
 import torch
 class EndpointHandler():
     def __init__(self, path=""):
+        # self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # self.tokenizer = AutoTokenizer.from_pretrained("optimum/sbert-all-MiniLM-L6-with-pooler")
+        self.onnx_extractor = pipeline("feature-extraction", model="optimum/sbert-all-MiniLM-L6-with-pooler", accelerator="ort")
         # self.model.to(self.device)
         # print("model will run on ", self.device)