File size: 1,403 Bytes
0c66d0d
11f026a
 
0c66d0d
11f026a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c66d0d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
import pandas as pd
import tiktoken

import time
from sentence_transformers import SentenceTransformer


import os
import torch

from openai.embeddings_utils import get_embedding, cosine_similarity


df = pd.read_pickle('entire_data.pkl')
embedder = SentenceTransformer('all-mpnet-base-v2')

def search(query):
  n = 15
  query_embedding = embedder.encode(query)
  df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768,-1)))

  results = (
      df.sort_values("similarity", ascending=False)
      .head(n))
  
  resultlist = []

  hlist = []
  for r in results.index:
      if results.name[r] not in hlist:
          smalldf = results.loc[results.name == results.name[r]]
          smallarr = smalldf.similarity[r].max()
          sm =smalldf.rating[r].mean()
          
          if smalldf.shape[1] > 3:
            smalldf = smalldf[:3]

          resultlist.append(
          {
            "name":results.name[r],
            "description":results.description[r],
            "relevance score": smallarr.tolist(),
            "rating": sm.tolist(),
            "relevant_reviews": [ smalldf.text[s] for s in smalldf.index]
          })
          hlist.append(results.name[r])
  return resultlist

def greet(query):
    bm25 = search(query)
    return bm25

iface = gr.Interface(fn=greet, inputs="text", outputs="json")

iface.launch()