File size: 1,748 Bytes
7c5cc2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import pickle
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.neighbors import NearestNeighbors
import gradio as gr

# Load the embeddings from the file
with open('embeddings.pkl', 'rb') as f:
    embeddings = pickle.load(f)

# Initialize the Nearest Neighbors model with cosine similarity
nbrs = NearestNeighbors(n_neighbors=20, metric='cosine').fit(embeddings)

# Load the dataset
df = pd.read_csv('quran_hadith.csv')

# Initialize the SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

def semantic_search(query, model, embeddings, nbrs, k=10):
    # Encode the query
    query_embedding = model.encode([query])[0]

    # Find the k nearest neighbors
    distances, indices = nbrs.kneighbors([query_embedding])

    # Convert distances to percentages and round them to two decimal places
    # distances = [(1 - dist) * 100 for dist in distances[0]]  # Cosine similarity as percentage
    # distances = [round(dist, 2) for dist in distances]

    # Return the k most similar sentences and their indices
    similar_sentences = [(df['text'].iloc[idx], dist) for idx, dist in zip(indices[0], distances)]
    return similar_sentences

# Gradio function
def search_interface(query):
    similar_sentences = semantic_search(query, model, embeddings, nbrs, k=10)
    results = [{"sentence": sentence, "similarity": f"{distance}%"} for sentence, distance in similar_sentences]
    return results

# Create Gradio interface
iface = gr.Interface(
    fn=search_interface,
    inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
    outputs=gr.JSON(label="Similar Sentences")
)

# Launch the interface
iface.launch(share=True)