mhdhrubo's picture
adding files
7c5cc2a verified
raw
history blame
No virus
1.75 kB
import pickle
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.neighbors import NearestNeighbors
import gradio as gr
# Load the embeddings from the file
with open('embeddings.pkl', 'rb') as f:
embeddings = pickle.load(f)
# Initialize the Nearest Neighbors model with cosine similarity
nbrs = NearestNeighbors(n_neighbors=20, metric='cosine').fit(embeddings)
# Load the dataset
df = pd.read_csv('quran_hadith.csv')
# Initialize the SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')
def semantic_search(query, model, embeddings, nbrs, k=10):
# Encode the query
query_embedding = model.encode([query])[0]
# Find the k nearest neighbors
distances, indices = nbrs.kneighbors([query_embedding])
# Convert distances to percentages and round them to two decimal places
# distances = [(1 - dist) * 100 for dist in distances[0]] # Cosine similarity as percentage
# distances = [round(dist, 2) for dist in distances]
# Return the k most similar sentences and their indices
similar_sentences = [(df['text'].iloc[idx], dist) for idx, dist in zip(indices[0], distances)]
return similar_sentences
# Gradio function
def search_interface(query):
similar_sentences = semantic_search(query, model, embeddings, nbrs, k=10)
results = [{"sentence": sentence, "similarity": f"{distance}%"} for sentence, distance in similar_sentences]
return results
# Create Gradio interface
iface = gr.Interface(
fn=search_interface,
inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
outputs=gr.JSON(label="Similar Sentences")
)
# Launch the interface
iface.launch(share=True)