Spaces:
Sleeping
Sleeping
import pandas as pd | |
from sklearn.metrics import jaccard_score | |
import numpy as np | |
from load_data import items | |
# Example of populating unique_items | |
unique_items = set(item for sublist in items for item in sublist) | |
# Create a product-item matrix | |
unique_items_list = list(unique_items) | |
product_item_matrix = pd.DataFrame(0, index=range(len(items)), columns=unique_items_list) | |
# Populate the product-item matrix | |
for i, transaction in enumerate(items): | |
for item in transaction: | |
product_item_matrix.loc[i, item] += 1 | |
# Convert the DataFrame to a binary matrix | |
product_item_matrix_binary = product_item_matrix.map(lambda x: 1 if x > 0 else 0) | |
# Calculate Jacquard similarity | |
similarity_matrix_jaccard = pd.DataFrame(np.nan, index=product_item_matrix.columns, columns=product_item_matrix.columns) | |
for i in range(len(similarity_matrix_jaccard.columns)): | |
for j in range(len(similarity_matrix_jaccard.columns)): | |
similarity_matrix_jaccard.iloc[i, j] = jaccard_score( | |
product_item_matrix_binary.iloc[:, i], | |
product_item_matrix_binary.iloc[:, j] | |
) | |
# Function to get collaborative recommendations based on a product | |
def collaborative_recommendations(product_name, similarity_matrix=similarity_matrix_jaccard, threshold=0.01): | |
if product_name not in unique_items_list: | |
return f"Product '{product_name}' not found in the database." | |
similar_products = similarity_matrix[product_name][similarity_matrix[product_name] > threshold].sort_values(ascending=False).index.tolist() | |
return similar_products[:5] # top 5 most similar products | |