Recommendation System#

Two approaches to build a recommendation system with some common processes are :

  1. Classfication Algorithms

  2. Clustering Algorithms

Using Clustering algorithm#

from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder
import pandas as pd

# Sample data
data = [
    ["green", "nature", "thumbnail", "landscape"],
    ["blue", "architecture", "medium", "portrait"],
    ["blue", "people", "medium", "landscape"],
    ["yellow", "nature", "medium", "portrait"],
    ["green", "nature", "thumbnail", "landscape"],
    ["blue", "people", "medium", "landscape"],
    ["blue", "nature", "thumbnail", "portrait"],
    ["yellow", "architecture", "thumbnail", "landscape"],
    ["blue", "people", "medium", "portrait"],
    ["yellow", "nature", "medium", "landscape"],
    ["yellow", "people", "thumbnail", "portrait"],
    ["blue", "people", "medium", "landscape"],
    ["red", "architecture", "thumbnail", "landscape"],
]

# Encode categorical features
label_encoders = [LabelEncoder() for _ in range(len(data[0]))]
encoded_data = []
for i, column in enumerate(zip(*data)):
    encoded_data.append(label_encoders[i].fit_transform(column))

X = list(zip(*encoded_data))  # Features

# Clustering
k = 2  # Number of clusters
kmeans = KMeans(n_clusters=k, n_init=10)
kmeans.fit(X)
clusters = kmeans.labels_

# Add cluster labels to the original data
data_with_clusters = pd.DataFrame(data, columns=["Color", "Category", "Size", "Type"])
data_with_clusters["Cluster"] = clusters

# Recommendation function
def recommend_items(cluster, data_with_clusters):
    items_in_cluster = data_with_clusters[data_with_clusters["Cluster"] == cluster]
    recommended_items = items_in_cluster.sample(n=3)  # Sample 3 items from the cluster
    return recommended_items

# Example usage
user_interaction = ["green", "nature", "thumbnail", "landscape"]  # Assuming user interacted with this item
encoded_interaction = [label_encoders[i].transform([val])[0] for i, val in enumerate(user_interaction)]
cluster = kmeans.predict([encoded_interaction])[0]
recommendations = recommend_items(cluster, data_with_clusters)
print("Recommended items:")
print(recommendations)
Recommended items:
   Color Category       Size       Type  Cluster
8   blue   people     medium   portrait        0
0  green   nature  thumbnail  landscape        0
6   blue   nature  thumbnail   portrait        0

Using classification algorithm#

from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Sample data
data = [
    ["green", "nature", "thumbnail", "landscape"],
    ["blue", "architecture", "medium", "portrait"],
    ["blue", "people", "medium", "landscape"],
    ["yellow", "nature", "medium", "portrait"],
    ["green", "nature", "thumbnail", "landscape"],
    ["blue", "people", "medium", "landscape"],
    ["blue", "nature", "thumbnail", "portrait"],
    ["yellow", "architecture", "thumbnail", "landscape"],
    ["blue", "people", "medium", "portrait"],
    ["yellow", "nature", "medium", "landscape"],
    ["yellow", "people", "thumbnail", "portrait"],
    ["blue", "people", "medium", "landscape"],
    ["red", "architecture", "thumbnail", "landscape"],
]
result = [
    "Favorite",
    "NotFavorite",
    "Favorite",
    "Favorite",
    "Favorite",
    "Favorite",
    "Favorite",
    "NotFavorite",
    "NotFavorite",
    "Favorite",
    "Favorite",
    "NotFavorite",
    "NotFavorite",
]

# Encode categorical features and labels
label_encoders = [LabelEncoder() for _ in range(len(data[0]))]
encoded_data = []
for i, column in enumerate(zip(*data)):
    encoded_data.append(label_encoders[i].fit_transform(column))

X = list(zip(*encoded_data))  # Features
y = result  # Labels

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the logistic regression classifier
classifier = svm.SVC()
classifier.fit(X_train, y_train)

# Predictions
y_pred = classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Sample prediction
sample_item = ["green", "nature", "thumbnail", "landscape"]  # Sample item attributes
encoded_item = [label_encoders[i].transform([val])[0] for i, val in enumerate(sample_item)]
prediction = classifier.predict([encoded_item])[0]
print(f"Prediction for the sample item: {prediction}")
Accuracy: 0.6666666666666666
Prediction for the sample item: Favorite