Source code for libreco.algorithms.user_cf

"""Implementation of UserCF."""
from collections import defaultdict
from itertools import islice
from operator import itemgetter

import numpy as np
from tqdm import tqdm

from ..bases import CfBase
from ..recommendation import popular_recommendations
from ..utils.misc import colorize


[docs]class UserCF(CfBase): """*User Collaborative Filtering* algorithm. Parameters ---------- task : {'rating', 'ranking'} Recommendation task. See :ref:`Task`. data_info : :class:`~libreco.data.DataInfo` object Object that contains useful information for training and inference. sim_type : {'cosine', 'pearson', 'jaccard'}, default: 'cosine' Types for computing similarities. k_sim : int, default: 20 Number of similar items to use. store_top_k : bool, default: True Whether to store top k similar users after training. block_size : int or None, default: None Block size for computing similarity matrix. Large block size makes computation faster, but may cause memory issue. num_threads : int, default: 1 Number of threads to use. min_common : int, default: 1 Number of minimum common users to consider when computing similarities. mode : {'forward', 'invert'}, default: 'invert' Whether to use forward index or invert index. seed : int, default: 42 Random seed. lower_upper_bound : tuple or None, default: None Lower and upper score bound for `rating` task. """ def __init__( self, task, data_info, sim_type="cosine", k_sim=20, store_top_k=True, block_size=None, num_threads=1, min_common=1, mode="invert", seed=42, lower_upper_bound=None, ): super().__init__( task, data_info, "user_cf", sim_type, k_sim, store_top_k, block_size, num_threads, min_common, mode, seed, lower_upper_bound, ) self.all_args = locals()
[docs] def predict(self, user, item, cold_start="popular", inner_id=False): """Make prediction(s) on given user(s) and item(s). Parameters ---------- user : int or str or array_like User id or batch of user ids. item : int or str or array_like Item id or batch of item ids. cold_start : {'popular'}, default: 'popular' Cold start strategy, ItemCF can only use 'popular' strategy. inner_id : bool, default: False Whether to use inner_id defined in `libreco`. For library users inner_id may never be used. Returns ------- prediction : float or numpy.ndarray Predicted scores for each user-item pair. """ user_arr, item_arr = self.pre_predict_check(user, item, inner_id, cold_start) preds = [] sim_matrix = self.sim_matrix interaction = self.item_interaction for u, i in zip(user_arr, item_arr): if u == self.n_users or i == self.n_items: preds.append(self.default_pred) continue user_slice = slice(sim_matrix.indptr[u], sim_matrix.indptr[u + 1]) sim_users = sim_matrix.indices[user_slice] sim_values = sim_matrix.data[user_slice] item_slice = slice(interaction.indptr[i], interaction.indptr[i + 1]) item_interacted_u = interaction.indices[item_slice] item_interacted_values = interaction.data[item_slice] common_users, indices_in_u, indices_in_i = np.intersect1d( sim_users, item_interacted_u, assume_unique=True, return_indices=True ) common_sims = sim_values[indices_in_u] common_labels = item_interacted_values[indices_in_i] pred = self.compute_pred( u, i, common_users.size, common_sims, common_labels ) preds.append(pred) return preds[0] if len(user_arr) == 1 else preds
def recommend_one(self, user_id, n_rec, filter_consumed, random_rec): user_slice = slice( self.sim_matrix.indptr[user_id], self.sim_matrix.indptr[user_id + 1] ) sim_users = self.sim_matrix.indices[user_slice] sim_values = self.sim_matrix.data[user_slice] if sim_users.size == 0 or np.all(sim_values <= 0): # pragma: no cover self.print_count += 1 no_str = ( f"no similar neighbor for user {user_id}, " f"return default recommendation" ) if self.print_count < 7: print(f"{colorize(no_str, 'red')}") return popular_recommendations(self.data_info, inner_id=True, n_rec=n_rec) all_item_indices = self.user_interaction.indices all_item_indptr = self.user_interaction.indptr all_item_values = self.user_interaction.data if self.topk_sim is not None: k_nbs_and_sims = self.topk_sim[user_id] else: k_nbs_and_sims = islice( sorted(zip(sim_users, sim_values), key=itemgetter(1), reverse=True), self.k_sim, ) item_sims = defaultdict(lambda: 0.0) item_scores = defaultdict(lambda: 0.0) for v, u_v_sim in k_nbs_and_sims: item_slices = slice(all_item_indptr[v], all_item_indptr[v + 1]) v_interacted_items = all_item_indices[item_slices] v_interacted_values = all_item_values[item_slices] for i, v_i_score in zip(v_interacted_items, v_interacted_values): item_sims[i] += u_v_sim item_scores[i] += u_v_sim * v_i_score ids = np.array(list(item_sims)) preds = np.array([item_scores[i] / item_sims[i] for i in ids]) return self.rank_recommendations( user_id, ids, preds, n_rec, self.user_consumed[user_id], filter_consumed, random_rec, ) def compute_top_k(self): top_k = dict() for u in tqdm(range(self.n_users), desc="top_k"): user_slice = slice(self.sim_matrix.indptr[u], self.sim_matrix.indptr[u + 1]) sim_users = self.sim_matrix.indices[user_slice].tolist() sim_values = self.sim_matrix.data[user_slice].tolist() top_k[u] = sorted( zip(sim_users, sim_values), key=itemgetter(1), reverse=True )[: self.k_sim] self.topk_sim = top_k def rebuild_model(self, path, model_name, **kwargs): raise NotImplementedError("`UserCF` doesn't support model retraining")