Source code for libreco.algorithms.ncf

"""Implementation of NCF."""
from ..bases import ModelMeta, TfBase
from ..prediction import normalize_prediction
from ..tfops import dense_nn, dropout_config, reg_config, tf, tf_dense
from ..torchops import hidden_units_config
from ..utils.validate import check_unknown, convert_id


[docs]class NCF(TfBase, metaclass=ModelMeta): """*Neural Collaborative Filtering* algorithm. Parameters ---------- task : {'rating', 'ranking'} Recommendation task. See :ref:`Task`. data_info : :class:`~libreco.data.DataInfo` object Object that contains useful information for training and inference. loss_type : {'cross_entropy', 'focal'}, default: 'cross_entropy' Loss for model training. embed_size: int, default: 16 Vector size of embeddings. n_epochs: int, default: 10 Number of epochs for training. lr : float, default 0.001 Learning rate for training. lr_decay : bool, default: False Whether to use learning rate decay. epsilon : float, default: 1e-5 A small constant added to the denominator to improve numerical stability in Adam optimizer. According to the `official comment <https://github.com/tensorflow/tensorflow/blob/v1.15.0/tensorflow/python/training/adam.py#L64>`_, default value of `1e-8` for `epsilon` is generally not good, so here we choose `1e-5`. Users can try tuning this hyperparameter if the training is unstable. reg : float or None, default: None Regularization parameter, must be non-negative or None. batch_size : int, default: 256 Batch size for training. num_neg : int, default: 1 Number of negative samples for each positive sample, only used in `ranking` task. use_bn : bool, default: True Whether to use batch normalization. dropout_rate : float or None, default: None Probability of an element to be zeroed. If it is None, dropout is not used. hidden_units : int, list of int or tuple of (int,), default: (128, 64, 32) Number of layers and corresponding layer size in MLP. .. versionchanged:: 1.0.0 Accept type of ``int``, ``list`` or ``tuple``, instead of ``str``. seed : int, default: 42 Random seed. lower_upper_bound : tuple or None, default: None Lower and upper score bound for `rating` task. tf_sess_config : dict or None, default: None Optional TensorFlow session config, see `ConfigProto options <https://github.com/tensorflow/tensorflow/blob/v2.10.0/tensorflow/core/protobuf/config.proto#L431>`_. References ---------- *Xiangnan He et al.* `Neural Collaborative Filtering <https://arxiv.org/pdf/1708.05031.pdf>`_. """ user_variables = ["user_gmf", "user_mlp"] item_variables = ["item_gmf", "item_mlp"] def __init__( self, task, data_info, loss_type="cross_entropy", embed_size=16, n_epochs=20, lr=0.01, lr_decay=False, epsilon=1e-5, reg=None, batch_size=256, num_neg=1, use_bn=True, dropout_rate=None, hidden_units=(128, 64, 32), seed=42, lower_upper_bound=None, tf_sess_config=None, ): super().__init__(task, data_info, lower_upper_bound, tf_sess_config) self.all_args = locals() self.loss_type = loss_type self.embed_size = embed_size self.n_epochs = n_epochs self.lr = lr self.lr_decay = lr_decay self.epsilon = epsilon self.reg = reg_config(reg) self.batch_size = batch_size self.num_neg = num_neg self.use_bn = use_bn self.dropout_rate = dropout_config(dropout_rate) self.hidden_units = hidden_units_config(hidden_units) self.seed = seed def build_model(self): self.user_indices = tf.placeholder(tf.int32, shape=[None]) self.item_indices = tf.placeholder(tf.int32, shape=[None]) self.labels = tf.placeholder(tf.float32, shape=[None]) self.is_training = tf.placeholder_with_default(False, shape=[]) user_gmf = tf.get_variable( name="user_gmf", shape=[self.n_users + 1, self.embed_size], initializer=tf.glorot_uniform_initializer(), regularizer=self.reg, ) item_gmf = tf.get_variable( name="item_gmf", shape=[self.n_items + 1, self.embed_size], initializer=tf.glorot_uniform_initializer(), regularizer=self.reg, ) user_mlp = tf.get_variable( name="user_mlp", shape=[self.n_users + 1, self.embed_size], initializer=tf.glorot_uniform_initializer(), regularizer=self.reg, ) item_mlp = tf.get_variable( name="item_mlp", shape=[self.n_items + 1, self.embed_size], initializer=tf.glorot_uniform_initializer(), regularizer=self.reg, ) user_gmf_embed = tf.nn.embedding_lookup(user_gmf, self.user_indices) item_gmf_embed = tf.nn.embedding_lookup(item_gmf, self.item_indices) user_mlp_embed = tf.nn.embedding_lookup(user_mlp, self.user_indices) item_mlp_embed = tf.nn.embedding_lookup(item_mlp, self.item_indices) gmf_layer = tf.multiply(user_gmf_embed, item_gmf_embed) mlp_input = tf.concat([user_mlp_embed, item_mlp_embed], axis=1) mlp_layer = dense_nn( mlp_input, self.hidden_units, use_bn=self.use_bn, dropout_rate=self.dropout_rate, is_training=self.is_training, ) concat_layer = tf.concat([gmf_layer, mlp_layer], axis=1) self.output = tf.reshape(tf_dense(units=1)(concat_layer), [-1])
[docs] def predict(self, user, item, feats=None, cold_start="average", inner_id=False): """Make prediction(s) on given user(s) and item(s). Parameters ---------- user : int or str or array_like User id or batch of user ids. item : int or str or array_like Item id or batch of item ids. feats : None, default: None NCF can't use features. cold_start : {'popular', 'average'}, default: 'average' Cold start strategy. - 'popular' will sample from popular items. - 'average' will use the average of all the user/item embeddings as the representation of the cold-start user/item. inner_id : bool, default: False Whether to use inner_id defined in `libreco`. For library users inner_id may never be used. Returns ------- prediction : float or array_like Predicted scores for each user-item pair. """ assert feats is None, "NCF can't use features." user, item = convert_id(self, user, item, inner_id) unknown_num, unknown_index, user, item = check_unknown(self, user, item) preds = self.sess.run( self.output, feed_dict={ self.user_indices: user, self.item_indices: item, self.is_training: False, }, ) return normalize_prediction(preds, self, cold_start, unknown_num, unknown_index)
[docs] def recommend_user( self, user, n_rec, user_feats=None, item_data=None, cold_start="average", inner_id=False, filter_consumed=True, random_rec=False, ): """Recommend a list of items for given user(s). Parameters ---------- user : int or str or array_like User id or batch of user ids to recommend. n_rec : int Number of recommendations to return. user_feats : None, default: None NCF can't use features. item_data : None, default: None NCF can't use features. cold_start : {'popular', 'average'}, default: 'average' Cold start strategy. - 'popular' will sample from popular items. - 'average' will use the average of all the user/item embeddings as the representation of the cold-start user/item. inner_id : bool, default: False Whether to use inner_id defined in `libreco`. For library users inner_id may never be used. filter_consumed : bool, default: True Whether to filter out items that a user has previously consumed. random_rec : bool, default: False Whether to choose items for recommendation based on their prediction scores. Returns ------- recommendation : dict Recommendation result with user ids as keys and array_like recommended items as values. """ assert user_feats is None and item_data is None, "NCF can't use features." return super().recommend_user( user, n_rec, user_feats, item_data, cold_start, inner_id, filter_consumed, random_rec, )