Source code for libreco.algorithms.wide_deep

"""Implementation of Wide & Deep."""
from ..bases import ModelMeta, TfBase
from ..feature.multi_sparse import true_sparse_field_size
from ..layers import dense_nn, embedding_lookup, tf_dense
from ..tfops import dropout_config, reg_config, tf
from ..tfops.features import compute_dense_feats, compute_sparse_feats
from ..torchops import hidden_units_config
from ..utils.misc import count_params
from ..utils.validate import (
    check_dense_values,
    check_multi_sparse,
    check_sparse_indices,
    dense_field_size,
    sparse_feat_size,
    sparse_field_size,
)


[docs]class WideDeep(TfBase, metaclass=ModelMeta): """*Wide & Deep* algorithm. Parameters ---------- task : {'rating', 'ranking'} Recommendation task. See :ref:`Task`. data_info : :class:`~libreco.data.DataInfo` object Object that contains useful information for training and inference. loss_type : {'cross_entropy', 'focal'}, default: 'cross_entropy' Loss for model training. embed_size: int, default: 16 Vector size of embeddings. n_epochs: int, default: 10 Number of epochs for training. lr : dict, default: {"wide": 0.01, "deep": 1e-4} Learning rate for training. The parameter should be a dict that contains learning rate of wide and deep parts. lr_decay : bool, default: False Whether to use learning rate decay. epsilon : float, default: 1e-5 A small constant added to the denominator to improve numerical stability in Adam optimizer. According to the `official comment <https://github.com/tensorflow/tensorflow/blob/v1.15.0/tensorflow/python/training/adam.py#L64>`_, default value of `1e-8` for `epsilon` is generally not good, so here we choose `1e-5`. Users can try tuning this hyperparameter if the training is unstable. reg : float or None, default: None Regularization parameter, must be non-negative or None. batch_size : int, default: 256 Batch size for training. sampler : {'random', 'unconsumed', 'popular'}, default: 'random' Negative sampling strategy. - ``'random'`` means random sampling. - ``'unconsumed'`` samples items that the target user did not consume before. - ``'popular'`` has a higher probability to sample popular items as negative samples. .. versionadded:: 1.1.0 num_neg : int, default: 1 Number of negative samples for each positive sample, only used in `ranking` task. use_bn : bool, default: True Whether to use batch normalization. dropout_rate : float or None, default: None Probability of an element to be zeroed. If it is None, dropout is not used. hidden_units : int, list of int or tuple of (int,), default: (128, 64, 32) Number of layers and corresponding layer size in MLP. .. versionchanged:: 1.0.0 Accept type of ``int``, ``list`` or ``tuple``, instead of ``str``. multi_sparse_combiner : {'normal', 'mean', 'sum', 'sqrtn'}, default: 'sqrtn' Options for combining `multi_sparse` features. seed : int, default: 42 Random seed. lower_upper_bound : tuple or None, default: None Lower and upper score bound for `rating` task. tf_sess_config : dict or None, default: None Optional TensorFlow session config, see `ConfigProto options <https://github.com/tensorflow/tensorflow/blob/v2.10.0/tensorflow/core/protobuf/config.proto#L431>`_. Notes ----- According to the original paper, the Wide part uses FTRL with L1 regularization as the optimizer, so we'll also adopt it here. Note this may not be suitable for your specific task. References ---------- *Heng-Tze Cheng et al.* `Wide & Deep Learning for Recommender Systems <https://arxiv.org/pdf/1606.07792.pdf>`_. """ user_variables = ("embedding/user_wide_var", "embedding/user_deep_var") item_variables = ("embedding/item_wide_var", "embedding/item_deep_var") sparse_variables = ("embedding/sparse_wide_var", "embedding/sparse_deep_var") dense_variables = ("embedding/dense_wide_var", "embedding/dense_deep_var") def __init__( self, task, data_info=None, loss_type="cross_entropy", embed_size=16, n_epochs=20, lr=None, lr_decay=False, epsilon=1e-5, reg=None, batch_size=256, sampler="random", num_neg=1, use_bn=True, dropout_rate=None, hidden_units=(128, 64, 32), multi_sparse_combiner="sqrtn", seed=42, lower_upper_bound=None, tf_sess_config=None, ): super().__init__(task, data_info, lower_upper_bound, tf_sess_config) self.all_args = locals() self.loss_type = loss_type self.embed_size = embed_size self.n_epochs = n_epochs self.lr = self.check_lr(lr) self.lr_decay = lr_decay self.epsilon = epsilon self.reg = reg_config(reg) self.batch_size = batch_size self.sampler = sampler self.num_neg = num_neg self.use_bn = use_bn self.dropout_rate = dropout_config(dropout_rate) self.hidden_units = hidden_units_config(hidden_units) self.seed = seed self.sparse = check_sparse_indices(data_info) self.dense = check_dense_values(data_info) if self.sparse: self.sparse_feature_size = sparse_feat_size(data_info) self.sparse_field_size = sparse_field_size(data_info) self.multi_sparse_combiner = check_multi_sparse( data_info, multi_sparse_combiner ) self.true_sparse_field_size = true_sparse_field_size( data_info, self.sparse_field_size, self.multi_sparse_combiner ) if self.dense: self.dense_field_size = dense_field_size(data_info) def build_model(self): tf.set_random_seed(self.seed) self.labels = tf.placeholder(tf.float32, shape=[None]) self.is_training = tf.placeholder_with_default(False, shape=[]) self.wide_embed, self.deep_embed = [], [] self._build_user_item() if self.sparse: self._build_sparse() if self.dense: self._build_dense() wide_embed = tf.concat(self.wide_embed, axis=1) wide_term = tf_dense(units=1, name="wide_term")(wide_embed) deep_embed = tf.concat(self.deep_embed, axis=1) deep_layer = dense_nn( deep_embed, self.hidden_units, use_bn=self.use_bn, dropout_rate=self.dropout_rate, is_training=self.is_training, name="deep", ) deep_term = tf_dense(units=1, name="deep_term")(deep_layer) self.output = tf.squeeze(tf.add(wide_term, deep_term)) self.serving_topk = self.build_topk(self.output) count_params() def _build_user_item(self): self.user_indices = tf.placeholder(tf.int32, shape=[None]) self.item_indices = tf.placeholder(tf.int32, shape=[None]) wide_user_embed = embedding_lookup( indices=self.user_indices, var_name="user_wide_var", var_shape=(self.n_users + 1, 1), initializer=tf.glorot_uniform_initializer(), regularizer=self.reg, ) wide_item_embed = embedding_lookup( indices=self.item_indices, var_name="item_wide_var", var_shape=(self.n_items + 1, 1), initializer=tf.glorot_uniform_initializer(), regularizer=self.reg, ) deep_user_embed = embedding_lookup( indices=self.user_indices, var_name="user_deep_var", var_shape=(self.n_users + 1, self.embed_size), initializer=tf.glorot_uniform_initializer(), regularizer=self.reg, ) deep_item_embed = embedding_lookup( indices=self.item_indices, var_name="item_deep_var", var_shape=(self.n_items + 1, self.embed_size), initializer=tf.glorot_uniform_initializer(), regularizer=self.reg, ) self.wide_embed.extend([wide_user_embed, wide_item_embed]) self.deep_embed.extend([deep_user_embed, deep_item_embed]) def _build_sparse(self): self.sparse_indices = tf.placeholder( tf.int32, shape=[None, self.sparse_field_size] ) wide_sparse_embed = compute_sparse_feats( self.data_info, self.multi_sparse_combiner, self.sparse_indices, var_name="sparse_wide_var", var_shape=[self.sparse_feature_size], initializer=tf.glorot_uniform_initializer(), regularizer=self.reg, ) deep_sparse_embed = compute_sparse_feats( self.data_info, self.multi_sparse_combiner, self.sparse_indices, var_name="sparse_deep_var", var_shape=(self.sparse_feature_size, self.embed_size), initializer=tf.glorot_uniform_initializer(), regularizer=self.reg, flatten=True, ) self.wide_embed.append(wide_sparse_embed) self.deep_embed.append(deep_sparse_embed) def _build_dense(self): self.dense_values = tf.placeholder( tf.float32, shape=[None, self.dense_field_size] ) wide_dense_embed = compute_dense_feats( self.dense_values, var_name="dense_wide_var", var_shape=[self.dense_field_size], initializer=tf.glorot_uniform_initializer(), regularizer=self.reg, ) deep_dense_embed = compute_dense_feats( self.dense_values, var_name="dense_deep_var", var_shape=(self.dense_field_size, self.embed_size), initializer=tf.glorot_uniform_initializer(), regularizer=self.reg, flatten=True, ) self.wide_embed.append(wide_dense_embed) self.deep_embed.append(deep_dense_embed) @staticmethod def check_lr(lr): if not lr: return {"wide": 0.01, "deep": 1e-4} else: assert isinstance(lr, dict) and "wide" in lr and "deep" in lr, ( "`lr` should be a dict that contains learning rate of " "wide and deep parts, e.g. {'wide': 0.01, 'deep': 1e-4}" ) return lr