Source code for libreco.bases.tf_base

"""TF model base class."""
import abc
import os

import numpy as np

from ..prediction import predict_tf_feat
from ..recommendation import cold_start_rec, construct_rec, recommend_tf_feat
from ..tfops import modify_variable_names, sess_config, tf
from ..training import get_trainer
from ..utils.save_load import (
    load_tf_model,
    load_tf_variables,
    save_default_recs,
    save_params,
    save_tf_model,
    save_tf_variables,
)
from ..utils.validate import check_unknown_user
from .base import Base


[docs]class TfBase(Base): """Base class for TF models. Models that relies on TensorFlow graph for inference. Although some models such as `RNN4Rec`, `SVD` etc., are trained using TensorFlow, they don't belong to this base class since their inference only uses embeddings. Parameters ---------- task : {'rating', 'ranking'} Recommendation task. See :ref:`Task`. data_info : :class:`~libreco.data.DataInfo` object Object that contains useful information for training and inference. lower_upper_bound : tuple or None Lower and upper score bound for `rating` task. tf_sess_config : dict or None Optional TensorFlow session config, see `ConfigProto options <https://github.com/tensorflow/tensorflow/blob/v2.10.0/tensorflow/core/protobuf/config.proto#L431>`_. """ def __init__(self, task, data_info, lower_upper_bound=None, tf_sess_config=None): super().__init__(task, data_info, lower_upper_bound) self.sess = sess_config(tf_sess_config) self.model_built = False self.trainer = None self.loaded = False @abc.abstractmethod def build_model(self): raise NotImplementedError
[docs] def fit( self, train_data, verbose=1, shuffle=True, eval_data=None, metrics=None, k=10, eval_batch_size=8192, eval_user_num=None, ): """Fit TF model on the training data. Parameters ---------- train_data : :class:`~libreco.data.TransformedSet` object Data object used for training. verbose : int, default: 1 Print verbosity. If `eval_data` is provided, setting it to higher than 1 will print evaluation metrics during training. shuffle : bool, default: True Whether to shuffle the training data. eval_data : :class:`~libreco.data.TransformedSet` object, default: None Data object used for evaluating. metrics : list or None, default: None List of metrics for evaluating. k : int, default: 10 Parameter of metrics, e.g. recall at k, ndcg at k eval_batch_size : int, default: 8192 Batch size for evaluating. eval_user_num : int or None, default: None Number of users for evaluating. Setting it to a positive number will sample users randomly from eval data. Raises ------ RuntimeError If :py:func:`fit` is called from a loaded model(:py:func:`load`). """ if self.loaded: raise RuntimeError( "Loaded model doesn't support retraining, use `rebuild_model` instead. " "Or constructing a new model from scratch." ) if eval_data is not None and k > self.n_items: raise ValueError(f"eval `k` {k} exceeds num of items {self.n_items}") self.show_start_time() if not self.model_built: self.build_model() self.model_built = True if self.trainer is None: self.trainer = get_trainer(self) self.trainer.run( train_data, verbose, shuffle, eval_data, metrics, k, eval_batch_size, eval_user_num, ) self.assign_tf_variables_oov() self.default_recs = recommend_tf_feat( model=self, user_ids=[self.n_users], n_rec=min(2000, self.n_items), user_feats=None, item_data=None, filter_consumed=False, random_rec=False, ).flatten()
[docs] def predict(self, user, item, feats=None, cold_start="average", inner_id=False): """Make prediction(s) on given user(s) and item(s). Parameters ---------- user : int or str or array_like User id or batch of user ids. item : int or str or array_like Item id or batch of item ids. feats : dict or pandas.Series or None, default: None Extra features used in prediction. cold_start : {'popular', 'average'}, default: 'average' Cold start strategy. - 'popular' will sample from popular items. - 'average' will use the average of all the user/item embeddings as the representation of the cold-start user/item. inner_id : bool, default: False Whether to use inner_id defined in `libreco`. For library users inner_id may never be used. Returns ------- prediction : float or numpy.ndarray Predicted scores for each user-item pair. """ return predict_tf_feat(self, user, item, feats, cold_start, inner_id)
[docs] def recommend_user( self, user, n_rec, user_feats=None, item_data=None, cold_start="average", inner_id=False, filter_consumed=True, random_rec=False, ): """Recommend a list of items for given user(s). Parameters ---------- user : int or str or array_like User id or batch of user ids to recommend. n_rec : int Number of recommendations to return. user_feats : dict or pandas.Series or None, default: None Extra user features for recommendation. item_data : pandas.DataFrame or None, default: None Extra item features for recommendation. cold_start : {'popular', 'average'}, default: 'average' Cold start strategy. - 'popular' will sample from popular items. - 'average' will use the average of all the user/item embeddings as the representation of the cold-start user/item. inner_id : bool, default: False Whether to use inner_id defined in `libreco`. For library users inner_id may never be used. filter_consumed : bool, default: True Whether to filter out items that a user has previously consumed. random_rec : bool, default: False Whether to choose items for recommendation based on their prediction scores. Returns ------- recommendation : dict of {Union[int, str, array_like] : numpy.ndarray} Recommendation result with user ids as keys and array_like recommended items as values. """ if ( (user_feats is not None or item_data is not None) and not np.isscalar(user) and len(user) > 1 ): raise ValueError( f"Batch recommend doesn't support assigning arbitrary features: {user}" ) result_recs = dict() user_ids, unknown_users = check_unknown_user(self.data_info, user, inner_id) if unknown_users: cold_recs = cold_start_rec( self.data_info, self.default_recs, cold_start, unknown_users, n_rec, inner_id, ) result_recs.update(cold_recs) if user_ids: computed_recs = recommend_tf_feat( self, user_ids, n_rec, user_feats, item_data, filter_consumed, random_rec, ) user_recs = construct_rec(self.data_info, user_ids, computed_recs, inner_id) result_recs.update(user_recs) return result_recs
def assign_tf_variables_oov(self): ( user_variables, item_variables, sparse_variables, dense_variables, _, ) = modify_variable_names(self, trainable=True) update_ops = [] for v in tf.trainable_variables(): if user_variables is not None and v.name in user_variables: # size = v.get_shape().as_list()[1] mean_op = tf.IndexedSlices( tf.reduce_mean( tf.gather(v, tf.range(self.n_users)), axis=0, keepdims=True ), [self.n_users], ) update_ops.append(v.scatter_update(mean_op)) if item_variables is not None and v.name in item_variables: mean_op = tf.IndexedSlices( tf.reduce_mean( tf.gather(v, tf.range(self.n_items)), axis=0, keepdims=True ), [self.n_items], ) update_ops.append(v.scatter_update(mean_op)) if sparse_variables is not None and v.name in sparse_variables: sparse_oovs = self.data_info.sparse_oov start = 0 for oov in sparse_oovs: # multi_sparse case if start >= oov: continue mean_tensor = tf.reduce_mean( tf.gather(v, tf.range(start, oov)), axis=0, keepdims=True ) update_ops.append(v.scatter_nd_update([[oov]], mean_tensor)) start = oov + 1 self.sess.run(update_ops)
[docs] def save(self, path, model_name, manual=True, inference_only=False): """Save TF model for inference or retraining. Parameters ---------- path : str File folder path to save model. model_name : str Name of the saved model file. manual : bool, default: True Whether to save model variables using numpy. inference_only : bool, default: False Whether to save model variables only for inference. See Also -------- load """ if not os.path.isdir(path): print(f"file folder {path} doesn't exists, creating a new one...") os.makedirs(path) save_params(self, path, model_name) save_default_recs(self, path, model_name) if manual: save_tf_variables(self.sess, path, model_name, inference_only) else: save_tf_model(self.sess, path, model_name)
[docs] @classmethod def load(cls, path, model_name, data_info, manual=True): """Load saved TF model for inference. Parameters ---------- path : str File folder path to save model. model_name : str Name of the saved model file. data_info : :class:`~libreco.data.DataInfo` object Object that contains some useful information. manual : bool, default: True Whether to load model variables using numpy. If you save the model using `manual`, you should also load the mode using `manual`. Returns ------- model : type(cls) Loaded TF model. See Also -------- save """ if manual: return load_tf_variables(cls, path, model_name, data_info) else: return load_tf_model(cls, path, model_name, data_info)