"""TF model base class."""
import abc
import os
import numpy as np
from .base import Base
from ..prediction import predict_tf_feat
from ..recommendation import (
check_dynamic_rec_feats,
cold_start_rec,
construct_rec,
recommend_tf_feat,
)
from ..tfops import modify_variable_names, sess_config, tf
from ..training.dispatch import get_trainer
from ..utils.save_load import (
load_tf_model,
load_tf_variables,
save_default_recs,
save_params,
save_tf_model,
save_tf_variables,
)
from ..utils.validate import check_fitting, check_unknown_user
[docs]class TfBase(Base):
"""Base class for TF models.
Models that relies on TensorFlow graph for inference. Although some models such as
`RNN4Rec`, `SVD` etc., are trained using TensorFlow, they don't inherit from this
base class since their inference only uses embeddings.
Parameters
----------
task : {'rating', 'ranking'}
Recommendation task. See :ref:`Task`.
data_info : :class:`~libreco.data.DataInfo` object
Object that contains useful information for training and inference.
lower_upper_bound : tuple or None
Lower and upper score bound for `rating` task.
tf_sess_config : dict or None
Optional TensorFlow session config, see `ConfigProto options
<https://github.com/tensorflow/tensorflow/blob/v2.10.0/tensorflow/core/protobuf/config.proto#L431>`_.
"""
def __init__(self, task, data_info, lower_upper_bound=None, tf_sess_config=None):
super().__init__(task, data_info, lower_upper_bound)
self.sess = sess_config(tf_sess_config)
self.model_built = False
self.trainer = None
self.loaded = False
@abc.abstractmethod
def build_model(self):
raise NotImplementedError
[docs] def fit(
self,
train_data,
neg_sampling,
verbose=1,
shuffle=True,
eval_data=None,
metrics=None,
k=10,
eval_batch_size=8192,
eval_user_num=None,
num_workers=0,
):
"""Fit TF model on the training data.
Parameters
----------
train_data : :class:`~libreco.data.TransformedSet` object
Data object used for training.
neg_sampling : bool
Whether to perform negative sampling for training or evaluating data.
.. versionadded:: 1.1.0
.. NOTE::
Negative sampling is needed if your data is implicit(i.e., `task` is ranking)
and ONLY contains positive labels. Otherwise, it should be False.
verbose : int, default: 1
Print verbosity.
- ``verbose <= 0``: Print nothing.
- ``verbose == 1``: Print progress bar and training time.
- ``verbose > 1`` : Print evaluation metrics if ``eval_data`` is provided.
shuffle : bool, default: True
Whether to shuffle the training data.
eval_data : :class:`~libreco.data.TransformedSet` object, default: None
Data object used for evaluating.
metrics : list or None, default: None
List of metrics for evaluating.
k : int, default: 10
Parameter of metrics, e.g. recall at k, ndcg at k
eval_batch_size : int, default: 8192
Batch size for evaluating.
eval_user_num : int or None, default: None
Number of users for evaluating. Setting it to a positive number will sample
users randomly from eval data.
num_workers : int, default: 0
How many subprocesses to use for training data loading.
0 means that the data will be loaded in the main process,
which is slower than multiprocessing.
.. versionadded:: 1.1.0
.. CAUTION::
Using multiprocessing(``num_workers`` > 0) may consume more memory than
single processing. See `Multi-process data loading <https://pytorch.org/docs/stable/data.html#multi-process-data-loading>`_.
Raises
------
RuntimeError
If :py:func:`fit` is called from a loaded model(:py:func:`load`).
AssertionError
If ``neg_sampling`` parameter is not bool type.
"""
check_fitting(self, train_data, eval_data, neg_sampling, k)
self.show_start_time()
if not self.model_built:
self.build_model()
self.model_built = True
if self.trainer is None:
self.trainer = get_trainer(self)
self.trainer.run(
train_data,
neg_sampling,
verbose,
shuffle,
eval_data,
metrics,
k,
eval_batch_size,
eval_user_num,
num_workers,
)
self.assign_tf_variables_oov()
self.default_recs = recommend_tf_feat(
model=self,
user_ids=[self.n_users],
n_rec=min(2000, self.n_items),
user_feats=None,
seq=None,
filter_consumed=False,
random_rec=False,
).flatten()
[docs] def predict(self, user, item, feats=None, cold_start="average", inner_id=False):
"""Make prediction(s) on given user(s) and item(s).
Parameters
----------
user : int or str or array_like
User id or batch of user ids.
item : int or str or array_like
Item id or batch of item ids.
feats : dict or pandas.Series or None, default: None
Extra features used in prediction.
cold_start : {'popular', 'average'}, default: 'average'
Cold start strategy.
- 'popular' will sample from popular items.
- 'average' will use the average of all the user/item embeddings as the
representation of the cold-start user/item.
inner_id : bool, default: False
Whether to use inner_id defined in `libreco`. For library users inner_id
may never be used.
Returns
-------
prediction : float or numpy.ndarray
Predicted scores for each user-item pair.
"""
if self.model_name == "NCF" and feats is not None:
raise ValueError("NCF can't use features.")
return predict_tf_feat(self, user, item, feats, cold_start, inner_id)
[docs] def recommend_user(
self,
user,
n_rec,
user_feats=None,
seq=None,
cold_start="average",
inner_id=False,
filter_consumed=True,
random_rec=False,
):
"""Recommend a list of items for given user(s).
If both ``user_feats`` and ``seq`` are ``None``, the model will use the stored features
for recommendation, and the ``cold_start`` strategy will be used for unknown users.
If either ``user_feats`` or ``seq`` is provided, the model will use them for recommendation.
In this case, if the ``user`` is unknown, it will be set to padding id, which means
the ``cold_start`` strategy will not be applied.
This situation is common when one wants to recommend for an unknown user based on
user features or behavior sequence.
Parameters
----------
user : int or str or array_like
User id or batch of user ids to recommend.
n_rec : int
Number of recommendations to return.
user_feats : dict or None, default: None
Extra user features for recommendation.
seq : list or numpy.ndarray or None, default: None
Extra item sequence for recommendation. If the sequence length is larger than
`recent_num` hyperparameter specified in the model, it will be truncated.
If smaller, it will be padded.
.. versionadded:: 1.1.0
cold_start : {'popular', 'average'}, default: 'average'
Cold start strategy.
- 'popular' will sample from popular items.
- 'average' will use the average of all the user/item embeddings as the
representation of the cold-start user/item.
inner_id : bool, default: False
Whether to use inner_id defined in `libreco`. For library users inner_id
may never be used.
filter_consumed : bool, default: True
Whether to filter out items that a user has previously consumed.
random_rec : bool, default: False
Whether to choose items for recommendation based on their prediction scores.
Returns
-------
recommendation : dict of {Union[int, str, array_like] : numpy.ndarray}
Recommendation result with user ids as keys and array_like recommended items as values.
"""
if self.model_name == "NCF" and user_feats is not None:
raise ValueError("`NCF` can't use features.")
if user_feats is None and seq is None:
result_recs = dict()
user_ids, unknown_users = check_unknown_user(self.data_info, user, inner_id)
if unknown_users:
cold_recs = cold_start_rec(
self.data_info,
self.default_recs,
cold_start,
unknown_users,
n_rec,
inner_id,
)
result_recs.update(cold_recs)
if user_ids:
computed_recs = recommend_tf_feat(
self,
user_ids,
n_rec,
user_feats,
seq,
filter_consumed,
random_rec,
inner_id,
)
user_recs = construct_rec(
self.data_info, user_ids, computed_recs, inner_id
)
result_recs.update(user_recs)
else:
# must be a single user if `user_feats` or `seq` is provided
check_dynamic_rec_feats(self.model_name, user, user_feats, seq)
user_id = self._convert_id(user, inner_id)
computed_recs = recommend_tf_feat(
self,
[user_id],
n_rec,
user_feats,
seq,
filter_consumed,
random_rec,
inner_id,
)
rec_items = (
computed_recs[0]
if inner_id
else np.array([self.data_info.id2item[i] for i in computed_recs[0]])
)
result_recs = {user: rec_items}
return result_recs
def _convert_id(self, user, inner_id):
"""Convert a single user to inner user id.
If the user doesn't exist, it will be converted to padding id.
"""
assert np.isscalar(user), f"User to convert must be scalar, got: {user}"
if inner_id:
if not isinstance(user, (int, np.integer)):
raise ValueError(f"`inner id` user must be int, got {user}")
return user if 0 <= user < self.n_users else self.n_users
else:
return self.data_info.user2id.get(user, self.n_users)
def assign_tf_variables_oov(self):
(
user_variables,
item_variables,
sparse_variables,
dense_variables,
_,
) = modify_variable_names(self, trainable=True)
update_ops = []
for v in tf.trainable_variables():
if user_variables is not None and v.name in user_variables:
# size = v.get_shape().as_list()[1]
mean_op = tf.IndexedSlices(
tf.reduce_mean(
tf.gather(v, tf.range(self.n_users)), axis=0, keepdims=True
),
[self.n_users],
)
update_ops.append(v.scatter_update(mean_op))
if item_variables is not None and v.name in item_variables:
mean_op = tf.IndexedSlices(
tf.reduce_mean(
tf.gather(v, tf.range(self.n_items)), axis=0, keepdims=True
),
[self.n_items],
)
update_ops.append(v.scatter_update(mean_op))
if sparse_variables is not None and v.name in sparse_variables:
sparse_oovs = self.data_info.sparse_oov
start = 0
for oov in sparse_oovs:
# multi_sparse case
if start >= oov:
continue
mean_tensor = tf.reduce_mean(
tf.gather(v, tf.range(start, oov)), axis=0, keepdims=True
)
update_ops.append(v.scatter_nd_update([[oov]], mean_tensor))
start = oov + 1
self.sess.run(update_ops)
def build_topk(self, outputs):
self.k = tf.placeholder(tf.int32, shape=())
_, indices = tf.math.top_k(outputs, self.k, sorted=True)
return indices
[docs] def save(self, path, model_name, manual=True, inference_only=False):
"""Save TF model for inference or retraining.
Parameters
----------
path : str
File folder path to save model.
model_name : str
Name of the saved model file.
manual : bool, default: True
Whether to save model variables using numpy.
inference_only : bool, default: False
Whether to save model variables only for inference.
See Also
--------
load
"""
if not os.path.isdir(path):
print(f"file folder {path} doesn't exists, creating a new one...")
os.makedirs(path)
save_params(self, path, model_name)
save_default_recs(self, path, model_name)
if manual:
save_tf_variables(self.sess, path, model_name, inference_only)
else:
save_tf_model(self.sess, path, model_name)
[docs] @classmethod
def load(cls, path, model_name, data_info, manual=True):
"""Load saved TF model for inference.
Parameters
----------
path : str
File folder path to save model.
model_name : str
Name of the saved model file.
data_info : :class:`~libreco.data.DataInfo` object
Object that contains some useful information.
manual : bool, default: True
Whether to load model variables using numpy. If you save the model using
`manual`, you should also load the mode using `manual`.
Returns
-------
model : type(cls)
Loaded TF model.
See Also
--------
save
"""
if manual:
return load_tf_variables(cls, path, model_name, data_info)
else:
return load_tf_model(cls, path, model_name, data_info)