Source code for libreco.data.transformed

"""Transformed Dataset."""
import warnings

import numpy as np
from scipy.sparse import csr_matrix

from .consumed import interaction_consumed
from ..utils.sampling import NegativeSampling


[docs]class TransformedSet: """Dataset after transforming. Often generated by calling functions in ``DatasetPure`` or ``DatasetFeat``, then ``TransformedSet`` will be used in formal training. Parameters ---------- user_indices : numpy.ndarray All user rows in data, represented in inner id. item_indices : numpy.ndarray All item rows in data, represented in inner id. labels : numpy.ndarray All labels in data. sparse_indices : numpy.ndarray or None, default: None All sparse rows in data, represented in inner id. dense_values : numpy.ndarray or None, default: None All dense rows in data. train : bool, default: True Whether it is train data. See Also -------- :class:`~libreco.data.dataset.DatasetPure` :class:`~libreco.data.dataset.DatasetFeat` """ def __init__( self, user_indices=None, item_indices=None, labels=None, sparse_indices=None, dense_values=None, train=True, ): self._user_indices = user_indices self._item_indices = item_indices self._labels = labels self._sparse_indices = sparse_indices self._dense_values = dense_values self.has_sampled = False if train: self._sparse_interaction = csr_matrix( (labels, (user_indices, item_indices)), dtype=np.float32 ) if not train: self.user_consumed, _ = interaction_consumed(user_indices, item_indices) self.user_indices_orig = None self.item_indices_orig = None self.labels_orig = None self.sparse_indices_orig = None self.dense_values_orig = None
[docs] def build_negative_samples( self, data_info, num_neg=1, item_gen_mode="random", seed=42 ): """Perform negative sampling on all the data contained. .. deprecated:: 1.1.0 Use ``neg_sampling`` parameter instead of explicitly calling this method for negative sampling. See :ref:`Negative Sampling`. Parameters ---------- data_info : DataInfo Object contains data information. num_neg : int, default: 1 Number of negative samples for each positive sample. item_gen_mode : str, default: 'random' Sampling strategy, currently only 'random' is supported. seed : int, default: 42 Random seed. """ warnings.warn( "`build_negative_samples` is deprecated, and it will be removed in the future. " "Use `neg_sampling` parameter instead", DeprecationWarning, stacklevel=2, ) self.has_sampled = True self.user_indices_orig = self._user_indices self.item_indices_orig = self._item_indices self.labels_orig = self._labels self.sparse_indices_orig = self._sparse_indices self.dense_values_orig = self._dense_values self._sampling_impl(data_info, num_neg, item_gen_mode, seed)
def _sampling_impl(self, data_info, num_neg=1, item_gen_mode="random", seed=42): sparse_part = False if self.sparse_indices is None else True dense_part = False if self.dense_values is None else True neg = NegativeSampling( self, data_info, num_neg, sparse=sparse_part, dense=dense_part ) ( self._user_indices, self._item_indices, self._labels, self._sparse_indices, self._dense_values, ) = neg.generate_all(seed=seed, item_gen_mode=item_gen_mode) def __len__(self): return len(self.labels) def __getitem__(self, index): """Get a slice of data.""" return self.user_indices[index], self.item_indices[index], self.labels[index] @property def user_indices(self): """All user rows in data""" return self._user_indices @property def item_indices(self): """All item rows in data""" return self._item_indices @property def sparse_indices(self): """All sparse rows in data""" return self._sparse_indices @property def dense_values(self): """All dense rows in data""" return self._dense_values @property def labels(self): """All labels in data""" return self._labels @property def sparse_interaction(self): """User-item interaction data, in :class:`scipy.sparse.csr_matrix` format.""" return self._sparse_interaction