Source code for libreco.algorithms.deepwalk

"""Implementation of DeepWalk."""
import random
from collections import defaultdict

import numpy as np
from gensim.models import Word2Vec
from tqdm import tqdm

from ..bases import GensimBase


[docs]class DeepWalk(GensimBase): """*DeepWalk* algorithm. .. CAUTION:: DeepWalk can only use in ``ranking`` task. Parameters ---------- task : {'ranking'} Recommendation task. See :ref:`Task`. data_info : :class:`~libreco.data.DataInfo` object Object that contains useful information for training and inference. embed_size: int, default: 16 Vector size of embeddings. norm_embed : bool, default: False Whether to l2 normalize output embeddings. n_walks : int, default: 10 Number of walks for every item. walk_length : int, default: 10 Length of each walk. window_size : int, default: 5 Maximum item distance within a sequence during training. n_epochs: int, default: 10 Number of epochs for training. n_threads : int, default: 0 Number of threads to use, `0` will use all cores. seed : int, default: 42 Random seed. lower_upper_bound : tuple or None, default: None Lower and upper score bound for `rating` task. References ---------- *Bryan Perozzi et al.* `DeepWalk: Online Learning of Social Representations <https://arxiv.org/pdf/1403.6652.pdf>`_. """ def __init__( self, task, data_info, embed_size=16, norm_embed=False, n_walks=10, walk_length=10, window_size=5, n_epochs=10, n_threads=0, seed=42, lower_upper_bound=None, ): super().__init__( task, data_info, embed_size, norm_embed, window_size, n_epochs, n_threads, seed, lower_upper_bound, ) assert task == "ranking", "DeepWalk is only suitable for ranking" self.all_args = locals() self.n_walks = n_walks self.walk_length = walk_length def _build_graph(self): graph = defaultdict(list) for items in self.user_consumed.values(): for i in range(len(items) - 1): graph[items[i]].append(items[i + 1]) return graph def get_data(self): graph = self._build_graph() return _ItemCorpus( graph, self.n_items, self.n_walks, self.walk_length, self.seed ) def build_model(self): model = Word2Vec( vector_size=self.embed_size, window=self.window_size, sg=1, hs=1, seed=self.seed, min_count=1, workers=self.workers, sorted_vocab=0, ) model.build_vocab(self.data, update=False) return model
class _ItemCorpus: def __init__(self, graph, n_items, n_walks, walk_length, seed): self.graph = graph self.n_items = n_items self.n_walks = n_walks self.walk_length = walk_length self.np_rng = np.random.default_rng(seed) self.i = 0 def __iter__(self): for _ in tqdm(range(self.n_walks), desc=f"DeepWalk iter {self.i}"): for node in self.np_rng.permutation(self.n_items): walk = [node] while len(walk) < self.walk_length: neighbors = self.graph[walk[-1]] if len(neighbors) > 0: walk.append(random.choice(neighbors)) else: break yield list(map(str, walk)) self.i += 1