Source code for alpenglow.experiments.ALSFactorExperiment

import alpenglow
import alpenglow.Getter as rs

[docs]class ALSFactorExperiment(alpenglow.OnlineExperiment): """ALSFactorExperiment(dimension=10,begin_min=-0.01,begin_max=0.01,number_of_iterations=15,regularization_lambda=1e-3,alpha=40,implicit=1,clear_before_fit=1,period_length=86400) This class implements an online version of the well-known matrix factorization recommendation model [Koren2009]_ and trains it via Alternating Least Squares in a periodic fashion. The model is able to train on explicit data using traditional ALS, and on implicit data using the iALS algorithm [Hu2008]_. .. [Hu2008] Hu, Yifan, Yehuda Koren, and Chris Volinsky. "Collaborative filtering for implicit feedback datasets." Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on. Ieee, 2008. Parameters ---------- dimension : int The latent factor dimension of the factormodel. begin_min : double The factors are initialized randomly, sampling each element uniformly from the interval (begin_min, begin_max). begin_max : double See begin_min. number_of_iterations : int The number of ALS iterations to perform in each period. regularization_lambda : double The coefficient for the L2 regularization term. See [Hu2008]_. This number is multiplied by the number of non-zero elements of the user-item rating matrix before being used, to achieve similar magnitude to the one used in traditional SGD. alpha : int The weight coefficient for positive samples in the error formula. See [Hu2008]_. implicit : int Valued 1 or 0, indicating whether to run iALS or ALS. clear_before_fit : int Whether to reset the model after each period. period_length : int The period length in seconds. timeframe_length : int The size of historic time interval to iterate over at every batch model retrain. Leave at the default 0 to retrain on everything. """ def _config(self, top_k, seed): model = rs.EigenFactorModel(**self.parameter_defaults( begin_min=-0.01, begin_max=0.01, dimension=10, seed=67439852, )) offline_learner = rs.OfflineEigenFactorModelALSLearner(**self.parameter_defaults( number_of_iterations=15, regularization_lambda=1e-3, alpha=40, implicit=1, clear_before_fit=1, )) offline_learner.set_model(model) online_learner = rs.PeriodicOfflineLearnerWrapper(**self.parameter_defaults( write_model=False, read_model=False, clear_model=False, learn=True, base_out_file_name="", base_in_file_name="", )) online_learner.set_model(model) online_learner.add_offline_learner(offline_learner) data_generator_parameters = self.parameter_defaults( timeframe_length=0, ) if(data_generator_parameters['timeframe_length']==0): data_generator = rs.CompletePastDataGenerator() else: data_generator = rs.TimeframeDataGenerator(**data_generator_parameters) online_learner.set_data_generator(data_generator) period_computer = rs.PeriodComputer(**self.parameter_defaults( period_length=86400, start_time=-1, period_mode="time", )) online_learner.set_period_computer(period_computer) return (model, online_learner, [])