diff --git a/cornac/models/mmnr/__init__.py b/cornac/models/mmnr/__init__.py new file mode 100644 index 00000000..34e73bed --- /dev/null +++ b/cornac/models/mmnr/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2023 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from .recom_mmnr import MMNR diff --git a/cornac/models/mmnr/mmnr.py b/cornac/models/mmnr/mmnr.py new file mode 100644 index 00000000..308e0e9d --- /dev/null +++ b/cornac/models/mmnr/mmnr.py @@ -0,0 +1,507 @@ +import math +from collections import Counter +from itertools import chain + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from scipy.sparse import csr_matrix +from sklearn.preprocessing import normalize +from tqdm.auto import trange + +OPTIMIZER_DICT = { + "sgd": torch.optim.SGD, + "adam": torch.optim.Adam, + "rmsprop": torch.optim.RMSprop, + "adagrad": torch.optim.Adagrad, +} + + +class Model(nn.Module): + def __init__( + self, + n_items, + emb_dim=32, + n_aspects=11, + padding_idx=None, + ctx=3, + d1=5, + d2=5, + ): + super(Model, self).__init__() + + self.emb_dim = emb_dim + self.n_aspects = n_aspects + self.padding_idx = padding_idx if padding_idx is not None else n_items + self.ctx = ctx + self.d1 = d1 + self.d2 = d2 + + self.item_embedding = nn.Embedding( + n_items + 1, self.emb_dim, padding_idx=self.padding_idx + ) + + # Aspect-Specific Projection Matrices (K different aspects) + self.aspProj = nn.Parameter( + torch.Tensor(self.n_aspects, self.emb_dim, self.d1), requires_grad=True + ) + self.aspProjSeq = nn.Parameter( + torch.Tensor(2 * self.n_aspects, self.d1, self.d2), requires_grad=True + ) + torch.nn.init.xavier_normal_(self.aspProj.data, gain=1) + torch.nn.init.xavier_normal_(self.aspProjSeq.data, gain=1) + + self.out = nn.Linear(self.d2, n_items) + self.his_linear_embds = nn.Linear(n_items, self.d2) + self.his_nn_embds = nn.Embedding( + n_items + 1, self.d2, padding_idx=self.padding_idx + ) + self.gate_his = nn.Linear(self.d2, 1) + + self.asp_h1_h2 = nn.Linear(self.d1, self.d2) + + def forward(self, seq, decay, uHis, iHis, device): + + batch = seq.shape[0] # batch + self.max_seq = seq.shape[1] # L + self.max_bas = seq.shape[2] # B + + # Multi-view Embedding + uEmbs, iEmbs = self.EmbeddingLayer( + batch, seq, uHis, iHis, device + ) # [batch, L, B, d] + + # Multi-aspect Representation Learning + uEmbsAsp = self.AspectLearning(uEmbs, batch, device) # [batch, asp, L, h1] + iEmbsAsp = self.AspectLearning(iEmbs, batch, device) + + # decay [batch, L, 1] + decay = decay.unsqueeze(1) # [batch, 1, L, 1] + decay = decay.repeat(1, self.n_aspects, 1, 1) # [batch, asp, L, 1] + uEmbsAspDec = uEmbsAsp * decay # decay[batch, asp, L, 1]->[batch, asp, L, h1] + iEmbsAspDec = iEmbsAsp * decay # decay[batch, asp, L, 1]->[batch, asp, L, h1] + + uAsp = self.asp_h1_h2(torch.sum(uEmbsAspDec, dim=2) / self.max_seq) + iAsp = self.asp_h1_h2(torch.sum(iEmbsAspDec, dim=2) / self.max_seq) + + result, loss_cl = self.PredictionLayer(uAsp, iAsp, uHis) + + return result, loss_cl + + def EmbeddingLayer(self, batch, seq, uHis, iHis, device): + """ + input: + seq [batch, L, B, d] + output: + userEmbs [batch, L, B, d] + itemEmbs [batch, L, B, d] + """ + embs = self.item_embedding(seq) + + # [batch*max_num_seq*max_bas] + row = ( + torch.arange(batch) + .repeat(self.max_seq * self.max_bas, 1) + .transpose(0, 1) + .reshape(-1) + ) + col = seq.reshape(len(seq), -1).reshape(-1) # [batch, L, B] + + # padded = torch.zeros(batch, 1).to(device) # [batch, 1] + padded = torch.zeros(batch, 1).fill_(0).to(device) # [batch, 1] + userHis = torch.cat((uHis, padded), dim=1) # [batch, n_items+1] + itemHis = torch.cat((iHis, padded), dim=1) # [batch, n_items+1] + + uMatrix = userHis[row, col].reshape( + batch, self.max_seq, -1, 1 + ) # [batch, L, B, 1] + iMatrix = itemHis[row, col].reshape( + batch, self.max_seq, -1, 1 + ) # [batch, L, B, 1] + + uEmbs = embs * uMatrix + iEmbs = embs * iMatrix + + return uEmbs, iEmbs + + def AspectLearning(self, embs, batch, device): + """ + input: + uEmbs [batch, L, B, d] + iEmbs [batch, L, B, d] + output: + basketAsp [batch, asp, L, h1] + """ + + # Aspect Embeddings (basket) + self.aspEmbed = nn.Embedding(self.n_aspects, self.ctx * self.d1).to(device) + self.aspEmbed.weight.requires_grad = True + torch.nn.init.xavier_normal_(self.aspEmbed.weight.data, gain=1) + + # Loop over all aspects + asp_lst = [] + for a in range(self.n_aspects): + self.norm = nn.LayerNorm(self.aspProj[a].shape[1]).to(device) + + # [batch, L, B, d] × [d, h1] = [batch, L, B, h1] + aspProj = torch.tanh(torch.matmul(embs, self.norm(self.aspProj[a]))) + + # [batch, L, 1] -> [batch, L, 1, h1] + aspEmbed = self.aspEmbed( + torch.LongTensor(batch, self.max_seq, 1).fill_(a).to(device) + ) + aspEmbed = torch.transpose(aspEmbed, 2, 3) # [batch, L, h1, 1] + + if self.ctx == 1: + # [batch, L, B, (1*h1)] × [batch, L, (1*h1), 1] = [batch, L, B, 1] + aspAttn = torch.matmul(aspProj, aspEmbed) + aspAttn = F.softmax(aspAttn, dim=2) # [batch,L,B,1] + else: + pad_size = int((self.ctx - 1) / 2) + + # [batch, max_len, max_bas+1+1, h1]; pad_size=1 + aspProj_padded = F.pad( + aspProj, (0, 0, pad_size, pad_size), "constant", 0 + ) + + # [batch,L,B+1+1,h1]->[batch,L,B,h1,ctx] + aspProj_padded = aspProj_padded.unfold(2, self.ctx, 1) # sliding + aspProj_padded = torch.transpose(aspProj_padded, 3, 4) + # [batch, max_len, max_bas, ctx*h1] + aspProj_padded = aspProj_padded.contiguous().view( + -1, self.max_seq, self.max_bas, self.ctx * self.d1 + ) + + # Calculate Attention: Inner Product & Softmax + # [batch, L,B, (ctx*h1)] x [batch, L, (ctx*h1), 1] -> [batch, L, B, 1] + aspAttn = torch.matmul(aspProj_padded, aspEmbed) + aspAttn = F.softmax(aspAttn, dim=2) # [batch, max_len, max_bas, 1] + + # [batch, L, B, h1] x [batch, L, B, 1] + aspItem = aspProj * aspAttn.expand_as(aspProj) # [batch, L, B, h1] + batch_asp = torch.sum(aspItem, dim=2) # [batch, L, h1] + + # [batch, L, h1] -> [batch, 1, L, h1] + asp_lst.append(torch.unsqueeze(batch_asp, 1)) + + # [batch, asp, L, h1] + basketAsp = torch.cat(asp_lst, dim=1) + + return basketAsp + + def PredictionLayer(self, uuAsp, iiAsp, his): + intent = [] + loss_cl = 0 + # Over loop each aspect + for b in range(uuAsp.shape[1]): + uInterest = torch.tanh(uuAsp[:, b, :]) # [batch, h2] + iInterest = torch.tanh(iiAsp[:, b, :]) # [batch, h2] + + uLoss = self.cl_loss(uInterest, iInterest) # [batch, h2] + iLoss = self.cl_loss(iInterest, uInterest) # [batch, h2] + cLoss = uLoss + iLoss + + Interest = torch.cat( + [uInterest.unsqueeze(2), iInterest.unsqueeze(2)], dim=2 + ) # [batch,h2,2] + Interests = torch.sum(Interest, dim=2) # [batch,h2] + scores_trans = self.out(Interests) # [batch,h2] -> [batch,n_items] + scores_trans = F.softmax(scores_trans, dim=-1) # [batch, n_items] + + hisEmb = self.his_linear_embds(his) # [batch,n_items] -> [batch,h2] + + # [h1 -> 1] + gate = torch.sigmoid( + self.gate_his(hisEmb) + self.gate_his(Interests) + ) # value + + res = gate * scores_trans + (1 - gate) * his # [batch, n_items] + res = res / math.sqrt(self.emb_dim) + + intent.append(res.unsqueeze(2)) + loss_cl += cLoss.mean() + + results = torch.cat(intent, dim=2) # [batch, n_items, asp] + result = F.max_pool1d(results, int(results.size(2))).squeeze( + 2 + ) # [batch, n_items] + loss_cl = loss_cl / self.n_aspects + + return result, loss_cl + + def sim(self, z1: torch.Tensor, z2: torch.Tensor): + z1 = F.normalize(z1) + z2 = F.normalize(z2) + return torch.mm(z1, z2.t()) + + def cl_loss(self, z1: torch.Tensor, z2: torch.Tensor): + tau = 0.6 + f = lambda x: torch.exp(x / tau) + + refl_sim = f(self.sim(z1, z1)) + between_sim = f(self.sim(z1, z2)) + return -torch.log( + between_sim.diag() + / (refl_sim.sum(1) + between_sim.sum(1) - refl_sim.diag()) + ) + + +def transform_data( + batch_users, + batch_basket_items, + user_history_matrix, + item_history_matrix, + total_items, + decay, + device, + is_test=False, +): + padding_idx = total_items + if is_test: + batch_history_items = [ + [np.unique(basket).tolist() for basket in basket_items] + for basket_items in batch_basket_items + ] + batch_targets = None + else: + batch_history_items = [ + [np.unique(basket).tolist() for basket in basket_items[:-1]] + for basket_items in batch_basket_items + ] + batch_targets = np.zeros((len(batch_basket_items), total_items), dtype="uint8") + for inc, basket_items in enumerate(batch_basket_items): + batch_targets[inc, basket_items[-1]] = 1 + batch_targets = torch.tensor(batch_targets, dtype=torch.uint8, device=device) + + batch_lengths = [ + [len(basket) for basket in history_items] + for history_items in batch_history_items + ] + + max_sequence_size = max([len(lengths) for lengths in batch_lengths]) + max_basket_size = max([max(lengths) for lengths in batch_lengths]) + padded_samples = [] + padded_decays = [] + for history_items in batch_history_items: + padded_samples.append( + [ + basket + [padding_idx] * (max_basket_size - len(basket)) + for basket in history_items + ] + + [[padding_idx] * max_basket_size] + * (max_sequence_size - len(history_items)) + ) + padded_decays.append( + [ + decay ** (len(history_items) - 1 - inc) + for inc, _ in enumerate(history_items) + ] + + [0] * (max_sequence_size - len(history_items)) + ) + padded_samples = ( + torch.from_numpy(np.asarray(padded_samples, dtype=np.int32)) + .type(torch.LongTensor) + .to(device) + ) + padded_decays = ( + torch.from_numpy( + np.asarray(padded_decays, dtype=np.float32).reshape( + len(batch_history_items), -1, 1 + ) + ) + .type(torch.FloatTensor) + .to(device) + ) + userhis = ( + torch.from_numpy(user_history_matrix[batch_users].todense()) + .type(torch.FloatTensor) + .to(device) + ) + itemhis = ( + torch.from_numpy(item_history_matrix[batch_users].todense()) + .type(torch.FloatTensor) + .to(device) + ) + return padded_samples, padded_decays, userhis, itemhis, batch_targets + + +def build_history_matrix( + train_set, + val_set, + test_set, + total_users, + total_items, + mode="train", +): + counter = Counter() + for [user], _, [basket_items] in train_set.ubi_iter(1, shuffle=False): + if mode == "train": + user_items = chain.from_iterable(basket_items[:-1]) + else: + user_items = chain.from_iterable(basket_items) + counter.update((user, item) for item in user_items) + if val_set is not None and mode != "train": + for [user], _, [basket_items] in val_set.ubi_iter(1, shuffle=False): + if mode == "validation": + user_items = chain.from_iterable(basket_items[:-1]) + else: + user_items = chain.from_iterable(basket_items) + counter.update((user, item) for item in user_items) + if test_set is not None and mode == "test": + for [user], _, [basket_items] in test_set.ubi_iter(1, shuffle=False): + user_items = chain.from_iterable(basket_items[:-1]) + counter.update((user, item) for item in user_items) + users = [] + items = [] + counts = [] + for (user, item), count in counter.items(): + users.append(user) + items.append(item) + counts.append(count) + users = np.asarray(users, dtype=np.int32) + items = np.asarray(items, dtype=np.int32) + scores = np.asarray(counts, dtype=np.float32) + history_matrix = csr_matrix( + (scores, (users, items)), shape=(total_users, total_items) + ) + user_history_matrix = normalize(history_matrix, norm="l1", axis=1) + item_history_matrix = normalize(history_matrix, norm="l1", axis=0) + return user_history_matrix, item_history_matrix + + +def learn( + model, + train_set, + total_users, + total_items, + val_set, + n_epochs, + batch_size, + lr, + l2, + decay, + m, + n, + optimizer, + device, + verbose=False, +): + model.to(device) + + optimizer = OPTIMIZER_DICT[optimizer]( + params=model.parameters(), + lr=lr, + weight_decay=l2, + ) + train_user_history_matrix, train_item_history_matrix = build_history_matrix( + train_set=train_set, + val_set=val_set, + test_set=None, + total_users=total_users, + total_items=total_items, + mode="train", + ) + val_user_history_matrix, val_item_history_matrix = build_history_matrix( + train_set=train_set, + val_set=val_set, + test_set=None, + total_users=total_users, + total_items=total_items, + mode="validation", + ) + progress_bar = trange(1, n_epochs + 1, disable=not verbose) + last_val_loss = np.inf + last_loss = np.inf + for _ in progress_bar: + model.train() + total_loss = 0.0 + cnt = 0 + for inc, (u_batch, _, bi_batch) in enumerate( + train_set.ubi_iter(batch_size, shuffle=True) + ): + (samples, decays, userhis, itemhis, target) = transform_data( + u_batch, + bi_batch, + total_items=total_items, + user_history_matrix=train_user_history_matrix, + item_history_matrix=train_item_history_matrix, + decay=decay, + device=device, + ) + scores, loss_cl = model(samples, decays, userhis, itemhis, device) + loss_ce = ( + -( + m * target * torch.log(scores) + + n * (1 - target) * torch.log(1 - scores) + ) + .sum(-1) + .mean() + ) + loss = loss_ce + loss_cl + total_loss += loss.item() + optimizer.zero_grad() + loss.backward() + optimizer.step() + + cnt += len(bi_batch) + last_loss = total_loss / cnt + if inc % 10 == 0: + progress_bar.set_postfix(loss=last_loss, val_loss=last_val_loss) + + if val_set is not None: + model.eval() + total_val_loss = 0.0 + cnt = 0 + for inc, (u_batch, _, bi_batch) in enumerate( + val_set.ubi_iter(batch_size, shuffle=False) + ): + (samples, decays, userhis, itemhis, target) = transform_data( + u_batch, + bi_batch, + total_items=total_items, + user_history_matrix=val_user_history_matrix, + item_history_matrix=val_item_history_matrix, + decay=decay, + device=device, + ) + scores, loss_cl = model(samples, decays, userhis, itemhis, device) + loss_ce = ( + -( + m * target * torch.log(scores) + + n * (1 - target) * torch.log(1 - scores) + ) + .sum(-1) + .mean() + ) + loss = loss_ce + loss_cl + total_val_loss += loss.item() + cnt += len(bi_batch) + last_val_loss = total_val_loss / cnt + if inc % 10 == 0: + progress_bar.set_postfix(loss=last_loss, val_loss=last_val_loss) + + +def score( + model, + user_history_matrix, + item_history_matrix, + total_items, + user_idx, + history_baskets, + decay, + device, +): + model.eval() + (samples, decays, userhis, itemhis, _) = transform_data( + [user_idx], + [history_baskets], + total_items=total_items, + user_history_matrix=user_history_matrix, + item_history_matrix=item_history_matrix, + decay=decay, + device=device, + is_test=True, + ) + scores, _ = model(samples, decays, userhis, itemhis, device) + return scores.cpu().detach().numpy().squeeze() diff --git a/cornac/models/mmnr/recom_mmnr.py b/cornac/models/mmnr/recom_mmnr.py new file mode 100644 index 00000000..4ac94563 --- /dev/null +++ b/cornac/models/mmnr/recom_mmnr.py @@ -0,0 +1,135 @@ +# Copyright 2023 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from ..recommender import NextBasketRecommender + + +class MMNR(NextBasketRecommender): + """Multi-view Multi-aspect Neural Recommendation. + + Parameters + ---------- + name: string, default: 'MMNR' + The name of the recommender model. + + References + ---------- + Zhiying Deng, Jianjun Li, Zhiqiang Guo, Wei Liu, Li Zou, and Guohui Li. 2023. + Multi-view Multi-aspect Neural Networks for Next-basket Recommendation. + In Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR '23). + Association for Computing Machinery, New York, NY, USA, 1283–1292. https://doi.org/10.1145/3539618.3591738 + """ + + def __init__( + self, + name="MMNR", + emb_dim=32, + n_aspects=11, + ctx=3, + d1=5, + d2=5, + decay=0.6, + lr=1e-2, + l2=1e-3, + optimizer="adam", + batch_size=100, + n_epochs=20, + m=1, + n=0.002, + device="cpu", + init_params=None, + trainable=True, + verbose=False, + seed=None, + ): + super().__init__(name=name, trainable=trainable, verbose=verbose) + self.emb_dim = emb_dim + self.n_aspects = n_aspects + self.seed = seed + self.ctx = ctx + self.d1 = d1 + self.d2 = d2 + self.optimizer = optimizer + self.lr = lr + self.l2 = l2 + self.m = m + self.n = n + self.decay = decay + self.device = device + self.batch_size = batch_size + self.n_epochs = n_epochs + self.init_params = init_params if init_params is not None else {} + + def fit(self, train_set, val_set=None): + super().fit(train_set=train_set, val_set=val_set) + from .mmnr import Model, build_history_matrix, learn + + self.model = Model( + self.total_items, + emb_dim=self.emb_dim, + n_aspects=self.n_aspects, + padding_idx=self.total_items, + ctx=self.ctx, + d1=self.d1, + d2=self.d2, + ) + learn( + model=self.model, + train_set=train_set, + total_users=self.total_users, + total_items=self.total_items, + val_set=val_set, + n_epochs=self.n_epochs, + batch_size=self.batch_size, + lr=self.lr, + l2=self.l2, + m=self.m, + n=self.n, + decay=self.decay, + optimizer=self.optimizer, + device=self.device, + verbose=self.verbose, + ) + + self.user_history_matrix = self.init_params.get("user_history_matrix", None) + self.item_history_matrix = self.init_params.get("item_history_matrix", None) + if self.user_history_matrix is None or self.item_history_matrix is None: + print( + "Constructing test history matrices from train_set and val_set as they are not provided." + ) + self.user_history_matrix, self.item_history_matrix = build_history_matrix( + train_set=train_set, + val_set=val_set, + test_set=None, + total_users=self.total_users, + total_items=self.total_items, + mode="test", + ) + return self + + def score(self, user_idx, history_baskets, **kwargs): + from .mmnr import score + + item_scores = score( + self.model, + self.user_history_matrix, + self.item_history_matrix, + self.total_items, + user_idx, + history_baskets, + self.decay, + self.device, + ) + return item_scores