diff --git a/model.py b/model.py index 1a0a046..3835a1f 100644 --- a/model.py +++ b/model.py @@ -19,671 +19,741 @@ from collections import Counter class GCNModelAE(nn.Module): - def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args): - super(GCNModelAE, self).__init__() + def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args): + super(GCNModelAE, self).__init__() - self.args = args - self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu) - self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) - self.dc = InnerProductDecoder(dropout, act=lambda x: x) - # self.dc = InnerDecoder(dropout, act=lambda x: x) + self.args = args + self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu) + self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.dc = InnerProductDecoder(dropout, act=lambda x: x) + # self.dc = InnerDecoder(dropout, act=lambda x: x) - def forward(self, x, adj): - z = self.gc1(x,adj) - z = self.gc2(z,adj) - return self.dc(z),z,None + def forward(self, x, adj): + z = self.gc1(x,adj) + z = self.gc2(z,adj) + return self.dc(z),z,None - def loss(self,pred_adj,labels, n_nodes, n_features, norm, pos_weight,L=1): + def loss(self,pred_adj,labels, n_nodes, n_features, norm, pos_weight,L=1): - cost = norm * F.binary_cross_entropy_with_logits(pred_adj, labels,pos_weight = pos_weight) - return cost, + cost = norm * F.binary_cross_entropy_with_logits(pred_adj, labels,pos_weight = pos_weight) + return cost, - def check_parameters(self): - for name, param in self.named_parameters(): - if param.requires_grad: - print(name, param.data,param.data.shape) + def check_parameters(self): + for name, param in self.named_parameters(): + if param.requires_grad: + print(name, param.data,param.data.shape) class GCNModelVAE(nn.Module): - def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args): - super(GCNModelVAE, self).__init__() + def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args): + super(GCNModelVAE, self).__init__() - self.args = args - self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu) - self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) - self.gc3 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) - self.dc = InnerProductDecoder(dropout, act=lambda x: x) - # self.dc = InnerDecoder(dropout, act=lambda x: x) + self.args = args + self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu) + self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.gc3 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.dc = InnerProductDecoder(dropout, act=lambda x: x) + # self.dc = InnerDecoder(dropout, act=lambda x: x) - def encoder(self, x, adj): - hidden1 = self.gc1(x, adj) - return self.gc2(hidden1, adj), self.gc3(hidden1, adj) + def encoder(self, x, adj): + hidden1 = self.gc1(x, adj) + return self.gc2(hidden1, adj), self.gc3(hidden1, adj) - def decoder(self,mu,logvar): + def decoder(self,mu,logvar): - z_u = self.reparameterize(mu, logvar) + z_u = self.reparameterize(mu, logvar) - return self.dc(z_u) + return self.dc(z_u) - def reparameterize(self, mu, logvar): - std = torch.exp(logvar) - eps = torch.randn_like(std) - return eps.mul(std).add_(mu) + def reparameterize(self, mu, logvar): + std = torch.exp(logvar) + eps = torch.randn_like(std) + return eps.mul(std).add_(mu) - # if self.training: - # std = torch.exp(logvar) - # eps = torch.randn_like(std) - # return eps.mul(std).add_(mu) - # else: - # return mu + # if self.training: + # std = torch.exp(logvar) + # eps = torch.randn_like(std) + # return eps.mul(std).add_(mu) + # else: + # return mu - def forward(self, x, adj): + def forward(self, x, adj): - mu, logvar = self.encoder(x, adj) - z_u = self.reparameterize(mu, logvar) - # z_a = self.reparameterize(mu_a,logvar_a) - return self.dc(z_u),mu, logvar + mu, logvar = self.encoder(x, adj) + z_u = self.reparameterize(mu, logvar) + # z_a = self.reparameterize(mu_a,logvar_a) + return self.dc(z_u),mu, logvar - def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1): + def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1): - det=1e-10 - norm_u = norm - pos_weight_u= pos_weight + det=1e-10 + norm_u = norm + pos_weight_u= pos_weight - L_rec_u=0 + L_rec_u=0 - mu, logvar = self.encoder(x, adj) - # z_mu, z_sigma2_log = self.encoder(x) - for l in range(L): + mu, logvar = self.encoder(x, adj) + # z_mu, z_sigma2_log = self.encoder(x) + for l in range(L): - pred_adj = self.decoder(mu,logvar) + pred_adj = self.decoder(mu,logvar) - cost_u = norm * F.binary_cross_entropy_with_logits(pred_adj, labels ,pos_weight = pos_weight) + cost_u = norm * F.binary_cross_entropy_with_logits(pred_adj, labels ,pos_weight = pos_weight) - L_rec_u += cost_u + L_rec_u += cost_u - L_rec_u/=L + L_rec_u/=L - KLD = -0.5 / n_nodes * torch.mean(torch.sum(1 + 2 * logvar - mu.pow(2) - logvar.exp().pow(2),1)) - return L_rec_u, KLD + KLD = -0.5 / n_nodes * torch.mean(torch.sum(1 + 2 * logvar - mu.pow(2) - logvar.exp().pow(2),1)) + return L_rec_u, KLD - def check_parameters(self): - for name, param in self.named_parameters(): - if param.requires_grad: - print(name, param.data,param.data.shape) + def check_parameters(self): + for name, param in self.named_parameters(): + if param.requires_grad: + print(name, param.data,param.data.shape) class GCNModelVAECD(nn.Module): - def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args): - super(GCNModelVAECD, self).__init__() + def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args): + super(GCNModelVAECD, self).__init__() - self.args = args - self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu) - self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) - self.gc3 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) - self.dc = InnerProductDecoder(dropout, act=lambda x: x) - # self.dc = InnerDecoder(dropout, act=lambda x: x) + self.args = args + self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu) + self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.gc3 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.dc = InnerProductDecoder(dropout, act=lambda x: x) + # self.dc = InnerDecoder(dropout, act=lambda x: x) - #for embedding attributes/features - # self.linear_a1= Linear(n_nodes, hidden_dim1, act = torch.tanh,sparse_inputs=True) # the input dim is the number of nodes - # self.linear_a2= Linear(hidden_dim1, hidden_dim2, act = lambda x:x) - # self.linear_a3= Linear(hidden_dim1, hidden_dim2, act = lambda x:x) + #for embedding attributes/features + # self.linear_a1= Linear(n_nodes, hidden_dim1, act = torch.tanh,sparse_inputs=True) # the input dim is the number of nodes + # self.linear_a2= Linear(hidden_dim1, hidden_dim2, act = lambda x:x) + # self.linear_a3= Linear(hidden_dim1, hidden_dim2, act = lambda x:x) - self.pi_=nn.Parameter(torch.FloatTensor(args.nClusters,).fill_(1)/args.nClusters,requires_grad=True) - self.mu_c=nn.Parameter(torch.randn(args.nClusters,hidden_dim2),requires_grad=True) - self.log_sigma2_c=nn.Parameter(torch.randn(args.nClusters,hidden_dim2),requires_grad=True) + self.pi_=nn.Parameter(torch.FloatTensor(args.nClusters,).fill_(1)/args.nClusters,requires_grad=True) + self.mu_c=nn.Parameter(torch.randn(args.nClusters,hidden_dim2),requires_grad=True) + self.log_sigma2_c=nn.Parameter(torch.randn(args.nClusters,hidden_dim2),requires_grad=True) - def encoder(self, x, adj): - hidden1 = self.gc1(x, adj) - # hidden_a1 = self.linear_a1(x.t()) # transpose the input feature matrix + def encoder(self, x, adj): + hidden1 = self.gc1(x, adj) + # hidden_a1 = self.linear_a1(x.t()) # transpose the input feature matrix - return self.gc2(hidden1, adj), self.gc3(hidden1, adj) + return self.gc2(hidden1, adj), self.gc3(hidden1, adj) - def decoder(self,mu,logvar): + def decoder(self,mu,logvar): - z_u = self.reparameterize(mu, logvar) - # z_a = self.reparameterize(mu_a,logvar_a) + z_u = self.reparameterize(mu, logvar) + # z_a = self.reparameterize(mu_a,logvar_a) - return self.dc(z_u) + return self.dc(z_u) - def reparameterize(self, mu, logvar): - if self.training: - std = torch.exp(logvar) - eps = torch.randn_like(std) - return eps.mul(std).add_(mu) - else: - return mu + def reparameterize(self, mu, logvar): + if self.training: + std = torch.exp(logvar) + eps = torch.randn_like(std) + return eps.mul(std).add_(mu) + else: + return mu - def forward(self, x, adj): + def forward(self, x, adj): - mu, logvar = self.encoder(x, adj) - z_u = self.reparameterize(mu, logvar) - # z_a = self.reparameterize(mu_a,logvar_a) - return self.dc(z_u),mu, logvar + mu, logvar = self.encoder(x, adj) + z_u = self.reparameterize(mu, logvar) + # z_a = self.reparameterize(mu_a,logvar_a) + return self.dc(z_u),mu, logvar - def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1): + def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1): - det=1e-10 - norm_u = norm - pos_weight_u= pos_weight + det=1e-10 + norm_u = norm + pos_weight_u= pos_weight - L_rec_u=0 + L_rec_u=0 - mu, logvar = self.encoder(x, adj) - hidden_dim2 = mu.shape[1] + mu, logvar = self.encoder(x, adj) + hidden_dim2 = mu.shape[1] - # z_mu, z_sigma2_log = self.encoder(x) - for l in range(L): + # z_mu, z_sigma2_log = self.encoder(x) + for l in range(L): - # z=torch.randn_like(z_mu)*torch.exp(z_sigma2_log/2)+z_mu - pred_adj = self.decoder(mu,logvar) - # L_rec+=F.binary_cross_entropy(x_pro,x) + # z=torch.randn_like(z_mu)*torch.exp(z_sigma2_log/2)+z_mu + pred_adj = self.decoder(mu,logvar) + # L_rec+=F.binary_cross_entropy(x_pro,x) - # cost_u = norm * F.binary_cross_entropy_with_logits(pred_adj, labels_sub_u,pos_weight = pos_weight) - cost_u = norm * F.binary_cross_entropy_with_logits(pred_adj, labels ,pos_weight = pos_weight) - # cost_a = norm_a * F.binary_cross_entropy_with_logits(pred_x, labels_sub_a, pos_weight = pos_weight_a) - # cost_a =torch.Tensor(1).fill_(0) + # cost_u = norm * F.binary_cross_entropy_with_logits(pred_adj, labels_sub_u,pos_weight = pos_weight) + cost_u = norm * F.binary_cross_entropy_with_logits(pred_adj, labels ,pos_weight = pos_weight) + # cost_a = norm_a * F.binary_cross_entropy_with_logits(pred_x, labels_sub_a, pos_weight = pos_weight_a) + # cost_a =torch.Tensor(1).fill_(0) - L_rec_u += cost_u - # L_rec_a += cost_a + L_rec_u += cost_u + # L_rec_a += cost_a - L_rec_u/=L - # L_rec_a/=L + L_rec_u/=L + # L_rec_a/=L - # z_a = self.reparameterize(mu_a,logvar_a) - # KLD_a = (0.5 / n_features) * torch.mean(torch.sum(-1 - 2 * logvar_a + mu_a.pow(2) + logvar_a.exp().pow(2), 1)) - # KLD_a =torch.Tensor(1).fill_(0) + # z_a = self.reparameterize(mu_a,logvar_a) + # KLD_a = (0.5 / n_features) * torch.mean(torch.sum(-1 - 2 * logvar_a + mu_a.pow(2) + logvar_a.exp().pow(2), 1)) + # KLD_a =torch.Tensor(1).fill_(0) - # Loss=L_rec*x.size(1) + # Loss=L_rec*x.size(1) - self.pi_.data = (self.pi_/self.pi_.sum()).data - # log_sigma2_c=self.log_sigma2_c - # mu_c=self.mu_c + self.pi_.data = (self.pi_/self.pi_.sum()).data + # log_sigma2_c=self.log_sigma2_c + # mu_c=self.mu_c - # z = torch.randn_like(z_mu) * torch.exp(z_sigma2_log / 2) + z_mu - z = self.reparameterize(mu,logvar) + # z = torch.randn_like(z_mu) * torch.exp(z_sigma2_log / 2) + z_mu + z = self.reparameterize(mu,logvar) - gamma_c=torch.exp(torch.log(self.pi_.unsqueeze(0))+self.gaussian_pdfs_log(z,self.mu_c,self.log_sigma2_c))+det - gamma_c = F.softmax(gamma_c) # is softmax a good way? + gamma_c=torch.exp(torch.log(self.pi_.unsqueeze(0))+self.gaussian_pdfs_log(z,self.mu_c,self.log_sigma2_c))+det + gamma_c = F.softmax(gamma_c) # is softmax a good way? - gamma_c=gamma_c/(gamma_c.sum(1).view(-1,1)) #shape: batch_size*Clusters - self.pi_.data = gamma_c.mean(0).data # prior need to be re-normalized? In GMM, prior is based on gamma_c:https://brilliant.org/wiki/gaussian-mixture-model/ + gamma_c=gamma_c/(gamma_c.sum(1).view(-1,1)) #shape: batch_size*Clusters + self.pi_.data = gamma_c.mean(0).data # prior need to be re-normalized? In GMM, prior is based on gamma_c:https://brilliant.org/wiki/gaussian-mixture-model/ - # KLD_u_c=(0.5 / n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(self.log_sigma2_c.unsqueeze(0)+\ - # torch.exp(2*logvar.unsqueeze(1)-self.log_sigma2_c.unsqueeze(0))+\ - # (mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2)/torch.exp(self.log_sigma2_c.unsqueeze(0)),2),1)) + # KLD_u_c=(0.5 / n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(self.log_sigma2_c.unsqueeze(0)+\ + # torch.exp(2*logvar.unsqueeze(1)-self.log_sigma2_c.unsqueeze(0))+\ + # (mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2)/torch.exp(self.log_sigma2_c.unsqueeze(0)),2),1)) - # KLD_u_c-= (0.5/n_nodes)*torch.mean(torch.sum(1+2*logvar,1)) - # gamma_loss = (1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c/self.pi_.unsqueeze(0)),1)) - (0.5 / hidden_dim2)*torch.mean(torch.sum(1+2*logvar,1)) + # KLD_u_c-= (0.5/n_nodes)*torch.mean(torch.sum(1+2*logvar,1)) + # gamma_loss = (1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c/self.pi_.unsqueeze(0)),1)) - (0.5 / hidden_dim2)*torch.mean(torch.sum(1+2*logvar,1)) - KLD_u_c=-(0.5/n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(-1+self.log_sigma2_c.unsqueeze(0)-2*logvar.unsqueeze(1)+ - torch.exp(2*logvar.unsqueeze(1)-self.log_sigma2_c.unsqueeze(0))+ - (mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2)/torch.exp(self.log_sigma2_c.unsqueeze(0)),2),1)) + KLD_u_c=-(0.5/n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(-1+self.log_sigma2_c.unsqueeze(0)-2*logvar.unsqueeze(1)+ + torch.exp(2*logvar.unsqueeze(1)-self.log_sigma2_c.unsqueeze(0))+ + (mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2)/torch.exp(self.log_sigma2_c.unsqueeze(0)),2),1)) - gamma_loss = -(1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c/self.pi_.unsqueeze(0)),1)) + gamma_loss = -(1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c/self.pi_.unsqueeze(0)),1)) - return L_rec_u,-KLD_u_c,-gamma_loss + return L_rec_u,-KLD_u_c,-gamma_loss - def pre_train(self,x,adj,Y,pre_epoch=50): - ''' - This function is used to initialize cluster paramters: pi_, mu_c, log_sigma2_c. - ------------- - paramters: - x: is the feature matrix of graph G. - adj: is the adjacent matrix of graph G. - Y: is the class label for each node in graph G. - ''' + def pre_train(self,x,adj,Y,pre_epoch=50): + ''' + This function is used to initialize cluster paramters: pi_, mu_c, log_sigma2_c. + ------------- + paramters: + x: is the feature matrix of graph G. + adj: is the adjacent matrix of graph G. + Y: is the class label for each node in graph G. + ''' - if not os.path.exists('./pretrain_model_{}.pk'.format(self.args.dataset)): + if not os.path.exists('./pretrain_model_{}.pk'.format(self.args.dataset)): - Loss=nn.MSELoss() - opti=Adam(self.parameters()) #all paramters in model + Loss=nn.MSELoss() + opti=Adam(self.parameters()) #all paramters in model - print('Pretraining......') - # epoch_bar=tqdm(range(pre_epoch)) - # for _ in epoch_bar: - for _ in range(pre_epoch): + print('Pretraining......') + # epoch_bar=tqdm(range(pre_epoch)) + # for _ in epoch_bar: + for _ in range(pre_epoch): - self.train() - L=0 - mu, logvar = self.encoder(x,adj) - pred_adj = self.decoder(mu,logvar) + self.train() + L=0 + mu, logvar = self.encoder(x,adj) + pred_adj = self.decoder(mu,logvar) - loss= Loss(pred_adj,adj.to_dense()) + loss= Loss(pred_adj,adj.to_dense()) - L+=loss.detach().cpu().numpy() + L+=loss.detach().cpu().numpy() - opti.zero_grad() - loss.backward() - opti.step() + opti.zero_grad() + loss.backward() + opti.step() - # epoch_bar.write('L2={:.4f}'.format(L)) - print('L2={:.4f}'.format(L)) + # epoch_bar.write('L2={:.4f}'.format(L)) + print('L2={:.4f}'.format(L)) - self.gc2.load_state_dict(self.gc3.state_dict()) - # self.linear_a2.load_state_dict(self.linear_a3.state_dict()) + self.gc2.load_state_dict(self.gc3.state_dict()) + # self.linear_a2.load_state_dict(self.linear_a3.state_dict()) - with torch.no_grad(): - mu, logvar = self.encoder(x,adj) - assert F.mse_loss(mu, logvar) == 0 - # assert F.mse_loss(mu_a, logvar_a) == 0 - Z = mu.data.numpy() + with torch.no_grad(): + mu, logvar = self.encoder(x,adj) + assert F.mse_loss(mu, logvar) == 0 + # assert F.mse_loss(mu_a, logvar_a) == 0 + Z = mu.data.numpy() - gmm = GaussianMixture(n_components=self.args.nClusters, covariance_type='diag') + gmm = GaussianMixture(n_components=self.args.nClusters, covariance_type='diag') - pre = gmm.fit_predict(Z) - print('Acc={:.4f}%'.format(cluster_acc(pre, Y)[0] * 100)) + pre = gmm.fit_predict(Z) + print('Acc={:.4f}%'.format(cluster_acc(pre, Y)[0] * 100)) - self.pi_.data = torch.from_numpy(gmm.weights_).float() - self.mu_c.data = torch.from_numpy(gmm.means_).float() - self.log_sigma2_c.data = torch.log(torch.from_numpy(gmm.covariances_).float()) + self.pi_.data = torch.from_numpy(gmm.weights_).float() + self.mu_c.data = torch.from_numpy(gmm.means_).float() + self.log_sigma2_c.data = torch.log(torch.from_numpy(gmm.covariances_).float()) - torch.save(self.state_dict(), './pretrain_model_{}.pk'.format(self.args.dataset)) - else: - self.load_state_dict(torch.load('./pretrain_model_{}.pk'.format(self.args.dataset))) + torch.save(self.state_dict(), './pretrain_model_{}.pk'.format(self.args.dataset)) + else: + self.load_state_dict(torch.load('./pretrain_model_{}.pk'.format(self.args.dataset))) - def predict(self,mu, logvar): - # z_mu, z_sigma2_log, z_ma,z_a_sigma2_log = self.encoder(x,adj) - # mu, logvar, mu_a, logvar_a = self.encoder(x,adj) - # z = torch.randn_like(mu) * torch.exp(logvar) + mu - z = self.reparameterize(mu,logvar) - pi = self.pi_ - log_sigma2_c = self.log_sigma2_c - mu_c = self.mu_c - gamma_c = torch.exp(torch.log(pi.unsqueeze(0))+self.gaussian_pdfs_log(z,mu_c,log_sigma2_c)) + def predict(self,mu, logvar): + # z_mu, z_sigma2_log, z_ma,z_a_sigma2_log = self.encoder(x,adj) + # mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + # z = torch.randn_like(mu) * torch.exp(logvar) + mu + z = self.reparameterize(mu,logvar) + pi = self.pi_ + log_sigma2_c = self.log_sigma2_c + mu_c = self.mu_c + gamma_c = torch.exp(torch.log(pi.unsqueeze(0))+self.gaussian_pdfs_log(z,mu_c,log_sigma2_c)) - gamma=gamma_c.detach().cpu().numpy() + gamma=gamma_c.detach().cpu().numpy() - return np.argmax(gamma,axis=1),gamma + return np.argmax(gamma,axis=1),gamma - def gaussian_pdfs_log(self,x,mus,log_sigma2s): - G=[] - for c in range(self.args.nClusters): - G.append(self.gaussian_pdf_log(x,mus[c:c+1,:],log_sigma2s[c:c+1,:]).view(-1,1)) - return torch.cat(G,1) + def gaussian_pdfs_log(self,x,mus,log_sigma2s): + G=[] + for c in range(self.args.nClusters): + G.append(self.gaussian_pdf_log(x,mus[c:c+1,:],log_sigma2s[c:c+1,:]).view(-1,1)) + return torch.cat(G,1) - @staticmethod - def gaussian_pdf_log(x,mu,log_sigma2): - return -0.5*(torch.sum(np.log(np.pi*2)+log_sigma2+(x-mu).pow(2)/torch.exp(log_sigma2),1)) + @staticmethod + def gaussian_pdf_log(x,mu,log_sigma2): + return -0.5*(torch.sum(np.log(np.pi*2)+log_sigma2+(x-mu).pow(2)/torch.exp(log_sigma2),1)) - def check_parameters(self): - for name, param in self.named_parameters(): - if param.requires_grad: - print(name, param.data,param.data.shape) + def check_parameters(self): + for name, param in self.named_parameters(): + if param.requires_grad: + print(name, param.data,param.data.shape) class GCNModelVAECE(nn.Module): - def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args): - super(GCNModelVAECE, self).__init__() + def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args): + super(GCNModelVAECE, self).__init__() + + + self.args = args + self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu) + self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.gc3 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + # self.dc = InnerProductDecoder(dropout, act=lambda x: x) + self.dc = InnerDecoder(dropout, act=lambda x: x) + #for embedding attributes/features + self.linear_a1= Linear(n_nodes, hidden_dim1, act = torch.tanh,sparse_inputs=True) # the input dim is the number of nodes + self.linear_a2= Linear(hidden_dim1, hidden_dim2, act = lambda x:x) + self.linear_a3= Linear(hidden_dim1, hidden_dim2, act = lambda x:x) + + #modularity layer + # self.modulairty_layer = Linear(hidden_dim2,args.nClusters,act=torch.relu) + # self.cluster_choose= Linear(hidden_dim2,args.nClusters,act=torch.relu) + + + self.pi_=nn.Parameter(torch.FloatTensor(args.nClusters,).fill_(1)/args.nClusters,requires_grad=True) + self.mu_c=nn.Parameter(torch.FloatTensor(args.nClusters,hidden_dim2).fill_(0.00),requires_grad=True) + self.log_sigma2_c=nn.Parameter(torch.FloatTensor(args.nClusters,hidden_dim2).fill_(0.0),requires_grad=False) - self.args = args - self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu) - self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) - self.gc3 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) - # self.dc = InnerProductDecoder(dropout, act=lambda x: x) - self.dc = InnerDecoder(dropout, act=lambda x: x) + torch.nn.init.xavier_normal_(self.mu_c) + # torch.nn.init.xavier_normal_(self.log_sigma2_c) - #for embedding attributes/features - self.linear_a1= Linear(n_nodes, hidden_dim1, act = torch.tanh,sparse_inputs=True) # the input dim is the number of nodes - self.linear_a2= Linear(hidden_dim1, hidden_dim2, act = lambda x:x) - self.linear_a3= Linear(hidden_dim1, hidden_dim2, act = lambda x:x) + # calculate mi - #modularity layer - self.modulairty_layer = Linear(hidden_dim2,args.nClusters,act=torch.relu) + # critic_params = {'dim_x': x.shape[1],'dim_y':y.shape[1],'layers': 2,'embed_dim': 32,'hidden_dim': 64,'activation': 'relu',} + # self.critic_structure = ConcatCritic(hidden_dim2,n_nodes,256,3,'relu',rho=None,) + # self.critic_feature = ConcatCritic(hidden_dim2,input_feat_dim,256,3,'relu',rho=None,) + def encoder(self, x, adj): + hidden1 = self.gc1(x, adj) + hidden_a1 = self.linear_a1(x.t()) # transpose the input feature matrix + return self.gc2(hidden1, adj), self.gc3(hidden1, adj), self.linear_a2(hidden_a1),self.linear_a3(hidden_a1) - self.pi_=nn.Parameter(torch.FloatTensor(args.nClusters,).fill_(1)/args.nClusters,requires_grad=True) - self.mu_c=nn.Parameter(torch.FloatTensor(args.nClusters,hidden_dim2).fill_(0.00),requires_grad=True) - self.log_sigma2_c=nn.Parameter(torch.FloatTensor(args.nClusters,hidden_dim2).fill_(0.0),requires_grad=False) - - torch.nn.init.xavier_normal_(self.mu_c) - # torch.nn.init.xavier_normal_(self.log_sigma2_c) + def decoder(self,mu,mu_a,logvar,logvar_a): - # calculate mi + z_u = self.reparameterize(mu, logvar) + z_a = self.reparameterize(mu_a,logvar_a) + return self.dc((z_u,z_a)) - # critic_params = {'dim_x': x.shape[1],'dim_y':y.shape[1],'layers': 2,'embed_dim': 32,'hidden_dim': 64,'activation': 'relu',} - # self.critic_structure = ConcatCritic(hidden_dim2,n_nodes,256,3,'relu',rho=None,) - # self.critic_feature = ConcatCritic(hidden_dim2,input_feat_dim,256,3,'relu',rho=None,) + def reparameterize(self, mu, logvar): + if self.training: + std = torch.exp(logvar) + eps = torch.randn_like(std) + return eps.mul(std).add_(mu) + else: + return mu - def encoder(self, x, adj): - hidden1 = self.gc1(x, adj) - hidden_a1 = self.linear_a1(x.t()) # transpose the input feature matrix - return self.gc2(hidden1, adj), self.gc3(hidden1, adj), self.linear_a2(hidden_a1),self.linear_a3(hidden_a1) + def forward(self, x, adj): - def decoder(self,mu,mu_a,logvar,logvar_a): + mu, logvar, mu_a, logvar_a = self.encoder(x, adj) + z_u = self.reparameterize(mu, logvar) + z_a = self.reparameterize(mu_a,logvar_a) + return self.dc((z_u,z_a)),mu, logvar, mu_a, logvar_a - z_u = self.reparameterize(mu, logvar) - z_a = self.reparameterize(mu_a,logvar_a) - return self.dc((z_u,z_a)) + def modularity_loss(self, z,adj): - def reparameterize(self, mu, logvar): - if self.training: - std = torch.exp(logvar) - eps = torch.randn_like(std) - return eps.mul(std).add_(mu) - else: - return mu + adj = adj.to_dense() + H = self.modulairty_layer(z) + assert H.shape[0]==z.shape[0] - def forward(self, x, adj): + n = torch.tensor(1.0*z.shape[0]) - mu, logvar, mu_a, logvar_a = self.encoder(x, adj) - z_u = self.reparameterize(mu, logvar) - z_a = self.reparameterize(mu_a,logvar_a) - return self.dc((z_u,z_a)),mu, logvar, mu_a, logvar_a + H_norm = n.sqrt()*H.sqrt()/(H.sqrt().sum()) + print("H_norm shape",H_norm.shape) + print("H_norm ",H_norm) + m = (adj-torch.eye(adj.shape[0])).sum()/2 + D = (adj-torch.eye(adj.shape[0])).sum(1) # the degree of nodes, adj includes self loop + B = (adj-torch.eye(adj.shape[0]))-torch.matmul(D.view(-1,1),D.view(1,-1))/(2*m) # modularity matrix + mod_loss=torch.trace(torch.matmul(torch.matmul(H_norm.t(),B),H_norm)/(4*m)) + print("mod_loss",mod_loss) - def modularity_loss(self, z,adj): + return mod_loss - adj = adj.to_dense() - H = self.modulairty_layer(z) - assert H.shape[0]==z.shape[0] + def dist(self,x): + # x = x/torch.norm(x,2,dim=1).view(-1,1) + assert len(x.size()) == 2 + norm = (x ** 2).sum(1).view(-1, 1) + dn = (norm + norm.view(1, -1)) - 2.0 * (x @ x.t()) + return torch.sum(torch.relu(dn).sqrt()) - n = torch.tensor(1.0*z.shape[0]) + def mi_loss(self,z,x,a): + # critic_params = {'dim_x': x.shape[1],'dim_y':y.shape[1],'layers': 2,'embed_dim': 32,'hidden_dim': 64,'activation': 'relu',} + # critic = ConcatCritic(rho=None,**critic_params) + indice = torch.randperm(len(z))[0:50] + # mi_x = estimate_mutual_information('dv',z[indice],x[indice],self.critic_structure) + mi_a = estimate_mutual_information('js',z[indice],a[indice],self.critic_feature) + return mi_a - H_norm = n.sqrt()*H.sqrt()/(H.sqrt().sum()) - print("H_norm shape",H_norm.shape) - print("H_norm ",H_norm) - m = (adj-torch.eye(adj.shape[0])).sum()/2 - D = (adj-torch.eye(adj.shape[0])).sum(1) # the degree of nodes, adj includes self loop - B = (adj-torch.eye(adj.shape[0]))-torch.matmul(D.view(-1,1),D.view(1,-1))/(2*m) # modularity matrix - mod_loss=torch.trace(torch.matmul(torch.matmul(H_norm.t(),B),H_norm)/(4*m)) - print("mod_loss",mod_loss) + def change_cluster_grad_false(self): + for name, param in self.named_parameters(): + if name in ['pi_','mu_c','log_sigma2_c']: + param.requires_grad=False - return mod_loss + def change_cluster_grad_true(self): + for name, param in self.named_parameters(): + if name in ['pi_','mu_c','log_sigma2_c']: + param.requires_grad=True - def dist(self,x): - # x = x/torch.norm(x,2,dim=1).view(-1,1) - assert len(x.size()) == 2 - norm = (x ** 2).sum(1).view(-1, 1) - dn = (norm + norm.view(1, -1)) - 2.0 * (x @ x.t()) - return torch.sum(torch.relu(dn).sqrt()) - def mi_loss(self,z,x,a): - # critic_params = {'dim_x': x.shape[1],'dim_y':y.shape[1],'layers': 2,'embed_dim': 32,'hidden_dim': 64,'activation': 'relu',} - # critic = ConcatCritic(rho=None,**critic_params) - indice = torch.randperm(len(z))[0:50] - # mi_x = estimate_mutual_information('dv',z[indice],x[indice],self.critic_structure) - mi_a = estimate_mutual_information('js',z[indice],a[indice],self.critic_feature) - return mi_a + def change_nn_grad_false(self): + for name, param in self.named_parameters(): + if name not in ['pi_','mu_c','log_sigma2_c']: + param.requires_grad=False - def change_cluster_grad_false(self): - for name, param in self.named_parameters(): - if name in ['pi_','mu_c','log_sigma2_c']: - param.requires_grad=False + def change_nn_grad_true(self): + for name, param in self.named_parameters(): + if name not in ['pi_','mu_c','log_sigma2_c']: + param.requires_grad=True - def change_cluster_grad_true(self): - for name, param in self.named_parameters(): - if name in ['pi_','mu_c','log_sigma2_c']: - param.requires_grad=True + def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1): + det=1e-10 + labels_sub_u, labels_sub_a = labels + norm_u, norm_a = norm + pos_weight_u, pos_weight_a = pos_weight - def change_nn_grad_false(self): - for name, param in self.named_parameters(): - if name not in ['pi_','mu_c','log_sigma2_c']: - param.requires_grad=False + L_rec_u=0 + L_rec_a=0 - def change_nn_grad_true(self): - for name, param in self.named_parameters(): - if name not in ['pi_','mu_c','log_sigma2_c']: - param.requires_grad=True + mi=0 - def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1): + mu, logvar, mu_a, logvar_a = self.encoder(x, adj) - det=1e-10 - labels_sub_u, labels_sub_a = labels - norm_u, norm_a = norm - pos_weight_u, pos_weight_a = pos_weight + # mutual information loss - L_rec_u=0 - L_rec_a=0 + # z_mu, z_sigma2_log = self.encoder(x) + # mi_a = self.mi_loss(mu,adj.to_dense(),x.to_dense()) + for l in range(L): - mi=0 + # z=torch.randn_like(z_mu)*torch.exp(z_sigma2_log/2)+z_mu + pred_adj, pred_x = self.decoder(mu,mu_a,logvar,logvar_a) + # L_rec+=F.binary_cross_entropy(x_pro,x) - mu, logvar, mu_a, logvar_a = self.encoder(x, adj) + cost_u = norm_u * F.binary_cross_entropy_with_logits(pred_adj, labels_sub_u, pos_weight = pos_weight_u) + cost_a = norm_a * F.binary_cross_entropy_with_logits(pred_x, labels_sub_a, pos_weight = pos_weight_a) + # cost_a =torch.Tensor(1).fill_(0) - # mutual information loss + L_rec_u += cost_u + L_rec_a += cost_a - # z_mu, z_sigma2_log = self.encoder(x) - # mi_a = self.mi_loss(mu,adj.to_dense(),x.to_dense()) - for l in range(L): - # z=torch.randn_like(z_mu)*torch.exp(z_sigma2_log/2)+z_mu - pred_adj, pred_x = self.decoder(mu,mu_a,logvar,logvar_a) - # L_rec+=F.binary_cross_entropy(x_pro,x) + L_rec_u/=L + L_rec_a/=L - cost_u = norm_u * F.binary_cross_entropy_with_logits(pred_adj, labels_sub_u, pos_weight = pos_weight_u) - cost_a = norm_a * F.binary_cross_entropy_with_logits(pred_x, labels_sub_a, pos_weight = pos_weight_a) - # cost_a =torch.Tensor(1).fill_(0) + # z_a = self.reparameterize(mu_a,logvar_a) + # KLD_a = (0.5 / n_features) * torch.mean(torch.sum(-1 - 2 * logvar_a + mu_a.pow(2) + logvar_a.exp().pow(2), 1)) + KLD_a = -(0.5 / n_features) * torch.mean(torch.sum(-1 - 2 * logvar_a + mu_a.pow(2) + logvar_a.exp().pow(2), 1)) + # KLD_a =torch.Tensor(1).fill_(0) - L_rec_u += cost_u - L_rec_a += cost_a + # Loss=L_rec*x.size(1) - L_rec_u/=L - L_rec_a/=L + # log_sigma2_c=self.log_sigma2_c + # mu_c=self.mu_c - # z_a = self.reparameterize(mu_a,logvar_a) - # KLD_a = (0.5 / n_features) * torch.mean(torch.sum(-1 - 2 * logvar_a + mu_a.pow(2) + logvar_a.exp().pow(2), 1)) - KLD_a = -(0.5 / n_features) * torch.mean(torch.sum(-1 - 2 * logvar_a + mu_a.pow(2) + logvar_a.exp().pow(2), 1)) - # KLD_a =torch.Tensor(1).fill_(0) + # z = torch.randn_like(z_mu) * torch.exp(z_sigma2_log / 2) + z_mu + z = self.reparameterize(mu,logvar) - # Loss=L_rec*x.size(1) + # mod_loss=self.modularity_loss(z,adj) + # gamma_c=torch.exp(torch.log(self.pi_.unsqueeze(0))+self.gaussian_pdfs_log(z,self.mu_c,self.log_sigma2_c))+det + gamma_c=torch.exp(self.gaussian_pdfs_log(z,self.mu_c,self.log_sigma2_c))+det + # gamma_c = self.cluster_choose(self.reparameterize(mu,logvar)) + # print('gamma_c:',gamma_c) + gamma_c=gamma_c/(gamma_c.sum(1).view(-1,1))#batch_size*Clusters + gamma_c=F.softmax(gamma_c) + # print('gamma_c normalized:',gamma_c) + # print('gamma_c argmax:',torch.argmax(gamma_c,1)) + print('gamma_c counter:',Counter(torch.argmax(gamma_c,1).tolist())) - # log_sigma2_c=self.log_sigma2_c - # mu_c=self.mu_c + # gamma_c=torch.nn.functional.one_hot(torch.argmax(gamma_c,1),self.args.nClusters) - # z = torch.randn_like(z_mu) * torch.exp(z_sigma2_log / 2) + z_mu - z = self.reparameterize(mu,logvar) + # self.pi_.data = (self.pi_/self.pi_.sum()).data # prior need to be re-normalized? In GMM, prior is based on gamma_c:https://brilliant.org/wiki/gaussian-mixture-model/ + # self.pi_.data = gamma_c.mean(0).data # prior need to be re-normalized? In GMM, prior is based on gamma_c:https://brilliant.org/wiki/gaussian-mixture-model/ - # mod_loss=self.modularity_loss(z,adj) - # gamma_c=torch.exp(torch.log(self.pi_.unsqueeze(0))+self.gaussian_pdfs_log(z,self.mu_c,self.log_sigma2_c))+det - gamma_c=torch.exp(self.gaussian_pdfs_log(z,self.mu_c,self.log_sigma2_c))+det - # print('gamma_c:',gamma_c) + KLD_u_c=-(0.5/n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(-1+self.log_sigma2_c.unsqueeze(0)-2*logvar.unsqueeze(1)+torch.exp(2*logvar.unsqueeze(1)-self.log_sigma2_c.unsqueeze(0))+(mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2)/torch.exp(self.log_sigma2_c.unsqueeze(0)),2),1)) + # KLD_u_c=-(0.5/n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(-1-2*logvar.unsqueeze(1)+torch.exp(2*logvar.unsqueeze(1))+(mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2),2),1)) + # temp_kld=-(0.5/n_nodes)*torch.sum((mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2),2) - gamma_c=gamma_c/(gamma_c.sum(1).view(-1,1))#batch_size*Clusters - gamma_c=F.softmax(gamma_c) - # print('gamma_c normalized:',gamma_c) - # print('gamma_c argmax:',torch.argmax(gamma_c,1)) - print('gamma_c counter:',Counter(torch.argmax(gamma_c,1).tolist())) + # KLD_u_c_test=-(0.5/n_nodes)*F.mse_loss(mu.unsqueeze(1),self.mu_c.unsqueeze(0),reduction='none') + # print('kld_u_c_test:',KLD_u_c_test.sum(2)) - # self.pi_.data = (self.pi_/self.pi_.sum()).data # prior need to be re-normalized? In GMM, prior is based on gamma_c:https://brilliant.org/wiki/gaussian-mixture-model/ - # self.pi_.data = gamma_c.mean(0).data # prior need to be re-normalized? In GMM, prior is based on gamma_c:https://brilliant.org/wiki/gaussian-mixture-model/ + # KLD_u_c=-(0.5/n_nodes)*F.mse_loss(mu.unsqueeze(1),self.mu_c.unsqueeze(0)) - KLD_u_c=-(0.5/n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(-1+self.log_sigma2_c.unsqueeze(0)-2*logvar.unsqueeze(1)+torch.exp(2*logvar.unsqueeze(1)-self.log_sigma2_c.unsqueeze(0))+(mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2)/torch.exp(self.log_sigma2_c.unsqueeze(0)),2),1)) - # KLD_u_c=-(0.5/n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(-1-2*logvar.unsqueeze(1)+torch.exp(2*logvar.unsqueeze(1))+(mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2),2),1)) - # temp_kld=-(0.5/n_nodes)*torch.sum((mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2),2) + # KLD_u_c=(0.5 / n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(self.log_sigma2_c.unsqueeze(0)+\ + # torch.exp(2*logvar.unsqueeze(1)-self.log_sigma2_c.unsqueeze(0))+\ + # (mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2)/torch.exp(self.log_sigma2_c.unsqueeze(0)),2),1)) - # KLD_u_c_test=-(0.5/n_nodes)*F.mse_loss(mu.unsqueeze(1),self.mu_c.unsqueeze(0),reduction='none') - # print('kld_u_c_test:',KLD_u_c_test.sum(2)) + mutual_dist = (1/(self.args.nClusters**2))*self.dist(self.mu_c) + # gamma_loss=-(1/self.args.nClusters)*torch.mean(torch.sum(gamma_c*torch.log(gamma_c),1)) + # gamma_loss = (1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c),1)) - (0.5 / self.args.hid_dim)*torch.mean(torch.sum(1+2*logvar,1)) + gamma_loss = -(1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c/self.pi_.unsqueeze(0)),1)) + # gamma_loss = (1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c/self.pi_.unsqueeze(0)),1)) - (0.5 / self.args.hid_dim)*torch.mean(torch.sum(1+2*logvar,1)) - # KLD_u_c=-(0.5/n_nodes)*F.mse_loss(mu.unsqueeze(1),self.mu_c.unsqueeze(0)) + #soft assignment + Q = self.getSoftAssignments(z,self.mu_c,self.args.nClusters,self.args.hidden2,n_nodes) + P = self.calculateP(Q) + soft_cluster_loss = self.getKLDivLossExpression(Q,P) - # KLD_u_c=(0.5 / n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(self.log_sigma2_c.unsqueeze(0)+\ - # torch.exp(2*logvar.unsqueeze(1)-self.log_sigma2_c.unsqueeze(0))+\ - # (mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2)/torch.exp(self.log_sigma2_c.unsqueeze(0)),2),1)) + # return L_rec_u , L_rec_a , -KLD_u_c ,-KLD_a + return L_rec_u , L_rec_a , -KLD_u_c ,-KLD_a , -gamma_loss, -mutual_dist,soft_cluster_loss + # return L_rec_u , L_rec_a , -KLD_u_c ,-KLD_a , -gamma_loss,-mi_a + # return L_rec_u + L_rec_a + KLD_u_c + KLD_a + gamma_loss - mutual_dist = (1/(self.args.nClusters**2))*self.dist(self.mu_c) - # gamma_loss=-(1/self.args.nClusters)*torch.mean(torch.sum(gamma_c*torch.log(gamma_c),1)) - # gamma_loss = (1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c),1)) - (0.5 / self.args.hid_dim)*torch.mean(torch.sum(1+2*logvar,1)) - gamma_loss = -(1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c/self.pi_.unsqueeze(0)),1)) - # gamma_loss = (1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c/self.pi_.unsqueeze(0)),1)) - (0.5 / self.args.hid_dim)*torch.mean(torch.sum(1+2*logvar,1)) + def pre_train(self,x,adj,Y,pre_epoch=22): + ''' + This function is used to initialize cluster paramters: pi_, mu_c, log_sigma2_c. + ------------- + paramters: + x: is the feature matrix of graph G. + adj: is the adjacent matrix of graph G. + Y: is the class label for each node in graph G. + ''' + if not os.path.exists('./pretrain_model_{}_{}.pk'.format(self.args.dataset,pre_epoch)): - # return L_rec_u , L_rec_a , -KLD_u_c ,-KLD_a - return L_rec_u , 0.1*L_rec_a , -30*KLD_u_c ,-KLD_a , -gamma_loss, -0.05*mutual_dist - # return L_rec_u , L_rec_a , -KLD_u_c ,-KLD_a , -gamma_loss,-mi_a - # return L_rec_u + L_rec_a + KLD_u_c + KLD_a + gamma_loss + Loss=nn.MSELoss() + opti=Adam(self.parameters()) #all paramters in model + print('Pretraining......') + # epoch_bar=tqdm(range(pre_epoch)) + # for _ in epoch_bar: + for _ in range(pre_epoch): - def pre_train(self,x,adj,Y,pre_epoch=22): - ''' - This function is used to initialize cluster paramters: pi_, mu_c, log_sigma2_c. - ------------- - paramters: - x: is the feature matrix of graph G. - adj: is the adjacent matrix of graph G. - Y: is the class label for each node in graph G. - ''' + self.train() + L=0 + mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + pred_adj, pred_x = self.decoder(mu,mu_a,logvar,logvar_a) - if not os.path.exists('./pretrain_model_{}_{}.pk'.format(self.args.dataset,pre_epoch)): + loss= Loss(pred_x,x) + Loss(pred_adj,adj) - Loss=nn.MSELoss() - opti=Adam(self.parameters()) #all paramters in model + L+=loss.detach().cpu().numpy() - print('Pretraining......') - # epoch_bar=tqdm(range(pre_epoch)) - # for _ in epoch_bar: - for _ in range(pre_epoch): + opti.zero_grad() + loss.backward() + opti.step() - self.train() - L=0 - mu, logvar, mu_a, logvar_a = self.encoder(x,adj) - pred_adj, pred_x = self.decoder(mu,mu_a,logvar,logvar_a) + # epoch_bar.write('L2={:.4f}'.format(L)) + print('L2={:.4f}'.format(L)) - loss= Loss(pred_x,x) + Loss(pred_adj,adj) + # self.gc2.load_state_dict(self.gc3.state_dict()) + # self.linear_a2.load_state_dict(self.linear_a3.state_dict()) - L+=loss.detach().cpu().numpy() - opti.zero_grad() - loss.backward() - opti.step() + # with torch.no_grad(): + # mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + # assert F.mse_loss(mu, logvar) == 0 + # assert F.mse_loss(mu_a, logvar_a) == 0 + # Z = mu.data.numpy() - # epoch_bar.write('L2={:.4f}'.format(L)) - print('L2={:.4f}'.format(L)) + mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + Z = self.reparameterize(mu,logvar) - # self.gc2.load_state_dict(self.gc3.state_dict()) - # self.linear_a2.load_state_dict(self.linear_a3.state_dict()) + gmm = GaussianMixture(n_components=self.args.nClusters, covariance_type='diag') + pre = gmm.fit_predict(Z.cpu().detach().numpy()) + print('Acc={:.4f}%'.format(cluster_acc(pre, Y)[0] * 100)) - # with torch.no_grad(): - # mu, logvar, mu_a, logvar_a = self.encoder(x,adj) - # assert F.mse_loss(mu, logvar) == 0 - # assert F.mse_loss(mu_a, logvar_a) == 0 - # Z = mu.data.numpy() + self.pi_.data = torch.from_numpy(gmm.weights_).float() + self.mu_c.data = torch.from_numpy(gmm.means_).float() + self.log_sigma2_c.data = torch.log(torch.from_numpy(gmm.covariances_).float()) - mu, logvar, mu_a, logvar_a = self.encoder(x,adj) - Z = self.reparameterize(mu,logvar) + torch.save(self.state_dict(), './pretrain_model_{}_{}.pk'.format(self.args.dataset,pre_epoch)) + else: + self.load_state_dict(torch.load('./pretrain_model_{}_{}.pk'.format(self.args.dataset,pre_epoch))) - gmm = GaussianMixture(n_components=self.args.nClusters, covariance_type='diag') + # def predict_nn(self,mu,logvar): + # z = self.reparameterize(mu,logvar) + # gamma_c = self.cluster_choose(self.reparameterize(mu,logvar)) - pre = gmm.fit_predict(Z.cpu().detach().numpy()) - print('Acc={:.4f}%'.format(cluster_acc(pre, Y)[0] * 100)) + # print('gamma_c,normalized:',gamma_c) + # print('gamma_c argmax:',torch.argmax(gamma_c,1)) + # print('gamma_c argmax counter:',Counter(torch.argmax(gamma_c,1).tolist())) - self.pi_.data = torch.from_numpy(gmm.weights_).float() - self.mu_c.data = torch.from_numpy(gmm.means_).float() - self.log_sigma2_c.data = torch.log(torch.from_numpy(gmm.covariances_).float()) - - torch.save(self.state_dict(), './pretrain_model_{}_{}.pk'.format(self.args.dataset,pre_epoch)) - else: - self.load_state_dict(torch.load('./pretrain_model_{}_{}.pk'.format(self.args.dataset,pre_epoch))) + # gamma=gamma_c.detach().cpu().numpy() - def predict(self,mu, logvar): - # z_mu, z_sigma2_log, z_ma,z_a_sigma2_log = self.encoder(x,adj) - # mu, logvar, mu_a, logvar_a = self.encoder(x,adj) - # z = torch.randn_like(mu) * torch.exp(z_sigma2_log / 2) + z_mu - det=1e-10 - z = self.reparameterize(mu,logvar) - pi = self.pi_ - # log_sigma2_c = self.log_sigma2_c - # mu_c = self.mu_c - # gamma_c = torch.exp(torch.log(pi.unsqueeze(0))+self.gaussian_pdfs_log(z,mu_c,log_sigma2_c)) - gamma_c = torch.exp(self.gaussian_pdfs_log(z,self.mu_c,self.log_sigma2_c))+det - print('gamma_c:',gamma_c) - gamma_c=gamma_c/(gamma_c.sum(1).view(-1,1))#batch_size*Clusters - gamma_c=F.softmax(gamma_c) - print('gamma_c,normalized:',gamma_c) - print('gamma_c argmax:',torch.argmax(gamma_c,1)) - print('gamma_c argmax counter:',Counter(torch.argmax(gamma_c,1).tolist())) - - gamma=gamma_c.detach().cpu().numpy() - return np.argmax(gamma,axis=1),gamma, z - - def predict_dist(self,mu, logvar): - # z_mu, z_sigma2_log, z_ma,z_a_sigma2_log = self.encoder(x,adj) - # mu, logvar, mu_a, logvar_a = self.encoder(x,adj) - # z = torch.randn_like(mu) * torch.exp(z_sigma2_log / 2) + z_mu - z = self.reparameterize(mu,logvar) - pi = self.pi_ - log_sigma2_c = self.log_sigma2_c - mu_c = self.mu_c - # gamma_c = torch.exp(self.gaussian_pdfs_log(z,mu_c,log_sigma2_c)) - - # gamma=gamma_c.detach().cpu().numpy() - - gamma=[] - for e in range(z.shape[0]): - temp_dist=[] - for m in range(mu_c.shape[0]): - temp_dist.append(F.mse_loss(z[e],mu_c[m]).data) - gamma.append(temp_dist) - - return np.argmin(gamma,axis=1),np.array(gamma) - - def plot_tsne(self,dataset,epoch,z,true_label,pred_label): - - tsne = TSNE(n_components=2, init='pca',perplexity=50.0) - data = torch.cat([z,self.mu_c.to('cpu')],dim=0).detach().numpy() - zs_tsne = tsne.fit_transform(data) - - cluster_labels=set(true_label) - print(cluster_labels) - index_group= [np.array(true_label)==y for y in cluster_labels] - colors = cm.tab20(range(len(index_group))) - - fig, ax = plt.subplots() - for index,c in zip(index_group,colors): - ax.scatter(zs_tsne[np.ix_(index), 0], zs_tsne[np.ix_(index), 1],color=c,s=2) - - ax.scatter(zs_tsne[z.shape[0]:, 0], zs_tsne[z.shape[0]:, 1],marker='^',color='b',s=40) - plt.title('true label') - # ax.legend() - plt.savefig("./visualization/{}_{}_tsne_{}.pdf".format(dataset,epoch,'true_label')) - - cluster_labels=set(pred_label) - print(cluster_labels) - index_group= [np.array(pred_label)==y for y in cluster_labels] - colors = cm.tab10(range(len(index_group))) - - fig, ax = plt.subplots() - for index,c in zip(index_group,colors): - ax.scatter(zs_tsne[np.ix_(index), 0], zs_tsne[np.ix_(index), 1],color=c,s=2) - - for index,c in enumerate(colors): - ax.scatter(zs_tsne[z.shape[0]+index:z.shape[0]+index+1, 0], zs_tsne[z.shape[0]+index:z.shape[0]+index+1, 1],marker='^',color=c,s=40) - - plt.title('pred label') - # ax.legend() - plt.savefig("./visualization/{}_{}_tsne_{}.pdf".format(dataset,epoch,'pred_label')) - - def gaussian_pdfs_log(self,x,mus,log_sigma2s): - G=[] - for c in range(self.args.nClusters): - G.append(self.gaussian_pdf_log(x,mus[c:c+1,:],log_sigma2s[c:c+1,:]).view(-1,1)) - return torch.cat(G,1) - - - @staticmethod - def gaussian_pdf_log(x,mu,log_sigma2): - return -0.5*(torch.sum(np.log(np.pi*2)+log_sigma2+(x-mu).pow(2)/torch.exp(log_sigma2),1)) # np.pi*2, not square - - def check_parameters(self): - for name, param in self.named_parameters(): - if param.requires_grad: - print(name, param.data,param.data.shape) - def check_gradient(self): - for name, param in self.named_parameters(): - if param.requires_grad: - print('grad: ',name) - print(param.grad,param.grad.shape) + + # return np.argmax(gamma,axis=1),gamma, z + + + + def predict(self,mu, logvar): + # z_mu, z_sigma2_log, z_ma,z_a_sigma2_log = self.encoder(x,adj) + # mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + # z = torch.randn_like(mu) * torch.exp(z_sigma2_log / 2) + z_mu + det=1e-10 + z = self.reparameterize(mu,logvar) + pi = self.pi_ + # log_sigma2_c = self.log_sigma2_c + # mu_c = self.mu_c + # gamma_c = torch.exp(torch.log(pi.unsqueeze(0))+self.gaussian_pdfs_log(z,mu_c,log_sigma2_c)) + gamma_c = torch.exp(self.gaussian_pdfs_log(mu,self.mu_c,self.log_sigma2_c))+det + # gamma_c = torch.exp(self.gaussian_pdfs_log(z,self.mu_c,self.log_sigma2_c))+det + print('gamma_c:',gamma_c) + gamma_c=gamma_c/(gamma_c.sum(1).view(-1,1))#batch_size*Clusters + # gamma_c=F.softmax(gamma_c) + print('gamma_c,normalized:',gamma_c) + print('gamma_c argmax:',torch.argmax(gamma_c,1)) + print('gamma_c argmax counter:',Counter(torch.argmax(gamma_c,1).tolist())) + + gamma=gamma_c.detach().cpu().numpy() + + return np.argmax(gamma,axis=1),gamma, z + + def predict_dist(self,mu, logvar): + # z_mu, z_sigma2_log, z_ma,z_a_sigma2_log = self.encoder(x,adj) + # mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + # z = torch.randn_like(mu) * torch.exp(z_sigma2_log / 2) + z_mu + z = self.reparameterize(mu,logvar) + pi = self.pi_ + log_sigma2_c = self.log_sigma2_c + mu_c = self.mu_c + # gamma_c = torch.exp(self.gaussian_pdfs_log(z,mu_c,log_sigma2_c)) + + # gamma=gamma_c.detach().cpu().numpy() + + gamma=[] + for e in range(z.shape[0]): + temp_dist=[] + for m in range(mu_c.shape[0]): + temp_dist.append(F.mse_loss(z[e],mu_c[m]).data) + gamma.append(temp_dist) + + return np.argmin(gamma,axis=1),np.array(gamma) + + def plot_tsne(self,dataset,epoch,z,true_label,pred_label): + + tsne = TSNE(n_components=2, init='pca',perplexity=50.0) + data = torch.cat([z,self.mu_c.to('cpu')],dim=0).detach().numpy() + zs_tsne = tsne.fit_transform(data) + + cluster_labels=set(true_label) + print(cluster_labels) + index_group= [np.array(true_label)==y for y in cluster_labels] + colors = cm.tab20(range(len(index_group))) + + fig, ax = plt.subplots() + for index,c in zip(index_group,colors): + ax.scatter(zs_tsne[np.ix_(index), 0], zs_tsne[np.ix_(index), 1],color=c,s=2) + + ax.scatter(zs_tsne[z.shape[0]:, 0], zs_tsne[z.shape[0]:, 1],marker='^',color='b',s=40) + plt.title('true label') + # ax.legend() + plt.savefig("./visualization/{}_{}_tsne_{}.pdf".format(dataset,epoch,'true_label')) + + cluster_labels=set(pred_label) + print(cluster_labels) + index_group= [np.array(pred_label)==y for y in cluster_labels] + colors = cm.tab10(range(len(index_group))) + + fig, ax = plt.subplots() + for index,c in zip(index_group,colors): + ax.scatter(zs_tsne[np.ix_(index), 0], zs_tsne[np.ix_(index), 1],color=c,s=2) + + for index,c in enumerate(colors): + ax.scatter(zs_tsne[z.shape[0]+index:z.shape[0]+index+1, 0], zs_tsne[z.shape[0]+index:z.shape[0]+index+1, 1],marker='^',color=c,s=40) + + plt.title('pred label') + # ax.legend() + plt.savefig("./visualization/{}_{}_tsne_{}.pdf".format(dataset,epoch,'pred_label')) + + def gaussian_pdfs_log(self,x,mus,log_sigma2s): + G=[] + for c in range(self.args.nClusters): + G.append(self.gaussian_pdf_log(x,mus[c:c+1,:],log_sigma2s[c:c+1,:]).view(-1,1)) + return torch.cat(G,1) + + + @staticmethod + def gaussian_pdf_log(x,mu,log_sigma2): + return -0.5*(torch.sum(np.log(np.pi*2)+log_sigma2+(x-mu).pow(2)/torch.exp(log_sigma2),1)) # np.pi*2, not square + + def check_parameters(self): + for name, param in self.named_parameters(): + if param.requires_grad: + print(name, param.data,param.data.shape) + def check_gradient(self): + for name, param in self.named_parameters(): + if param.requires_grad: + print('grad: ',name) + print(param.grad,param.grad.shape) + + def calculateP(self, Q): + # Function to calculate the desired distribution Q^2, for more details refer to DEC paper + f = Q.sum(dim=0) + pij_numerator = Q * Q + pij_numerator = pij_numerator / f + normalizer_p = pij_numerator.sum(dim=1).reshape((Q.shape[0], 1)) + P = pij_numerator / normalizer_p + return P + + def getKLDivLossExpression(self, Q_expression, P_expression): + # Loss = KL Divergence between the two distributions + log_arg = P_expression / Q_expression + log_exp = torch.log(log_arg) + sum_arg = P_expression * log_exp + loss = torch.sum(sum_arg) + return loss + + def getSoftAssignments(self,latent_space, cluster_centers, num_clusters, latent_space_dim, num_samples): + ''' + Returns cluster membership distribution for each sample + :param latent_space: latent space representation of inputs + :param cluster_centers: the coordinates of cluster centers in latent space + :param num_clusters: total number of clusters + :param latent_space_dim: dimensionality of latent space + :param num_samples: total number of input samples + :return: soft assigment based on the equation qij = (1+|zi - uj|^2)^(-1)/sum_j'((1+|zi - uj'|^2)^(-1)) + ''' + # z_expanded = latent_space.reshape((num_samples, 1, latent_space_dim)) + # z_expanded = T.tile(z_expanded, (1, num_clusters, 1)) + # u_expanded = T.tile(cluster_centers, (num_samples, 1, 1)) + + # distances_from_cluster_centers = (z_expanded - u_expanded).norm(2, axis=2) + # qij_numerator = 1 + distances_from_cluster_centers * distances_from_cluster_centers + # qij_numerator = 1 / qij_numerator + # normalizer_q = qij_numerator.sum(axis=1).reshape((num_samples, 1)) + + # return qij_numerator / normalizer_q + + + distances_from_cluster_centers = (latent_space.unsqueeze(1)- cluster_centers.unsqueeze(0)).norm(2, dim=2) + qij_numerator = 1 + distances_from_cluster_centers * distances_from_cluster_centers + qij_numerator = 1 / qij_numerator + normalizer_q = qij_numerator.sum(dim=1).reshape((num_samples, 1)) + + return qij_numerator / normalizer_q diff --git a/model_bk.py b/model_bk.py new file mode 100644 index 0000000..8f655de --- /dev/null +++ b/model_bk.py @@ -0,0 +1,677 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.optim import Adam +from sklearn.mixture import GaussianMixture +from sklearn.metrics import accuracy_score +from sklearn.manifold import TSNE +import matplotlib.pyplot as plt +import matplotlib.cm as cm +import numpy as np +import os +from tqdm import tqdm + +from layers import GraphConvolution, GraphConvolutionSparse, Linear, InnerDecoder, InnerProductDecoder +from utils import cluster_acc + +from utils_smiles import * +from estimators import estimate_mutual_information +from collections import Counter + +class GCNModelAE(nn.Module): + def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args): + super(GCNModelAE, self).__init__() + + self.args = args + self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu) + self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.dc = InnerProductDecoder(dropout, act=lambda x: x) + # self.dc = InnerDecoder(dropout, act=lambda x: x) + + def forward(self, x, adj): + z = self.gc1(x,adj) + z = self.gc2(z,adj) + return self.dc(z),z,None + + + def loss(self,pred_adj,labels, n_nodes, n_features, norm, pos_weight,L=1): + + cost = norm * F.binary_cross_entropy_with_logits(pred_adj, labels,pos_weight = pos_weight) + return cost, + + def check_parameters(self): + for name, param in self.named_parameters(): + if param.requires_grad: + print(name, param.data,param.data.shape) + +class GCNModelVAE(nn.Module): + def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args): + super(GCNModelVAE, self).__init__() + + self.args = args + self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu) + self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.gc3 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.dc = InnerProductDecoder(dropout, act=lambda x: x) + # self.dc = InnerDecoder(dropout, act=lambda x: x) + + + def encoder(self, x, adj): + hidden1 = self.gc1(x, adj) + return self.gc2(hidden1, adj), self.gc3(hidden1, adj) + + def decoder(self,mu,logvar): + + z_u = self.reparameterize(mu, logvar) + + return self.dc(z_u) + + def reparameterize(self, mu, logvar): + std = torch.exp(logvar) + eps = torch.randn_like(std) + return eps.mul(std).add_(mu) + + # if self.training: + # std = torch.exp(logvar) + # eps = torch.randn_like(std) + # return eps.mul(std).add_(mu) + # else: + # return mu + + def forward(self, x, adj): + + mu, logvar = self.encoder(x, adj) + z_u = self.reparameterize(mu, logvar) + # z_a = self.reparameterize(mu_a,logvar_a) + return self.dc(z_u),mu, logvar + + + def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1): + + det=1e-10 + norm_u = norm + pos_weight_u= pos_weight + + L_rec_u=0 + + mu, logvar = self.encoder(x, adj) + # z_mu, z_sigma2_log = self.encoder(x) + for l in range(L): + + pred_adj = self.decoder(mu,logvar) + + cost_u = norm * F.binary_cross_entropy_with_logits(pred_adj, labels ,pos_weight = pos_weight) + + L_rec_u += cost_u + + L_rec_u/=L + + KLD = -0.5 / n_nodes * torch.mean(torch.sum(1 + 2 * logvar - mu.pow(2) - logvar.exp().pow(2),1)) + return L_rec_u, KLD + + + def check_parameters(self): + for name, param in self.named_parameters(): + if param.requires_grad: + print(name, param.data,param.data.shape) + + +class GCNModelVAECD(nn.Module): + def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args): + super(GCNModelVAECD, self).__init__() + + self.args = args + self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu) + self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.gc3 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.dc = InnerProductDecoder(dropout, act=lambda x: x) + # self.dc = InnerDecoder(dropout, act=lambda x: x) + + #for embedding attributes/features + # self.linear_a1= Linear(n_nodes, hidden_dim1, act = torch.tanh,sparse_inputs=True) # the input dim is the number of nodes + # self.linear_a2= Linear(hidden_dim1, hidden_dim2, act = lambda x:x) + # self.linear_a3= Linear(hidden_dim1, hidden_dim2, act = lambda x:x) + + + self.pi_=nn.Parameter(torch.FloatTensor(args.nClusters,).fill_(1)/args.nClusters,requires_grad=True) + self.mu_c=nn.Parameter(torch.randn(args.nClusters,hidden_dim2),requires_grad=True) + self.log_sigma2_c=nn.Parameter(torch.randn(args.nClusters,hidden_dim2),requires_grad=True) + + def encoder(self, x, adj): + hidden1 = self.gc1(x, adj) + # hidden_a1 = self.linear_a1(x.t()) # transpose the input feature matrix + + return self.gc2(hidden1, adj), self.gc3(hidden1, adj) + + def decoder(self,mu,logvar): + + z_u = self.reparameterize(mu, logvar) + # z_a = self.reparameterize(mu_a,logvar_a) + + return self.dc(z_u) + + def reparameterize(self, mu, logvar): + if self.training: + std = torch.exp(logvar) + eps = torch.randn_like(std) + return eps.mul(std).add_(mu) + else: + return mu + + def forward(self, x, adj): + + mu, logvar = self.encoder(x, adj) + z_u = self.reparameterize(mu, logvar) + # z_a = self.reparameterize(mu_a,logvar_a) + return self.dc(z_u),mu, logvar + + + def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1): + + det=1e-10 + norm_u = norm + pos_weight_u= pos_weight + + L_rec_u=0 + + mu, logvar = self.encoder(x, adj) + hidden_dim2 = mu.shape[1] + + # z_mu, z_sigma2_log = self.encoder(x) + for l in range(L): + + # z=torch.randn_like(z_mu)*torch.exp(z_sigma2_log/2)+z_mu + pred_adj = self.decoder(mu,logvar) + # L_rec+=F.binary_cross_entropy(x_pro,x) + + # cost_u = norm * F.binary_cross_entropy_with_logits(pred_adj, labels_sub_u,pos_weight = pos_weight) + cost_u = norm * F.binary_cross_entropy_with_logits(pred_adj, labels ,pos_weight = pos_weight) + # cost_a = norm_a * F.binary_cross_entropy_with_logits(pred_x, labels_sub_a, pos_weight = pos_weight_a) + # cost_a =torch.Tensor(1).fill_(0) + + L_rec_u += cost_u + # L_rec_a += cost_a + + L_rec_u/=L + # L_rec_a/=L + + # z_a = self.reparameterize(mu_a,logvar_a) + # KLD_a = (0.5 / n_features) * torch.mean(torch.sum(-1 - 2 * logvar_a + mu_a.pow(2) + logvar_a.exp().pow(2), 1)) + # KLD_a =torch.Tensor(1).fill_(0) + + # Loss=L_rec*x.size(1) + + + self.pi_.data = (self.pi_/self.pi_.sum()).data + # log_sigma2_c=self.log_sigma2_c + # mu_c=self.mu_c + + # z = torch.randn_like(z_mu) * torch.exp(z_sigma2_log / 2) + z_mu + z = self.reparameterize(mu,logvar) + + gamma_c=torch.exp(torch.log(self.pi_.unsqueeze(0))+self.gaussian_pdfs_log(z,self.mu_c,self.log_sigma2_c))+det + gamma_c = F.softmax(gamma_c) # is softmax a good way? + + gamma_c=gamma_c/(gamma_c.sum(1).view(-1,1)) #shape: batch_size*Clusters + self.pi_.data = gamma_c.mean(0).data # prior need to be re-normalized? In GMM, prior is based on gamma_c:https://brilliant.org/wiki/gaussian-mixture-model/ + + # KLD_u_c=(0.5 / n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(self.log_sigma2_c.unsqueeze(0)+\ + # torch.exp(2*logvar.unsqueeze(1)-self.log_sigma2_c.unsqueeze(0))+\ + # (mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2)/torch.exp(self.log_sigma2_c.unsqueeze(0)),2),1)) + + # KLD_u_c-= (0.5/n_nodes)*torch.mean(torch.sum(1+2*logvar,1)) + # gamma_loss = (1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c/self.pi_.unsqueeze(0)),1)) - (0.5 / hidden_dim2)*torch.mean(torch.sum(1+2*logvar,1)) + + KLD_u_c=-(0.5/n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(-1+self.log_sigma2_c.unsqueeze(0)-2*logvar.unsqueeze(1)+ + torch.exp(2*logvar.unsqueeze(1)-self.log_sigma2_c.unsqueeze(0))+ + (mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2)/torch.exp(self.log_sigma2_c.unsqueeze(0)),2),1)) + + gamma_loss = -(1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c/self.pi_.unsqueeze(0)),1)) + + return L_rec_u,-KLD_u_c,-gamma_loss + + def pre_train(self,x,adj,Y,pre_epoch=10): + ''' + This function is used to initialize cluster paramters: pi_, mu_c, log_sigma2_c. + ------------- + paramters: + x: is the feature matrix of graph G. + adj: is the adjacent matrix of graph G. + Y: is the class label for each node in graph G. + ''' + + if not os.path.exists('./pretrain_model_{}.pk'.format(self.args.dataset)): + + Loss=nn.MSELoss() + opti=Adam(self.parameters()) #all paramters in model + + print('Pretraining......') + # epoch_bar=tqdm(range(pre_epoch)) + # for _ in epoch_bar: + for _ in range(pre_epoch): + + self.train() + L=0 + mu, logvar = self.encoder(x,adj) + pred_adj = self.decoder(mu,logvar) + + loss= Loss(pred_adj,adj.to_dense()) + + L+=loss.detach().cpu().numpy() + + opti.zero_grad() + loss.backward() + opti.step() + + # epoch_bar.write('L2={:.4f}'.format(L)) + print('L2={:.4f}'.format(L)) + + self.gc2.load_state_dict(self.gc3.state_dict()) + # self.linear_a2.load_state_dict(self.linear_a3.state_dict()) + + with torch.no_grad(): + mu, logvar = self.encoder(x,adj) + assert F.mse_loss(mu, logvar) == 0 + # assert F.mse_loss(mu_a, logvar_a) == 0 + Z = mu.data.numpy() + + + gmm = GaussianMixture(n_components=self.args.nClusters, covariance_type='diag') + + pre = gmm.fit_predict(Z) + print('Acc={:.4f}%'.format(cluster_acc(pre, Y)[0] * 100)) + + self.pi_.data = torch.from_numpy(gmm.weights_).float() + self.mu_c.data = torch.from_numpy(gmm.means_).float() + self.log_sigma2_c.data = torch.log(torch.from_numpy(gmm.covariances_).float()) + + torch.save(self.state_dict(), './pretrain_model_{}.pk'.format(self.args.dataset)) + else: + self.load_state_dict(torch.load('./pretrain_model_{}.pk'.format(self.args.dataset))) + + def predict(self,mu, logvar): + # z_mu, z_sigma2_log, z_ma,z_a_sigma2_log = self.encoder(x,adj) + # mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + # z = torch.randn_like(mu) * torch.exp(logvar) + mu + z = self.reparameterize(mu,logvar) + pi = self.pi_ + log_sigma2_c = self.log_sigma2_c + mu_c = self.mu_c + gamma_c = torch.exp(torch.log(pi.unsqueeze(0))+self.gaussian_pdfs_log(z,mu_c,log_sigma2_c)) + + gamma=gamma_c.detach().cpu().numpy() + + return np.argmax(gamma,axis=1),gamma + + + def gaussian_pdfs_log(self,x,mus,log_sigma2s): + G=[] + for c in range(self.args.nClusters): + G.append(self.gaussian_pdf_log(x,mus[c:c+1,:],log_sigma2s[c:c+1,:]).view(-1,1)) + return torch.cat(G,1) + + + @staticmethod + def gaussian_pdf_log(x,mu,log_sigma2): + return -0.5*(torch.sum(np.log(np.pi*2)+log_sigma2+(x-mu).pow(2)/torch.exp(log_sigma2),1)) + + def check_parameters(self): + for name, param in self.named_parameters(): + if param.requires_grad: + print(name, param.data,param.data.shape) + +class GCNModelVAECE(nn.Module): + def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args): + super(GCNModelVAECE, self).__init__() + + + self.args = args + self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu) + self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.gc3 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + # self.dc = InnerProductDecoder(dropout, act=lambda x: x) + self.dc = InnerDecoder(dropout, act=lambda x: x) + + #for embedding attributes/features + self.linear_a1= Linear(n_nodes, hidden_dim1, act = torch.tanh,sparse_inputs=True) # the input dim is the number of nodes + self.linear_a2= Linear(hidden_dim1, hidden_dim2, act = lambda x:x) + self.linear_a3= Linear(hidden_dim1, hidden_dim2, act = lambda x:x) + + #modularity layer + self.modulairty_layer = Linear(hidden_dim2,args.nClusters,act=torch.relu) + # cluster choosing + self.cluster_choose= Linear(hidden_dim2,args.nClusters,act=torch.sigmoid) + + + self.pi_=nn.Parameter(torch.FloatTensor(args.nClusters,).fill_(1)/args.nClusters,requires_grad=True) + self.mu_c=nn.Parameter(torch.FloatTensor(args.nClusters,hidden_dim2).fill_(0),requires_grad=True) + self.log_sigma2_c=nn.Parameter(torch.FloatTensor(args.nClusters,hidden_dim2).fill_(1),requires_grad=True) + + # torch.nn.init.xavier_normal_(self.mu_c) + # torch.nn.init.xavier_normal_(self.log_sigma2_c) + + # calculate mi + + # critic_params = {'dim_x': x.shape[1],'dim_y':y.shape[1],'layers': 2,'embed_dim': 32,'hidden_dim': 64,'activation': 'relu',} + # self.critic_structure = ConcatCritic(hidden_dim2,n_nodes,256,3,'relu',rho=None,) + # self.critic_feature = ConcatCritic(hidden_dim2,input_feat_dim,256,3,'relu',rho=None,) + + def encoder(self, x, adj): + hidden1 = self.gc1(x, adj) + hidden_a1 = self.linear_a1(x.t()) # transpose the input feature matrix + return self.gc2(hidden1, adj), self.gc3(hidden1, adj), self.linear_a2(hidden_a1),self.linear_a3(hidden_a1) + + def decoder(self,mu,mu_a,logvar,logvar_a): + + z_u = self.reparameterize(mu, logvar) + z_a = self.reparameterize(mu_a,logvar_a) + return self.dc((z_u,z_a)) + + def reparameterize(self, mu, logvar): + if self.training: + std = torch.exp(logvar) + eps = torch.randn_like(std) + return eps.mul(std).add_(mu) + else: + return mu + + def forward(self, x, adj): + + mu, logvar, mu_a, logvar_a = self.encoder(x, adj) + z_u = self.reparameterize(mu, logvar) + z_a = self.reparameterize(mu_a,logvar_a) + return self.dc((z_u,z_a)),mu, logvar, mu_a, logvar_a + + def modularity_loss(self, z,adj): + + adj = adj.to_dense() + H = self.modulairty_layer(z) + assert H.shape[0]==z.shape[0] + + n = torch.tensor(1.0*z.shape[0]) + + H_norm = n.sqrt()*H.sqrt()/(H.sqrt().sum()) + print("H_norm shape",H_norm.shape) + print("H_norm ",H_norm) + m = (adj-torch.eye(adj.shape[0])).sum()/2 + D = (adj-torch.eye(adj.shape[0])).sum(1) # the degree of nodes, adj includes self loop + B = (adj-torch.eye(adj.shape[0]))-torch.matmul(D.view(-1,1),D.view(1,-1))/(2*m) # modularity matrix + mod_loss=torch.trace(torch.matmul(torch.matmul(H_norm.t(),B),H_norm)/(4*m)) + print("mod_loss",mod_loss) + + return mod_loss + + def dist(self,x): + # x = x/torch.norm(x,2,dim=1).view(-1,1) + assert len(x.size()) == 2 + norm = (x ** 2).sum(1).view(-1, 1) + dn = (norm + norm.view(1, -1)) - 2.0 * (x @ x.t()) + return torch.sum(torch.relu(dn).sqrt()) + + def mi_loss(self,z,x,a): + # critic_params = {'dim_x': x.shape[1],'dim_y':y.shape[1],'layers': 2,'embed_dim': 32,'hidden_dim': 64,'activation': 'relu',} + # critic = ConcatCritic(rho=None,**critic_params) + indice = torch.randperm(len(z))[0:50] + # mi_x = estimate_mutual_information('dv',z[indice],x[indice],self.critic_structure) + mi_a = estimate_mutual_information('js',z[indice],a[indice],self.critic_feature) + return mi_a + + def change_cluster_grad_false(self): + for name, param in self.named_parameters(): + if name in ['pi_','mu_c','log_sigma2_c']: + param.requires_grad=False + + def change_cluster_grad_true(self): + for name, param in self.named_parameters(): + if name in ['pi_','mu_c','log_sigma2_c']: + param.requires_grad=True + + + def change_nn_grad_false(self): + for name, param in self.named_parameters(): + if name not in ['pi_','mu_c','log_sigma2_c']: + param.requires_grad=False + + def change_nn_grad_true(self): + for name, param in self.named_parameters(): + if name not in ['pi_','mu_c','log_sigma2_c']: + param.requires_grad=True + + def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1): + + det=1e-10 + labels_sub_u, labels_sub_a = labels + norm_u, norm_a = norm + pos_weight_u, pos_weight_a = pos_weight + + L_rec_u=0 + L_rec_a=0 + + mi=0 + + mu, logvar, mu_a, logvar_a = self.encoder(x, adj) + + # mutual information loss + + # z_mu, z_sigma2_log = self.encoder(x) + # mi_a = self.mi_loss(mu,adj.to_dense(),x.to_dense()) + for l in range(L): + + # z=torch.randn_like(z_mu)*torch.exp(z_sigma2_log/2)+z_mu + pred_adj, pred_x = self.decoder(mu,mu_a,logvar,logvar_a) + # L_rec+=F.binary_cross_entropy(x_pro,x) + + cost_u = norm_u * F.binary_cross_entropy_with_logits(pred_adj, labels_sub_u, pos_weight = pos_weight_u) + cost_a = norm_a * F.binary_cross_entropy_with_logits(pred_x, labels_sub_a, pos_weight = pos_weight_a) + # cost_a =torch.Tensor(1).fill_(0) + + L_rec_u += cost_u + L_rec_a += cost_a + + + L_rec_u/=L + L_rec_a/=L + + # z_a = self.reparameterize(mu_a,logvar_a) + # KLD_a = (0.5 / n_features) * torch.mean(torch.sum(-1 - 2 * logvar_a + mu_a.pow(2) + logvar_a.exp().pow(2), 1)) + KLD_a = -(0.5 / n_features) * torch.mean(torch.sum(-1 - 2 * logvar_a + mu_a.pow(2) + logvar_a.exp().pow(2), 1)) + # KLD_a =torch.Tensor(1).fill_(0) + + # Loss=L_rec*x.size(1) + + + # log_sigma2_c=self.log_sigma2_c + # mu_c=self.mu_c + + # z = torch.randn_like(z_mu) * torch.exp(z_sigma2_log / 2) + z_mu + z = self.reparameterize(mu,logvar) + + # mod_loss=self.modularity_loss(z,adj) + # gamma_c=torch.exp(torch.log(self.pi_.unsqueeze(0))+self.gaussian_pdfs_log(z,self.mu_c,self.log_sigma2_c))+det + # gamma_c=torch.exp(self.gaussian_pdfs_log(z,self.mu_c,self.log_sigma2_c))+det + gamma_c = self.cluster_choose(z) + # print('gamma_c:',gamma_c) + + gamma_c=gamma_c/(gamma_c.sum(1).view(-1,1))#batch_size*Clusters + gamma_c=F.softmax(gamma_c) + # print('gamma_c normalized:',gamma_c) + # print('gamma_c argmax:',torch.argmax(gamma_c,1)) + print('gamma_c counter:',Counter(torch.argmax(gamma_c,1).tolist())) + + + # self.pi_.data = (self.pi_/self.pi_.sum()).data # prior need to be re-normalized? In GMM, prior is based on gamma_c:https://brilliant.org/wiki/gaussian-mixture-model/ + # self.pi_.data = gamma_c.mean(0).data # prior need to be re-normalized? In GMM, prior is based on gamma_c:https://brilliant.org/wiki/gaussian-mixture-model/ + + KLD_u_c=-(0.5/n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(-1+self.log_sigma2_c.unsqueeze(0)-2*logvar.unsqueeze(1)+torch.exp(2*logvar.unsqueeze(1)-self.log_sigma2_c.unsqueeze(0))+(mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2)/torch.exp(self.log_sigma2_c.unsqueeze(0)),2),1)) + + # KLD_u_c_test=-(0.5/n_nodes)*F.mse_loss(mu.unsqueeze(1),self.mu_c.unsqueeze(0),reduction='none') + # print('kld_u_c_test:',KLD_u_c_test.sum(2)) + + + # KLD_u_c=-(0.5/n_nodes)*F.mse_loss(mu.unsqueeze(1),self.mu_c.unsqueeze(0)) + + # KLD_u_c=(0.5 / n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(self.log_sigma2_c.unsqueeze(0)+\ + # torch.exp(2*logvar.unsqueeze(1)-self.log_sigma2_c.unsqueeze(0))+\ + # (mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2)/torch.exp(self.log_sigma2_c.unsqueeze(0)),2),1)) + + mutual_dist = (1/(self.args.nClusters**2))*self.dist(self.mu_c) + + gamma_loss=-(1/self.args.nClusters)*torch.mean(torch.sum(gamma_c*torch.log(gamma_c),1)) + # gamma_loss = (1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c),1)) - (0.5 / self.args.hid_dim)*torch.mean(torch.sum(1+2*logvar,1)) + # gamma_loss = -(1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c/self.pi_.unsqueeze(0)),1)) + # gamma_loss = (1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c/self.pi_.unsqueeze(0)),1)) - (0.5 / self.args.hid_dim)*torch.mean(torch.sum(1+2*logvar,1)) + + + # return L_rec_u , L_rec_a , -KLD_u_c ,-KLD_a + return L_rec_u , L_rec_a , -KLD_u_c ,-KLD_a , -gamma_loss, -0.05*mutual_dist + # return L_rec_u , L_rec_a , -KLD_u_c ,-KLD_a , -gamma_loss,-mi_a + # return L_rec_u + L_rec_a + KLD_u_c + KLD_a + gamma_loss + + + def pre_train(self,x,adj,Y,pre_epoch=20): + ''' + This function is used to initialize cluster paramters: pi_, mu_c, log_sigma2_c. + ------------- + paramters: + x: is the feature matrix of graph G. + adj: is the adjacent matrix of graph G. + Y: is the class label for each node in graph G. + ''' + + if not os.path.exists('./pretrain_model_{}.pk'.format(self.args.dataset)): + + Loss=nn.MSELoss() + opti=Adam(self.parameters()) #all paramters in model + + print('Pretraining......') + # epoch_bar=tqdm(range(pre_epoch)) + # for _ in epoch_bar: + for _ in range(pre_epoch): + + self.train() + L=0 + mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + pred_adj, pred_x = self.decoder(mu,mu_a,logvar,logvar_a) + + loss= Loss(pred_x,x.to_dense()) + Loss(pred_adj,adj.to_dense()) + + L+=loss.detach().cpu().numpy() + + opti.zero_grad() + loss.backward() + opti.step() + + # epoch_bar.write('L2={:.4f}'.format(L)) + print('L2={:.4f}'.format(L)) + + self.gc2.load_state_dict(self.gc3.state_dict()) + self.linear_a2.load_state_dict(self.linear_a3.state_dict()) + + + with torch.no_grad(): + mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + assert F.mse_loss(mu, logvar) == 0 + assert F.mse_loss(mu_a, logvar_a) == 0 + Z = mu.data.numpy() + + + gmm = GaussianMixture(n_components=self.args.nClusters, covariance_type='diag') + + pre = gmm.fit_predict(Z) + print('Acc={:.4f}%'.format(cluster_acc(pre, Y)[0] * 100)) + + self.pi_.data = torch.from_numpy(gmm.weights_).float() + self.mu_c.data = torch.from_numpy(gmm.means_).float() + self.log_sigma2_c.data = torch.log(torch.from_numpy(gmm.covariances_).float()) + + torch.save(self.state_dict(), './pretrain_model_{}.pk'.format(self.args.dataset)) + else: + self.load_state_dict(torch.load('./pretrain_model_{}.pk'.format(self.args.dataset))) + + def predict(self,mu, logvar): + # z_mu, z_sigma2_log, z_ma,z_a_sigma2_log = self.encoder(x,adj) + # mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + # z = torch.randn_like(mu) * torch.exp(z_sigma2_log / 2) + z_mu + det=1e-10 + z = self.reparameterize(mu,logvar) + pi = self.pi_ + # log_sigma2_c = self.log_sigma2_c + # mu_c = self.mu_c + # gamma_c = torch.exp(torch.log(pi.unsqueeze(0))+self.gaussian_pdfs_log(z,mu_c,log_sigma2_c)) + gamma_c = torch.exp(self.gaussian_pdfs_log(z,self.mu_c,self.log_sigma2_c))+det + print('gamma_c:',gamma_c) + gamma_c=gamma_c/(gamma_c.sum(1).view(-1,1))#batch_size*Clusters + gamma_c=F.softmax(gamma_c) + print('gamma_c,normalized:',gamma_c) + print('gamma_c argmax:',torch.argmax(gamma_c,1)) + print('gamma_c argmax counter:',Counter(torch.argmax(gamma_c,1).tolist())) + + gamma=gamma_c.detach().cpu().numpy() + return np.argmax(gamma,axis=1),gamma, z + + def predict_dist(self,mu, logvar): + # z_mu, z_sigma2_log, z_ma,z_a_sigma2_log = self.encoder(x,adj) + # mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + # z = torch.randn_like(mu) * torch.exp(z_sigma2_log / 2) + z_mu + z = self.reparameterize(mu,logvar) + pi = self.pi_ + log_sigma2_c = self.log_sigma2_c + mu_c = self.mu_c + # gamma_c = torch.exp(self.gaussian_pdfs_log(z,mu_c,log_sigma2_c)) + + # gamma=gamma_c.detach().cpu().numpy() + + gamma=[] + for e in range(z.shape[0]): + temp_dist=[] + for m in range(mu_c.shape[0]): + temp_dist.append(F.mse_loss(z[e],mu_c[m]).data) + gamma.append(temp_dist) + + return np.argmin(gamma,axis=1),np.array(gamma) + + def plot_tsne(self,dataset,epoch,z,true_label,desp): + + cluster_labels=set(true_label) + print(cluster_labels) + index_group= [np.array(true_label)==y for y in cluster_labels] + colors = cm.tab20(range(len(index_group))) + + tsne = TSNE(n_components=2, init='pca',perplexity=50.0) + data = torch.cat([z,self.mu_c.to('cpu')],dim=0).detach().numpy() + zs_tsne = tsne.fit_transform(data) + + fig, ax = plt.subplots() + cmap = plt.get_cmap("tab10") + for index,c in zip(index_group,colors): + ax.scatter(zs_tsne[np.ix_(index), 0], zs_tsne[np.ix_(index), 1],color=c,s=2) + + if 'predict' in desp.split(): + for index,c in enumerate(colors): + ax.scatter(zs_tsne[z.shape[0]+index:z.shape[0]+index+1, 0], zs_tsne[z.shape[0]+index:z.shape[0]+index+1, 1],marker='^',color=c,s=40) + else: + ax.scatter(zs_tsne[z.shape[0]:, 0], zs_tsne[z.shape[0]:, 1],marker='^',color='b',s=40) + plt.title(desp) + # ax.legend() + plt.savefig("{}_{}_tsne_{}.pdf".format(dataset,epoch,desp)) + + def gaussian_pdfs_log(self,x,mus,log_sigma2s): + G=[] + for c in range(self.args.nClusters): + G.append(self.gaussian_pdf_log(x,mus[c:c+1,:],log_sigma2s[c:c+1,:]).view(-1,1)) + return torch.cat(G,1) + + + @staticmethod + def gaussian_pdf_log(x,mu,log_sigma2): + return -0.5*(torch.sum(np.log(np.pi*2)+log_sigma2+(x-mu).pow(2)/torch.exp(log_sigma2),1)) # np.pi*2, not square + + def check_parameters(self): + for name, param in self.named_parameters(): + if param.requires_grad: + print(name, param.data,param.data.shape) + def check_gradient(self): + for name, param in self.named_parameters(): + if param.requires_grad: + print('grad: ',name) + print(param.grad,param.grad.shape) diff --git a/model_cluster_choose.py b/model_cluster_choose.py new file mode 100644 index 0000000..c6367b6 --- /dev/null +++ b/model_cluster_choose.py @@ -0,0 +1,705 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.optim import Adam +from sklearn.mixture import GaussianMixture +from sklearn.metrics import accuracy_score +from sklearn.manifold import TSNE +import matplotlib.pyplot as plt +import matplotlib.cm as cm +import numpy as np +import os +from tqdm import tqdm + +from layers import GraphConvolution, GraphConvolutionSparse, Linear, InnerDecoder, InnerProductDecoder +from utils import cluster_acc + +from utils_smiles import * +from estimators import estimate_mutual_information +from collections import Counter + +class GCNModelAE(nn.Module): + def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args): + super(GCNModelAE, self).__init__() + + self.args = args + self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu) + self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.dc = InnerProductDecoder(dropout, act=lambda x: x) + # self.dc = InnerDecoder(dropout, act=lambda x: x) + + def forward(self, x, adj): + z = self.gc1(x,adj) + z = self.gc2(z,adj) + return self.dc(z),z,None + + + def loss(self,pred_adj,labels, n_nodes, n_features, norm, pos_weight,L=1): + + cost = norm * F.binary_cross_entropy_with_logits(pred_adj, labels,pos_weight = pos_weight) + return cost, + + def check_parameters(self): + for name, param in self.named_parameters(): + if param.requires_grad: + print(name, param.data,param.data.shape) + +class GCNModelVAE(nn.Module): + def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args): + super(GCNModelVAE, self).__init__() + + self.args = args + self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu) + self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.gc3 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.dc = InnerProductDecoder(dropout, act=lambda x: x) + # self.dc = InnerDecoder(dropout, act=lambda x: x) + + + def encoder(self, x, adj): + hidden1 = self.gc1(x, adj) + return self.gc2(hidden1, adj), self.gc3(hidden1, adj) + + def decoder(self,mu,logvar): + + z_u = self.reparameterize(mu, logvar) + + return self.dc(z_u) + + def reparameterize(self, mu, logvar): + std = torch.exp(logvar) + eps = torch.randn_like(std) + return eps.mul(std).add_(mu) + + # if self.training: + # std = torch.exp(logvar) + # eps = torch.randn_like(std) + # return eps.mul(std).add_(mu) + # else: + # return mu + + def forward(self, x, adj): + + mu, logvar = self.encoder(x, adj) + z_u = self.reparameterize(mu, logvar) + # z_a = self.reparameterize(mu_a,logvar_a) + return self.dc(z_u),mu, logvar + + + def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1): + + det=1e-10 + norm_u = norm + pos_weight_u= pos_weight + + L_rec_u=0 + + mu, logvar = self.encoder(x, adj) + # z_mu, z_sigma2_log = self.encoder(x) + for l in range(L): + + pred_adj = self.decoder(mu,logvar) + + cost_u = norm * F.binary_cross_entropy_with_logits(pred_adj, labels ,pos_weight = pos_weight) + + L_rec_u += cost_u + + L_rec_u/=L + + KLD = -0.5 / n_nodes * torch.mean(torch.sum(1 + 2 * logvar - mu.pow(2) - logvar.exp().pow(2),1)) + return L_rec_u, KLD + + + def check_parameters(self): + for name, param in self.named_parameters(): + if param.requires_grad: + print(name, param.data,param.data.shape) + + +class GCNModelVAECD(nn.Module): + def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args): + super(GCNModelVAECD, self).__init__() + + self.args = args + self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu) + self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.gc3 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.dc = InnerProductDecoder(dropout, act=lambda x: x) + # self.dc = InnerDecoder(dropout, act=lambda x: x) + + #for embedding attributes/features + # self.linear_a1= Linear(n_nodes, hidden_dim1, act = torch.tanh,sparse_inputs=True) # the input dim is the number of nodes + # self.linear_a2= Linear(hidden_dim1, hidden_dim2, act = lambda x:x) + # self.linear_a3= Linear(hidden_dim1, hidden_dim2, act = lambda x:x) + + + self.pi_=nn.Parameter(torch.FloatTensor(args.nClusters,).fill_(1)/args.nClusters,requires_grad=True) + self.mu_c=nn.Parameter(torch.randn(args.nClusters,hidden_dim2),requires_grad=True) + self.log_sigma2_c=nn.Parameter(torch.randn(args.nClusters,hidden_dim2),requires_grad=True) + + def encoder(self, x, adj): + hidden1 = self.gc1(x, adj) + # hidden_a1 = self.linear_a1(x.t()) # transpose the input feature matrix + + return self.gc2(hidden1, adj), self.gc3(hidden1, adj) + + def decoder(self,mu,logvar): + + z_u = self.reparameterize(mu, logvar) + # z_a = self.reparameterize(mu_a,logvar_a) + + return self.dc(z_u) + + def reparameterize(self, mu, logvar): + if self.training: + std = torch.exp(logvar) + eps = torch.randn_like(std) + return eps.mul(std).add_(mu) + else: + return mu + + def forward(self, x, adj): + + mu, logvar = self.encoder(x, adj) + z_u = self.reparameterize(mu, logvar) + # z_a = self.reparameterize(mu_a,logvar_a) + return self.dc(z_u),mu, logvar + + + def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1): + + det=1e-10 + norm_u = norm + pos_weight_u= pos_weight + + L_rec_u=0 + + mu, logvar = self.encoder(x, adj) + hidden_dim2 = mu.shape[1] + + # z_mu, z_sigma2_log = self.encoder(x) + for l in range(L): + + # z=torch.randn_like(z_mu)*torch.exp(z_sigma2_log/2)+z_mu + pred_adj = self.decoder(mu,logvar) + # L_rec+=F.binary_cross_entropy(x_pro,x) + + # cost_u = norm * F.binary_cross_entropy_with_logits(pred_adj, labels_sub_u,pos_weight = pos_weight) + cost_u = norm * F.binary_cross_entropy_with_logits(pred_adj, labels ,pos_weight = pos_weight) + # cost_a = norm_a * F.binary_cross_entropy_with_logits(pred_x, labels_sub_a, pos_weight = pos_weight_a) + # cost_a =torch.Tensor(1).fill_(0) + + L_rec_u += cost_u + # L_rec_a += cost_a + + L_rec_u/=L + # L_rec_a/=L + + # z_a = self.reparameterize(mu_a,logvar_a) + # KLD_a = (0.5 / n_features) * torch.mean(torch.sum(-1 - 2 * logvar_a + mu_a.pow(2) + logvar_a.exp().pow(2), 1)) + # KLD_a =torch.Tensor(1).fill_(0) + + # Loss=L_rec*x.size(1) + + + self.pi_.data = (self.pi_/self.pi_.sum()).data + # log_sigma2_c=self.log_sigma2_c + # mu_c=self.mu_c + + # z = torch.randn_like(z_mu) * torch.exp(z_sigma2_log / 2) + z_mu + z = self.reparameterize(mu,logvar) + + gamma_c=torch.exp(torch.log(self.pi_.unsqueeze(0))+self.gaussian_pdfs_log(z,self.mu_c,self.log_sigma2_c))+det + gamma_c = F.softmax(gamma_c) # is softmax a good way? + + gamma_c=gamma_c/(gamma_c.sum(1).view(-1,1)) #shape: batch_size*Clusters + self.pi_.data = gamma_c.mean(0).data # prior need to be re-normalized? In GMM, prior is based on gamma_c:https://brilliant.org/wiki/gaussian-mixture-model/ + + # KLD_u_c=(0.5 / n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(self.log_sigma2_c.unsqueeze(0)+\ + # torch.exp(2*logvar.unsqueeze(1)-self.log_sigma2_c.unsqueeze(0))+\ + # (mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2)/torch.exp(self.log_sigma2_c.unsqueeze(0)),2),1)) + + # KLD_u_c-= (0.5/n_nodes)*torch.mean(torch.sum(1+2*logvar,1)) + # gamma_loss = (1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c/self.pi_.unsqueeze(0)),1)) - (0.5 / hidden_dim2)*torch.mean(torch.sum(1+2*logvar,1)) + + KLD_u_c=-(0.5/n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(-1+self.log_sigma2_c.unsqueeze(0)-2*logvar.unsqueeze(1)+ + torch.exp(2*logvar.unsqueeze(1)-self.log_sigma2_c.unsqueeze(0))+ + (mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2)/torch.exp(self.log_sigma2_c.unsqueeze(0)),2),1)) + + gamma_loss = -(1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c/self.pi_.unsqueeze(0)),1)) + + return L_rec_u,-KLD_u_c,-gamma_loss + + def pre_train(self,x,adj,Y,pre_epoch=50): + ''' + This function is used to initialize cluster paramters: pi_, mu_c, log_sigma2_c. + ------------- + paramters: + x: is the feature matrix of graph G. + adj: is the adjacent matrix of graph G. + Y: is the class label for each node in graph G. + ''' + + if not os.path.exists('./pretrain_model_{}.pk'.format(self.args.dataset)): + + Loss=nn.MSELoss() + opti=Adam(self.parameters()) #all paramters in model + + print('Pretraining......') + # epoch_bar=tqdm(range(pre_epoch)) + # for _ in epoch_bar: + for _ in range(pre_epoch): + + self.train() + L=0 + mu, logvar = self.encoder(x,adj) + pred_adj = self.decoder(mu,logvar) + + loss= Loss(pred_adj,adj.to_dense()) + + L+=loss.detach().cpu().numpy() + + opti.zero_grad() + loss.backward() + opti.step() + + # epoch_bar.write('L2={:.4f}'.format(L)) + print('L2={:.4f}'.format(L)) + + self.gc2.load_state_dict(self.gc3.state_dict()) + # self.linear_a2.load_state_dict(self.linear_a3.state_dict()) + + with torch.no_grad(): + mu, logvar = self.encoder(x,adj) + assert F.mse_loss(mu, logvar) == 0 + # assert F.mse_loss(mu_a, logvar_a) == 0 + Z = mu.data.numpy() + + + gmm = GaussianMixture(n_components=self.args.nClusters, covariance_type='diag') + + pre = gmm.fit_predict(Z) + print('Acc={:.4f}%'.format(cluster_acc(pre, Y)[0] * 100)) + + self.pi_.data = torch.from_numpy(gmm.weights_).float() + self.mu_c.data = torch.from_numpy(gmm.means_).float() + self.log_sigma2_c.data = torch.log(torch.from_numpy(gmm.covariances_).float()) + + torch.save(self.state_dict(), './pretrain_model_{}.pk'.format(self.args.dataset)) + else: + self.load_state_dict(torch.load('./pretrain_model_{}.pk'.format(self.args.dataset))) + + def predict(self,mu, logvar): + # z_mu, z_sigma2_log, z_ma,z_a_sigma2_log = self.encoder(x,adj) + # mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + # z = torch.randn_like(mu) * torch.exp(logvar) + mu + z = self.reparameterize(mu,logvar) + pi = self.pi_ + log_sigma2_c = self.log_sigma2_c + mu_c = self.mu_c + gamma_c = torch.exp(torch.log(pi.unsqueeze(0))+self.gaussian_pdfs_log(z,mu_c,log_sigma2_c)) + + gamma=gamma_c.detach().cpu().numpy() + + return np.argmax(gamma,axis=1),gamma + + + def gaussian_pdfs_log(self,x,mus,log_sigma2s): + G=[] + for c in range(self.args.nClusters): + G.append(self.gaussian_pdf_log(x,mus[c:c+1,:],log_sigma2s[c:c+1,:]).view(-1,1)) + return torch.cat(G,1) + + + @staticmethod + def gaussian_pdf_log(x,mu,log_sigma2): + return -0.5*(torch.sum(np.log(np.pi*2)+log_sigma2+(x-mu).pow(2)/torch.exp(log_sigma2),1)) + + def check_parameters(self): + for name, param in self.named_parameters(): + if param.requires_grad: + print(name, param.data,param.data.shape) + +class GCNModelVAECE(nn.Module): + def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args): + super(GCNModelVAECE, self).__init__() + + + self.args = args + self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu) + self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + self.gc3 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) + # self.dc = InnerProductDecoder(dropout, act=lambda x: x) + self.dc = InnerDecoder(dropout, act=lambda x: x) + + #for embedding attributes/features + self.linear_a1= Linear(n_nodes, hidden_dim1, act = torch.tanh,sparse_inputs=True) # the input dim is the number of nodes + self.linear_a2= Linear(hidden_dim1, hidden_dim2, act = lambda x:x) + self.linear_a3= Linear(hidden_dim1, hidden_dim2, act = lambda x:x) + + #modularity layer + self.modulairty_layer = Linear(hidden_dim2,args.nClusters,act=torch.relu) + self.cluster_choose= Linear(hidden_dim2,args.nClusters,act=torch.relu) + + + self.pi_=nn.Parameter(torch.FloatTensor(args.nClusters,).fill_(1)/args.nClusters,requires_grad=True) + self.mu_c=nn.Parameter(torch.FloatTensor(args.nClusters,hidden_dim2).fill_(0.00),requires_grad=True) + self.log_sigma2_c=nn.Parameter(torch.FloatTensor(args.nClusters,hidden_dim2).fill_(0.0),requires_grad=False) + + torch.nn.init.xavier_normal_(self.mu_c) + # torch.nn.init.xavier_normal_(self.log_sigma2_c) + + # calculate mi + + # critic_params = {'dim_x': x.shape[1],'dim_y':y.shape[1],'layers': 2,'embed_dim': 32,'hidden_dim': 64,'activation': 'relu',} + # self.critic_structure = ConcatCritic(hidden_dim2,n_nodes,256,3,'relu',rho=None,) + # self.critic_feature = ConcatCritic(hidden_dim2,input_feat_dim,256,3,'relu',rho=None,) + + def encoder(self, x, adj): + hidden1 = self.gc1(x, adj) + hidden_a1 = self.linear_a1(x.t()) # transpose the input feature matrix + return self.gc2(hidden1, adj), self.gc3(hidden1, adj), self.linear_a2(hidden_a1),self.linear_a3(hidden_a1) + + def decoder(self,mu,mu_a,logvar,logvar_a): + + z_u = self.reparameterize(mu, logvar) + z_a = self.reparameterize(mu_a,logvar_a) + return self.dc((z_u,z_a)) + + def reparameterize(self, mu, logvar): + if self.training: + std = torch.exp(logvar) + eps = torch.randn_like(std) + return eps.mul(std).add_(mu) + else: + return mu + + def forward(self, x, adj): + + mu, logvar, mu_a, logvar_a = self.encoder(x, adj) + z_u = self.reparameterize(mu, logvar) + z_a = self.reparameterize(mu_a,logvar_a) + return self.dc((z_u,z_a)),mu, logvar, mu_a, logvar_a + + def modularity_loss(self, z,adj): + + adj = adj.to_dense() + H = self.modulairty_layer(z) + assert H.shape[0]==z.shape[0] + + n = torch.tensor(1.0*z.shape[0]) + + H_norm = n.sqrt()*H.sqrt()/(H.sqrt().sum()) + print("H_norm shape",H_norm.shape) + print("H_norm ",H_norm) + m = (adj-torch.eye(adj.shape[0])).sum()/2 + D = (adj-torch.eye(adj.shape[0])).sum(1) # the degree of nodes, adj includes self loop + B = (adj-torch.eye(adj.shape[0]))-torch.matmul(D.view(-1,1),D.view(1,-1))/(2*m) # modularity matrix + mod_loss=torch.trace(torch.matmul(torch.matmul(H_norm.t(),B),H_norm)/(4*m)) + print("mod_loss",mod_loss) + + return mod_loss + + def dist(self,x): + # x = x/torch.norm(x,2,dim=1).view(-1,1) + assert len(x.size()) == 2 + norm = (x ** 2).sum(1).view(-1, 1) + dn = (norm + norm.view(1, -1)) - 2.0 * (x @ x.t()) + return torch.sum(torch.relu(dn).sqrt()) + + def mi_loss(self,z,x,a): + # critic_params = {'dim_x': x.shape[1],'dim_y':y.shape[1],'layers': 2,'embed_dim': 32,'hidden_dim': 64,'activation': 'relu',} + # critic = ConcatCritic(rho=None,**critic_params) + indice = torch.randperm(len(z))[0:50] + # mi_x = estimate_mutual_information('dv',z[indice],x[indice],self.critic_structure) + mi_a = estimate_mutual_information('js',z[indice],a[indice],self.critic_feature) + return mi_a + + def change_cluster_grad_false(self): + for name, param in self.named_parameters(): + if name in ['pi_','mu_c','log_sigma2_c']: + param.requires_grad=False + + def change_cluster_grad_true(self): + for name, param in self.named_parameters(): + if name in ['pi_','mu_c','log_sigma2_c']: + param.requires_grad=True + + + def change_nn_grad_false(self): + for name, param in self.named_parameters(): + if name not in ['pi_','mu_c','log_sigma2_c']: + param.requires_grad=False + + def change_nn_grad_true(self): + for name, param in self.named_parameters(): + if name not in ['pi_','mu_c','log_sigma2_c']: + param.requires_grad=True + + def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1): + + det=1e-10 + labels_sub_u, labels_sub_a = labels + norm_u, norm_a = norm + pos_weight_u, pos_weight_a = pos_weight + + L_rec_u=0 + L_rec_a=0 + + mi=0 + + mu, logvar, mu_a, logvar_a = self.encoder(x, adj) + + # mutual information loss + + # z_mu, z_sigma2_log = self.encoder(x) + # mi_a = self.mi_loss(mu,adj.to_dense(),x.to_dense()) + for l in range(L): + + # z=torch.randn_like(z_mu)*torch.exp(z_sigma2_log/2)+z_mu + pred_adj, pred_x = self.decoder(mu,mu_a,logvar,logvar_a) + # L_rec+=F.binary_cross_entropy(x_pro,x) + + cost_u = norm_u * F.binary_cross_entropy_with_logits(pred_adj, labels_sub_u, pos_weight = pos_weight_u) + cost_a = norm_a * F.binary_cross_entropy_with_logits(pred_x, labels_sub_a, pos_weight = pos_weight_a) + # cost_a =torch.Tensor(1).fill_(0) + + L_rec_u += cost_u + L_rec_a += cost_a + + + L_rec_u/=L + L_rec_a/=L + + # z_a = self.reparameterize(mu_a,logvar_a) + # KLD_a = (0.5 / n_features) * torch.mean(torch.sum(-1 - 2 * logvar_a + mu_a.pow(2) + logvar_a.exp().pow(2), 1)) + KLD_a = -(0.5 / n_features) * torch.mean(torch.sum(-1 - 2 * logvar_a + mu_a.pow(2) + logvar_a.exp().pow(2), 1)) + # KLD_a =torch.Tensor(1).fill_(0) + + # Loss=L_rec*x.size(1) + + + # log_sigma2_c=self.log_sigma2_c + # mu_c=self.mu_c + + # z = torch.randn_like(z_mu) * torch.exp(z_sigma2_log / 2) + z_mu + z = self.reparameterize(mu,logvar) + + # mod_loss=self.modularity_loss(z,adj) + # gamma_c=torch.exp(torch.log(self.pi_.unsqueeze(0))+self.gaussian_pdfs_log(z,self.mu_c,self.log_sigma2_c))+det + # gamma_c=torch.exp(self.gaussian_pdfs_log(z,self.mu_c,self.log_sigma2_c))+det + gamma_c = self.cluster_choose(self.reparameterize(mu,logvar)) + # print('gamma_c:',gamma_c) + + # gamma_c=gamma_c/(gamma_c.sum(1).view(-1,1))#batch_size*Clusters + gamma_c=F.softmax(gamma_c) + # print('gamma_c normalized:',gamma_c) + # print('gamma_c argmax:',torch.argmax(gamma_c,1)) + print('gamma_c counter:',Counter(torch.argmax(gamma_c,1).tolist())) + + + # self.pi_.data = (self.pi_/self.pi_.sum()).data # prior need to be re-normalized? In GMM, prior is based on gamma_c:https://brilliant.org/wiki/gaussian-mixture-model/ + # self.pi_.data = gamma_c.mean(0).data # prior need to be re-normalized? In GMM, prior is based on gamma_c:https://brilliant.org/wiki/gaussian-mixture-model/ + + KLD_u_c=-(0.5/n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(-1+self.log_sigma2_c.unsqueeze(0)-2*logvar.unsqueeze(1)+torch.exp(2*logvar.unsqueeze(1)-self.log_sigma2_c.unsqueeze(0))+(mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2)/torch.exp(self.log_sigma2_c.unsqueeze(0)),2),1)) + # KLD_u_c=-(0.5/n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(-1-2*logvar.unsqueeze(1)+torch.exp(2*logvar.unsqueeze(1))+(mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2),2),1)) + # temp_kld=-(0.5/n_nodes)*torch.sum((mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2),2) + + # KLD_u_c_test=-(0.5/n_nodes)*F.mse_loss(mu.unsqueeze(1),self.mu_c.unsqueeze(0),reduction='none') + # print('kld_u_c_test:',KLD_u_c_test.sum(2)) + + + # KLD_u_c=-(0.5/n_nodes)*F.mse_loss(mu.unsqueeze(1),self.mu_c.unsqueeze(0)) + + # KLD_u_c=(0.5 / n_nodes)*torch.mean(torch.sum(gamma_c*torch.sum(self.log_sigma2_c.unsqueeze(0)+\ + # torch.exp(2*logvar.unsqueeze(1)-self.log_sigma2_c.unsqueeze(0))+\ + # (mu.unsqueeze(1)-self.mu_c.unsqueeze(0)).pow(2)/torch.exp(self.log_sigma2_c.unsqueeze(0)),2),1)) + + mutual_dist = (1/(self.args.nClusters**2))*self.dist(self.mu_c) + + # gamma_loss=-(1/self.args.nClusters)*torch.mean(torch.sum(gamma_c*torch.log(gamma_c),1)) + # gamma_loss = (1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c),1)) - (0.5 / self.args.hid_dim)*torch.mean(torch.sum(1+2*logvar,1)) + gamma_loss = -(1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c/self.pi_.unsqueeze(0)),1)) + # gamma_loss = (1 / self.args.nClusters) * torch.mean(torch.sum(gamma_c*torch.log(gamma_c/self.pi_.unsqueeze(0)),1)) - (0.5 / self.args.hid_dim)*torch.mean(torch.sum(1+2*logvar,1)) + + + # return L_rec_u , L_rec_a , -KLD_u_c ,-KLD_a + return L_rec_u , 0.1*L_rec_a , -30*KLD_u_c ,-KLD_a , -gamma_loss, -0.05*mutual_dist + # return L_rec_u , L_rec_a , -KLD_u_c ,-KLD_a , -gamma_loss,-mi_a + # return L_rec_u + L_rec_a + KLD_u_c + KLD_a + gamma_loss + + + def pre_train(self,x,adj,Y,pre_epoch=22): + ''' + This function is used to initialize cluster paramters: pi_, mu_c, log_sigma2_c. + ------------- + paramters: + x: is the feature matrix of graph G. + adj: is the adjacent matrix of graph G. + Y: is the class label for each node in graph G. + ''' + + if not os.path.exists('./pretrain_model_{}_{}.pk'.format(self.args.dataset,pre_epoch)): + + Loss=nn.MSELoss() + opti=Adam(self.parameters()) #all paramters in model + + print('Pretraining......') + # epoch_bar=tqdm(range(pre_epoch)) + # for _ in epoch_bar: + for _ in range(pre_epoch): + + self.train() + L=0 + mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + pred_adj, pred_x = self.decoder(mu,mu_a,logvar,logvar_a) + + loss= Loss(pred_x,x) + Loss(pred_adj,adj) + + L+=loss.detach().cpu().numpy() + + opti.zero_grad() + loss.backward() + opti.step() + + # epoch_bar.write('L2={:.4f}'.format(L)) + print('L2={:.4f}'.format(L)) + + # self.gc2.load_state_dict(self.gc3.state_dict()) + # self.linear_a2.load_state_dict(self.linear_a3.state_dict()) + + + # with torch.no_grad(): + # mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + # assert F.mse_loss(mu, logvar) == 0 + # assert F.mse_loss(mu_a, logvar_a) == 0 + # Z = mu.data.numpy() + + mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + Z = self.reparameterize(mu,logvar) + + gmm = GaussianMixture(n_components=self.args.nClusters, covariance_type='diag') + + pre = gmm.fit_predict(Z.cpu().detach().numpy()) + print('Acc={:.4f}%'.format(cluster_acc(pre, Y)[0] * 100)) + + self.pi_.data = torch.from_numpy(gmm.weights_).float() + self.mu_c.data = torch.from_numpy(gmm.means_).float() + self.log_sigma2_c.data = torch.log(torch.from_numpy(gmm.covariances_).float()) + + torch.save(self.state_dict(), './pretrain_model_{}_{}.pk'.format(self.args.dataset,pre_epoch)) + else: + self.load_state_dict(torch.load('./pretrain_model_{}_{}.pk'.format(self.args.dataset,pre_epoch))) + + def predict_nn(self,mu,logvar): + z = self.reparameterize(mu,logvar) + gamma_c = self.cluster_choose(self.reparameterize(mu,logvar)) + + print('gamma_c,normalized:',gamma_c) + print('gamma_c argmax:',torch.argmax(gamma_c,1)) + print('gamma_c argmax counter:',Counter(torch.argmax(gamma_c,1).tolist())) + + gamma=gamma_c.detach().cpu().numpy() + + + return np.argmax(gamma,axis=1),gamma, z + + + + def predict(self,mu, logvar): + # z_mu, z_sigma2_log, z_ma,z_a_sigma2_log = self.encoder(x,adj) + # mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + # z = torch.randn_like(mu) * torch.exp(z_sigma2_log / 2) + z_mu + det=1e-10 + z = self.reparameterize(mu,logvar) + pi = self.pi_ + # log_sigma2_c = self.log_sigma2_c + # mu_c = self.mu_c + # gamma_c = torch.exp(torch.log(pi.unsqueeze(0))+self.gaussian_pdfs_log(z,mu_c,log_sigma2_c)) + gamma_c = torch.exp(self.gaussian_pdfs_log(z,self.mu_c,self.log_sigma2_c))+det + print('gamma_c:',gamma_c) + gamma_c=gamma_c/(gamma_c.sum(1).view(-1,1))#batch_size*Clusters + gamma_c=F.softmax(gamma_c) + print('gamma_c,normalized:',gamma_c) + print('gamma_c argmax:',torch.argmax(gamma_c,1)) + print('gamma_c argmax counter:',Counter(torch.argmax(gamma_c,1).tolist())) + + gamma=gamma_c.detach().cpu().numpy() + + def predict_dist(self,mu, logvar): + # z_mu, z_sigma2_log, z_ma,z_a_sigma2_log = self.encoder(x,adj) + # mu, logvar, mu_a, logvar_a = self.encoder(x,adj) + # z = torch.randn_like(mu) * torch.exp(z_sigma2_log / 2) + z_mu + z = self.reparameterize(mu,logvar) + pi = self.pi_ + log_sigma2_c = self.log_sigma2_c + mu_c = self.mu_c + # gamma_c = torch.exp(self.gaussian_pdfs_log(z,mu_c,log_sigma2_c)) + + # gamma=gamma_c.detach().cpu().numpy() + + gamma=[] + for e in range(z.shape[0]): + temp_dist=[] + for m in range(mu_c.shape[0]): + temp_dist.append(F.mse_loss(z[e],mu_c[m]).data) + gamma.append(temp_dist) + + return np.argmin(gamma,axis=1),np.array(gamma) + + def plot_tsne(self,dataset,epoch,z,true_label,pred_label): + + tsne = TSNE(n_components=2, init='pca',perplexity=50.0) + data = torch.cat([z,self.mu_c.to('cpu')],dim=0).detach().numpy() + zs_tsne = tsne.fit_transform(data) + + cluster_labels=set(true_label) + print(cluster_labels) + index_group= [np.array(true_label)==y for y in cluster_labels] + colors = cm.tab20(range(len(index_group))) + + fig, ax = plt.subplots() + for index,c in zip(index_group,colors): + ax.scatter(zs_tsne[np.ix_(index), 0], zs_tsne[np.ix_(index), 1],color=c,s=2) + + ax.scatter(zs_tsne[z.shape[0]:, 0], zs_tsne[z.shape[0]:, 1],marker='^',color='b',s=40) + plt.title('true label') + # ax.legend() + plt.savefig("./visualization/{}_{}_tsne_{}.pdf".format(dataset,epoch,'true_label')) + + cluster_labels=set(pred_label) + print(cluster_labels) + index_group= [np.array(pred_label)==y for y in cluster_labels] + colors = cm.tab10(range(len(index_group))) + + fig, ax = plt.subplots() + for index,c in zip(index_group,colors): + ax.scatter(zs_tsne[np.ix_(index), 0], zs_tsne[np.ix_(index), 1],color=c,s=2) + + for index,c in enumerate(colors): + ax.scatter(zs_tsne[z.shape[0]+index:z.shape[0]+index+1, 0], zs_tsne[z.shape[0]+index:z.shape[0]+index+1, 1],marker='^',color=c,s=40) + + plt.title('pred label') + # ax.legend() + plt.savefig("./visualization/{}_{}_tsne_{}.pdf".format(dataset,epoch,'pred_label')) + + def gaussian_pdfs_log(self,x,mus,log_sigma2s): + G=[] + for c in range(self.args.nClusters): + G.append(self.gaussian_pdf_log(x,mus[c:c+1,:],log_sigma2s[c:c+1,:]).view(-1,1)) + return torch.cat(G,1) + + + @staticmethod + def gaussian_pdf_log(x,mu,log_sigma2): + return -0.5*(torch.sum(np.log(np.pi*2)+log_sigma2+(x-mu).pow(2)/torch.exp(log_sigma2),1)) # np.pi*2, not square + + def check_parameters(self): + for name, param in self.named_parameters(): + if param.requires_grad: + print(name, param.data,param.data.shape) + def check_gradient(self): + for name, param in self.named_parameters(): + if param.requires_grad: + print('grad: ',name) + print(param.grad,param.grad.shape) diff --git a/train.py b/train.py index 689bf94..7873b33 100644 --- a/train.py +++ b/train.py @@ -52,10 +52,8 @@ def training(args): print("node size:{}, feature size:{}".format(n_nodes,n_features)) - adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(sp.csc_matrix(adj_init)) - print('adj_train sum\n',adj_train.sum()/2) - fea_train, train_feas, val_feas, val_feas_false, test_feas, test_feas_false = mask_test_feas(features) - print('fea_train shape',fea_train.shape) + # adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj_init) + # fea_train, train_feas, val_feas, val_feas_false, test_feas, test_feas_false = mask_test_feas(features) features_orig = features features_label = torch.FloatTensor(features.toarray()) @@ -77,28 +75,20 @@ def training(args): embedding_attr_var_result_file = "result/AGAE_{}_a_sig.emb".format(args.dataset) # Some preprocessing, get the support matrix, D^{-1/2}\hat{A}D^{-1/2} - adj_norm = preprocess_graph(adj_train) + adj_norm = preprocess_graph(adj_init) + print("graph edge number after normalize adjacent matrix:{}".format(adj_init.sum()/2)) - # pos_weight_u = torch.tensor(float(adj_init.shape[0] * adj_init.shape[0] - adj_init.sum()) / adj_init.sum()) #?? - # norm_u = adj_init.shape[0] * adj_init.shape[0] / float((adj_init.shape[0] * adj_init.shape[0] - adj_init.sum()) * 2) #?? - # pos_weight_a = torch.tensor(float(features[2][0] * features[2][1] - len(features[1])) / len(features[1])) - # norm_a = features[2][0] * features[2][1] / float((features[2][0] * features[2][1] - len(features[1])) * 2) + pos_weight_u = torch.tensor(float(adj_init.shape[0] * adj_init.shape[0] - adj_init.sum()) / adj_init.sum()) #?? + norm_u = adj_init.shape[0] * adj_init.shape[0] / float((adj_init.shape[0] * adj_init.shape[0] - adj_init.sum()) * 2) #?? + pos_weight_a = torch.tensor(float(features[2][0] * features[2][1] - len(features[1])) / len(features[1])) + norm_a = features[2][0] * features[2][1] / float((features[2][0] * features[2][1] - len(features[1])) * 2) - pos_weight_u = torch.tensor(float(adj_train.shape[0] * adj_train.shape[0] - adj_train.sum()) / adj_train.sum()) #?? - norm_u = adj_train.shape[0] * adj_train.shape[0] / float((adj_train.shape[0] * adj_train.shape[0] - adj_train.sum()) * 2) #?? - pos_weight_a = torch.tensor(float(fea_train.shape[0] * fea_train.shape[1] - (fea_train.sum())) / (fea_train.sum())) - norm_a = fea_train.shape[0] * fea_train.shape[0] / float(fea_train.shape[0] * fea_train.shape[1] - fea_train.sum()) * 2 features_training = sparse_mx_to_torch_sparse_tensor(features_orig) - print('pos_weight_u,norm_u,pos_weight_a,norm_a',pos_weight_u,norm_u,pos_weight_a,norm_a) - - adj_label = torch.FloatTensor(adj_train.toarray()+sp.eye(adj_init.shape[0])) # add the identity matrix to the adj as label - - fea_train = sparse_mx_to_torch_sparse_tensor(fea_train) - adj_train = sparse_mx_to_torch_sparse_tensor(adj_train) # clustering pretraining for GMM paramter initialization # writer=SummaryWriter('./logs') + adj_label = torch.FloatTensor(adj_init.toarray()+sp.eye(adj_init.shape[0])) # add the identity matrix to the adj as label mean_h=[] mean_c=[] @@ -112,11 +102,6 @@ def training(args): if args.cuda: features_training = features_training.to_dense().cuda() - fea_train = fea_train.to_dense().cuda() - print('fea_train\n',fea_train) - adj_train = adj_train.to_dense().cuda() - print('adj_train\n',adj_train) - print('adj_train sum\n',adj_train.sum()) adj_norm = adj_norm.to_dense().cuda() pos_weight_u = pos_weight_u.cuda() pos_weight_a = pos_weight_a.cuda() @@ -194,8 +179,8 @@ def training(args): elif args.model =='gcn_vaece': #gcn with vae for co-embedding of feature and graph - (recovered_u, recovered_a), mu_u, logvar_u, mu_a, logvar_a = model(fea_train,adj_norm) - loss_list = model.loss(fea_train,adj_norm,labels = (adj_train, fea_train), n_nodes = n_nodes, n_features = n_features,norm = (norm_u, norm_a), pos_weight = (pos_weight_u, pos_weight_a)) + (recovered_u, recovered_a), mu_u, logvar_u, mu_a, logvar_a = model(features_training, adj_norm) + loss_list = model.loss(features_training,adj_norm,labels = (adj_label, features_label), n_nodes = n_nodes, n_features = n_features,norm = (norm_u, norm_a), pos_weight = (pos_weight_u, pos_weight_a)) loss =sum(loss_list) if epoch%10 <8: diff --git a/train_bk_2021_1_7.py b/train_bk_2021_1_7.py new file mode 100644 index 0000000..5a24c1f --- /dev/null +++ b/train_bk_2021_1_7.py @@ -0,0 +1,359 @@ +from __future__ import division +from __future__ import print_function + +import argparse +import time +import numpy as np +import scipy.sparse as sp +import torch +from torch import optim +from torch.autograd import Variable +from torch.optim.lr_scheduler import StepLR +from model import GCNModelVAE,GCNModelVAECD,GCNModelAE,GCNModelVAECE +from utils import preprocess_graph, get_roc_score, sparse_to_tuple,sparse_mx_to_torch_sparse_tensor,cluster_acc,clustering_evaluation, find_motif +from preprocessing import mask_test_feas,mask_test_edges, load_AN, check_symmetric,load_data +from tqdm import tqdm +from tensorboardX import SummaryWriter +from evaluation import clustering_latent_space +from collections import Counter +import itertools +import random + +import warnings +warnings.simplefilter("ignore") + +def training(args): + + print("Using {} dataset".format(args.dataset)) + # adj_init, features, Y= load_AN(args.dataset) + adj_init, features, labels, idx_train, idx_val, idx_test = load_data(args.dataset) + Y = np.argmax(labels,1) # labels is in one-hot format + + # Store original adjacency matrix (without diagonal entries) for later + adj_init = adj_init- sp.dia_matrix((adj_init.diagonal()[np.newaxis, :], [0]), shape=adj_init.shape) + adj_init.eliminate_zeros() + + assert adj_init.diagonal().sum()==0,"adj diagonal sum:{}, should be 0".format(adj_init.diagonal().sum()) + n_nodes, n_features= features.shape + # assert check_symmetric(adj_init).sum()==n_nodes*n_nodes,"adj should be symmetric" + print("imported graph edge number (without selfloop):{}".format((adj_init-adj_init.diagonal()).sum()/2)) + + # find motif 3 nodes + + # motif_matrix=find_motif(adj_init,args.dataset) + # print("find motif") + + + args.nClusters=len(set(Y)) + # args.nClusters=1 + print("cluster number:{}".format(args.nClusters)) + assert(adj_init.shape[0]==n_nodes) + + print("node size:{}, feature size:{}".format(n_nodes,n_features)) + + + # adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj_init) + # fea_train, train_feas, val_feas, val_feas_false, test_feas, test_feas_false = mask_test_feas(features) + + features_orig = features + features_label = torch.FloatTensor(features.toarray()) + features = sp.lil_matrix(features) + + features = sparse_to_tuple(features.tocoo()) + + features_nonzero = features[1].shape[0] + + print("graph edge number after mask:{}".format(adj_init.sum()/2)) + + + + # save result to files + link_predic_result_file = "result/AGAE_{}.res".format(args.dataset) + embedding_node_mean_result_file = "result/AGAE_{}_n_mu.emb".format(args.dataset) + embedding_attr_mean_result_file = "result/AGAE_{}_a_mu.emb".format(args.dataset) + embedding_node_var_result_file = "result/AGAE_{}_n_sig.emb".format(args.dataset) + embedding_attr_var_result_file = "result/AGAE_{}_a_sig.emb".format(args.dataset) + + # Some preprocessing, get the support matrix, D^{-1/2}\hat{A}D^{-1/2} + adj_norm = preprocess_graph(adj_init) + print("graph edge number after normalize adjacent matrix:{}".format(adj_init.sum()/2)) + + pos_weight_u = torch.tensor(float(adj_init.shape[0] * adj_init.shape[0] - adj_init.sum()) / adj_init.sum()) #?? + norm_u = adj_init.shape[0] * adj_init.shape[0] / float((adj_init.shape[0] * adj_init.shape[0] - adj_init.sum()) * 2) #?? + pos_weight_a = torch.tensor(float(features[2][0] * features[2][1] - len(features[1])) / len(features[1])) + norm_a = features[2][0] * features[2][1] / float((features[2][0] * features[2][1] - len(features[1])) * 2) + + features_training = sparse_mx_to_torch_sparse_tensor(features_orig) + + # clustering pretraining for GMM paramter initialization + # writer=SummaryWriter('./logs') + + adj_label = torch.FloatTensor(adj_init.toarray()+sp.eye(adj_init.shape[0])) # add the identity matrix to the adj as label + + mean_h=[] + mean_c=[] + mean_v=[] + mean_ari=[] + mean_ami=[] + mean_nmi=[] + mean_purity=[] + mean_accuracy=[] + + + if args.cuda: + features_training = features_training.to_dense().cuda() + adj_norm = adj_norm.to_dense().cuda() + pos_weight_u = pos_weight_u.cuda() + pos_weight_a = pos_weight_a.cuda() + adj_label = adj_label.cuda() + features_label = features_label.cuda() + # idx_train = idx_train.cuda() + + # idx_val = idx_val.cuda() + # idx_test = idx_test.cuda() + + features_training, adj_norm = Variable(features_training), Variable(adj_norm) + pos_weight_u = Variable(pos_weight_u) + pos_weight_a = Variable(pos_weight_a) + + for r in range(args.num_run): + + model = None + if args.model == 'gcn_ae': + model = GCNModelAE(n_features,n_nodes, args.hidden1, args.hidden2, args.dropout,args) + elif args.model == 'gcn_vae': + model = GCNModelVAE(n_features,n_nodes, args.hidden1, args.hidden2, args.dropout,args) + elif args.model == 'gcn_vaecd': + model = GCNModelVAECD(n_features,n_nodes, args.hidden1, args.hidden2, args.dropout,args) + elif args.model =='gcn_vaece': #gcn with vae for co-embedding of feature and graph + model = GCNModelVAECE(n_features,n_nodes, args.hidden1, args.hidden2, args.dropout,args) + + # using GMM to pretrain the clustering parameters + + if args.cuda: + model.cuda() + + print([i for i in model.named_parameters()]) + + + if args.model == 'gcn_vaecd': + params1=[model.gc1.parameters(),model.gc2.parameters(),model.gc3.parameters(),model.dc.parameters()] + optimizer1 = optim.Adam(itertools.chain(*params1), lr=args.lr) + elif args.model == 'gcn_vaece': + params1=[model.gc1.parameters(),model.gc2.parameters(),model.gc3.parameters(),model.dc.parameters(),model.linear_a1.parameters(),model.linear_a2.parameters(),model.linear_a3.parameters()] + optimizer1 = optim.Adam(itertools.chain(*params1), lr=args.lr) + + # model.pre_train(features_training,adj_norm,Y,pre_epoch=50) + + optimizer2 = optim.Adam(model.parameters(), lr=args.lr) + + # params2=[model.pi_,model.mu_c,model.log_sigma2_c] + # optimizer2 = optim.Adam(itertools.chain(*params2), lr=args.lr) + + hidden_emb_u = None + hidden_emb_a = None + + cost_val = [] + acc_val = [] + val_roc_score = [] + lr_s=StepLR(optimizer1,step_size=30,gamma=0.95) # it seems that fix leanring rate is better + + loss_list=None + for epoch in range(args.epochs): + t = time.time() + model.train() + + if args.model =='gcn_vaecd': + recovered_u, mu_u, logvar_u = model(features_training, adj_norm) + loss_list = model.loss(features_training,adj_norm,labels = adj_label, n_nodes = n_nodes, n_features = n_features,norm = norm_u, pos_weight = pos_weight_u) + loss =sum(loss_list) + + elif args.model == 'gcn_ae': + recovered_u, mu_u,logvar_u = model(features_training, adj_norm) + loss_list = model.loss(recovered_u,labels = adj_label, n_nodes = n_nodes, n_features = n_features,norm = norm_u, pos_weight = pos_weight_u) + loss =sum(loss_list) + elif args.model == 'gcn_vae': + recovered_u, mu_u, logvar_u = model(features_training, adj_norm) + loss_list = model.loss(features_training,adj_norm,labels = adj_label, n_nodes = n_nodes, n_features = n_features,norm = norm_u, pos_weight = pos_weight_u) + loss =sum(loss_list) + elif args.model =='gcn_vaece': #gcn with vae for co-embedding of feature and graph + + + (recovered_u, recovered_a), mu_u, logvar_u, mu_a, logvar_a = model(features_training, adj_norm) + loss_list = model.loss(features_training,adj_norm,labels = (adj_label, features_label), n_nodes = n_nodes, n_features = n_features,norm = (norm_u, norm_a), pos_weight = (pos_weight_u, pos_weight_a)) + loss =sum(loss_list) + + if epoch%10 <8: + model.change_nn_grad_true() + model.change_cluster_grad_false() + optimizer2.zero_grad() + loss.backward() + optimizer2.step() + else: + model.change_nn_grad_false() + model.change_cluster_grad_true() + optimizer2.zero_grad() + loss.backward() + optimizer2.step() + + + + lr_s.step() + + # model.check_gradient() + # model.check_parameters() + + # if (epoch+1)%50==0: + # pre,gamma,z = model.predict(mu_u,logvar_u) + # model.plot_tsne(args.dataset,epoch,z,pre,'predict label') + # model.plot_tsne(args.dataset,epoch,z,Y,'true label') + + + + correct_prediction_u = ((torch.sigmoid(recovered_u.to('cpu'))>=0.5)==adj_label.type(torch.LongTensor)) + # correct_prediction_a = ((torch.sigmoid(recovered_a)>=0.5).type(torch.LongTensor)==features_label.type(torch.LongTensor)).type(torch.FloatTensor) + + accuracy = torch.mean(correct_prediction_u*1.0) + + # hidden_emb_u = mu_u.data.numpy() + # hidden_emb_a = mu_a.data.numpy() + # roc_curr, ap_curr = get_roc_score(np.dot(hidden_emb_u,hidden_emb_u.T), adj, val_edges, val_edges_false) + # roc_curr_a, ap_curr_a = get_roc_score(np.dot(hidden_emb_u,hidden_emb_a.T), features_orig, val_feas, val_feas_false) + + # val_roc_score.append(roc_curr) + + #clustering############# + pre=[] + tru=[] + gamma = None + + + tru=Y + # model.eval() + + # if args.model == 'vgaecd': + # pre=model.predict(mu_u,logvar_u) + + # print("True label:{}".format(tru)) + # print(Counter(tru)) + # print("Predicted label:{}".format(pre)) + # print(Counter(pre)) + + # # mc_ + # print("cluster means") + # print(model.mu_c.data) + + # print("cluster prior") + # print(model.pi_.data) + # else: + # pre=clustering_latent_space(mu_u.detach().numpy(),tru) + + # writer.add_scalar('loss',loss.item(),epoch) + # writer.add_scalar('acc',cluster_acc(pre,tru)[0]*100,epoch) + # writer.add_scalar('lr',lr_s.get_last_lr()[0],epoch) + + # print('Loss={:.4f},Clustering_ACC={:.4f}%,LR={:.4f}'.format(loss.item(),cluster_acc(pre,tru)[0]*100,lr_s.get_last_lr()[0])) + # H, C, V, ari, ami, nmi, purity = clustering_evaluation(tru,pre) + # print('H:{} C:{} V:{} ari:{} ami:{} nmi:{} purity:{}'.format(H, C, V, ari, ami, nmi, purity)) + + ####################### + + + print("Epoch:", '%04d' % (epoch + 1), + "LR={:.4f}".format(lr_s.get_last_lr()[0]), + "train_loss_total=", "{:.5f}".format(loss.item()), + "train_loss_parts=", "{}".format([round(l.item(),4) for l in loss_list]), + # "log_lik=", "{:.5f}".format(cost.item()), + # "KL_u=", "{:.5f}".format(KLD_u.item()), + # "KL_a=", "{:.5f}".format(KLD_a.item()), + # "yita_loss=", "{:.5f}".format(yita_loss.item()), + "link_pred_train_acc=", "{:.5f}".format(accuracy.item()), + # "val_edge_roc=", "{:.5f}".format(val_roc_score[-1]), + # "val_edge_ap=", "{:.5f}".format(ap_curr), + # "val_attr_roc=", "{:.5f}".format(roc_curr_a), + # "val_attr_ap=", "{:.5f}".format(ap_curr_a), + "time=", "{:.5f}".format(time.time() - t)) + + # model.check_parameters() + # z = model.reparameterize(mu_u,logvar_u) + # model.plot_tsne(args.dataset,epoch,z,tru,'true label') + print("Optimization Finished!") + + # if args.model == 'gcn_vaece': + # (recovered_u, recovered_a), mu_u, logvar_u, mu_a, logvar_a = model(features_training, adj_norm) + # else: + # recovered_u, mu_u, logvar_u = model(features_training, adj_norm) + + + pre,gamma,z = model.predict(mu_u,logvar_u) + + H, C, V, ari, ami, nmi, purity = clustering_evaluation(tru,pre) + acc = cluster_acc(pre,tru)[0]*100 + mean_h.append(round(H,4)) + mean_c.append(round(C,4)) + mean_v.append(round(V,4)) + mean_ari.append(round(ari,4)) + mean_ami.append(round(ami,4)) + mean_nmi.append(round(nmi,4)) + mean_purity.append(round(purity,4)) + mean_accuracy.append(round(acc,4)) + + if args.model in ['gcn_vaecd','gcn_vaece']: + pre,gamma,z = model.predict(mu_u,logvar_u) + model.plot_tsne(args.dataset,epoch,z.to('cpu'),tru,pre) + else: + pre=clustering_latent_space(mu_u.detach().numpy(),tru) + + # np.save(embedding_node_mean_result_file, mu_u.data.numpy()) + # np.save(embedding_attr_mean_result_file, mu_a.data.numpy()) + # np.save(embedding_node_var_result_file, logvar_u.data.numpy()) + # np.save(embedding_attr_var_result_file, logvar_a.data.numpy()) + + # roc_score, ap_score = get_roc_score(np.dot(hidden_emb_u,hidden_emb_u.T), adj, test_edges, test_edges_false) + # roc_score_a, ap_score_a = get_roc_score(np.dot(hidden_emb_u,hidden_emb_a.T), features_orig, test_feas, test_feas_false) + + # print('Test edge ROC score: ' + str(roc_score)) + # print('Test edge AP score: ' + str(ap_score)) + # print('Test attr ROC score: ' + str(roc_score_a)) + # print('Test attr AP score: ' + str(ap_score_a)) + + + ###### Report Final Results ###### + print('Homogeneity:{}\t mean:{}\t std:{}\n'.format(mean_h,round(np.mean(mean_h),4),round(np.std(mean_h),4))) + print('Completeness:{}\t mean:{}\t std:{}\n'.format(mean_c,round(np.mean(mean_c),4),round(np.std(mean_c),4))) + print('V_measure_score:{}\t mean:{}\t std:{}\n'.format(mean_v,round(np.mean(mean_v),4),round(np.std(mean_v),4))) + print('adjusted Rand Score:{}\t mean:{}\t std:{}\n'.format(mean_ari,round(np.mean(mean_ari),4),round(np.std(mean_ari),4))) + print('adjusted Mutual Information:{}\t mean:{}\t std:{}\n'.format(mean_ami,round(np.mean(mean_ami),4),round(np.std(mean_ami),4))) + print('Normalized Mutual Information:{}\t mean:{}\t std:{}\n'.format(mean_nmi,round(np.mean(mean_nmi),4),round(np.std(mean_nmi),4))) + print('Purity:{}\t mean:{}\t std:{}\n'.format(mean_purity,round(np.mean(mean_purity),4),round(np.std(mean_purity),4))) + print('Accuracy:{}\t mean:{}\t std:{}\n'.format(mean_accuracy,round(np.mean(mean_accuracy),4),round(np.std(mean_accuracy),4))) + print("True label distribution:{}".format(tru)) + print(Counter(tru)) + print("Predicted label distribution:{}".format(pre)) + print(Counter(pre)) + +def parse_args(): + parser = argparse.ArgumentParser(description="Node clustering") + parser.add_argument('--model', type=str, default='gcn_ae', help="models used for clustering: gcn_ae,gcn_vae,gcn_vaecd,gcn_vaece") + parser.add_argument('--seed', type=int, default=42, help='Random seed.') + parser.add_argument('--epochs', type=int, default=300, help='Number of epochs to train.') + parser.add_argument('--hidden1', type=int, default=32, help='Number of units in hidden layer 1.') + parser.add_argument('--hidden2', type=int, default=16, help='Number of units in hidden layer 2.') + parser.add_argument('--lr', type=float, default=0.002, help='Initial aearning rate.') + parser.add_argument('--dropout', type=float, default=0.2, help='Dropout rate (1 - keep probability).') + parser.add_argument('--dataset', type=str, default='cora', help='type of dataset.') + parser.add_argument('--nClusters',type=int,default=7) + parser.add_argument('--num_run',type=int,default=1,help='Number of running times') + parser.add_argument('--cuda', action='store_true', default=False, help='Disables CUDA training.') + args, unknown = parser.parse_known_args() + + return args + +if __name__ == '__main__': + args = parse_args() + if args.cuda: + torch.cuda.set_device(1) + torch.cuda.manual_seed(args.seed) + random.seed(args.seed) + np.random.seed(args.seed) + training(args)