add synthetic data

goodman1204 · Apr 19, 2022 · d4d7c71 · d4d7c71
1 parent d805b9b
commit d4d7c71
Show file tree

Hide file tree

Showing 6 changed files with 311 additions and 275 deletions.
diff --git a/can.py b/can.py
@@ -26,13 +26,20 @@
 
 def training(args):
 
+    if args.cuda>=0:
+        device = torch.device('cuda')
+    else:
+        device = torch.device('cpu')
+
     print("Using {} dataset".format(args.dataset))
     if args.dataset in ['cora','pubmed','citeseer']:
         adj_init, features, labels, idx_train, idx_val, idx_test = load_data(args.dataset)
         Y = np.argmax(labels,1) # labels is in one-hot format
-    else:
-        adj_init, features, Y= load_AN(args.dataset)
 
+    elif args.dataset in ['Flickr','BlogCatalog']:
+        adj_init, features, Y= load_AN(args.dataset)
+    else:
+        adj_init, features, Y= load_AN("synthetic_{}_{}".format(args.synthetic_num_nodes,args.synthetic_density))
     # print("find motif")
     # motif_matrix=find_motif(adj_init,args.dataset)
 
@@ -111,15 +118,15 @@ def training(args):
 
 
     # adj_norm = drop_edge(adj_norm,Y)
-    if args.cuda:
-        # drop features
-        features_training = features_training.to_dense().cuda()
-        # features_training = drop_feature(features_training,1.0).cuda()
-        adj_norm = adj_norm.to_dense().cuda()
-        pos_weight_u = pos_weight_u.cuda()
-        pos_weight_a = pos_weight_a.cuda()
-        adj_label = adj_label.cuda()
-        features_label = features_label.cuda()
+
+    # drop features
+    features_training = features_training.to_dense().to(device)
+    # features_training = drop_feature(features_training,1.0).cuda()
+    adj_norm = adj_norm.to_dense().to(device)
+    pos_weight_u = pos_weight_u.to(device)
+    pos_weight_a = pos_weight_a.to(device)
+    adj_label = adj_label.to(device)
+    features_label = features_label.to(device)
 
     features_training, adj_norm = Variable(features_training), Variable(adj_norm)
     pos_weight_u = Variable(pos_weight_u)
@@ -131,12 +138,11 @@ def training(args):
         # np.random.seed(args.seed)
         # torch.manual_seed(args.seed)
 
-        model = GCNModelVAECE(n_features,n_nodes, args.hidden1, args.hidden2, args.dropout,args) # CAN will not use the clustering los of VAECE
+        model = GCNModelVAECE(n_features,n_nodes, args.hidden1, args.hidden2, args.dropout,args,device) # CAN will not use the clustering los of VAECE
 
         # using GMM to pretrain the  clustering parameters
 
-        if args.cuda:
-            model.cuda()
+        model.to(device)
 
 
         optimizer2 = optim.Adam(model.parameters(), lr=args.lr)
@@ -277,18 +283,20 @@ def parse_args():
     parser.add_argument('--lr', type=float, default=0.002, help='Initial aearning rate.')
     parser.add_argument('--dropout', type=float, default=0.0, help='Dropout rate (1 - keep probability).')
     parser.add_argument('--dataset', type=str, default='cora', help='type of dataset.')
+
+    parser.add_argument('--synthetic_num_nodes',type=int,default=1000)
+    parser.add_argument('--synthetic_density', type=float, default=0.1)
+
     parser.add_argument('--nClusters',type=int,default=7)
     parser.add_argument('--num_run',type=int,default=1,help='Number of running times')
-    parser.add_argument('--cuda', action='store_true', default=False, help='Disables CUDA training.')
+    parser.add_argument('--cuda', type=int, default=0, help='training with GPU.')
     args, unknown = parser.parse_known_args()
 
     return args
 
 if __name__ == '__main__':
     args = parse_args()
-    if args.cuda:
-        torch.cuda.set_device(0)
-        # torch.cuda.manual_seed(args.seed)
+    # torch.cuda.manual_seed(args.seed)
     # random.seed(args.seed)
     # np.random.seed(args.seed)
     # torch.manual_seed(args.seed)

diff --git a/generate_synthtic_graph.py b/generate_synthtic_graph.py
@@ -1,6 +1,7 @@
 import networkx as nx
 import numpy as np
 from networkx.generators import erdos_renyi_graph
+import sys
 
 attribute_size = 1000
 
@@ -55,9 +56,9 @@ def save_graph(num_nodes,density,G,fea_matrix,label):
 
 
 if __name__=="__main__":
-    num_nodes = 1000
-    density = 0.1
-    G,fea_matrix,label = generate_graphs(num_nodes,density)
+    num_nodes = int(sys.argv[1])
+    density = float(sys.argv[2])
+    G,fea_matrix,label = generate_graphs(int(num_nodes),float(density))
     save_graph(num_nodes,density,G,fea_matrix,label)
     print("Done")
 

diff --git a/model.py b/model.py
@@ -19,10 +19,11 @@
 from collections import Counter
 
 class GCNModelAE(nn.Module):
-    def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args):
+    def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args,device=torch.device('cpu')):
 
         super(GCNModelAE, self).__init__()
 
+        self.device = device
         self.args = args
         self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu)
         self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x)
@@ -291,10 +292,11 @@ def check_parameters(self):
                 print(name, param.data,param.data.shape)
 
 class NEC(nn.Module):
-    def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args):
+    def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args,device):
         super(NEC, self).__init__()
 
         self.args = args
+        device = self.device
         self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu)
         self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x)
         self.dc = InnerProductDecoder(dropout, act=torch.sigmoid)
@@ -331,9 +333,9 @@ def modularity_loss(self,z,adj):
         H_norm = n.sqrt()*H.sqrt()/(H.sqrt().sum())
         # print("H_norm shape",H_norm.shape)
         # print("H_norm ",H_norm)
-        m = (adj-torch.eye(adj.shape[0]).cuda()).sum()/2
-        D = (adj-torch.eye(adj.shape[0]).cuda()).sum(1) # the degree of nodes, adj includes self loop
-        B = (adj-torch.eye(adj.shape[0]).cuda())-torch.matmul(D.view(-1,1),D.view(1,-1))/(2*m) # modularity matrix
+        m = (adj-torch.eye(adj.shape[0]).to(self.device)).sum()/2
+        D = (adj-torch.eye(adj.shape[0]).to(self.device)).sum(1) # the degree of nodes, adj includes self loop
+        B = (adj-torch.eye(adj.shape[0]).to(self.device))-torch.matmul(D.view(-1,1),D.view(1,-1))/(2*m) # modularity matrix
         mod_loss=torch.trace(torch.matmul(torch.matmul(H_norm.t(),B),H_norm)/(4*m))
         # print("mod_loss",mod_loss)
         return mod_loss
@@ -410,7 +412,7 @@ def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1):
         mod_loss=self.modularity_loss(z,adj)
 
         print('z shape mu_c shape',z.shape,self.mu_c.shape)
-        Q = self.getSoftAssignments(z,self.mu_c.cuda(),n_nodes)
+        Q = self.getSoftAssignments(z,self.mu_c.to(self.device),n_nodes)
 
         P = self.calculateP(Q)
 
@@ -422,7 +424,7 @@ def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1):
 
     def predict_soft_assignment(self,z):
 
-        Q = self.getSoftAssignments(z,self.mu_c.cuda(),z.shape[0])
+        Q = self.getSoftAssignments(z,self.mu_c.to(self.device),z.shape[0])
         gamma_c = Q
         gamma=gamma_c.detach().cpu().numpy()
 
@@ -433,10 +435,11 @@ def init_clustering_params_kmeans(self,km):
         self.mu_c = torch.nn.Parameter(torch.from_numpy(km.cluster_centers_))
 
 class DAEGCE(nn.Module):
-    def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args):
+    def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args,device):
         super(DAEGCE, self).__init__()
 
 
+        self.device = device
         self.args = args
         self.gc1 = SpGAT(input_feat_dim,hidden_dim1,hidden_dim1,dropout,alpha=0.2,nheads=4)
         self.gc2 = SpGAT(hidden_dim1,hidden_dim2,hidden_dim2,dropout,alpha=0.2,nheads=4)
@@ -496,7 +499,7 @@ def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,epoch):
         pred_adj = self.decoder(z)
         L_rec = norm_u * F.binary_cross_entropy_with_logits(pred_adj, labels_sub_u, pos_weight = pos_weight_u)
 
-        self.Q = self.getSoftAssignments(z,self.mu_c.cuda(),n_nodes)
+        self.Q = self.getSoftAssignments(z,self.mu_c.to(self.device),n_nodes)
         self.P = self.calculateP(self.Q)
 
         if epoch>=200:
@@ -515,7 +518,7 @@ def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,epoch):
 
     def predict_soft_assignment(self,z):
 
-        Q = self.getSoftAssignments(z,self.mu_c.cuda(),z.shape[0])
+        Q = self.getSoftAssignments(z,self.mu_c.to(self.device),z.shape[0])
         gamma_c = Q
         gamma=gamma_c.detach().cpu().numpy()
 
@@ -573,11 +576,12 @@ def init_clustering_params_kmeans(self,km):
         self.mu_c = torch.nn.Parameter(torch.from_numpy(km.cluster_centers_))
 
 class GCNModelVAECE(nn.Module):
-    def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args):
+    def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args,device):
         super(GCNModelVAECE, self).__init__()
 
 
         self.args = args
+        self.device = device
         self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu)
         self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x)
         self.gc3 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x)
@@ -643,9 +647,9 @@ def modularity_loss(self, z,adj):
         H_norm = n.sqrt()*H.sqrt()/(H.sqrt().sum())
         # print("H_norm shape",H_norm.shape)
         # print("H_norm ",H_norm)
-        m = (adj-torch.eye(adj.shape[0]).cuda()).sum()/2
-        D = (adj-torch.eye(adj.shape[0]).cuda()).sum(1) # the degree of nodes, adj includes self loop
-        B = (adj-torch.eye(adj.shape[0]).cuda())-torch.matmul(D.view(-1,1),D.view(1,-1))/(2*m) # modularity matrix
+        m = (adj-torch.eye(adj.shape[0]).to(self.device)).sum()/2
+        D = (adj-torch.eye(adj.shape[0]).to(self.device)).sum(1) # the degree of nodes, adj includes self loop
+        B = (adj-torch.eye(adj.shape[0]).to(self.device))-torch.matmul(D.view(-1,1),D.view(1,-1))/(2*m) # modularity matrix
         mod_loss=torch.trace(torch.matmul(torch.matmul(H_norm.t(),B),H_norm)/(4*m))
         # print("mod_loss",mod_loss)
 
@@ -795,7 +799,7 @@ def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1):
 
         # print('z shape mu_c shape',z.shape,self.mu_c.shape)
 
-        Q = self.getSoftAssignments(z,self.mu_c.cuda(),n_nodes)
+        Q = self.getSoftAssignments(z,self.mu_c.to(self.device),n_nodes)
 
         P = self.calculateP(Q)
         # if epoch ==0:
@@ -906,7 +910,7 @@ def predict_soft_assignment(self, mu, logvar,z):
         # z = torch.randn_like(mu) * torch.exp(z_sigma2_log / 2) + z_mu
         det=1e-10
         # z  = self.reparameterize(mu,logvar)
-        Q = self.getSoftAssignments(z,self.mu_c.cuda(),z.shape[0])
+        Q = self.getSoftAssignments(z,self.mu_c.to(self.device),z.shape[0])
 
         pi = self.pi_
         # log_sigma2_c = self.log_sigma2_c
@@ -1045,14 +1049,14 @@ def init_clustering_params(self,gmm):
         # self.pi_= torch.nn.Parameter(torch.from_numpy(gmm.weights_))
         # self.mu_c = torch.nn.Parameter(torch.from_numpy(gmm.means_))
         # self.log_sigma2_c = torch.nn.Parameter(torch.from_numpy(gmm.covariances_))
-        if self.args.cuda>=0:
-            self.pi_= torch.nn.Parameter(torch.from_numpy(gmm.weights_).float().to('cuda'))
-            self.mu_c = torch.nn.Parameter(torch.from_numpy(gmm.means_).to('cuda'))
-            self.log_sigma2_c = torch.nn.Parameter(torch.from_numpy(gmm.covariances_).to('cuda'))
-        else:
-            self.pi_= torch.nn.Parameter(torch.from_numpy(gmm.weights_),requires_grad=False)
-            self.mu_c = torch.nn.Parameter(torch.from_numpy(gmm.means_))
-            self.log_sigma2_c = torch.nn.Parameter(torch.from_numpy(gmm.covariances_),requires_grad=False)
+        # if self.args.cuda>=0:
+        self.pi_= torch.nn.Parameter(torch.from_numpy(gmm.weights_).float().to(self.device))
+        self.mu_c = torch.nn.Parameter(torch.from_numpy(gmm.means_).to(self.device))
+        self.log_sigma2_c = torch.nn.Parameter(torch.from_numpy(gmm.covariances_).to(self.device))
+        # else:
+            # self.pi_= torch.nn.Parameter(torch.from_numpy(gmm.weights_),requires_grad=False)
+            # self.mu_c = torch.nn.Parameter(torch.from_numpy(gmm.means_))
+            # self.log_sigma2_c = torch.nn.Parameter(torch.from_numpy(gmm.covariances_),requires_grad=False)
 
         # print(self.mu_c)
         print("check cluster parameter device",'\npi',self.pi_.device,"\nmu_c device",self.mu_c.device,'\nlog_sigma2 device', self.log_sigma2_c.device)