Skip to content

Commit

Permalink
add synthetic data
Browse files Browse the repository at this point in the history
  • Loading branch information
goodman1204 committed Apr 19, 2022
1 parent d805b9b commit d4d7c71
Show file tree
Hide file tree
Showing 6 changed files with 311 additions and 275 deletions.
44 changes: 26 additions & 18 deletions can.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,20 @@

def training(args):

if args.cuda>=0:
device = torch.device('cuda')
else:
device = torch.device('cpu')

print("Using {} dataset".format(args.dataset))
if args.dataset in ['cora','pubmed','citeseer']:
adj_init, features, labels, idx_train, idx_val, idx_test = load_data(args.dataset)
Y = np.argmax(labels,1) # labels is in one-hot format
else:
adj_init, features, Y= load_AN(args.dataset)

elif args.dataset in ['Flickr','BlogCatalog']:
adj_init, features, Y= load_AN(args.dataset)
else:
adj_init, features, Y= load_AN("synthetic_{}_{}".format(args.synthetic_num_nodes,args.synthetic_density))
# print("find motif")
# motif_matrix=find_motif(adj_init,args.dataset)

Expand Down Expand Up @@ -111,15 +118,15 @@ def training(args):


# adj_norm = drop_edge(adj_norm,Y)
if args.cuda:
# drop features
features_training = features_training.to_dense().cuda()
# features_training = drop_feature(features_training,1.0).cuda()
adj_norm = adj_norm.to_dense().cuda()
pos_weight_u = pos_weight_u.cuda()
pos_weight_a = pos_weight_a.cuda()
adj_label = adj_label.cuda()
features_label = features_label.cuda()

# drop features
features_training = features_training.to_dense().to(device)
# features_training = drop_feature(features_training,1.0).cuda()
adj_norm = adj_norm.to_dense().to(device)
pos_weight_u = pos_weight_u.to(device)
pos_weight_a = pos_weight_a.to(device)
adj_label = adj_label.to(device)
features_label = features_label.to(device)

features_training, adj_norm = Variable(features_training), Variable(adj_norm)
pos_weight_u = Variable(pos_weight_u)
Expand All @@ -131,12 +138,11 @@ def training(args):
# np.random.seed(args.seed)
# torch.manual_seed(args.seed)

model = GCNModelVAECE(n_features,n_nodes, args.hidden1, args.hidden2, args.dropout,args) # CAN will not use the clustering los of VAECE
model = GCNModelVAECE(n_features,n_nodes, args.hidden1, args.hidden2, args.dropout,args,device) # CAN will not use the clustering los of VAECE

# using GMM to pretrain the clustering parameters

if args.cuda:
model.cuda()
model.to(device)


optimizer2 = optim.Adam(model.parameters(), lr=args.lr)
Expand Down Expand Up @@ -277,18 +283,20 @@ def parse_args():
parser.add_argument('--lr', type=float, default=0.002, help='Initial aearning rate.')
parser.add_argument('--dropout', type=float, default=0.0, help='Dropout rate (1 - keep probability).')
parser.add_argument('--dataset', type=str, default='cora', help='type of dataset.')

parser.add_argument('--synthetic_num_nodes',type=int,default=1000)
parser.add_argument('--synthetic_density', type=float, default=0.1)

parser.add_argument('--nClusters',type=int,default=7)
parser.add_argument('--num_run',type=int,default=1,help='Number of running times')
parser.add_argument('--cuda', action='store_true', default=False, help='Disables CUDA training.')
parser.add_argument('--cuda', type=int, default=0, help='training with GPU.')
args, unknown = parser.parse_known_args()

return args

if __name__ == '__main__':
args = parse_args()
if args.cuda:
torch.cuda.set_device(0)
# torch.cuda.manual_seed(args.seed)
# torch.cuda.manual_seed(args.seed)
# random.seed(args.seed)
# np.random.seed(args.seed)
# torch.manual_seed(args.seed)
Expand Down
7 changes: 4 additions & 3 deletions generate_synthtic_graph.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import networkx as nx
import numpy as np
from networkx.generators import erdos_renyi_graph
import sys

attribute_size = 1000

Expand Down Expand Up @@ -55,9 +56,9 @@ def save_graph(num_nodes,density,G,fea_matrix,label):


if __name__=="__main__":
num_nodes = 1000
density = 0.1
G,fea_matrix,label = generate_graphs(num_nodes,density)
num_nodes = int(sys.argv[1])
density = float(sys.argv[2])
G,fea_matrix,label = generate_graphs(int(num_nodes),float(density))
save_graph(num_nodes,density,G,fea_matrix,label)
print("Done")

Expand Down
52 changes: 28 additions & 24 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
from collections import Counter

class GCNModelAE(nn.Module):
def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args):
def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args,device=torch.device('cpu')):

super(GCNModelAE, self).__init__()

self.device = device
self.args = args
self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu)
self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x)
Expand Down Expand Up @@ -291,10 +292,11 @@ def check_parameters(self):
print(name, param.data,param.data.shape)

class NEC(nn.Module):
def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args):
def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args,device):
super(NEC, self).__init__()

self.args = args
device = self.device
self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu)
self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x)
self.dc = InnerProductDecoder(dropout, act=torch.sigmoid)
Expand Down Expand Up @@ -331,9 +333,9 @@ def modularity_loss(self,z,adj):
H_norm = n.sqrt()*H.sqrt()/(H.sqrt().sum())
# print("H_norm shape",H_norm.shape)
# print("H_norm ",H_norm)
m = (adj-torch.eye(adj.shape[0]).cuda()).sum()/2
D = (adj-torch.eye(adj.shape[0]).cuda()).sum(1) # the degree of nodes, adj includes self loop
B = (adj-torch.eye(adj.shape[0]).cuda())-torch.matmul(D.view(-1,1),D.view(1,-1))/(2*m) # modularity matrix
m = (adj-torch.eye(adj.shape[0]).to(self.device)).sum()/2
D = (adj-torch.eye(adj.shape[0]).to(self.device)).sum(1) # the degree of nodes, adj includes self loop
B = (adj-torch.eye(adj.shape[0]).to(self.device))-torch.matmul(D.view(-1,1),D.view(1,-1))/(2*m) # modularity matrix
mod_loss=torch.trace(torch.matmul(torch.matmul(H_norm.t(),B),H_norm)/(4*m))
# print("mod_loss",mod_loss)
return mod_loss
Expand Down Expand Up @@ -410,7 +412,7 @@ def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1):
mod_loss=self.modularity_loss(z,adj)

print('z shape mu_c shape',z.shape,self.mu_c.shape)
Q = self.getSoftAssignments(z,self.mu_c.cuda(),n_nodes)
Q = self.getSoftAssignments(z,self.mu_c.to(self.device),n_nodes)

P = self.calculateP(Q)

Expand All @@ -422,7 +424,7 @@ def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1):

def predict_soft_assignment(self,z):

Q = self.getSoftAssignments(z,self.mu_c.cuda(),z.shape[0])
Q = self.getSoftAssignments(z,self.mu_c.to(self.device),z.shape[0])
gamma_c = Q
gamma=gamma_c.detach().cpu().numpy()

Expand All @@ -433,10 +435,11 @@ def init_clustering_params_kmeans(self,km):
self.mu_c = torch.nn.Parameter(torch.from_numpy(km.cluster_centers_))

class DAEGCE(nn.Module):
def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args):
def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args,device):
super(DAEGCE, self).__init__()


self.device = device
self.args = args
self.gc1 = SpGAT(input_feat_dim,hidden_dim1,hidden_dim1,dropout,alpha=0.2,nheads=4)
self.gc2 = SpGAT(hidden_dim1,hidden_dim2,hidden_dim2,dropout,alpha=0.2,nheads=4)
Expand Down Expand Up @@ -496,7 +499,7 @@ def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,epoch):
pred_adj = self.decoder(z)
L_rec = norm_u * F.binary_cross_entropy_with_logits(pred_adj, labels_sub_u, pos_weight = pos_weight_u)

self.Q = self.getSoftAssignments(z,self.mu_c.cuda(),n_nodes)
self.Q = self.getSoftAssignments(z,self.mu_c.to(self.device),n_nodes)
self.P = self.calculateP(self.Q)

if epoch>=200:
Expand All @@ -515,7 +518,7 @@ def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,epoch):

def predict_soft_assignment(self,z):

Q = self.getSoftAssignments(z,self.mu_c.cuda(),z.shape[0])
Q = self.getSoftAssignments(z,self.mu_c.to(self.device),z.shape[0])
gamma_c = Q
gamma=gamma_c.detach().cpu().numpy()

Expand Down Expand Up @@ -573,11 +576,12 @@ def init_clustering_params_kmeans(self,km):
self.mu_c = torch.nn.Parameter(torch.from_numpy(km.cluster_centers_))

class GCNModelVAECE(nn.Module):
def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args):
def __init__(self, input_feat_dim, n_nodes, hidden_dim1, hidden_dim2, dropout,args,device):
super(GCNModelVAECE, self).__init__()


self.args = args
self.device = device
self.gc1 = GraphConvolutionSparse(input_feat_dim, hidden_dim1, dropout, act=torch.relu)
self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x)
self.gc3 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x)
Expand Down Expand Up @@ -643,9 +647,9 @@ def modularity_loss(self, z,adj):
H_norm = n.sqrt()*H.sqrt()/(H.sqrt().sum())
# print("H_norm shape",H_norm.shape)
# print("H_norm ",H_norm)
m = (adj-torch.eye(adj.shape[0]).cuda()).sum()/2
D = (adj-torch.eye(adj.shape[0]).cuda()).sum(1) # the degree of nodes, adj includes self loop
B = (adj-torch.eye(adj.shape[0]).cuda())-torch.matmul(D.view(-1,1),D.view(1,-1))/(2*m) # modularity matrix
m = (adj-torch.eye(adj.shape[0]).to(self.device)).sum()/2
D = (adj-torch.eye(adj.shape[0]).to(self.device)).sum(1) # the degree of nodes, adj includes self loop
B = (adj-torch.eye(adj.shape[0]).to(self.device))-torch.matmul(D.view(-1,1),D.view(1,-1))/(2*m) # modularity matrix
mod_loss=torch.trace(torch.matmul(torch.matmul(H_norm.t(),B),H_norm)/(4*m))
# print("mod_loss",mod_loss)

Expand Down Expand Up @@ -795,7 +799,7 @@ def loss(self,x,adj,labels, n_nodes, n_features, norm, pos_weight,L=1):

# print('z shape mu_c shape',z.shape,self.mu_c.shape)

Q = self.getSoftAssignments(z,self.mu_c.cuda(),n_nodes)
Q = self.getSoftAssignments(z,self.mu_c.to(self.device),n_nodes)

P = self.calculateP(Q)
# if epoch ==0:
Expand Down Expand Up @@ -906,7 +910,7 @@ def predict_soft_assignment(self, mu, logvar,z):
# z = torch.randn_like(mu) * torch.exp(z_sigma2_log / 2) + z_mu
det=1e-10
# z = self.reparameterize(mu,logvar)
Q = self.getSoftAssignments(z,self.mu_c.cuda(),z.shape[0])
Q = self.getSoftAssignments(z,self.mu_c.to(self.device),z.shape[0])

pi = self.pi_
# log_sigma2_c = self.log_sigma2_c
Expand Down Expand Up @@ -1045,14 +1049,14 @@ def init_clustering_params(self,gmm):
# self.pi_= torch.nn.Parameter(torch.from_numpy(gmm.weights_))
# self.mu_c = torch.nn.Parameter(torch.from_numpy(gmm.means_))
# self.log_sigma2_c = torch.nn.Parameter(torch.from_numpy(gmm.covariances_))
if self.args.cuda>=0:
self.pi_= torch.nn.Parameter(torch.from_numpy(gmm.weights_).float().to('cuda'))
self.mu_c = torch.nn.Parameter(torch.from_numpy(gmm.means_).to('cuda'))
self.log_sigma2_c = torch.nn.Parameter(torch.from_numpy(gmm.covariances_).to('cuda'))
else:
self.pi_= torch.nn.Parameter(torch.from_numpy(gmm.weights_),requires_grad=False)
self.mu_c = torch.nn.Parameter(torch.from_numpy(gmm.means_))
self.log_sigma2_c = torch.nn.Parameter(torch.from_numpy(gmm.covariances_),requires_grad=False)
# if self.args.cuda>=0:
self.pi_= torch.nn.Parameter(torch.from_numpy(gmm.weights_).float().to(self.device))
self.mu_c = torch.nn.Parameter(torch.from_numpy(gmm.means_).to(self.device))
self.log_sigma2_c = torch.nn.Parameter(torch.from_numpy(gmm.covariances_).to(self.device))
# else:
# self.pi_= torch.nn.Parameter(torch.from_numpy(gmm.weights_),requires_grad=False)
# self.mu_c = torch.nn.Parameter(torch.from_numpy(gmm.means_))
# self.log_sigma2_c = torch.nn.Parameter(torch.from_numpy(gmm.covariances_),requires_grad=False)

# print(self.mu_c)
print("check cluster parameter device",'\npi',self.pi_.device,"\nmu_c device",self.mu_c.device,'\nlog_sigma2 device', self.log_sigma2_c.device)
Expand Down
Loading

0 comments on commit d4d7c71

Please sign in to comment.