Skip to content

Commit

Permalink
test on utspred
Browse files Browse the repository at this point in the history
  • Loading branch information
goodman1204 committed Jan 23, 2021
1 parent df42324 commit db0a280
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 32 deletions.
4 changes: 2 additions & 2 deletions estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def logmeanexp_nodiag(x, dim=None, device='cpu'):
dim = (0, 1)

logsumexp = torch.logsumexp(
x - torch.diag(np.inf * torch.ones(batch_size).to(device)), dim=dim)
x - torch.diag(np.inf * torch.ones(batch_size)).to('cuda'), dim=dim)

try:
if len(dim) == 1:
Expand All @@ -28,7 +28,7 @@ def logmeanexp_nodiag(x, dim=None, device='cpu'):
num_elem = batch_size * (batch_size - 1.)
except ValueError:
num_elem = batch_size - 1
return logsumexp - torch.log(torch.tensor(num_elem)).to(device)
return logsumexp - torch.log(torch.tensor(num_elem))


def tuba_lower_bound(scores, log_baseline=None):
Expand Down
4 changes: 2 additions & 2 deletions layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def reset_parameters(self):
torch.nn.init.xavier_uniform_(self.weight)

def forward(self, input, adj):
# input = F.dropout(input, self.dropout, self.training)
input = F.dropout(input, self.dropout, self.training)
support = torch.spmm(input, self.weight)
output = torch.spmm(adj, support)
output = self.act(output)
Expand Down Expand Up @@ -326,7 +326,7 @@ def __init__(self, nfeat, nhid, nclass, dropout, alpha = 0.2, nheads = 8):
concat=False)

def forward(self, x, adj):
# x = F.dropout(x, self.dropout, training=self.training)
x = F.dropout(x, self.dropout, training=self.training)
x = torch.cat([att(x, adj) for att in self.attentions], dim=1)
# x = F.dropout(x, self.dropout, training=self.training)
x = self.out_att(x, adj)
Expand Down
52 changes: 37 additions & 15 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from torch.autograd import Variable
from torch.optim.lr_scheduler import StepLR
from model import GCNModelVAE,GCNModelVAECD,GCNModelAE,GCNModelVAECE
from utils import preprocess_graph, get_roc_score, sparse_to_tuple,sparse_mx_to_torch_sparse_tensor,cluster_acc,clustering_evaluation, find_motif,drop_feature, drop_edge
from utils import preprocess_graph, get_roc_score, sparse_to_tuple,sparse_mx_to_torch_sparse_tensor,cluster_acc,clustering_evaluation, find_motif,drop_feature, drop_edge,choose_cluster_votes
from preprocessing import mask_test_feas,mask_test_edges, load_AN, check_symmetric,load_data
from tqdm import tqdm
from tensorboardX import SummaryWriter
Expand All @@ -29,6 +29,11 @@ def training(args):
adj_init, features, labels, idx_train, idx_val, idx_test = load_data(args.dataset)
Y = np.argmax(labels,1) # labels is in one-hot format

# print("find motif")
# motif_matrix=find_motif(adj_init,args.dataset)

# adj_init=sp.lil_matrix(motif_matrix).multiply(adj_init)

# Store original adjacency matrix (without diagonal entries) for later
adj_init = adj_init- sp.dia_matrix((adj_init.diagonal()[np.newaxis, :], [0]), shape=adj_init.shape)
adj_init.eliminate_zeros()
Expand All @@ -40,8 +45,6 @@ def training(args):

# find motif 3 nodes

# motif_matrix=find_motif(adj_init,args.dataset)
# print("find motif")


args.nClusters=len(set(Y))
Expand Down Expand Up @@ -98,14 +101,16 @@ def training(args):
mean_nmi=[]
mean_purity=[]
mean_accuracy=[]
mean_f1=[]
mean_precision=[]


# adj_norm = drop_edge(adj_norm,Y)
if args.cuda:
# drop features
features_training = features_training.to_dense().cuda()
# features_training = drop_feature(features_training,1.0).cuda()
adj_norm = adj_norm.to_dense().cuda()
# adj_norm = drop_edge(adj_norm,0.5).cuda()
pos_weight_u = pos_weight_u.cuda()
pos_weight_a = pos_weight_a.cuda()
adj_label = adj_label.cuda()
Expand All @@ -121,6 +126,10 @@ def training(args):

for r in range(args.num_run):

# random.seed(args.seed)
# np.random.seed(args.seed)
# torch.manual_seed(args.seed)

model = None
if args.model == 'gcn_ae':
model = GCNModelAE(n_features,n_nodes, args.hidden1, args.hidden2, args.dropout,args)
Expand Down Expand Up @@ -159,7 +168,7 @@ def training(args):
cost_val = []
acc_val = []
val_roc_score = []
lr_s=StepLR(optimizer1,step_size=30,gamma=0.95) # it seems that fix leanring rate is better
lr_s=StepLR(optimizer1,step_size=30,gamma=1) # it seems that fix leanring rate is better

loss_list=None
pretrain_flag = False
Expand Down Expand Up @@ -191,8 +200,8 @@ def training(args):

pre,gamma,z = model.predict_soft_assignment(mu_u,logvar_u,z)

H, C, V, ari, ami, nmi, purity = clustering_evaluation(Y,pre)
print("purity, NMI:",purity,nmi)
H, C, V, ari, ami, nmi, purity, f1_score,precision = clustering_evaluation(Y,pre)
print("purity, NMI f1_score:",purity,nmi,f1_score)
# z = model.reparameterize(mu_u,logvar_u)
# Q = model.getSoftAssignments(z,model.mu_c,args.nClusters,args.hidden2,n_nodes)

Expand All @@ -207,7 +216,7 @@ def training(args):
# print("Soft cluster assignment",Counter(torch.argmax(Q,1).tolist()))
# loss_list.append(-0.01*soft_cluster_loss)

if epoch <=0.2*args.epochs:
if epoch <=200:
loss =sum(loss_list[0:-1])
# model.change_nn_grad_true()
model.change_cluster_grad_false()
Expand All @@ -219,7 +228,7 @@ def training(args):
gmm = GaussianMixture(n_components=args.nClusters,covariance_type='diag')
pre = gmm.fit_predict(z.cpu().detach().numpy())
# print('Acc={:.4f}%'.format(cluster_acc(pre, Y)[0] * 100))
H, C, V, ari, ami, nmi, purity = clustering_evaluation(pre,Y)
H, C, V, ari, ami, nmi, purity,f1_score,precision_score = clustering_evaluation(pre,Y)
print("GMM purity, NMI:",purity,nmi)
model.plot_tsne(args.dataset,epoch,z.to('cpu'),Y,pre)
model.init_clustering_params(gmm)
Expand Down Expand Up @@ -354,11 +363,19 @@ def training(args):


pre,gamma_c,z = model.predict_soft_assignment(mu_u,logvar_u,z)
with open("save_prediction.log",'w') as wp:
for label in pre:
wp.write("{}\n".format(label))

print('gamma_c:',gamma_c)
print('gamma_c argmax:',np.argmax(gamma_c,1))
print('gamma_c argmax counter:',Counter(np.argmax(gamma_c,1).tolist()))

H, C, V, ari, ami, nmi, purity = clustering_evaluation(tru,pre)
new_prediction=choose_cluster_votes(adj_label,pre)
H, C, V, ari, ami, nmi, purity,f1_score,precision= clustering_evaluation(tru,new_prediction)
print("new prediction nmi",nmi)

H, C, V, ari, ami, nmi, purity,f1_score,precision= clustering_evaluation(tru,pre)
acc = cluster_acc(pre,tru)[0]*100
mean_h.append(round(H,4))
mean_c.append(round(C,4))
Expand All @@ -368,6 +385,8 @@ def training(args):
mean_nmi.append(round(nmi,4))
mean_purity.append(round(purity,4))
mean_accuracy.append(round(acc,4))
mean_f1.append(round(f1_score,4))
mean_precision.append(round(precision,4))

if args.model in ['gcn_vaecd','gcn_vaece']:
# pre,gamma,z = model.predict_soft_assignment(mu_u,logvar_u)
Expand Down Expand Up @@ -398,6 +417,8 @@ def training(args):
print('Normalized Mutual Information:{}\t mean:{}\t std:{}\n'.format(mean_nmi,round(np.mean(mean_nmi),4),round(np.std(mean_nmi),4)))
print('Purity:{}\t mean:{}\t std:{}\n'.format(mean_purity,round(np.mean(mean_purity),4),round(np.std(mean_purity),4)))
print('Accuracy:{}\t mean:{}\t std:{}\n'.format(mean_accuracy,round(np.mean(mean_accuracy),4),round(np.std(mean_accuracy),4)))
print('F1-score:{}\t mean:{}\t std:{}\n'.format(mean_f1,round(np.mean(mean_f1),4),round(np.std(mean_f1),4)))
print('precision_score:{}\t mean:{}\t std:{}\n'.format(mean_precision,round(np.mean(mean_precision),4),round(np.std(mean_precision),4)))
print("True label distribution:{}".format(tru))
print(Counter(tru))
print("Predicted label distribution:{}".format(pre))
Expand All @@ -406,12 +427,12 @@ def training(args):
def parse_args():
parser = argparse.ArgumentParser(description="Node clustering")
parser.add_argument('--model', type=str, default='gcn_ae', help="models used for clustering: gcn_ae,gcn_vae,gcn_vaecd,gcn_vaece")
parser.add_argument('--seed', type=int, default=42, help='Random seed.')
parser.add_argument('--seed', type=int, default=20, help='Random seed.')
parser.add_argument('--epochs', type=int, default=300, help='Number of epochs to train.')
parser.add_argument('--hidden1', type=int, default=32, help='Number of units in hidden layer 1.')
parser.add_argument('--hidden2', type=int, default=16, help='Number of units in hidden layer 2.')
parser.add_argument('--lr', type=float, default=0.002, help='Initial aearning rate.')
parser.add_argument('--dropout', type=float, default=0.2, help='Dropout rate (1 - keep probability).')
parser.add_argument('--dropout', type=float, default=0.0, help='Dropout rate (1 - keep probability).')
parser.add_argument('--dataset', type=str, default='cora', help='type of dataset.')
parser.add_argument('--nClusters',type=int,default=7)
parser.add_argument('--num_run',type=int,default=1,help='Number of running times')
Expand All @@ -424,7 +445,8 @@ def parse_args():
args = parse_args()
if args.cuda:
torch.cuda.set_device(0)
torch.cuda.manual_seed(args.seed)
random.seed(args.seed)
np.random.seed(args.seed)
# torch.cuda.manual_seed(args.seed)
# random.seed(args.seed)
# np.random.seed(args.seed)
# torch.manual_seed(args.seed)
training(args)
70 changes: 57 additions & 13 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
import numpy as np
import scipy.sparse as sp
import torch
from sklearn.metrics import roc_auc_score, average_precision_score
from sklearn.metrics import roc_auc_score, average_precision_score,f1_score,precision_score
from sklearn import metrics
import itertools
import os
from collections import Counter
from munkres import Munkres, print_matrix

def find_motif(adj, dataset_name):

Expand All @@ -26,15 +28,19 @@ def find_motif(adj, dataset_name):

N = g.number_of_nodes()
motif_matrix = np.zeros((N,N))
for sub_nodes in itertools.combinations(g.nodes(),len(target.nodes())):
subg = g.subgraph(sub_nodes)
if nx.is_connected(subg) and nx.is_isomorphic(subg, target):
for e in subg.edges():
motif_matrix[e[0]][e[1]]=1
motif_matrix[e[1]][e[0]]=1

for node in g.nodes():
print(node)
neigbours = [i for i in g.neighbors(node)]
for sub_nodes in itertools.combinations(neigbours,len(target.nodes())):
subg = g.subgraph(sub_nodes)
if nx.is_connected(subg) and nx.is_isomorphic(subg, target):
for e in subg.edges():
motif_matrix[e[0]][e[1]]=1
motif_matrix[e[1]][e[0]]=1

with open(path,'wb') as wp:
np.save(wp,sp.coo_matrix(motif_matrix))
np.save(wp,motif_matrix)

return motif_matrix

Expand Down Expand Up @@ -259,7 +265,9 @@ def clustering_evaluation(labels_true, labels):
metrics.adjusted_rand_score(labels_true, labels),\
metrics.adjusted_mutual_info_score(labels_true, labels), \
metrics.normalized_mutual_info_score(labels_true,labels), \
purity_score(labels_true, labels)
purity_score(labels_true, labels),\
f1_score(labels_true,labels,average='macro'),\
precision_score(labels_true,labels,average='macro')

def drop_feature(feature_matrix,delta):
num_nodes, num_features = feature_matrix.shape
Expand All @@ -269,9 +277,45 @@ def drop_feature(feature_matrix,delta):
return feature_matrix_dropped


def drop_edge(adj,delta):
def drop_edge(adj,Y,delta=1):

num_nodes, num_features = adj.shape
mask = torch.tensor(np.random.binomial(1,delta,[num_nodes,num_features]))

adj_dropped = adj*mask
return adj_dropped
# mask = torch.tensor(np.random.binomial(1,delta,[num_nodes,num_features]))

for row in range(num_nodes):
print(row)
for col in range(num_nodes):
if row!=col and adj[row,col]==1:
if Y[row]!=Y[col]:
adj[row,col]=0
adj[col,row]=0

print("after drop edge: edge number",adj.sum())
return adj

def choose_cluster_votes(adj,prediction):

n_nodes = adj.shape[0]

new_prediction=[]
for i in range(n_nodes):
labels=prediction[(adj[i]>=1).tolist()]
labels_max = Counter(labels)

max_value = 0
candicate_label =0
for key,value in labels_max.items():
if value > max_value:
candicate_label = key
max_value = value
new_prediction.append(candicate_label)

print("new prediction duplicate rate:",np.sum(np.array(new_prediction)==prediction)/len(prediction))
return np.array(new_prediction)






0 comments on commit db0a280

Please sign in to comment.