-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
191 lines (145 loc) · 6.39 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import shutil
from wasabi import msg
from preprocessing import graph2edges
from runner import parallelrunnerGC, runnerGC
from embedding import OptKEmbedding, GetStabilityEmbedding, UsersMiniBatchKMeansEmbedding
from clustering import OptKClustering, GetStabilityClustering, UsersDendrogramClustering
from utils import ColorNetworks, AuditCentroids, GraphletCorrelations, ViewSignature, ViewComposition
from radicli import Radicli, Arg
########################################################################
#Radical CLI
cli = Radicli()
########################################################################
#Preprocessing
@cli.command(
"preprocessing",
inputt=Arg("--input", "-i", help="Input Directory"),
outputt=Arg("--output", "-o", help="Output Directory"),
)
def preprocessing(inputt: str = 'input/', outputt: str = 'input/'):
"""Preprocessing of the input data: Convert the graph to edges"""
if graph2edges(inputt,outputt):
msg.good("Preprocessing Done")
########################################################################
#PreCompute
@cli.command(
"precompute",
inputt=Arg("--input", "-i", help="Input Directory"),
logss=Arg("--logs", "-l", help="Logs Directory"),
threadss=Arg("--threads", "-t", help="Number of Threads"),
)
def precompute(inputt: str = 'input/', logss: str = 'logs/', threadss: int = 4):
"""Precompute the graphlet counts for each node"""
if parallelrunnerGC(inputt,logss,threads=threadss):
msg.good("Parallel precompute done")
########################################################################
## First Clustering (Users Caracterization)
@cli.command(
"optimKE",
inputt=Arg("--input", "-i", help="Input Directory"),
outputt=Arg("--output", "-o", help="Output Directory"),
)
def optimKE(inputt: str = 'input/', outputt: str = 'stability/'):
"""Get the optimal number of clusters for the users caracterization (Embedding)"""
if OptKEmbedding(inputt,outputt):
msg.good("GAP Users Done")
@cli.command(
"stabilityE",
inputt=Arg("--input", "-i", help="Input Directory"),
outputt=Arg("--output", "-o", help="Output Directory"),
runss=Arg("--runs", "-r", help="Number of Runs"),
K=Arg("--K", "-k", help="Number of Clusters"),
)
def stabilityE(inputt: str = 'input/', outputt: str = 'stability/', runss: int = 50, K: int = 5):
"""Get the stability of the users caracterization (Embedding)"""
if GetStabilityEmbedding(inputt,outputt,runs=runss,K=K):
msg.good("Embedding Stability Done")
@cli.command(
"embedding",
inputt=Arg("--input", "-i", help="Input Directory"),
outputt=Arg("--output", "-o", help="Output Directory"),
K=Arg("--K", "-k", help="Number of Clusters"),
)
def embedding(inputt: str = 'input/', outputt: str = 'embeddings/', K: int = 5):
"""Get the users caracterization (Embedding)"""
if UsersMiniBatchKMeansEmbedding(inputt,outputt,K=K):
msg.good("KMeans Embedding Done")
########################################################################
## Second Clustering (Graph Clustering)
@cli.command(
"optimKC",
inputt=Arg("--input", "-i", help="Input Directory"),
outputt=Arg("--output", "-o", help="Output Directory"),
)
def optimKC(inputt: str = 'embeddings/5-NormMiniBatchUsersEmbedding.csv', outputt: str = 'clustering/'):
"""Get the optimal number of clusters for the graph clustering"""
if OptKClustering(inputt,outputt):
msg.good("GAP Graphs Done")
@cli.command(
"stabilityC",
inputt=Arg("--input", "-i", help="Input Directory"),
outputt=Arg("--output", "-o", help="Output Directory"),
runss=Arg("--runs", "-r", help="Number of Runs"),
K=Arg("--K", "-k", help="Number of Clusters"),
)
def stabilityC(inputt: str = 'embeddings/5-NormMiniBatchUsersEmbedding.csv', outputt: str = 'clustering/', runss: int = 50, K: int = 5):
"""Get the stability of the graph clustering"""
if GetStabilityClustering(inputt,outputt,runs=runss,K=K):
msg.good("NetworkClustering Stability Done")
@cli.command(
"clustering",
inputt=Arg("--input", "-i", help="Input Directory"),
outputt=Arg("--output", "-o", help="Output Directory"),
K=Arg("--K", "-k", help="Number of Clusters"),
)
def clustering(inputt: str = 'embeddings/5-NormMiniBatchUsersEmbedding.csv', outputt: str = 'clustering/', K: int = 5):
"""Get the graph clustering (Dendrogram)"""
if UsersDendrogramClustering(inputt,outputt,K=K):
msg.good("Dendrogram Done")
########################################################################
#PostProcessing
@cli.command(
"correlations",
inputt=Arg("--input", "-i", help="Input Directory"),
outputt=Arg("--output", "-o", help="Output Directory"),
)
def correlations(inputt: str = 'input/', outputt: str = './'):
"""Get the correlations between the graphlets"""
GraphletCorrelations(inputt,outputt)
@cli.command(
"audit",
inputt=Arg("--input", "-i", help="Input Directory"),
threshold=Arg("--threshold", "-t", help="threshold"),
)
def audit(inputt: str = 'embeddings/5-CentroidsMiniBatchEmbedding.out', threshold: float = 0.05):
"""Get the dominant graphlets for each cluster"""
AuditCentroids(inputt,threshold)
@cli.command(
"colornetworks",
inputt=Arg("--input", "-i", help="Input Directory"),
clusterss=Arg("--clusters", "-c", help="Clusters File"),
outputt=Arg("--output", "-o", help="Output Directory"),
)
def colornetworks(inputt: str = 'input/', clusterss: str = 'embeddings/5-CompleteMiniBatchUsers.csv', outputt: str = 'colored/'):
"""Color the each node of the graph with its cluster color"""
ColorNetworks(inputt,clusterss,outputt)
@cli.command(
"viewsignature",
inputt=Arg("--input", "-i", help="Input Directory"),
outputt=Arg("--output", "-o", help="Output Directory"),
)
def viewsignature(inputt: str = 'input/', outputt: str = 'input/'):
"""View the signature of the graph"""
ViewSignature(inputt,outputt)
@cli.command(
"viewcomposition",
inputt=Arg("--input", "-i", help="Input Directory"),
outputt=Arg("--output", "-o", help="Output Directory"),
)
def viewcomposition(inputt: str = 'embeddings/', outputt: str = 'embeddings/'):
"""View the composition of the graph"""
ViewComposition(inputt,outputt)
########################################################################
if __name__ == "__main__":
cli.run()
########################################################################