-
Notifications
You must be signed in to change notification settings - Fork 0
/
cmd.py
87 lines (69 loc) · 2.4 KB
/
cmd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/python3
# line encoding :unix: LF
import rawMatGen
import sparseMatGen
import powerIter
import time
import metric
norm = metric.metric.get1Norm
beta = 0.8
N = 100
rankBlock = 2
tranMatBlock = 2
epsilon = 0.01
topK = 10
# note: block: 一块中含有的pagelink数目,比如block=2,则一块含两条记录.特别的,如果block=1,则代表不分块
# note: maybe the best practise: block=2
# if use mock,we will load&store data from memory,instead of file
# we will always store a copy of data in memory for legacy problem,
# but whether to load in-memory data or in-file data will be redecided by var "mock"
sparseMatGen.mock = False
powerIter.mock = False
def getMethodStr() -> str:
if rankBlock != 1 and tranMatBlock != 1:
return "block-strip"
elif rankBlock != 1 and tranMatBlock == 1:
return "block-based"
elif rankBlock == 1 and tranMatBlock == 1:
return "unblock"
else:
raise "not supported method"
def printSuperParam(delim=""):
print("\033[1;31m"+delim)
print(
f"method:{getMethodStr()} norm:{norm.__name__} epsilon:{epsilon} beta:{beta}")
print(f"N:{N} rankBlock:{rankBlock} transferMatBlock:{tranMatBlock} topK:{topK}")
print(f"in-memory mock: mat:{sparseMatGen.mock} rank:{powerIter.mock}")
print(delim, "\033[0m")
printSuperParam("~x~ "*10)
@metric.printTimeElapsed
def pageRank(version=3):
spraseMat, pr = None, None
if version == 3 or version == 2:
sparseMat = sparseMatGen.sparseMat(N, tranMatBlock, version=version)
pr = powerIter.pageRank(sparseMat, beta, rankBlock)
elif version == 1:
rawMat = rawMatGen.rawMat(N)
sparseMat = sparseMatGen.sparseMat(
rawMat, tranMatBlock, version=version)
pr = powerIter.pageRank(sparseMat, beta, rankBlock)
else:
raise "not supported version"
cnt = 0
loss = 0.
beg = time.time()
while (True):
cnt += 1
# use rankBlock to iter, if rankBlock != tranMatBlock, it should be a block-based powerIter
pr.iter(rankBlock)
ok, loss = pr.isConvergence(epsilon, norm)
print(f"\033[1;36miter:{cnt} loss:{loss}\033[0m")
if ok:
break
end = time.time()
print(
f"\033[1;32m[done] iter:{cnt} , loss={loss} , time:{end-beg}s \033[0m")
pages = pr.getkBest(topK)
printSuperParam("=*"*20)
print(f"topK:{topK}", pages)
pageRank(version=2)