-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcalculate_d_patterns_bide.py
95 lines (83 loc) · 2.31 KB
/
calculate_d_patterns_bide.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import os
import threading
import time
import _mysql,MySQLdb
threadLimiter = threading.BoundedSemaphore(2)
path = "/home/engineer/htdocs/pankaj/TextMining/data/"
dirlist = os.listdir(path)
count = 0
class DP(object):
def __init__(self):
pass
def compose(self, dp, p):
for k,v in p.iteritems():
try:
dp[k] += v
except:
dp[k] = v
return dp
def d_patterns(self, sp):
dp = {}
for pat in sp:
p = {}
for t in pat:
p[t] = 1
dp = self.compose(dp, p)
return dp
class myThread (threading.Thread):
def __init__(self,d,name,dp):
threading.Thread.__init__(self)
self.d= d
self.name = name
self.dp = dp
def get_normalized_dp(self,dp):
ndp = {}
s = 0.0
for v in dp.values():
s += v
for k,v in dp.iteritems():
ndp[k] = v/s
return ndp
def run(self):
threadLimiter.acquire()
try:
dp = self.dp
os.system("java -jar spmf.jar run BIDE+_with_strings data/"+self.d+" file/temp_output"+self.name+" 20% > file/temp"+self.name+"")
f = open('file/temp_output'+self.name+'', 'r')
ft = open('file/temp'+self.name+'' , 'r')
sp = []
while True:
l = f.readline()
if not l:
break
l = l.split('SUP')[0].replace('-1 ', '')
sp.append(l.split())
#print sp
ft.readline()
exec_time = ft.readline().split()[3]
dpps = str(self.get_normalized_dp(dp.d_patterns(sp)))
id = self.d.split('.')[0]
query = 'Update data set `dp` = \''+dpps.replace('\'', '"')+'\' where id = '+id
try :
self.connection = MySQLdb.connect('localhost', 'root', 'kgggdkp2692', 'mining')
self.cursor = self.connection.cursor()
q=self.cursor.execute(query)
print q ,query
self.connection.commit()
self.connection.close()
except Exception as e :
self.connection =None
print "Unable to write to DB:" + str(e)
finally:
threadLimiter.release()
for d in dirlist:
threadLimiter.acquire()
try :
myThread(d,count,DP()).start()
print("Processing")
#time.sleep(2)
#if count % 50 == 0 :
print count
finally:
count = count+1
threadLimiter.release()