-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlda.py
40 lines (34 loc) · 1.07 KB
/
lda.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from pymongo import MongoClient
import gensim
import time
import Config
#db = MongoClient(Config.MONGO_CONNECTION_URL)[Config.ACADEMIC_DATASET_DB]
#cleaned_reviews = db[Config.CLEANED_REVIEWS]
# perform LDA
def lda(dictionary, mm):
# takes about 70 minutes to run
t0 = time.time()
print "performing LDA"
lda = gensim.models.LdaModel(corpus = mm, id2word=dictionary, num_topics=Config.TOPIC_NUM)
lda.save(Config.LDA_LOCAL)
print "Done."
print time.time() - t0, "seconds"
return lda
if __name__ == "__main__":
t0 = time.time()
print "Loading Dictionary"
dictionary = gensim.corpora.Dictionary.load(Config.DICTIONARY_LOCAL)
print "Done."
print time.time() - t0, "seconds"
t0 = time.time()
print "Loading Corpus"
mm = gensim.corpora.BleiCorpus(Config.CORPUS_LOCAL)
print "Done."
print time.time() - t0, "seconds"
# perform lda
lda = lda(dictionary, mm)
# write the topics to a file
i = 0
for topic in lda.show_topics(num_topics=Config.TOPIC_NUM):
print '#' + str(i) + ': ' + topic
i += 1