-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNaiveBayesClassificationUI.py
83 lines (72 loc) · 2.93 KB
/
NaiveBayesClassificationUI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from NaiveBayesClassification import *
trainSet1 = [
{"goldTag": "-", "text": "just plain boring"},
{"goldTag": "-", "text": "entirely predictable and lacks energy"},
# {"goldTag":"-","text":"predictable"},
# {"goldTag":"-","text":"predictable"},
# {"goldTag":"-","text":"predictable"},
# {"goldTag":"-","text":"predictable"},
# {"goldTag":"-","text":"predictable"},
# {"goldTag":"-","text":"predictable"},
{"goldTag": "-", "text": "no surprises and very few laughs"},
{"goldTag": "-", "text": "i don't like this movie"},
{
"goldTag": "-",
"text": "awful bad bias catastrophe cheat deny envious foul harsh hate",
},
{"goldTag": "+", "text": "very powerful"},
{"goldTag": "+", "text": "the most fun film of the summer"},
{"goldTag": "+", "text": "i really like this movie"},
{"goldTag": "+", "text": "this movie made me cry"},
{
"goldTag": "+",
"text": "admirable beautiful confident dazzling ecstatic favor glee great",
},
]
classification1 = ["-","+"]
trainSet2 = [
{"goldTag": "comedy", "text": "fun, couple, love, love,"},
{"goldTag": "comedy", "text": "couple, fly, fast, fun, fun,"},
{"goldTag": "action", "text": "fast, furious, shoot,"},
{"goldTag": "action", "text": "furious, shoot, shoot, fun,"},
{"goldTag": "action", "text": "fly, fast, shoot, love,"},
]
classification2 = ["comedy","action"]
trainSet3 = [
{"goldTag": "+", "text": "good good good great great great"},
{"goldTag": "+", "text": "poor great great"},
{"goldTag": "-", "text": "good poor poor poor"},
{"goldTag": "-", "text": "good poor poor poor poor poor great great"},
{"goldTag": "-", "text": "poor poor"},
]
classification3 = ["-","+"]
trainingData = TrainBinaryNaiveBayes(
NormalizeDocuments(trainSet3),
classification3,
)
# PrintTrainingData(trainingData)
tests1 = [
{"goldLabel": "-", "text": "predictable with no fun"},
{"goldLabel": "+", "text": "i really like this movie"},
{"goldLabel": "-", "text": "i don't like this movie"},
{"goldLabel": "-", "text": "boring AF"},
{"goldLabel": "-", "text": "the most predictable movie ever"},
{"goldLabel": "+", "text": "this movie is dazzling"},
{"goldLabel": "-", "text": "i hate it"},
]
tests2=[
{"goldLabel": "action", "text": "fast, couple, shoot, fly,"},
]
tests3=[
{"goldLabel": "-", "text": "A good, good plot and great characters, but poor acting."},
]
for t in tests3:
nt = NormalizeForBayes(t["text"])
r = TestNaiveBayes(nt, trainingData, classification3)
# print()
print(f'classification for \033[33m"{t["text"]}"\033[0m is: \033[33m{r}\033[0m')
# print(f'- probability: {"%.8f"%GetProbabilityInClass(nt,"-",trainingData)}')
# print(f'+ probability: {"%.8f"%GetProbabilityInClass(nt,"+",trainingData)}')
tr = PrintConfusionMatrix(tests3, trainingData)
print(GetFMeasure(tr, 1))
#TODO: change functions to support more then two classifications