-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathvoc.py
executable file
·64 lines (50 loc) · 1.68 KB
/
voc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from spacy.lang.en import English
nlp = English()
tokenizer = nlp.Defaults.create_tokenizer(nlp)
class voc:
def __init__(self):
self.num_words= 1 # 0 is reserved for padding
self.num_tags=0
self.tags={}
self.index2tags={}
self.questions={}
self.word2index={}
self.response={}
def addWord(self,word):
if word not in self.word2index:
self.word2index[word] = self.num_words
self.num_words += 1
def addTags(self,tag):
if tag not in self.tags:
self.tags[tag]=self.num_tags
self.index2tags[self.num_tags]=tag
self.num_tags+=1
def addQuestion(self, question, answer):
self.questions[question]=answer
words=self.tokenization(question)
for wrd in words:
self.addWord(wrd)
def tokenization(self,ques):
tokens = tokenizer(ques)
token_list = []
for token in tokens:
token_list.append(token.lemma_)
return token_list
def getIndexOfWord(self,word):
return self.word2index[word]
def getQuestionInNum(self, ques):
words=self.tokenization(ques)
tmp=[ 0 for i in range(self.num_words)]
for wrds in words:
tmp[self.getIndexOfWord(wrds)]=1
return tmp
def getTag(self, tag):
tmp=[0.0 for i in range(self.num_tags)]
tmp[self.tags[tag]]=1.0
return tmp
def getVocabSize(self):
return self.num_words
def getTagSize(self):
return self.num_tags
def addResponse(self, tag, responses):
self.response[tag]=responses