Skip to content

Commit

Permalink
tokenizer + several bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Slash0BZ committed Oct 28, 2018
1 parent d9c5654 commit a9f4a02
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 57 deletions.
38 changes: 32 additions & 6 deletions frontend/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,23 @@
alert("You must enter a sentence to proceed.");
return;
}
var tokens = sentence.trim().split(" ");
let xhr = new XMLHttpRequest();
xhr.open("POST", SERVER_API + "annotate_token", true);
xhr.setRequestHeader("Content-Type", "application/json");
xhr.onreadystatechange = function () {
if (xhr.readyState === XMLHttpRequest.DONE && xhr.status === 200) {
var json = JSON.parse(xhr.responseText);
continueGenerateTokens(json);
}
};
var data = JSON.stringify({
sentence: sentence,
});
xhr.send(data);
}

function continueGenerateTokens(result) {
var tokens = result["tokens"];
document.getElementById("total-token-num").innerText = String(tokens.length);
for (var i = 0; i < tokens.length; i++) {
var curToken = tokens[i];
Expand Down Expand Up @@ -262,6 +278,16 @@
document.getElementById("using-preset-example").innerText = String(-1);
}

function getTokens() {
var parent_div = document.getElementById("token-display");
var i;
var tokens = [];
for (i = 0; i < parent_div.children.length; i++) {
tokens.push(parent_div.children[i].innerHTML);
}
return tokens;
}

function generatePresetMentions() {
var sentence = document.getElementById("sentence-input").value;
var xhr = new XMLHttpRequest();
Expand All @@ -274,7 +300,7 @@
}
};
var data = JSON.stringify({
tokens: sentence.trim().split(" "),
tokens: getTokens(),
});
xhr.send(data);
}
Expand Down Expand Up @@ -504,7 +530,7 @@
};
var data_vec = JSON.stringify({
index: i,
tokens: sentence.trim().split(" "),
tokens: getTokens(),
mention_starts: [mention_starts[i]],
mention_ends: [mention_ends[i]],
});
Expand All @@ -521,7 +547,7 @@
};
var data_simple = JSON.stringify({
index: i,
tokens: sentence.trim().split(" "),
tokens: getTokens(),
mention_starts: [mention_starts[i]],
mention_ends: [mention_ends[i]],
});
Expand All @@ -538,7 +564,7 @@
};
var data = JSON.stringify({
index: i,
tokens: sentence.trim().split(" "),
tokens: getTokens(),
mention_starts: [mention_starts[i]],
mention_ends: [mention_ends[i]],
mode: getInferenceMode(),
Expand Down Expand Up @@ -634,7 +660,7 @@

function getExampleSentenceMention(id) {
if (id == 1) {
return [[0, 2], [10, 12], [15, 17]];
return [[0, 2], [11, 13], [16, 18]];
}
if (id == 2) {
return [[0, 1], [5, 7], [9, 11], [20, 21]];
Expand Down
93 changes: 43 additions & 50 deletions server.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,53 +112,31 @@ def handle_input(self):
if mode != "figer":
if mode != "custom":
selected_inference_processor = InferenceProcessor(mode, resource_loader=self.runner.inference_processor)
for sentence in sentences:
sentence.set_signature(selected_inference_processor.signature())
cached = self.mem_cache.query_cache(sentence)
if cached is not None:
sentence = cached
else:
self.runner.process_sentence(sentence, selected_inference_processor)
self.mem_cache.insert_cache(sentence)
self.surface_cache.insert_cache(sentence)
predicted_types.append(list(sentence.predicted_types))
predicted_candidates.append(sentence.elmo_candidate_titles)
mentions.append(sentence.get_mention_surface_raw())
selected_candidates.append(sentence.selected_title)
other_possible_types.append(sentence.could_also_be_types)
else:
rules = r["taxonomy"]
mappings = self.parse_custom_rules(rules)
custom_inference_processor = InferenceProcessor(mode, custom_mapping=mappings)
for sentence in sentences:
sentence.set_signature(custom_inference_processor.signature())
cached = self.mem_cache.query_cache(sentence)
if cached is not None:
sentence = cached
else:
self.runner.process_sentence(sentence, custom_inference_processor)
self.mem_cache.insert_cache(sentence)
self.surface_cache.insert_cache(sentence)
predicted_types.append(list(sentence.predicted_types))
predicted_candidates.append(sentence.elmo_candidate_titles)
mentions.append(sentence.get_mention_surface_raw())
selected_candidates.append(sentence.selected_title)
other_possible_types.append(sentence.could_also_be_types)
selected_inference_processor = InferenceProcessor(mode, custom_mapping=mappings)
else:
for sentence in sentences:
sentence.set_signature(self.runner.inference_processor.signature())
cached = self.mem_cache.query_cache(sentence)
if cached is not None:
sentence = cached
else:
self.runner.process_sentence(sentence)
selected_inference_processor = self.runner.inference_processor

for sentence in sentences:
sentence.set_signature(selected_inference_processor.signature())
cached = self.mem_cache.query_cache(sentence)
if cached is not None:
sentence = cached
else:
self.runner.process_sentence(sentence, selected_inference_processor)
try:
self.mem_cache.insert_cache(sentence)
self.surface_cache.insert_cache(sentence)
predicted_types.append(list(sentence.predicted_types))
predicted_candidates.append(sentence.elmo_candidate_titles)
mentions.append(sentence.get_mention_surface_raw())
selected_candidates.append(sentence.selected_title)
other_possible_types.append(sentence.could_also_be_types)
except:
print("Cache insertion exception. Ignored.")
predicted_types.append(list(sentence.predicted_types))
predicted_candidates.append(sentence.elmo_candidate_titles)
mentions.append(sentence.get_mention_surface_raw())
selected_candidates.append(sentence.selected_title)
other_possible_types.append(sentence.could_also_be_types)

elapsed_time = time.time() - start_time
print("Processed mention " + str([x.get_mention_surface() for x in sentences]) + " in mode " + mode + ". TIME: " + str(elapsed_time) + " seconds.")
ret["type"] = predicted_types
Expand All @@ -176,6 +154,17 @@ def pipeline_initialize_helper(self, tokens):
doc.get_ner_ontonotes
doc.get_view("MENTION")

def handle_tokenizer_input(self):
r = request.get_json()
ret = {"tokens": []}
if "sentence" not in r:
return json.dumps(ret)
doc = self.pipeline.doc(r["sentence"])
token_view = doc.get_tokens
for cons in token_view:
ret["tokens"].append(str(cons))
return json.dumps(ret)

"""
Handles requests for mention filling
"""
Expand Down Expand Up @@ -209,12 +198,12 @@ def handle_mention_input(self):
for cons in additions_view:
add_to_list = True
if additions_view.view_name != "MENTION":
start = cons['start']
end = cons['end']
start = int(cons['start'])
end = int(cons['end'])
else:
start = cons['properties']['EntityHeadStartSpan']
end = cons['properties']['EntityHeadEndSpan']
for i in range(start - 1, end + 1):
start = int(cons['properties']['EntityHeadStartSpan'])
end = int(cons['properties']['EntityHeadEndSpan'])
for i in range(max(start - 1, 0), min(len(tokens), end + 1)):
if i in ret_set:
add_to_list = False
break
Expand Down Expand Up @@ -246,10 +235,13 @@ def handle_simple_input(self):
for sentence in sentences:
surface = sentence.get_mention_surface()
cached_types = self.surface_cache.query_cache(surface)
distinct = set()
for t in cached_types:
distinct.add("/" + t.split("/")[1])
types.append(list(distinct))
if cached_types is not None:
distinct = set()
for t in cached_types:
distinct.add("/" + t.split("/")[1])
types.append(list(distinct))
else:
types.append([])
ret["type"] = types
ret["index"] = r["index"]
return json.dumps(ret)
Expand Down Expand Up @@ -294,6 +286,7 @@ def start(self, localhost=False, port=80):
self.app.add_url_rule("/", "", self.handle_redirection)
self.app.add_url_rule("/<path:path>", "<path:path>", self.handle_root)
self.app.add_url_rule("/annotate", "annotate", self.handle_input, methods=['POST'])
self.app.add_url_rule("/annotate_token", "annotate_token", self.handle_tokenizer_input, methods=['POST'])
self.app.add_url_rule("/annotate_mention", "annotate_mention", self.handle_mention_input, methods=['POST'])
self.app.add_url_rule("/annotate_cache", "annotate_cache", self.handle_simple_input, methods=['POST'])
self.app.add_url_rule("/annotate_vec", "annotate_vec", self.handle_word2vec_input, methods=['POST'])
Expand Down
2 changes: 1 addition & 1 deletion zoe_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def rank_candidates_vec(self, sentence=None, candidates=None):
target_vec = self.word2vec_helper(sentence.get_mention_surface())
if target_vec is None:
print(sentence.get_mention_surface() + " not found in word2vec")
return candidates
return [(x, 0.0) for x in candidates]
assert(len(target_vec) == 300)
results = {}
for candidate in candidates:
Expand Down

0 comments on commit a9f4a02

Please sign in to comment.