diff --git a/frontend/index.html b/frontend/index.html
index 594acb2..96536bd 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -218,7 +218,23 @@
alert("You must enter a sentence to proceed.");
return;
}
- var tokens = sentence.trim().split(" ");
+ let xhr = new XMLHttpRequest();
+ xhr.open("POST", SERVER_API + "annotate_token", true);
+ xhr.setRequestHeader("Content-Type", "application/json");
+ xhr.onreadystatechange = function () {
+ if (xhr.readyState === XMLHttpRequest.DONE && xhr.status === 200) {
+ var json = JSON.parse(xhr.responseText);
+ continueGenerateTokens(json);
+ }
+ };
+ var data = JSON.stringify({
+ sentence: sentence,
+ });
+ xhr.send(data);
+ }
+
+ function continueGenerateTokens(result) {
+ var tokens = result["tokens"];
document.getElementById("total-token-num").innerText = String(tokens.length);
for (var i = 0; i < tokens.length; i++) {
var curToken = tokens[i];
@@ -262,6 +278,16 @@
document.getElementById("using-preset-example").innerText = String(-1);
}
+ function getTokens() {
+ var parent_div = document.getElementById("token-display");
+ var i;
+ var tokens = [];
+ for (i = 0; i < parent_div.children.length; i++) {
+ tokens.push(parent_div.children[i].innerHTML);
+ }
+ return tokens;
+ }
+
function generatePresetMentions() {
var sentence = document.getElementById("sentence-input").value;
var xhr = new XMLHttpRequest();
@@ -274,7 +300,7 @@
}
};
var data = JSON.stringify({
- tokens: sentence.trim().split(" "),
+ tokens: getTokens(),
});
xhr.send(data);
}
@@ -504,7 +530,7 @@
};
var data_vec = JSON.stringify({
index: i,
- tokens: sentence.trim().split(" "),
+ tokens: getTokens(),
mention_starts: [mention_starts[i]],
mention_ends: [mention_ends[i]],
});
@@ -521,7 +547,7 @@
};
var data_simple = JSON.stringify({
index: i,
- tokens: sentence.trim().split(" "),
+ tokens: getTokens(),
mention_starts: [mention_starts[i]],
mention_ends: [mention_ends[i]],
});
@@ -538,7 +564,7 @@
};
var data = JSON.stringify({
index: i,
- tokens: sentence.trim().split(" "),
+ tokens: getTokens(),
mention_starts: [mention_starts[i]],
mention_ends: [mention_ends[i]],
mode: getInferenceMode(),
@@ -634,7 +660,7 @@
function getExampleSentenceMention(id) {
if (id == 1) {
- return [[0, 2], [10, 12], [15, 17]];
+ return [[0, 2], [11, 13], [16, 18]];
}
if (id == 2) {
return [[0, 1], [5, 7], [9, 11], [20, 21]];
diff --git a/server.py b/server.py
index 52f0b93..695d761 100644
--- a/server.py
+++ b/server.py
@@ -112,53 +112,31 @@ def handle_input(self):
if mode != "figer":
if mode != "custom":
selected_inference_processor = InferenceProcessor(mode, resource_loader=self.runner.inference_processor)
- for sentence in sentences:
- sentence.set_signature(selected_inference_processor.signature())
- cached = self.mem_cache.query_cache(sentence)
- if cached is not None:
- sentence = cached
- else:
- self.runner.process_sentence(sentence, selected_inference_processor)
- self.mem_cache.insert_cache(sentence)
- self.surface_cache.insert_cache(sentence)
- predicted_types.append(list(sentence.predicted_types))
- predicted_candidates.append(sentence.elmo_candidate_titles)
- mentions.append(sentence.get_mention_surface_raw())
- selected_candidates.append(sentence.selected_title)
- other_possible_types.append(sentence.could_also_be_types)
else:
rules = r["taxonomy"]
mappings = self.parse_custom_rules(rules)
- custom_inference_processor = InferenceProcessor(mode, custom_mapping=mappings)
- for sentence in sentences:
- sentence.set_signature(custom_inference_processor.signature())
- cached = self.mem_cache.query_cache(sentence)
- if cached is not None:
- sentence = cached
- else:
- self.runner.process_sentence(sentence, custom_inference_processor)
- self.mem_cache.insert_cache(sentence)
- self.surface_cache.insert_cache(sentence)
- predicted_types.append(list(sentence.predicted_types))
- predicted_candidates.append(sentence.elmo_candidate_titles)
- mentions.append(sentence.get_mention_surface_raw())
- selected_candidates.append(sentence.selected_title)
- other_possible_types.append(sentence.could_also_be_types)
+ selected_inference_processor = InferenceProcessor(mode, custom_mapping=mappings)
else:
- for sentence in sentences:
- sentence.set_signature(self.runner.inference_processor.signature())
- cached = self.mem_cache.query_cache(sentence)
- if cached is not None:
- sentence = cached
- else:
- self.runner.process_sentence(sentence)
+ selected_inference_processor = self.runner.inference_processor
+
+ for sentence in sentences:
+ sentence.set_signature(selected_inference_processor.signature())
+ cached = self.mem_cache.query_cache(sentence)
+ if cached is not None:
+ sentence = cached
+ else:
+ self.runner.process_sentence(sentence, selected_inference_processor)
+ try:
self.mem_cache.insert_cache(sentence)
self.surface_cache.insert_cache(sentence)
- predicted_types.append(list(sentence.predicted_types))
- predicted_candidates.append(sentence.elmo_candidate_titles)
- mentions.append(sentence.get_mention_surface_raw())
- selected_candidates.append(sentence.selected_title)
- other_possible_types.append(sentence.could_also_be_types)
+ except:
+ print("Cache insertion exception. Ignored.")
+ predicted_types.append(list(sentence.predicted_types))
+ predicted_candidates.append(sentence.elmo_candidate_titles)
+ mentions.append(sentence.get_mention_surface_raw())
+ selected_candidates.append(sentence.selected_title)
+ other_possible_types.append(sentence.could_also_be_types)
+
elapsed_time = time.time() - start_time
print("Processed mention " + str([x.get_mention_surface() for x in sentences]) + " in mode " + mode + ". TIME: " + str(elapsed_time) + " seconds.")
ret["type"] = predicted_types
@@ -176,6 +154,17 @@ def pipeline_initialize_helper(self, tokens):
doc.get_ner_ontonotes
doc.get_view("MENTION")
+ def handle_tokenizer_input(self):
+ r = request.get_json()
+ ret = {"tokens": []}
+ if "sentence" not in r:
+ return json.dumps(ret)
+ doc = self.pipeline.doc(r["sentence"])
+ token_view = doc.get_tokens
+ for cons in token_view:
+ ret["tokens"].append(str(cons))
+ return json.dumps(ret)
+
"""
Handles requests for mention filling
"""
@@ -209,12 +198,12 @@ def handle_mention_input(self):
for cons in additions_view:
add_to_list = True
if additions_view.view_name != "MENTION":
- start = cons['start']
- end = cons['end']
+ start = int(cons['start'])
+ end = int(cons['end'])
else:
- start = cons['properties']['EntityHeadStartSpan']
- end = cons['properties']['EntityHeadEndSpan']
- for i in range(start - 1, end + 1):
+ start = int(cons['properties']['EntityHeadStartSpan'])
+ end = int(cons['properties']['EntityHeadEndSpan'])
+ for i in range(max(start - 1, 0), min(len(tokens), end + 1)):
if i in ret_set:
add_to_list = False
break
@@ -246,10 +235,13 @@ def handle_simple_input(self):
for sentence in sentences:
surface = sentence.get_mention_surface()
cached_types = self.surface_cache.query_cache(surface)
- distinct = set()
- for t in cached_types:
- distinct.add("/" + t.split("/")[1])
- types.append(list(distinct))
+ if cached_types is not None:
+ distinct = set()
+ for t in cached_types:
+ distinct.add("/" + t.split("/")[1])
+ types.append(list(distinct))
+ else:
+ types.append([])
ret["type"] = types
ret["index"] = r["index"]
return json.dumps(ret)
@@ -294,6 +286,7 @@ def start(self, localhost=False, port=80):
self.app.add_url_rule("/", "", self.handle_redirection)
self.app.add_url_rule("/", "", self.handle_root)
self.app.add_url_rule("/annotate", "annotate", self.handle_input, methods=['POST'])
+ self.app.add_url_rule("/annotate_token", "annotate_token", self.handle_tokenizer_input, methods=['POST'])
self.app.add_url_rule("/annotate_mention", "annotate_mention", self.handle_mention_input, methods=['POST'])
self.app.add_url_rule("/annotate_cache", "annotate_cache", self.handle_simple_input, methods=['POST'])
self.app.add_url_rule("/annotate_vec", "annotate_vec", self.handle_word2vec_input, methods=['POST'])
diff --git a/zoe_utils.py b/zoe_utils.py
index 86545b9..0afa607 100644
--- a/zoe_utils.py
+++ b/zoe_utils.py
@@ -277,7 +277,7 @@ def rank_candidates_vec(self, sentence=None, candidates=None):
target_vec = self.word2vec_helper(sentence.get_mention_surface())
if target_vec is None:
print(sentence.get_mention_surface() + " not found in word2vec")
- return candidates
+ return [(x, 0.0) for x in candidates]
assert(len(target_vec) == 300)
results = {}
for candidate in candidates: