diff --git a/frontend/index.html b/frontend/index.html index 594acb2..96536bd 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -218,7 +218,23 @@ alert("You must enter a sentence to proceed."); return; } - var tokens = sentence.trim().split(" "); + let xhr = new XMLHttpRequest(); + xhr.open("POST", SERVER_API + "annotate_token", true); + xhr.setRequestHeader("Content-Type", "application/json"); + xhr.onreadystatechange = function () { + if (xhr.readyState === XMLHttpRequest.DONE && xhr.status === 200) { + var json = JSON.parse(xhr.responseText); + continueGenerateTokens(json); + } + }; + var data = JSON.stringify({ + sentence: sentence, + }); + xhr.send(data); + } + + function continueGenerateTokens(result) { + var tokens = result["tokens"]; document.getElementById("total-token-num").innerText = String(tokens.length); for (var i = 0; i < tokens.length; i++) { var curToken = tokens[i]; @@ -262,6 +278,16 @@ document.getElementById("using-preset-example").innerText = String(-1); } + function getTokens() { + var parent_div = document.getElementById("token-display"); + var i; + var tokens = []; + for (i = 0; i < parent_div.children.length; i++) { + tokens.push(parent_div.children[i].innerHTML); + } + return tokens; + } + function generatePresetMentions() { var sentence = document.getElementById("sentence-input").value; var xhr = new XMLHttpRequest(); @@ -274,7 +300,7 @@ } }; var data = JSON.stringify({ - tokens: sentence.trim().split(" "), + tokens: getTokens(), }); xhr.send(data); } @@ -504,7 +530,7 @@ }; var data_vec = JSON.stringify({ index: i, - tokens: sentence.trim().split(" "), + tokens: getTokens(), mention_starts: [mention_starts[i]], mention_ends: [mention_ends[i]], }); @@ -521,7 +547,7 @@ }; var data_simple = JSON.stringify({ index: i, - tokens: sentence.trim().split(" "), + tokens: getTokens(), mention_starts: [mention_starts[i]], mention_ends: [mention_ends[i]], }); @@ -538,7 +564,7 @@ }; var data = JSON.stringify({ index: i, - tokens: sentence.trim().split(" "), + tokens: getTokens(), mention_starts: [mention_starts[i]], mention_ends: [mention_ends[i]], mode: getInferenceMode(), @@ -634,7 +660,7 @@ function getExampleSentenceMention(id) { if (id == 1) { - return [[0, 2], [10, 12], [15, 17]]; + return [[0, 2], [11, 13], [16, 18]]; } if (id == 2) { return [[0, 1], [5, 7], [9, 11], [20, 21]]; diff --git a/server.py b/server.py index 52f0b93..695d761 100644 --- a/server.py +++ b/server.py @@ -112,53 +112,31 @@ def handle_input(self): if mode != "figer": if mode != "custom": selected_inference_processor = InferenceProcessor(mode, resource_loader=self.runner.inference_processor) - for sentence in sentences: - sentence.set_signature(selected_inference_processor.signature()) - cached = self.mem_cache.query_cache(sentence) - if cached is not None: - sentence = cached - else: - self.runner.process_sentence(sentence, selected_inference_processor) - self.mem_cache.insert_cache(sentence) - self.surface_cache.insert_cache(sentence) - predicted_types.append(list(sentence.predicted_types)) - predicted_candidates.append(sentence.elmo_candidate_titles) - mentions.append(sentence.get_mention_surface_raw()) - selected_candidates.append(sentence.selected_title) - other_possible_types.append(sentence.could_also_be_types) else: rules = r["taxonomy"] mappings = self.parse_custom_rules(rules) - custom_inference_processor = InferenceProcessor(mode, custom_mapping=mappings) - for sentence in sentences: - sentence.set_signature(custom_inference_processor.signature()) - cached = self.mem_cache.query_cache(sentence) - if cached is not None: - sentence = cached - else: - self.runner.process_sentence(sentence, custom_inference_processor) - self.mem_cache.insert_cache(sentence) - self.surface_cache.insert_cache(sentence) - predicted_types.append(list(sentence.predicted_types)) - predicted_candidates.append(sentence.elmo_candidate_titles) - mentions.append(sentence.get_mention_surface_raw()) - selected_candidates.append(sentence.selected_title) - other_possible_types.append(sentence.could_also_be_types) + selected_inference_processor = InferenceProcessor(mode, custom_mapping=mappings) else: - for sentence in sentences: - sentence.set_signature(self.runner.inference_processor.signature()) - cached = self.mem_cache.query_cache(sentence) - if cached is not None: - sentence = cached - else: - self.runner.process_sentence(sentence) + selected_inference_processor = self.runner.inference_processor + + for sentence in sentences: + sentence.set_signature(selected_inference_processor.signature()) + cached = self.mem_cache.query_cache(sentence) + if cached is not None: + sentence = cached + else: + self.runner.process_sentence(sentence, selected_inference_processor) + try: self.mem_cache.insert_cache(sentence) self.surface_cache.insert_cache(sentence) - predicted_types.append(list(sentence.predicted_types)) - predicted_candidates.append(sentence.elmo_candidate_titles) - mentions.append(sentence.get_mention_surface_raw()) - selected_candidates.append(sentence.selected_title) - other_possible_types.append(sentence.could_also_be_types) + except: + print("Cache insertion exception. Ignored.") + predicted_types.append(list(sentence.predicted_types)) + predicted_candidates.append(sentence.elmo_candidate_titles) + mentions.append(sentence.get_mention_surface_raw()) + selected_candidates.append(sentence.selected_title) + other_possible_types.append(sentence.could_also_be_types) + elapsed_time = time.time() - start_time print("Processed mention " + str([x.get_mention_surface() for x in sentences]) + " in mode " + mode + ". TIME: " + str(elapsed_time) + " seconds.") ret["type"] = predicted_types @@ -176,6 +154,17 @@ def pipeline_initialize_helper(self, tokens): doc.get_ner_ontonotes doc.get_view("MENTION") + def handle_tokenizer_input(self): + r = request.get_json() + ret = {"tokens": []} + if "sentence" not in r: + return json.dumps(ret) + doc = self.pipeline.doc(r["sentence"]) + token_view = doc.get_tokens + for cons in token_view: + ret["tokens"].append(str(cons)) + return json.dumps(ret) + """ Handles requests for mention filling """ @@ -209,12 +198,12 @@ def handle_mention_input(self): for cons in additions_view: add_to_list = True if additions_view.view_name != "MENTION": - start = cons['start'] - end = cons['end'] + start = int(cons['start']) + end = int(cons['end']) else: - start = cons['properties']['EntityHeadStartSpan'] - end = cons['properties']['EntityHeadEndSpan'] - for i in range(start - 1, end + 1): + start = int(cons['properties']['EntityHeadStartSpan']) + end = int(cons['properties']['EntityHeadEndSpan']) + for i in range(max(start - 1, 0), min(len(tokens), end + 1)): if i in ret_set: add_to_list = False break @@ -246,10 +235,13 @@ def handle_simple_input(self): for sentence in sentences: surface = sentence.get_mention_surface() cached_types = self.surface_cache.query_cache(surface) - distinct = set() - for t in cached_types: - distinct.add("/" + t.split("/")[1]) - types.append(list(distinct)) + if cached_types is not None: + distinct = set() + for t in cached_types: + distinct.add("/" + t.split("/")[1]) + types.append(list(distinct)) + else: + types.append([]) ret["type"] = types ret["index"] = r["index"] return json.dumps(ret) @@ -294,6 +286,7 @@ def start(self, localhost=False, port=80): self.app.add_url_rule("/", "", self.handle_redirection) self.app.add_url_rule("/", "", self.handle_root) self.app.add_url_rule("/annotate", "annotate", self.handle_input, methods=['POST']) + self.app.add_url_rule("/annotate_token", "annotate_token", self.handle_tokenizer_input, methods=['POST']) self.app.add_url_rule("/annotate_mention", "annotate_mention", self.handle_mention_input, methods=['POST']) self.app.add_url_rule("/annotate_cache", "annotate_cache", self.handle_simple_input, methods=['POST']) self.app.add_url_rule("/annotate_vec", "annotate_vec", self.handle_word2vec_input, methods=['POST']) diff --git a/zoe_utils.py b/zoe_utils.py index 86545b9..0afa607 100644 --- a/zoe_utils.py +++ b/zoe_utils.py @@ -277,7 +277,7 @@ def rank_candidates_vec(self, sentence=None, candidates=None): target_vec = self.word2vec_helper(sentence.get_mention_surface()) if target_vec is None: print(sentence.get_mention_surface() + " not found in word2vec") - return candidates + return [(x, 0.0) for x in candidates] assert(len(target_vec) == 300) results = {} for candidate in candidates: