Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Workflow with five widgets #158

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions orangecontrib/storynavigation/modules/actoranalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ def __postag_sents(
ents = [dict(t) for t in unique_tuples]

doc = {"text": sentence, "ents": ents}
print('ACTORS.PY', doc)
html += displacy.render(doc, style="ent", options=options, manual=True)


Expand Down
19 changes: 12 additions & 7 deletions orangecontrib/storynavigation/modules/meansanalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ def __init__(self, language, story_elements, verb_frames, means_strategy, callba


def __convert_str_columns_to_ints(self, story_elements_df) -> None:
columns_to_convert = ["storyid", "sentence_id", "token_start_idx", "spacy_head_idx"]
columns_to_convert = ["storyid", "segment_id", "sentence_id", "token_start_idx", "spacy_head_idx"]
story_elements_df[columns_to_convert] = story_elements_df[columns_to_convert].astype(int)


def __compute_sentence_offsets(self, story_elements_df) -> pd.DataFrame:
sentences_df = story_elements_df.groupby(["storyid", "sentence_id"]).first().reset_index()[["storyid", "sentence_id", "sentence"]]
sentences_df = story_elements_df.groupby(["storyid", "sentence_id"]).first().reset_index()[["storyid", "segment_id", "sentence_id", "sentence"]]
char_offsets = []
last_sentence = ""
for sentence_id, sentence in zip(sentences_df["sentence_id"],
Expand All @@ -45,7 +45,7 @@ def __compute_sentence_offsets(self, story_elements_df) -> pd.DataFrame:
char_offsets.append(char_offset)
last_sentence = sentence
sentences_df["char_offset"] = char_offsets
return sentences_df[["storyid", "sentence_id", "char_offset"]].set_index(["storyid", "sentence_id"])
return sentences_df[["storyid", "segment_id", "sentence_id", "char_offset"]].set_index(["storyid", "sentence_id"])


def __convert_entities(self, entities, sentence_offsets) -> dict:
Expand Down Expand Up @@ -113,6 +113,7 @@ def __prepend_tokens_to_means_phrase(self, sentence_df, sentence_entities, head_
in_between_text = " " if entity_gap_size == 1 else ", "
sentence_entities[child_entity_id] = {
"text": child_entity_text + in_between_text + sentence_entities[head_start_id]["text"],
"segment_id": sentence_df[child_entity_id]["segment_id"],
"sentence_id": sentence_df[child_entity_id]["sentence_id"],
"label_": "MEANS" }
del sentence_entities[head_start_id]
Expand Down Expand Up @@ -153,17 +154,21 @@ def __process_sentence(self, sentence_dict) -> dict:

def __add_sentence_entity(self, sentence_dict, sentence_entities, entity_start_id, head_start_id, head_of_head_start_id) -> None:
entity = sentence_dict[entity_start_id]
segment_id = entity["segment_id"]
sentence_id = entity["sentence_id"]
sentence_entities[entity_start_id] = {
"label_": "PREP",
"segment_id": segment_id,
"sentence_id": sentence_id,
"text": entity["token_text"]}
sentence_entities[head_start_id] = {
"label_": "MEANS",
"segment_id": segment_id,
"sentence_id": sentence_id,
"text": sentence_dict[head_start_id]["token_text"]}
sentence_entities[head_of_head_start_id] = {
"label_": "VERB",
"segment_id": segment_id,
"sentence_id": sentence_id,
"text": sentence_dict[head_of_head_start_id]["token_text"]}
self.__expand_means_phrase(sentence_dict, sentence_entities, entity_start_id, head_start_id)
Expand All @@ -183,10 +188,10 @@ def __get_head_dependencies(self, sentence_df, entity_start_id, head_start_id) -


def __sort_and_filter_results(self, entities) -> pd.DataFrame:
results = [(entity["text"], entity["label_"], storyid, entity["sentence_id"], char_id)
results = [(entity["text"], entity["label_"], storyid, entity["segment_id"], entity["sentence_id"], char_id)
for storyid, story_entities in entities.items()
for char_id, entity in story_entities.items()]
results_df = pd.DataFrame(results, columns=["text", "label", "storyid", "sentence_id", "character_id"])
results_df = pd.DataFrame(results, columns=["text", "label", "storyid", "segment_id", "sentence_id", "character_id"])
results_df.sort_values(by=["storyid", "character_id"], inplace=True)
results_df["text_id"] = "ST" + results_df["storyid"].astype(str)
return results_df[["text", "label", "text_id", "sentence_id", "character_id"]].reset_index(drop=True)
results_df["text_id"] = results_df["storyid"].astype(int)
return results_df[["text", "label", "text_id", "segment_id", "sentence_id", "character_id"]].reset_index(drop=True)
31 changes: 21 additions & 10 deletions orangecontrib/storynavigation/modules/purposeanalysis.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pandas as pd
import storynavigation.modules.constants as constants
import storynavigation.modules.util as util

import sys

class PurposeAnalyzer:
"""Class for extracting purpose from texts
Expand Down Expand Up @@ -36,12 +36,12 @@ def __init__(self, language, story_elements, verb_frames, purpose_strategy, call


def __convert_str_columns_to_ints(self, story_elements_df) -> None:
columns_to_convert = ["storyid", "sentence_id", "token_start_idx", "spacy_head_idx"]
columns_to_convert = ["storyid", "sentence_id", "segment_id", "token_start_idx", "spacy_head_idx"]
story_elements_df[columns_to_convert] = story_elements_df[columns_to_convert].astype(int)


def __compute_sentence_offsets(self, story_elements_df) -> pd.DataFrame:
sentences_df = story_elements_df.groupby(["storyid", "sentence_id"]).first().reset_index()[["storyid", "sentence_id", "sentence"]]
sentences_df = story_elements_df.groupby(["storyid", "sentence_id"]).first().reset_index()[["storyid", "sentence_id", "segment_id", "sentence"]]
char_offsets = []
last_sentence = ""
for sentence_id, sentence in zip(sentences_df["sentence_id"],
Expand All @@ -53,22 +53,23 @@ def __compute_sentence_offsets(self, story_elements_df) -> pd.DataFrame:
char_offsets.append(char_offset)
last_sentence = sentence
sentences_df["char_offset"] = char_offsets
return sentences_df[["storyid", "sentence_id", "char_offset"]].set_index(["storyid", "sentence_id"])
return sentences_df[["storyid", "sentence_id", "segment_id", "char_offset"]].set_index(["storyid", "sentence_id"])


def __get_missing_label(self, entities_from_onsets, storyid, sentence_id) -> list:
labels_found = [entity['label_'] for entity in entities_from_onsets[storyid].values() if entity['sentence_id'] == sentence_id]
return [x for x in self.PURPOSE_LABELS if x not in labels_found]


def __add_missing_relation_part(self, entities_from_onsets, sentence_offsets, storyid, sentence_id, previous_sentence) -> None:
def __add_missing_relation_part(self, entities_from_onsets, sentence_offsets, storyid, sentence_id, segment_id, previous_sentence) -> None:
missing_labels = self.__get_missing_label(entities_from_onsets, storyid, sentence_id)
if len(missing_labels) == 1:
char_id_start = sentence_offsets.loc[(storyid, sentence_id - 1)]["char_offset"]
char_id_end = sentence_offsets.loc[(storyid, sentence_id)]["char_offset"] - 1
entities_from_onsets[storyid][char_id_start] = {
'label_': missing_labels[0],
'sentence_id': sentence_id,
'segment_id': segment_id,
'text': previous_sentence
}

Expand All @@ -79,6 +80,7 @@ def __add_missing_relation_parts(self, story_elements_df, entities_from_onsets,
sentence_ids = {}
for char_id in entities_from_onsets[storyid]:
sentence_id = entities_from_onsets[storyid][char_id]['sentence_id']
segment_id = entities_from_onsets[storyid][char_id]['segment_id']
label = entities_from_onsets[storyid][char_id]['label_']
if sentence_id in sentence_ids:
sentence_ids[sentence_id].append(label)
Expand All @@ -90,13 +92,14 @@ def __add_missing_relation_parts(self, story_elements_df, entities_from_onsets,
sentence_offsets,
storyid,
sentence_id,
segment_id,
sentences_df.loc[storyid, sentence_id - 1])
return entities_from_onsets


def __convert_entities(self, entities, sentence_offsets) -> dict:
entities_from_onsets = {}
for storyid, sentence_id, sentence_data in entities:
for storyid, sentence_id, segment_id, sentence_data in entities:
story_entities = entities_from_onsets.setdefault(storyid, {})
char_offset_sentence = sentence_offsets.loc[(storyid, sentence_id)]["char_offset"]
for token_start_id, token_data in sentence_data.items():
Expand All @@ -119,6 +122,7 @@ def __process_texts(self, story_elements_df, callback=None) -> list:
entities.append([
sentence_dict_index[0],
sentence_dict_index[1],
sentence_entities[list(sentence_entities.keys())[0]]["segment_id"],
sentence_entities])
if callback:
callback((100*(index + 1))/len(sentence_dict))
Expand Down Expand Up @@ -167,6 +171,7 @@ def __prepend_tokens_to_purpose_phrase(self, sentence_dict, sentence_entities, h
sentence_entities[child_entity_id] = {
"text": child_entity_text + in_between_text + sentence_entities[head_start_id]["text"],
"sentence_id": sentence_dict[child_entity_id]["sentence_id"],
"segment_id": sentence_dict[child_entity_id]["segment_id"],
"label_": sentence_entities[head_start_id]['label_'] }
del sentence_entities[head_start_id]
head_start_id = child_entity_id
Expand Down Expand Up @@ -219,15 +224,18 @@ def __log_error(self, error_phrase, e, token_data) -> None:
def __add_sentence_entity_adverb(self, sentence_dict, sentence_entities, entity_start_id, head_start_id, head_of_head_start_id) -> None:
entity = sentence_dict[entity_start_id]
sentence_id = entity["sentence_id"]
segment_id = entity["segment_id"]
reversed_order = ([x[2] for x in self.verb_frames if x[1] == entity["token_text"].lower()] == ['yes'])
head_label, head_of_head_label = ['PURPOSE', 'CONTEXT'] if reversed_order else ['CONTEXT', 'PURPOSE']
sentence_entities[entity_start_id] = {
"label_": "ADVERB",
"sentence_id": sentence_id,
"segment_id": segment_id,
"text": entity["token_text"]}
sentence_entities[head_start_id] = {
"label_": head_label,
"sentence_id": sentence_id,
"segment_id": segment_id,
"text": sentence_dict[head_start_id]["token_text"]}
processed_ids = {entity_start_id, head_start_id, head_of_head_start_id}
self.__expand_phrase(sentence_dict,
Expand All @@ -239,6 +247,7 @@ def __add_sentence_entity_adverb(self, sentence_dict, sentence_entities, entity_
sentence_entities[head_of_head_start_id] = {
"label_": head_of_head_label,
"sentence_id": sentence_id,
"segment_id": segment_id,
"text": sentence_dict[head_of_head_start_id]["token_text"]}
self.__expand_phrase(sentence_dict,
sentence_entities,
Expand All @@ -250,9 +259,11 @@ def __add_sentence_entity_adverb(self, sentence_dict, sentence_entities, entity_
def __add_sentence_entity_verb(self, sentence_dict, sentence_entities, entity_start_id) -> None:
entity = sentence_dict[entity_start_id]
sentence_id = entity["sentence_id"]
segment_id = entity["segment_id"]
sentence_entities[entity_start_id] = {
"label_": "PURPOSE",
"sentence_id": sentence_id,
"segment_id": segment_id,
"text": entity["token_text"]}
self.__expand_phrase(sentence_dict, sentence_entities, entity_start_id, entity_start_id, processed_ids=set())

Expand All @@ -267,10 +278,10 @@ def __get_head_dependencies(self, sentence_dict, entity_start_id, head_start_id)


def __sort_and_filter_results(self, entities) -> pd.DataFrame:
results = [(entity["text"], entity["label_"], storyid, entity["sentence_id"], char_id)
results = [(entity["text"], entity["label_"], storyid, entity["segment_id"], entity["sentence_id"], char_id)
for storyid, story_entities in entities.items()
for char_id, entity in story_entities.items()]
results_df = pd.DataFrame(results, columns=["text", "label", "storyid", "sentence_id", "character_id"])
results_df = pd.DataFrame(results, columns=["text", "label", "storyid", "segment_id", "sentence_id", "character_id"])
results_df.sort_values(by=["storyid", "character_id"], inplace=True)
results_df["text_id"] = "ST" + results_df["storyid"].astype(str)
return results_df[["text", "label", "text_id", "sentence_id", "character_id"]].reset_index(drop=True)
results_df["text_id"] = results_df["storyid"].astype(int)
return results_df[["text", "label", "text_id", "segment_id", "sentence_id", "character_id"]].reset_index(drop=True)
Loading
Loading