From 0e3b031ed5879ba80342d855e9ca241bf138e5bf Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 8 Jul 2022 21:56:20 +0800
Subject: [PATCH 01/21] Add an example to connect ForteHealth with Stave
---
examples/clinical_pipeline/chat_project.json | 5 +
.../chat_query_document.json | 5 +
.../clinical_pipeline/clinical_config.yml | 31 ++++
.../clinical_pipeline/clinical_pipeline.py | 147 ++++++++++++++++++
.../default_onto_project.json | 4 +
examples/clinical_pipeline/demo/__init__.py | 1 +
examples/clinical_pipeline/demo/clinical.py | 49 ++++++
examples/clinical_pipeline/download_models.py | 25 +++
.../clinical_pipeline/mimic3_note_reader.py | 82 ++++++++++
examples/clinical_pipeline/requirements.txt | 0
.../clinical_pipeline/sample_data/notes.txt | 6 +
examples/clinical_pipeline/settings.json | 3 +
.../clinical_pipeline/utterance_searcher.py | 124 +++++++++++++++
13 files changed, 482 insertions(+)
create mode 100644 examples/clinical_pipeline/chat_project.json
create mode 100644 examples/clinical_pipeline/chat_query_document.json
create mode 100644 examples/clinical_pipeline/clinical_config.yml
create mode 100644 examples/clinical_pipeline/clinical_pipeline.py
create mode 100644 examples/clinical_pipeline/default_onto_project.json
create mode 100644 examples/clinical_pipeline/demo/__init__.py
create mode 100644 examples/clinical_pipeline/demo/clinical.py
create mode 100644 examples/clinical_pipeline/download_models.py
create mode 100644 examples/clinical_pipeline/mimic3_note_reader.py
create mode 100644 examples/clinical_pipeline/requirements.txt
create mode 100644 examples/clinical_pipeline/sample_data/notes.txt
create mode 100644 examples/clinical_pipeline/settings.json
create mode 100644 examples/clinical_pipeline/utterance_searcher.py
diff --git a/examples/clinical_pipeline/chat_project.json b/examples/clinical_pipeline/chat_project.json
new file mode 100644
index 00000000..b95b186e
--- /dev/null
+++ b/examples/clinical_pipeline/chat_project.json
@@ -0,0 +1,5 @@
+{
+ "name": "clinical_pipeline_chat",
+ "project_type": "single_pack",
+ "ontology": "{\n \"name\": \"base_ontology\",\n \"definitions\": [\n {\n \"entry_name\": \"ft.onto.base_ontology.Token\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation :class:`Token`, used to represent a token or a word.\",\n \"attributes\": [\n {\n \"name\": \"pos\",\n \"type\": \"str\"\n },\n {\n \"name\": \"ud_xpos\",\n \"type\": \"str\",\n \"description\": \"Language specific pos tag. Used in CoNLL-U Format. Refer to https://universaldependencies.org/format.html\"\n },\n {\n \"name\": \"lemma\",\n \"type\": \"str\",\n \"description\": \"Lemma or stem of word form.\"\n },\n {\n \"name\": \"chunk\",\n \"type\": \"str\"\n },\n {\n \"name\": \"ner\",\n \"type\": \"str\"\n },\n {\n \"name\": \"sense\",\n \"type\": \"str\"\n },\n {\n \"name\": \"is_root\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"ud_features\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"str\"\n },\n {\n \"name\": \"ud_misc\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Subword\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"Used to represent subword tokenization results.\",\n \"attributes\": [\n {\n \"name\": \"is_first_segment\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"is_unk\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"vocab_id\",\n \"type\": \"int\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Classification\",\n \"parent_entry\": \"forte.data.ontology.top.Generics\",\n \"description\": \"Used to store values for classification prediction\",\n \"attributes\": [\n {\n \"name\": \"classification_result\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Document\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Document`, normally used to represent a document.\",\n \"attributes\": [\n {\n \"name\": \"document_class\",\n \"type\": \"List\",\n \"item_type\": \"str\",\n \"description\": \"A list of class names that the document belongs to.\"\n },\n {\n \"name\": \"sentiment\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n },\n {\n \"name\": \"classifications\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"ft.onto.base_ontology.Classification\",\n \"description\": \"Stores the classification results for this document. The key is the name/task of the classification, the value is an classification object storing the results.\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Sentence\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Sentence`, normally used to represent a sentence.\",\n \"attributes\": [\n {\n \"name\": \"speaker\",\n \"type\": \"str\"\n },\n {\n \"name\": \"part_id\",\n \"type\": \"int\"\n },\n {\n \"name\": \"sentiment\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n },\n {\n \"name\": \"classification\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n },\n {\n \"name\": \"classifications\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"ft.onto.base_ontology.Classification\",\n \"description\": \"Stores the classification results for this sentence. The key is the name/task of the classification, the value is an classification object storing the results.\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Phrase\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Phrase`.\",\n \"attributes\": [\n {\n \"name\": \"phrase_type\",\n \"type\": \"str\"\n },\n {\n \"name\": \"headword\",\n \"type\": \"ft.onto.base_ontology.Token\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.UtteranceContext\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"`UtteranceContext` represents the context part in dialogue.\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Utterance\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Utterance`, normally used to represent an utterance in dialogue.\",\n \"attributes\": [\n {\n \"name\": \"speaker\",\n \"type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.PredicateArgument\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `PredicateArgument`, normally used to represent an argument of a predicate, can be linked to the predicate via the predicate link.\",\n \"attributes\": [\n {\n \"name\": \"ner_type\",\n \"type\": \"str\"\n },\n {\n \"name\": \"predicate_lemma\",\n \"type\": \"str\"\n },\n {\n \"name\": \"is_verb\",\n \"type\": \"bool\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.EntityMention\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `EntityMention`, normally used to represent an Entity Mention in a piece of text.\",\n \"attributes\": [\n {\n \"name\": \"ner_type\",\n \"type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.EventMention\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `EventMention`, used to refer to a mention of an event.\",\n \"attributes\": [\n {\n \"name\": \"event_type\",\n \"type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.PredicateMention\",\n \"parent_entry\": \"ft.onto.base_ontology.Phrase\",\n \"description\": \"A span based annotation `PredicateMention`, normally used to represent a predicate (normally verbs) in a piece of text.\",\n \"attributes\": [\n {\n \"name\": \"predicate_lemma\",\n \"type\": \"str\"\n },\n {\n \"name\": \"framenet_id\",\n \"type\": \"str\"\n },\n {\n \"name\": \"is_verb\",\n \"type\": \"bool\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.PredicateLink\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a semantic role link between a predicate and its argument.\",\n \"attributes\": [\n {\n \"name\": \"arg_type\",\n \"type\": \"str\",\n \"description\": \"The predicate link type.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.PredicateMention\",\n \"child_type\": \"ft.onto.base_ontology.PredicateArgument\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Dependency\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a syntactic dependency.\",\n \"attributes\": [\n {\n \"name\": \"dep_label\",\n \"type\": \"str\",\n \"description\": \"The dependency label.\"\n },\n {\n \"name\": \"rel_type\",\n \"type\": \"str\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.Token\",\n \"child_type\": \"ft.onto.base_ontology.Token\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.EnhancedDependency\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a enhanced dependency: \\n https://universaldependencies.org/u/overview/enhanced-syntax.html\",\n \"attributes\": [\n {\n \"name\": \"dep_label\",\n \"type\": \"str\",\n \"description\": \"The enhanced dependency label in Universal Dependency.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.Token\",\n \"child_type\": \"ft.onto.base_ontology.Token\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.RelationLink\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a relation between two entity mentions\",\n \"attributes\": [\n {\n \"name\": \"rel_type\",\n \"type\": \"str\",\n \"description\": \"The type of the relation.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.EntityMention\",\n \"child_type\": \"ft.onto.base_ontology.EntityMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.CrossDocEntityRelation\",\n \"parent_entry\": \"forte.data.ontology.top.MultiPackLink\",\n \"description\": \"A `Link` type entry which represent a relation between two entity mentions across the packs.\",\n \"attributes\": [\n {\n \"name\": \"rel_type\",\n \"type\": \"str\",\n \"description\": \"The type of the relation.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.EntityMention\",\n \"child_type\": \"ft.onto.base_ontology.EntityMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.CoreferenceGroup\",\n \"parent_entry\": \"forte.data.ontology.top.Group\",\n \"description\": \"A group type entry that take `EntityMention`, as members, used to represent coreferent group of entities.\",\n \"member_type\": \"ft.onto.base_ontology.EntityMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.EventRelation\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a relation between two event mentions.\",\n \"attributes\": [\n {\n \"name\": \"rel_type\",\n \"type\": \"str\",\n \"description\": \"The type of the relation.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.EventMention\",\n \"child_type\": \"ft.onto.base_ontology.EventMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.CrossDocEventRelation\",\n \"parent_entry\": \"forte.data.ontology.top.MultiPackLink\",\n \"description\": \"A `Link` type entry which represent a relation between two event mentions across the packs.\",\n \"attributes\": [\n {\n \"name\": \"rel_type\",\n \"type\": \"str\",\n \"description\": \"The type of the relation.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.EventMention\",\n \"child_type\": \"ft.onto.base_ontology.EventMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.ConstituentNode\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `ConstituentNode` to represent constituents in constituency parsing. This can also sentiment values annotated on the nodes.\",\n \"attributes\": [\n {\n \"name\": \"label\",\n \"type\": \"str\"\n },\n {\n \"name\": \"sentiment\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n },\n {\n \"name\": \"is_root\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"is_leaf\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"parent_node\",\n \"type\": \"ft.onto.base_ontology.ConstituentNode\"\n },\n {\n \"name\": \"children_nodes\",\n \"type\": \"List\",\n \"item_type\": \"ft.onto.base_ontology.ConstituentNode\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Title\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Title`, normally used to represent a title.\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Body\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Body`, normally used to represent a document body.\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.MCOption\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.MCQuestion\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"attributes\": [\n {\n \"name\": \"options\",\n \"type\": \"List\",\n \"item_type\": \"ft.onto.base_ontology.MCOption\"\n },\n {\n \"name\": \"answers\",\n \"type\": \"List\",\n \"item_type\": \"int\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.MRCQuestion\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"An `Annotation` type which represents an MRC question.\",\n \"attributes\": [\n {\n \"name\": \"qid\",\n \"type\": \"int\"\n },\n {\n \"name\": \"answers\",\n \"type\": \"List\",\n \"item_type\": \"ft.onto.base_ontology.Phrase\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Recording\",\n \"parent_entry\": \"forte.data.ontology.top.AudioAnnotation\",\n \"description\": \"A span based annotation `Recording`, normally used to represent a recording.\",\n \"attributes\": [\n {\n \"name\": \"recording_class\",\n \"type\": \"List\",\n \"item_type\": \"str\",\n \"description\": \"A list of class names that the recording belongs to.\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.AudioUtterance\",\n \"parent_entry\": \"forte.data.ontology.top.AudioAnnotation\",\n \"description\": \"A span based annotation `AudioUtterance`, normally used to represent an utterance in dialogue.\",\n \"attributes\": [\n {\n \"name\": \"speaker\",\n \"type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ftx.medical.clinical_ontology.NegationContext\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `NegationContext`, used to represent the negation context of a named entity.\",\n \"attributes\": [\n {\n \"name\": \"polarity\",\n \"type\": \"bool\"\n }\n ]\n }\n ]\n}\n", "config": "{\"legendConfigs\":{\"ft.onto.base_ontology.Token\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"pos\":false,\"ud_xpos\":false,\"lemma\":false,\"chunk\":false,\"ner\":false,\"sense\":false}},\"ft.onto.base_ontology.Subword\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ft.onto.base_ontology.Classification\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ft.onto.base_ontology.Document\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ft.onto.base_ontology.Sentence\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"speaker\":false}},\"ft.onto.base_ontology.Phrase\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"phrase_type\":false}},\"ft.onto.base_ontology.UtteranceContext\":{\"is_selected\":false,\"is_shown\":true},\"ft.onto.base_ontology.Utterance\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"speaker\":false}},\"ft.onto.base_ontology.PredicateArgument\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"ner_type\":false,\"predicate_lemma\":false}},\"ft.onto.base_ontology.EntityMention\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"ner_type\":false}},\"ft.onto.base_ontology.EventMention\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"event_type\":false}},\"ft.onto.base_ontology.PredicateMention\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"predicate_lemma\":false,\"framenet_id\":false}},\"ft.onto.base_ontology.PredicateLink\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"arg_type\":false}},\"ft.onto.base_ontology.Dependency\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"dep_label\":false,\"rel_type\":false}},\"ft.onto.base_ontology.EnhancedDependency\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"dep_label\":false}},\"ft.onto.base_ontology.RelationLink\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"rel_type\":false}},\"ft.onto.base_ontology.CrossDocEntityRelation\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"rel_type\":false}},\"ft.onto.base_ontology.CoreferenceGroup\":{\"is_selected\":false,\"is_shown\":true},\"ft.onto.base_ontology.EventRelation\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"rel_type\":false}},\"ft.onto.base_ontology.CrossDocEventRelation\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"rel_type\":false}},\"ft.onto.base_ontology.ConstituentNode\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"label\":false}},\"ft.onto.base_ontology.Title\":{\"is_selected\":false,\"is_shown\":true},\"ft.onto.base_ontology.Body\":{\"is_selected\":false,\"is_shown\":true},\"ft.onto.base_ontology.MCOption\":{\"is_selected\":false,\"is_shown\":true},\"ft.onto.base_ontology.MCQuestion\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ft.onto.base_ontology.MRCQuestion\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ft.onto.base_ontology.Recording\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ft.onto.base_ontology.AudioUtterance\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"speaker\":false}},\"ftx.medical.clinical_ontology.NegationContext\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}}},\"scopeConfigs\":{\"ft.onto.base_ontology.Token\":false,\"ft.onto.base_ontology.Subword\":false,\"ft.onto.base_ontology.Document\":false,\"ft.onto.base_ontology.Sentence\":false,\"ft.onto.base_ontology.Phrase\":false,\"ft.onto.base_ontology.UtteranceContext\":false,\"ft.onto.base_ontology.Utterance\":false,\"ft.onto.base_ontology.PredicateArgument\":false,\"ft.onto.base_ontology.EntityMention\":false,\"ft.onto.base_ontology.EventMention\":false,\"ft.onto.base_ontology.PredicateMention\":false,\"ft.onto.base_ontology.ConstituentNode\":false,\"ft.onto.base_ontology.Title\":false,\"ft.onto.base_ontology.Body\":false,\"ft.onto.base_ontology.MCOption\":false,\"ft.onto.base_ontology.MCQuestion\":false,\"ft.onto.base_ontology.MRCQuestion\":false,\"ftx.medical.clinical_ontology.NegationContext\":false},\"layoutConfigs\":{\"center-middle\":\"DialogueBox\",\"left\":\"disable\",\"right\":\"disable\",\"center-bottom\":\"disable\"},\"remoteConfigs\":{\"pipelineUrl\":\"http://localhost:8008\",\"doValidation\":false,\"expectedName\":\"\",\"inputFormat\":\"string\",\"expectedRecords\":{}}}"}
+
diff --git a/examples/clinical_pipeline/chat_query_document.json b/examples/clinical_pipeline/chat_query_document.json
new file mode 100644
index 00000000..d8b95d4a
--- /dev/null
+++ b/examples/clinical_pipeline/chat_query_document.json
@@ -0,0 +1,5 @@
+{
+ "name": "query_chatbot.json",
+ "project_id": 99,
+ "textPack": "{\n \"py/object\": \"forte.data.data_pack.DataPack\",\n \"py/state\": {\n \"creation_records\": {},\n \"field_records\": {},\n \"links\": [],\n \"groups\": [],\n \"meta\": {\n \"py/object\": \"forte.data.data_pack.Meta\",\n \"py/state\": {\n \"pack_name\": \"query_chatbot\",\n \"_pack_id\": 3,\n \"language\": \"eng\",\n \"span_unit\": \"character\"\n }\n },\n \"_text\": \"Welcome! Please type in a query to retrieve relevant clinical reports.\",\n \"annotations\": [\n {\n \"py/object\": \"ft.onto.base_ontology.Utterance\",\n \"py/state\": {\n \"_span\": {\n \"py/object\": \"forte.data.span.Span\",\n \"begin\": 0,\n \"end\": 70\n },\n \"_tid\": 0,\n \"speaker\": \"ai\"\n }\n }\n ],\n \"generics\": [],\n \"replace_back_operations\": [],\n \"processed_original_spans\": [],\n \"orig_text_len\": 70,\n \"serialization\": {\n \"next_id\": 1\n }\n }\n}"
+}
diff --git a/examples/clinical_pipeline/clinical_config.yml b/examples/clinical_pipeline/clinical_config.yml
new file mode 100644
index 00000000..e5be72fa
--- /dev/null
+++ b/examples/clinical_pipeline/clinical_config.yml
@@ -0,0 +1,31 @@
+BERTTokenizer:
+ model_path: "resources/NCBI-disease"
+
+Spacy:
+ processors: ["sentence", "tokenize", "pos", "dep", "ner", "umls_link"]
+ medical_onto_type: "ftx.medical.clinical_ontology.MedicalEntityMention"
+ umls_onto_type: "ftx.medical.clinical_ontology.UMLSConceptLink"
+ lang: "en_ner_bc5cdr_md"
+
+BioBERTNERPredictor:
+ model_path: "resources/NCBI-disease"
+ ner_type: "DISEASE"
+ ignore_labels: ["O"]
+
+LastUtteranceSearcher:
+ stave_db_path: "C://Users//Leo//.stave//db.sqlite3"
+ url: "http://localhost:8899"
+
+Remote:
+ port: 8008
+ input_format: "DataPack"
+ service_name: "Medical_Chatbot"
+
+Stave:
+ url: "http://localhost:8899"
+ username: admin
+ pw: admin
+
+viewer_project_json: "default_onto_project.json"
+chat_project_json: "chat_project.json"
+chat_document_json: "chat_query_document.json"
diff --git a/examples/clinical_pipeline/clinical_pipeline.py b/examples/clinical_pipeline/clinical_pipeline.py
new file mode 100644
index 00000000..a2880e1a
--- /dev/null
+++ b/examples/clinical_pipeline/clinical_pipeline.py
@@ -0,0 +1,147 @@
+import json
+import sys
+import time
+
+import yaml
+from forte.common.configuration import Config
+from forte.data.data_pack import DataPack
+from forte.data.readers import RawDataDeserializeReader, RawPackReader
+from forte.pipeline import Pipeline
+from forte.processors.writers import PackIdJsonPackWriter
+from fortex.elastic import ElasticSearchPackIndexProcessor
+from fortex.health.processors.icd_coding_processor import ICDCodingProcessor
+from fortex.health.processors.negation_context_analyzer import \
+ NegationContextAnalyzer
+from fortex.huggingface import BioBERTNERPredictor
+from fortex.nltk import NLTKNER, NLTKPOSTagger, NLTKSentenceSegmenter
+from fortex.spacy import SpacyProcessor
+from ft.onto.base_ontology import EntityMention, Sentence
+from ftx.medical.clinical_ontology import (MedicalArticle,
+ MedicalEntityMention,
+ NegationContext)
+from stave_backend.lib.stave_session import StaveSession
+
+from mimic3_note_reader import Mimic3DischargeNoteReader
+from utterance_searcher import LastUtteranceSearcher
+
+
+def get_json(path: str):
+ file_obj = open(path)
+ data = json.load(file_obj)
+ file_obj.close()
+ return data
+
+
+def update_stave_db(default_project_json, chat_project_json, chat_doc_json, config):
+ project_id_base = 0
+ with StaveSession(url=config.Stave.url) as session:
+ session.login(username=config.Stave.username, password=config.Stave.pw)
+
+ projects = session.get_project_list().json()
+ project_names = [project["name"] for project in projects]
+
+ if (
+ default_project_json["name"] in project_names
+ and chat_project_json["name"] in project_names
+ ):
+
+ base_project = [
+ proj
+ for proj in projects
+ if proj["name"] == default_project_json["name"]
+ ][0]
+ return base_project["id"]
+
+ resp1 = session.create_project(default_project_json)
+ project_id_base = json.loads(resp1.text)["id"]
+
+ resp2 = session.create_project(chat_project_json)
+ project_id_chat = json.loads(resp2.text)["id"]
+
+ chat_doc_json["project_id"] = project_id_chat
+ doc_id = session.create_document(chat_doc_json)
+ project_list = session.get_project_list().json()
+
+ return project_id_base
+
+
+def main(
+ input_path: str, output_path: str, max_packs: int = -1, use_mimiciii_reader=1
+):
+ print("Starting demo pipeline example..")
+ config = yaml.safe_load(open("clinical_config.yml", "r"))
+ config = Config(config, default_hparams=None)
+ print("Running NER pipeline...")
+ pl = Pipeline[DataPack]()
+ if use_mimiciii_reader == 1:
+ pl.set_reader(
+ Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs}
+ )
+ else:
+ pl.set_reader(RawPackReader())
+
+ pl.add(SpacyProcessor(), {"processors": ["sentence", "tokenize"]})
+ pl.add(NLTKPOSTagger())
+
+ pl.add(BioBERTNERPredictor(), config=config.BioBERTNERPredictor)
+ pl.add(SpacyProcessor(), config.Spacy)
+ pl.add(NegationContextAnalyzer())
+ pl.add(
+ ICDCodingProcessor(),
+ {
+ "entry_type": "ft.onto.base_ontology.Sentence",
+ },
+ )
+ pl.add(
+ ElasticSearchPackIndexProcessor(),
+ {
+ "indexer": {
+ "other_kwargs": {"refresh": True},
+ }
+ },
+ )
+ pl.add(
+ PackIdJsonPackWriter(),
+ {
+ "output_dir": output_path,
+ "indent": 2,
+ "overwrite": True,
+ "drop_record": True,
+ "zip_pack": False,
+ },
+ )
+
+ pl.initialize()
+
+ for idx, pack in enumerate(pl.process_dataset(input_path)):
+ if (idx + 1) % 50 == 0:
+ print(
+ f"{time.strftime('%m-%d %H:%M')}: Processed {idx + 1} packs"
+ )
+
+ default_project_json = get_json(config.viewer_project_json)
+ chat_project_json = get_json(config.chat_project_json)
+ chat_doc_json = get_json(config.chat_document_json)
+
+ base_project_id = update_stave_db(
+ default_project_json, chat_project_json, chat_doc_json, config
+ )
+
+ remote_pl = Pipeline[DataPack]()
+ remote_pl.set_reader(RawDataDeserializeReader())
+ remote_pl.add(
+ LastUtteranceSearcher(),
+ config={
+ "query_result_project_id": base_project_id,
+ "stave_db_path": config.LastUtteranceSearcher.stave_db_path,
+ "url_stub": config.LastUtteranceSearcher.url,
+ },
+ )
+ remote_pl.serve(
+ port=config.Remote.port,
+ input_format=config.Remote.input_format,
+ service_name=config.Remote.service_name,
+ )
+
+
+main(sys.argv[1], sys.argv[2], int(sys.argv[3]), int(sys.argv[4]))
diff --git a/examples/clinical_pipeline/default_onto_project.json b/examples/clinical_pipeline/default_onto_project.json
new file mode 100644
index 00000000..03122b4a
--- /dev/null
+++ b/examples/clinical_pipeline/default_onto_project.json
@@ -0,0 +1,4 @@
+{
+ "name": "clinical_pipeline_base",
+ "project_type": "single_pack",
+ "ontology": "{\n \"name\": \"base_ontology\",\n \"definitions\": [\n {\n \"entry_name\": \"ft.onto.base_ontology.Token\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation :class:`Token`, used to represent a token or a word.\",\n \"attributes\": [\n {\n \"name\": \"pos\",\n \"type\": \"str\"\n },\n {\n \"name\": \"ud_xpos\",\n \"type\": \"str\",\n \"description\": \"Language specific pos tag. Used in CoNLL-U Format. Refer to https://universaldependencies.org/format.html\"\n },\n {\n \"name\": \"lemma\",\n \"type\": \"str\",\n \"description\": \"Lemma or stem of word form.\"\n },\n {\n \"name\": \"chunk\",\n \"type\": \"str\"\n },\n {\n \"name\": \"ner\",\n \"type\": \"str\"\n },\n {\n \"name\": \"sense\",\n \"type\": \"str\"\n },\n {\n \"name\": \"is_root\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"ud_features\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"str\"\n },\n {\n \"name\": \"ud_misc\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Subword\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"Used to represent subword tokenization results.\",\n \"attributes\": [\n {\n \"name\": \"is_first_segment\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"is_unk\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"vocab_id\",\n \"type\": \"int\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Classification\",\n \"parent_entry\": \"forte.data.ontology.top.Generics\",\n \"description\": \"Used to store values for classification prediction\",\n \"attributes\": [\n {\n \"name\": \"classification_result\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Document\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Document`, normally used to represent a document.\",\n \"attributes\": [\n {\n \"name\": \"document_class\",\n \"type\": \"List\",\n \"item_type\": \"str\",\n \"description\": \"A list of class names that the document belongs to.\"\n },\n {\n \"name\": \"sentiment\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n },\n {\n \"name\": \"classifications\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"ft.onto.base_ontology.Classification\",\n \"description\": \"Stores the classification results for this document. The key is the name/task of the classification, the value is an classification object storing the results.\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Sentence\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Sentence`, normally used to represent a sentence.\",\n \"attributes\": [\n {\n \"name\": \"speaker\",\n \"type\": \"str\"\n },\n {\n \"name\": \"part_id\",\n \"type\": \"int\"\n },\n {\n \"name\": \"sentiment\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n },\n {\n \"name\": \"classification\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n },\n {\n \"name\": \"classifications\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"ft.onto.base_ontology.Classification\",\n \"description\": \"Stores the classification results for this sentence. The key is the name/task of the classification, the value is an classification object storing the results.\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Phrase\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Phrase`.\",\n \"attributes\": [\n {\n \"name\": \"phrase_type\",\n \"type\": \"str\"\n },\n {\n \"name\": \"headword\",\n \"type\": \"ft.onto.base_ontology.Token\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.UtteranceContext\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"`UtteranceContext` represents the context part in dialogue.\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Utterance\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Utterance`, normally used to represent an utterance in dialogue.\",\n \"attributes\": [\n {\n \"name\": \"speaker\",\n \"type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.PredicateArgument\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `PredicateArgument`, normally used to represent an argument of a predicate, can be linked to the predicate via the predicate link.\",\n \"attributes\": [\n {\n \"name\": \"ner_type\",\n \"type\": \"str\"\n },\n {\n \"name\": \"predicate_lemma\",\n \"type\": \"str\"\n },\n {\n \"name\": \"is_verb\",\n \"type\": \"bool\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.EntityMention\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `EntityMention`, normally used to represent an Entity Mention in a piece of text.\",\n \"attributes\": [\n {\n \"name\": \"ner_type\",\n \"type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.EventMention\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `EventMention`, used to refer to a mention of an event.\",\n \"attributes\": [\n {\n \"name\": \"event_type\",\n \"type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.PredicateMention\",\n \"parent_entry\": \"ft.onto.base_ontology.Phrase\",\n \"description\": \"A span based annotation `PredicateMention`, normally used to represent a predicate (normally verbs) in a piece of text.\",\n \"attributes\": [\n {\n \"name\": \"predicate_lemma\",\n \"type\": \"str\"\n },\n {\n \"name\": \"framenet_id\",\n \"type\": \"str\"\n },\n {\n \"name\": \"is_verb\",\n \"type\": \"bool\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.PredicateLink\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a semantic role link between a predicate and its argument.\",\n \"attributes\": [\n {\n \"name\": \"arg_type\",\n \"type\": \"str\",\n \"description\": \"The predicate link type.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.PredicateMention\",\n \"child_type\": \"ft.onto.base_ontology.PredicateArgument\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Dependency\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a syntactic dependency.\",\n \"attributes\": [\n {\n \"name\": \"dep_label\",\n \"type\": \"str\",\n \"description\": \"The dependency label.\"\n },\n {\n \"name\": \"rel_type\",\n \"type\": \"str\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.Token\",\n \"child_type\": \"ft.onto.base_ontology.Token\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.EnhancedDependency\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a enhanced dependency: \\n https://universaldependencies.org/u/overview/enhanced-syntax.html\",\n \"attributes\": [\n {\n \"name\": \"dep_label\",\n \"type\": \"str\",\n \"description\": \"The enhanced dependency label in Universal Dependency.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.Token\",\n \"child_type\": \"ft.onto.base_ontology.Token\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.RelationLink\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a relation between two entity mentions\",\n \"attributes\": [\n {\n \"name\": \"rel_type\",\n \"type\": \"str\",\n \"description\": \"The type of the relation.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.EntityMention\",\n \"child_type\": \"ft.onto.base_ontology.EntityMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.CrossDocEntityRelation\",\n \"parent_entry\": \"forte.data.ontology.top.MultiPackLink\",\n \"description\": \"A `Link` type entry which represent a relation between two entity mentions across the packs.\",\n \"attributes\": [\n {\n \"name\": \"rel_type\",\n \"type\": \"str\",\n \"description\": \"The type of the relation.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.EntityMention\",\n \"child_type\": \"ft.onto.base_ontology.EntityMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.CoreferenceGroup\",\n \"parent_entry\": \"forte.data.ontology.top.Group\",\n \"description\": \"A group type entry that take `EntityMention`, as members, used to represent coreferent group of entities.\",\n \"member_type\": \"ft.onto.base_ontology.EntityMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.EventRelation\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a relation between two event mentions.\",\n \"attributes\": [\n {\n \"name\": \"rel_type\",\n \"type\": \"str\",\n \"description\": \"The type of the relation.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.EventMention\",\n \"child_type\": \"ft.onto.base_ontology.EventMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.CrossDocEventRelation\",\n \"parent_entry\": \"forte.data.ontology.top.MultiPackLink\",\n \"description\": \"A `Link` type entry which represent a relation between two event mentions across the packs.\",\n \"attributes\": [\n {\n \"name\": \"rel_type\",\n \"type\": \"str\",\n \"description\": \"The type of the relation.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.EventMention\",\n \"child_type\": \"ft.onto.base_ontology.EventMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.ConstituentNode\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `ConstituentNode` to represent constituents in constituency parsing. This can also sentiment values annotated on the nodes.\",\n \"attributes\": [\n {\n \"name\": \"label\",\n \"type\": \"str\"\n },\n {\n \"name\": \"sentiment\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n },\n {\n \"name\": \"is_root\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"is_leaf\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"parent_node\",\n \"type\": \"ft.onto.base_ontology.ConstituentNode\"\n },\n {\n \"name\": \"children_nodes\",\n \"type\": \"List\",\n \"item_type\": \"ft.onto.base_ontology.ConstituentNode\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Title\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Title`, normally used to represent a title.\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Body\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Body`, normally used to represent a document body.\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.MCOption\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.MCQuestion\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"attributes\": [\n {\n \"name\": \"options\",\n \"type\": \"List\",\n \"item_type\": \"ft.onto.base_ontology.MCOption\"\n },\n {\n \"name\": \"answers\",\n \"type\": \"List\",\n \"item_type\": \"int\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.MRCQuestion\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"An `Annotation` type which represents an MRC question.\",\n \"attributes\": [\n {\n \"name\": \"qid\",\n \"type\": \"int\"\n },\n {\n \"name\": \"answers\",\n \"type\": \"List\",\n \"item_type\": \"ft.onto.base_ontology.Phrase\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Recording\",\n \"parent_entry\": \"forte.data.ontology.top.AudioAnnotation\",\n \"description\": \"A span based annotation `Recording`, normally used to represent a recording.\",\n \"attributes\": [\n {\n \"name\": \"recording_class\",\n \"type\": \"List\",\n \"item_type\": \"str\",\n \"description\": \"A list of class names that the recording belongs to.\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.AudioUtterance\",\n \"parent_entry\": \"forte.data.ontology.top.AudioAnnotation\",\n \"description\": \"A span based annotation `AudioUtterance`, normally used to represent an utterance in dialogue.\",\n \"attributes\": [\n {\n \"name\": \"speaker\",\n \"type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ftx.medical.clinical_ontology.NegationContext\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `NegationContext`, used to represent the negation context of a named entity.\",\n \"attributes\": [\n {\n \"name\": \"polarity\",\n \"type\": \"bool\"\n }\n ]\n },\n {\n \"entry_name\": \"ftx.medical.clinical_ontology.MedicalEntityMention\",\n \"parent_entry\": \"ft.onto.base_ontology.EntityMention\",\n \"description\": \"A span based annotation class MedicalEntityMention, used to represent an Entity Mention in medical domain\",\n \"attributes\": [\n {\n \"name\": \"umls_link\",\n \"type\": \"str\"\n },\n {\n \"name\": \"umls_entities\",\n \"type\": \"List\",\n \"item_type\": \"ftx.medical.clinical_ontology.UMLSConceptLink\"\n }\n ]\n },\n {\n \"entry_name\": \"ftx.medical.clinical_ontology.MedicalArticle\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"An annotation based representation for the whole medical text chunk/document\",\n \"attributes\": [\n {\n \"name\": \"icd_version\",\n \"type\": \"int\",\n \"description\": \"The version of ICD-Coding being used.\"\n },\n {\n \"name\": \"icd_code\",\n \"type\": \"str\",\n \"description\": \"The ICD code assigned to current medical article.\"\n }\n ]\n }\n ]\n}\n", "config": "{\"legendConfigs\":{\"ft.onto.base_ontology.Token\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"pos\":false,\"ud_xpos\":false,\"lemma\":false,\"chunk\":false,\"ner\":false,\"sense\":false}},\"ft.onto.base_ontology.Subword\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ft.onto.base_ontology.Classification\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{}},\"ft.onto.base_ontology.Document\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ft.onto.base_ontology.Sentence\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"speaker\":false}},\"ft.onto.base_ontology.Phrase\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"phrase_type\":false}},\"ft.onto.base_ontology.UtteranceContext\":{\"is_selected\":false,\"is_shown\":false},\"ft.onto.base_ontology.Utterance\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"speaker\":false}},\"ft.onto.base_ontology.PredicateArgument\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"ner_type\":false,\"predicate_lemma\":false}},\"ft.onto.base_ontology.EntityMention\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"ner_type\":false}},\"ft.onto.base_ontology.EventMention\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"event_type\":false}},\"ft.onto.base_ontology.PredicateMention\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"predicate_lemma\":false,\"framenet_id\":false}},\"ft.onto.base_ontology.PredicateLink\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"arg_type\":false}},\"ft.onto.base_ontology.Dependency\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"dep_label\":false,\"rel_type\":false}},\"ft.onto.base_ontology.EnhancedDependency\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"dep_label\":false}},\"ft.onto.base_ontology.RelationLink\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"rel_type\":false}},\"ft.onto.base_ontology.CrossDocEntityRelation\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"rel_type\":false}},\"ft.onto.base_ontology.CoreferenceGroup\":{\"is_selected\":false,\"is_shown\":false},\"ft.onto.base_ontology.EventRelation\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"rel_type\":false}},\"ft.onto.base_ontology.CrossDocEventRelation\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"rel_type\":false}},\"ft.onto.base_ontology.ConstituentNode\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"label\":false}},\"ft.onto.base_ontology.Title\":{\"is_selected\":false,\"is_shown\":false},\"ft.onto.base_ontology.Body\":{\"is_selected\":false,\"is_shown\":false},\"ft.onto.base_ontology.MCOption\":{\"is_selected\":false,\"is_shown\":false},\"ft.onto.base_ontology.MCQuestion\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{}},\"ft.onto.base_ontology.MRCQuestion\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{}},\"ft.onto.base_ontology.Recording\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{}},\"ft.onto.base_ontology.AudioUtterance\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"speaker\":false}},\"ftx.medical.clinical_ontology.NegationContext\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ftx.medical.clinical_ontology.MedicalEntityMention\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"umls_link\":false}},\"ftx.medical.clinical_ontology.MedicalArticle\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"icd_code\":false}}},\"scopeConfigs\":{\"ft.onto.base_ontology.Token\":false,\"ft.onto.base_ontology.Subword\":false,\"ft.onto.base_ontology.Document\":false,\"ft.onto.base_ontology.Sentence\":false,\"ft.onto.base_ontology.Phrase\":false,\"ft.onto.base_ontology.UtteranceContext\":false,\"ft.onto.base_ontology.Utterance\":false,\"ft.onto.base_ontology.PredicateArgument\":false,\"ft.onto.base_ontology.EntityMention\":false,\"ft.onto.base_ontology.EventMention\":false,\"ft.onto.base_ontology.PredicateMention\":false,\"ft.onto.base_ontology.ConstituentNode\":false,\"ft.onto.base_ontology.Title\":false,\"ft.onto.base_ontology.Body\":false,\"ft.onto.base_ontology.MCOption\":false,\"ft.onto.base_ontology.MCQuestion\":false,\"ft.onto.base_ontology.MRCQuestion\":false,\"ftx.medical.clinical_ontology.NegationContext\":false,\"ftx.medical.clinical_ontology.MedicalEntityMention\":false,\"ftx.medical.clinical_ontology.MedicalArticle\":false},\"layoutConfigs\":{\"center-middle\":\"default-nlp\",\"left\":\"default-meta\",\"right\":\"default-attribute\",\"center-bottom\":\"disable\"},\"remoteConfigs\":{\"pipelineUrl\":\"\",\"doValidation\":false,\"expectedName\":\"\",\"inputFormat\":\"string\",\"expectedRecords\":{}}}"}
diff --git a/examples/clinical_pipeline/demo/__init__.py b/examples/clinical_pipeline/demo/__init__.py
new file mode 100644
index 00000000..49ecbbf8
--- /dev/null
+++ b/examples/clinical_pipeline/demo/__init__.py
@@ -0,0 +1 @@
+# ***automatically_generated***
diff --git a/examples/clinical_pipeline/demo/clinical.py b/examples/clinical_pipeline/demo/clinical.py
new file mode 100644
index 00000000..68541b46
--- /dev/null
+++ b/examples/clinical_pipeline/demo/clinical.py
@@ -0,0 +1,49 @@
+# ***automatically_generated***
+# ***source json:examples/clinical_pipeline/clinical_onto.json***
+# flake8: noqa
+# mypy: ignore-errors
+# pylint: skip-file
+"""
+Automatically generated ontology clinical. Do not change manually.
+"""
+
+from dataclasses import dataclass
+from forte.data.data_pack import DataPack
+from forte.data.ontology.top import Annotation
+from ft.onto.base_ontology import EntityMention
+
+__all__ = [
+ "ClinicalEntityMention",
+ "Description",
+ "Body",
+]
+
+
+@dataclass
+class ClinicalEntityMention(EntityMention):
+ """
+ A span based annotation `ClinicalEntityMention`, normally used to represent an Entity Mention in a piece of clinical text.
+ """
+
+ def __init__(self, pack: DataPack, begin: int, end: int):
+ super().__init__(pack, begin, end)
+
+
+@dataclass
+class Description(Annotation):
+ """
+ A span based annotation `Description`, used to represent the description in a piece of clinical note.
+ """
+
+ def __init__(self, pack: DataPack, begin: int, end: int):
+ super().__init__(pack, begin, end)
+
+
+@dataclass
+class Body(Annotation):
+ """
+ A span based annotation `Body`, used to represent the actual content in a piece of clinical note.
+ """
+
+ def __init__(self, pack: DataPack, begin: int, end: int):
+ super().__init__(pack, begin, end)
diff --git a/examples/clinical_pipeline/download_models.py b/examples/clinical_pipeline/download_models.py
new file mode 100644
index 00000000..db0d7cca
--- /dev/null
+++ b/examples/clinical_pipeline/download_models.py
@@ -0,0 +1,25 @@
+from forte.data.data_utils import maybe_download
+
+# download resources
+urls = [
+ "https://drive.google.com/file/d/15RSfFkW9syQKtx-_fQ9KshN3BJ27Jf8t/"
+ "view?usp=sharing",
+ "https://drive.google.com/file/d/1Nh7D6Xam5JefdoSXRoL7S0DZK1d4i2UK/"
+ "view?usp=sharing",
+ "https://drive.google.com/file/d/1YWcI60lGKtTFH01Ai1HnwOKBsrFf2r29/"
+ "view?usp=sharing",
+ "https://drive.google.com/file/d/1ElHUEMPQIuWmV0GimroqFphbCvFKskYj/"
+ "view?usp=sharing",
+ "https://drive.google.com/file/d/1EhMXlieoEg-bGUbbQ2vN-iyNJvC4Dajl/"
+ "view?usp=sharing",
+]
+
+filenames = [
+ "config.json",
+ "pytorch_model.bin",
+ "special_tokens_map.json",
+ "tokenizer_config.json",
+ "vocab.txt",
+]
+
+maybe_download(urls=urls, path="resources/NCBI-disease", filenames=filenames)
diff --git a/examples/clinical_pipeline/mimic3_note_reader.py b/examples/clinical_pipeline/mimic3_note_reader.py
new file mode 100644
index 00000000..70604871
--- /dev/null
+++ b/examples/clinical_pipeline/mimic3_note_reader.py
@@ -0,0 +1,82 @@
+# Copyright 2021 The Forte Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import csv
+import logging
+from pathlib import Path
+from typing import Any, Iterator, Union, List
+
+from smart_open import open
+
+from demo.clinical import Description, Body
+from forte.data.data_pack import DataPack
+from forte.data.base_reader import PackReader
+from ft.onto.base_ontology import Document
+
+
+class Mimic3DischargeNoteReader(PackReader):
+ """This class is designed to read the discharge notes from MIMIC3 dataset
+ as plain text packs.
+
+ For more information for the dataset, visit:
+ https://mimic.physionet.org/
+ """
+
+ def __init__(self):
+ super().__init__()
+ self.headers: List[str] = []
+ self.text_col = -1 # Default to be last column.
+ self.description_col = 0 # Default to be first column.
+ self.__note_count = 0 # Count number of notes processed.
+
+ def _collect( # type: ignore
+ self, mimic3_path: Union[Path, str]
+ ) -> Iterator[Any]:
+ with open(mimic3_path) as f:
+ for r in csv.reader(f):
+ if 0 < self.configs.max_num_notes <= self.__note_count:
+ break
+ yield r
+
+ def _parse_pack(self, row: List[str]) -> Iterator[DataPack]:
+ if len(self.headers) == 0:
+ self.headers.extend(row)
+ for i, h in enumerate(self.headers):
+ if h == "TEXT":
+ self.text_col = i
+ logging.info("Text Column is %d", i)
+ if h == "DESCRIPTION":
+ self.description_col = i
+ logging.info("Description Column is %d", i)
+ else:
+ pack: DataPack = DataPack()
+ description: str = row[self.description_col]
+ text: str = row[self.text_col]
+ delimiter = "\n-----------------\n"
+ full_text = description + delimiter + text
+ pack.set_text(full_text)
+
+ Description(pack, 0, len(description))
+ Body(pack, len(description) + len(delimiter), len(full_text))
+ Document(pack, 0, len(pack.text))
+ self.__note_count += 1
+ yield pack
+
+ @classmethod
+ def default_configs(cls):
+ config = super().default_configs()
+ # If this is set (>0), the reader will only read up to
+ # the number specified.
+ config["max_num_notes"] = -1
+ return config
diff --git a/examples/clinical_pipeline/requirements.txt b/examples/clinical_pipeline/requirements.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/examples/clinical_pipeline/sample_data/notes.txt b/examples/clinical_pipeline/sample_data/notes.txt
new file mode 100644
index 00000000..66f03c54
--- /dev/null
+++ b/examples/clinical_pipeline/sample_data/notes.txt
@@ -0,0 +1,6 @@
+ADDENDUM:
+RADIOLOGIC STUDIES: Radiologic studies also included a chest CT, which confirmed cavitary lesions in the left lung apex consistent with infectious process/tuberculosis.
+This also moderate-sized left pleural effusion.
+HEAD CT: Head CT showed no intracranial hemorrhage and no mass effect, but old infarction consistent with past medical history.
+ABDOMINAL CT: Abdominal CT showed no lesions of T10 and sacrum most likely secondary to steoporosis.
+These can be followed by repeat imaging as an outpatient.
\ No newline at end of file
diff --git a/examples/clinical_pipeline/settings.json b/examples/clinical_pipeline/settings.json
new file mode 100644
index 00000000..dd8e44c3
--- /dev/null
+++ b/examples/clinical_pipeline/settings.json
@@ -0,0 +1,3 @@
+{
+ "index.mapping.coerce": false
+}
diff --git a/examples/clinical_pipeline/utterance_searcher.py b/examples/clinical_pipeline/utterance_searcher.py
new file mode 100644
index 00000000..741514c4
--- /dev/null
+++ b/examples/clinical_pipeline/utterance_searcher.py
@@ -0,0 +1,124 @@
+import os
+import logging
+import sqlite3
+from typing import Dict, Any, Optional, List
+from fortex.elastic import ElasticSearchIndexer
+
+from forte.common import Resources, ProcessorConfigError
+from forte.common.configuration import Config
+from forte.data.common_entry_utils import create_utterance, get_last_utterance
+from forte.data.data_pack import DataPack
+from forte.processors.base import PackProcessor
+from ft.onto.base_ontology import Utterance
+
+
+def sqlite_insert(conn, table, row):
+ cols: str = ", ".join('"{}"'.format(col) for col in row.keys())
+ vals: str = ", ".join(":{}".format(col) for col in row.keys())
+ sql: str = 'INSERT INTO "{0}" ({1}) VALUES ({2})'.format(table, cols, vals)
+ cursor = conn.cursor()
+ cursor.execute(sql, row)
+ conn.commit()
+ return cursor.lastrowid
+
+
+def create_links(url_stub: str, ids: List[int]) -> List[str]:
+ links: List[str] = []
+
+ url_stub: str = url_stub.strip("/")
+ for temp_idm in ids:
+ links.append(
+ f"Report #{temp_idm}"
+ )
+ return links
+
+
+class LastUtteranceSearcher(PackProcessor):
+ # pylint: disable=attribute-defined-outside-init
+
+ def initialize(self, resources: Resources, configs: Config):
+ super().initialize(resources, configs)
+ self.index = ElasticSearchIndexer(self.configs.indexer.hparams)
+ if self.configs.query_result_project_id < 0:
+ raise ProcessorConfigError("Query Result Project is not set.")
+
+ if not os.path.exists(self.configs.stave_db_path):
+ raise ProcessorConfigError(
+ f"Cannot find Stave DB at: {self.configs.stave_db_path}"
+ )
+
+ def _process(self, input_pack: DataPack):
+ # Make sure we take the last utterance from the user.
+ utterance: Optional[Utterance] = get_last_utterance(input_pack, "user")
+
+ if utterance is not None:
+ logging.info("The last utterance is %s", utterance)
+ # Create the query using the last utterance from user.
+ size = self.configs.size or 1000
+ field = self.configs.field or "content"
+ query_value = {
+ "query": {"match": {field: utterance.text}},
+ "size": size,
+ }
+
+ # Search against the index.
+ results = self.index.search(query_value)
+ hits = results["hits"]["hits"]
+
+ conn = sqlite3.connect(self.configs.stave_db_path)
+
+ answers = []
+ for idx, hit in enumerate(hits):
+ source = hit["_source"]
+ # The raw pack string and pack id (not database id)
+ raw_pack_str: str = source["pack_info"]
+ pack_id: str = source["doc_id"]
+
+ # Now you can write the pack into the database and generate url.
+ item = {
+ "name": f"clinical_results_{idx}",
+ "textPack": raw_pack_str,
+ "project_id": self.configs.query_result_project_id,
+ }
+
+ db_id = sqlite_insert(conn, "stave_backend_document", item)
+ answers += [db_id]
+ print(pack_id, db_id)
+
+ if len(answers) == 0:
+ create_utterance(
+ input_pack,
+ "No results found. Please try another query.",
+ "ai",
+ )
+ else:
+ links: List[str] = create_links(self.configs.url_stub, answers)
+ response_text: str = (
+ "I found the following results:
-- "
+ + "
-- ".join(links)
+ )
+ print(response_text)
+
+ create_utterance(input_pack, response_text, "ai")
+ else:
+ logging.info("Cannot get another utterance.")
+ create_utterance(
+ input_pack,
+ "Hey, I didn't get what you say, could you try again?",
+ "ai",
+ )
+
+ @classmethod
+ def default_configs(cls) -> Dict[str, Any]:
+ return {
+ "size": 5,
+ "field": "content",
+ "indexer": {
+ "name": "ElasticSearchIndexer",
+ "hparams": ElasticSearchIndexer.default_configs(),
+ "other_kwargs": {"request_timeout": 10, "refresh": False},
+ },
+ "stave_db_path": "~/projects/stave/simple-backend/db.sqlite3",
+ "url_stub": "http://localhost:3000",
+ "query_result_project_id": -1,
+ }
From e3c713a229759f151578bfd4d0748fb58a3f9a1a Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 8 Jul 2022 22:16:58 +0800
Subject: [PATCH 02/21] Remove the duplicate processor
---
examples/clinical_pipeline/clinical_pipeline.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/examples/clinical_pipeline/clinical_pipeline.py b/examples/clinical_pipeline/clinical_pipeline.py
index a2880e1a..a275c9d3 100644
--- a/examples/clinical_pipeline/clinical_pipeline.py
+++ b/examples/clinical_pipeline/clinical_pipeline.py
@@ -82,9 +82,7 @@ def main(
pl.add(SpacyProcessor(), {"processors": ["sentence", "tokenize"]})
pl.add(NLTKPOSTagger())
-
pl.add(BioBERTNERPredictor(), config=config.BioBERTNERPredictor)
- pl.add(SpacyProcessor(), config.Spacy)
pl.add(NegationContextAnalyzer())
pl.add(
ICDCodingProcessor(),
From ec523ce594cbf72de17d0c96726452d5bbd8e121 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 8 Jul 2022 23:03:34 +0800
Subject: [PATCH 03/21] Change the reader to the right processor
---
examples/clinical_pipeline/clinical_pipeline.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/examples/clinical_pipeline/clinical_pipeline.py b/examples/clinical_pipeline/clinical_pipeline.py
index a275c9d3..74b914c0 100644
--- a/examples/clinical_pipeline/clinical_pipeline.py
+++ b/examples/clinical_pipeline/clinical_pipeline.py
@@ -5,7 +5,7 @@
import yaml
from forte.common.configuration import Config
from forte.data.data_pack import DataPack
-from forte.data.readers import RawDataDeserializeReader, RawPackReader
+from forte.data.readers import RawDataDeserializeReader, PlainTextReader
from forte.pipeline import Pipeline
from forte.processors.writers import PackIdJsonPackWriter
from fortex.elastic import ElasticSearchPackIndexProcessor
@@ -78,7 +78,7 @@ def main(
Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs}
)
else:
- pl.set_reader(RawPackReader())
+ pl.set_reader(PlainTextReader())
pl.add(SpacyProcessor(), {"processors": ["sentence", "tokenize"]})
pl.add(NLTKPOSTagger())
From 2ce1de277a54716619cd4666a3f560ccc4c7b62d Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 8 Jul 2022 23:20:15 +0800
Subject: [PATCH 04/21] Fix a comment
---
examples/clinical_pipeline/clinical_pipeline.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/clinical_pipeline/clinical_pipeline.py b/examples/clinical_pipeline/clinical_pipeline.py
index 74b914c0..480c76a4 100644
--- a/examples/clinical_pipeline/clinical_pipeline.py
+++ b/examples/clinical_pipeline/clinical_pipeline.py
@@ -71,7 +71,7 @@ def main(
print("Starting demo pipeline example..")
config = yaml.safe_load(open("clinical_config.yml", "r"))
config = Config(config, default_hparams=None)
- print("Running NER pipeline...")
+ print("Running pipeline...")
pl = Pipeline[DataPack]()
if use_mimiciii_reader == 1:
pl.set_reader(
From b132b9dbffdf733bcde596535fb112d011443ad1 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Fri, 8 Jul 2022 23:21:16 +0800
Subject: [PATCH 05/21] Add README.md to provide comprehensive tutorials
---
examples/clinical_pipeline/README.md | 120 +++++++++++++++++++++++++++
1 file changed, 120 insertions(+)
create mode 100644 examples/clinical_pipeline/README.md
diff --git a/examples/clinical_pipeline/README.md b/examples/clinical_pipeline/README.md
new file mode 100644
index 00000000..708dd0ad
--- /dev/null
+++ b/examples/clinical_pipeline/README.md
@@ -0,0 +1,120 @@
+## A Clinical Information Processing Example
+
+This example shows how we can construct a project to make ForteHealth and Stave work
+ side by side.
+
+## Install extra dependencies
+
+To install the latest code directly from source,
+
+```bash
+pip install git+https://git@github.com/asyml/forte-wrappers#egg=forte.elastic\&subdirectory=src/elastic
+pip install git+https://git@github.com/asyml/forte-wrappers#egg=forte.spacy\&subdirectory=src/spacy
+pip install git+https://git@github.com/asyml/forte-wrappers#egg=forte.spacy\&subdirectory=src/nltk
+```
+
+To install from PyPI,
+```bash
+pip install forte.elastic
+pip install forte.spacy
+pip install forte.nltk
+```
+
+## Downloading the models
+
+This example includes the following six functions:
+1. Sentence Segementation
+2. Tokenization
+3. Pos Tag
+4. Bio Named Entity Recognition
+5. Nagation Context Analysis
+6. ICD Coding Detection
+
+Before running the pipeline, we need to download the some of the models
+
+```bash
+python ./download_models.py
+```
+
+**Note**: The above script will save the model in `resources/NCBI-disease`. Use `--path` option to save the model into a different directory.
+
+## Set up the configuration
+Before run Elastic Searcher and Stave, we need to ensure that the current configuration is compatible with the environment of our computer.
+
+Please check and change the following configurations in `clinical_config.yml`:
+
+1. Ensure `LastUtteranceSearcher.stave_db_path`(line 16) is the correct path -> `$Home/.stave`, e.g., `"/home/name/.stave"`
+2. Ensure `Stave.username`(line 26) and `Stave.pw`(line 27) is `"admin"` and `"admin"`.
+
+
+
+
+## Prepare elastic searcher
+Download corresponding elasticsearch archive from https://www.elastic.co/downloads/past-releases/elasticsearch-7-17-2, unzip it and run `elasticsearch-7-17-2/bin/elasticsearch` to start the service.
+
+Run the following to check if elasticsearch is running properly:
+```bash
+curl -XGET localhost:9200/_cluster/health?pretty
+```
+
+Make sure you create index 'elastic_indexer' in the cluster before working with this example, you can run the following command:
+```bash
+curl -X PUT localhost:9200/elastic_indexer
+```
+
+You can also follow the online blog for more information:
+
+https://www.elastic.co/guide/en/elasticsearch/reference/current/starting-elasticsearch.html
+
+
+## Run indexer and Stave
+First, you should start an Elastic Indexer backend.
+
+Then, to start the Stave server that our pipeline will connect to for visualization purposes, run
+```bash
+stave -s start -o -l -n 8899
+```
+
+Here, you need to make sure `LastUtteranceSearcher.url` and `Stave.url` in `clinical_config.yml` are both `"http://localhost:8899"`. Or you can change the port 8899 to any port you like.
+
+
+## Run demo pipeline
+
+Now, open a new terminal, other than the one running stave server. You can run the following command to parse some files and index them.
+```bash
+python clinical_pipeline.py /path_to_mimiciii/1.4/NOTEEVENTS.csv.gz /path_to_sample_output 100 1
+```
+
+The last argument, `use_mimiciii_reader` is whether to use the `Mimic3DischargeNoteReader()`. If you set the argument to `1`, you will need to make sure the input data is mimic iii dataset, else `0` for `PlainTextReader()`.
+
+If we just need to check the remote pipeline connection to Stave.
+
+You can mask out Line 74 to Line 118 in `clinical_pipeline.py`.
+
+Hence, if you just wish to run the demo pipeline with existing database entries, and wish to just connect with Stave for visualization, You can mask out Line 74 to Line 118 in `clinical_pipeline.py` and run this command:
+
+```bash
+python clinical_processing_pipeline.py ./ ./ 100 0
+```
+
+Here, we also write out the raw data pack to `/path_to_sample_output`, and only
+index the first 100 notes. Remove the `100` parameter to index all documents.
+
+## Visualization
+
+You can go ahead and open `http://localhost:8899` on your browser to access Stave UI.
+Next, you will see 2 projects, named as `clinical_pipeline_base` and `clinical_pipeline_chat` by default.
+
+
+
+Click on `clinical_pipeline_chat` and then the document that resides within to go to the chatbot/search UI. Enter the keywords you want to search for in the elasticsearch indices. The pipeline would then return a bunch of documents that match your keywords. Click on those document links to access the Annotation Viewer UI for those documents.
+
+
+
+
+
+
+## Add the output data
+We write out the raw data pack to `/path_to_sample_output`, so you can see many json files in the directory.
+
+Click on `clinical_pipeline_base` and add the json file to the documents. Click on those document links to access the Annotation Viewer UI for those documents.
\ No newline at end of file
From 04af61187991b77a4fd495833545363d97b3c4c9 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sat, 9 Jul 2022 00:20:24 +0800
Subject: [PATCH 06/21] Change the configuration to default
---
examples/clinical_pipeline/clinical_config.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/clinical_pipeline/clinical_config.yml b/examples/clinical_pipeline/clinical_config.yml
index e5be72fa..31ecdd65 100644
--- a/examples/clinical_pipeline/clinical_config.yml
+++ b/examples/clinical_pipeline/clinical_config.yml
@@ -13,7 +13,7 @@ BioBERTNERPredictor:
ignore_labels: ["O"]
LastUtteranceSearcher:
- stave_db_path: "C://Users//Leo//.stave//db.sqlite3"
+ stave_db_path: "//home//name//.stave//db.sqlite3"
url: "http://localhost:8899"
Remote:
From 500a0ceb8d533a0ca9bb2141386b01767afe5906 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sat, 9 Jul 2022 09:52:44 +0800
Subject: [PATCH 07/21] fix the wrong serialization in all the json files
---
examples/clinical_pipeline/chat_project.json | 682 +++++++++++++++-
.../chat_query_document.json | 43 +-
.../default_onto_project.json | 730 +++++++++++++++++-
3 files changed, 1450 insertions(+), 5 deletions(-)
diff --git a/examples/clinical_pipeline/chat_project.json b/examples/clinical_pipeline/chat_project.json
index b95b186e..f242e2fa 100644
--- a/examples/clinical_pipeline/chat_project.json
+++ b/examples/clinical_pipeline/chat_project.json
@@ -1,5 +1,683 @@
{
"name": "clinical_pipeline_chat",
"project_type": "single_pack",
- "ontology": "{\n \"name\": \"base_ontology\",\n \"definitions\": [\n {\n \"entry_name\": \"ft.onto.base_ontology.Token\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation :class:`Token`, used to represent a token or a word.\",\n \"attributes\": [\n {\n \"name\": \"pos\",\n \"type\": \"str\"\n },\n {\n \"name\": \"ud_xpos\",\n \"type\": \"str\",\n \"description\": \"Language specific pos tag. Used in CoNLL-U Format. Refer to https://universaldependencies.org/format.html\"\n },\n {\n \"name\": \"lemma\",\n \"type\": \"str\",\n \"description\": \"Lemma or stem of word form.\"\n },\n {\n \"name\": \"chunk\",\n \"type\": \"str\"\n },\n {\n \"name\": \"ner\",\n \"type\": \"str\"\n },\n {\n \"name\": \"sense\",\n \"type\": \"str\"\n },\n {\n \"name\": \"is_root\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"ud_features\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"str\"\n },\n {\n \"name\": \"ud_misc\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Subword\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"Used to represent subword tokenization results.\",\n \"attributes\": [\n {\n \"name\": \"is_first_segment\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"is_unk\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"vocab_id\",\n \"type\": \"int\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Classification\",\n \"parent_entry\": \"forte.data.ontology.top.Generics\",\n \"description\": \"Used to store values for classification prediction\",\n \"attributes\": [\n {\n \"name\": \"classification_result\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Document\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Document`, normally used to represent a document.\",\n \"attributes\": [\n {\n \"name\": \"document_class\",\n \"type\": \"List\",\n \"item_type\": \"str\",\n \"description\": \"A list of class names that the document belongs to.\"\n },\n {\n \"name\": \"sentiment\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n },\n {\n \"name\": \"classifications\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"ft.onto.base_ontology.Classification\",\n \"description\": \"Stores the classification results for this document. The key is the name/task of the classification, the value is an classification object storing the results.\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Sentence\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Sentence`, normally used to represent a sentence.\",\n \"attributes\": [\n {\n \"name\": \"speaker\",\n \"type\": \"str\"\n },\n {\n \"name\": \"part_id\",\n \"type\": \"int\"\n },\n {\n \"name\": \"sentiment\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n },\n {\n \"name\": \"classification\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n },\n {\n \"name\": \"classifications\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"ft.onto.base_ontology.Classification\",\n \"description\": \"Stores the classification results for this sentence. The key is the name/task of the classification, the value is an classification object storing the results.\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Phrase\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Phrase`.\",\n \"attributes\": [\n {\n \"name\": \"phrase_type\",\n \"type\": \"str\"\n },\n {\n \"name\": \"headword\",\n \"type\": \"ft.onto.base_ontology.Token\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.UtteranceContext\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"`UtteranceContext` represents the context part in dialogue.\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Utterance\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Utterance`, normally used to represent an utterance in dialogue.\",\n \"attributes\": [\n {\n \"name\": \"speaker\",\n \"type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.PredicateArgument\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `PredicateArgument`, normally used to represent an argument of a predicate, can be linked to the predicate via the predicate link.\",\n \"attributes\": [\n {\n \"name\": \"ner_type\",\n \"type\": \"str\"\n },\n {\n \"name\": \"predicate_lemma\",\n \"type\": \"str\"\n },\n {\n \"name\": \"is_verb\",\n \"type\": \"bool\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.EntityMention\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `EntityMention`, normally used to represent an Entity Mention in a piece of text.\",\n \"attributes\": [\n {\n \"name\": \"ner_type\",\n \"type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.EventMention\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `EventMention`, used to refer to a mention of an event.\",\n \"attributes\": [\n {\n \"name\": \"event_type\",\n \"type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.PredicateMention\",\n \"parent_entry\": \"ft.onto.base_ontology.Phrase\",\n \"description\": \"A span based annotation `PredicateMention`, normally used to represent a predicate (normally verbs) in a piece of text.\",\n \"attributes\": [\n {\n \"name\": \"predicate_lemma\",\n \"type\": \"str\"\n },\n {\n \"name\": \"framenet_id\",\n \"type\": \"str\"\n },\n {\n \"name\": \"is_verb\",\n \"type\": \"bool\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.PredicateLink\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a semantic role link between a predicate and its argument.\",\n \"attributes\": [\n {\n \"name\": \"arg_type\",\n \"type\": \"str\",\n \"description\": \"The predicate link type.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.PredicateMention\",\n \"child_type\": \"ft.onto.base_ontology.PredicateArgument\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Dependency\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a syntactic dependency.\",\n \"attributes\": [\n {\n \"name\": \"dep_label\",\n \"type\": \"str\",\n \"description\": \"The dependency label.\"\n },\n {\n \"name\": \"rel_type\",\n \"type\": \"str\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.Token\",\n \"child_type\": \"ft.onto.base_ontology.Token\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.EnhancedDependency\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a enhanced dependency: \\n https://universaldependencies.org/u/overview/enhanced-syntax.html\",\n \"attributes\": [\n {\n \"name\": \"dep_label\",\n \"type\": \"str\",\n \"description\": \"The enhanced dependency label in Universal Dependency.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.Token\",\n \"child_type\": \"ft.onto.base_ontology.Token\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.RelationLink\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a relation between two entity mentions\",\n \"attributes\": [\n {\n \"name\": \"rel_type\",\n \"type\": \"str\",\n \"description\": \"The type of the relation.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.EntityMention\",\n \"child_type\": \"ft.onto.base_ontology.EntityMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.CrossDocEntityRelation\",\n \"parent_entry\": \"forte.data.ontology.top.MultiPackLink\",\n \"description\": \"A `Link` type entry which represent a relation between two entity mentions across the packs.\",\n \"attributes\": [\n {\n \"name\": \"rel_type\",\n \"type\": \"str\",\n \"description\": \"The type of the relation.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.EntityMention\",\n \"child_type\": \"ft.onto.base_ontology.EntityMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.CoreferenceGroup\",\n \"parent_entry\": \"forte.data.ontology.top.Group\",\n \"description\": \"A group type entry that take `EntityMention`, as members, used to represent coreferent group of entities.\",\n \"member_type\": \"ft.onto.base_ontology.EntityMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.EventRelation\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a relation between two event mentions.\",\n \"attributes\": [\n {\n \"name\": \"rel_type\",\n \"type\": \"str\",\n \"description\": \"The type of the relation.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.EventMention\",\n \"child_type\": \"ft.onto.base_ontology.EventMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.CrossDocEventRelation\",\n \"parent_entry\": \"forte.data.ontology.top.MultiPackLink\",\n \"description\": \"A `Link` type entry which represent a relation between two event mentions across the packs.\",\n \"attributes\": [\n {\n \"name\": \"rel_type\",\n \"type\": \"str\",\n \"description\": \"The type of the relation.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.EventMention\",\n \"child_type\": \"ft.onto.base_ontology.EventMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.ConstituentNode\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `ConstituentNode` to represent constituents in constituency parsing. This can also sentiment values annotated on the nodes.\",\n \"attributes\": [\n {\n \"name\": \"label\",\n \"type\": \"str\"\n },\n {\n \"name\": \"sentiment\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n },\n {\n \"name\": \"is_root\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"is_leaf\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"parent_node\",\n \"type\": \"ft.onto.base_ontology.ConstituentNode\"\n },\n {\n \"name\": \"children_nodes\",\n \"type\": \"List\",\n \"item_type\": \"ft.onto.base_ontology.ConstituentNode\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Title\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Title`, normally used to represent a title.\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Body\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Body`, normally used to represent a document body.\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.MCOption\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.MCQuestion\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"attributes\": [\n {\n \"name\": \"options\",\n \"type\": \"List\",\n \"item_type\": \"ft.onto.base_ontology.MCOption\"\n },\n {\n \"name\": \"answers\",\n \"type\": \"List\",\n \"item_type\": \"int\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.MRCQuestion\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"An `Annotation` type which represents an MRC question.\",\n \"attributes\": [\n {\n \"name\": \"qid\",\n \"type\": \"int\"\n },\n {\n \"name\": \"answers\",\n \"type\": \"List\",\n \"item_type\": \"ft.onto.base_ontology.Phrase\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Recording\",\n \"parent_entry\": \"forte.data.ontology.top.AudioAnnotation\",\n \"description\": \"A span based annotation `Recording`, normally used to represent a recording.\",\n \"attributes\": [\n {\n \"name\": \"recording_class\",\n \"type\": \"List\",\n \"item_type\": \"str\",\n \"description\": \"A list of class names that the recording belongs to.\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.AudioUtterance\",\n \"parent_entry\": \"forte.data.ontology.top.AudioAnnotation\",\n \"description\": \"A span based annotation `AudioUtterance`, normally used to represent an utterance in dialogue.\",\n \"attributes\": [\n {\n \"name\": \"speaker\",\n \"type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ftx.medical.clinical_ontology.NegationContext\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `NegationContext`, used to represent the negation context of a named entity.\",\n \"attributes\": [\n {\n \"name\": \"polarity\",\n \"type\": \"bool\"\n }\n ]\n }\n ]\n}\n", "config": "{\"legendConfigs\":{\"ft.onto.base_ontology.Token\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"pos\":false,\"ud_xpos\":false,\"lemma\":false,\"chunk\":false,\"ner\":false,\"sense\":false}},\"ft.onto.base_ontology.Subword\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ft.onto.base_ontology.Classification\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ft.onto.base_ontology.Document\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ft.onto.base_ontology.Sentence\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"speaker\":false}},\"ft.onto.base_ontology.Phrase\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"phrase_type\":false}},\"ft.onto.base_ontology.UtteranceContext\":{\"is_selected\":false,\"is_shown\":true},\"ft.onto.base_ontology.Utterance\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"speaker\":false}},\"ft.onto.base_ontology.PredicateArgument\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"ner_type\":false,\"predicate_lemma\":false}},\"ft.onto.base_ontology.EntityMention\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"ner_type\":false}},\"ft.onto.base_ontology.EventMention\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"event_type\":false}},\"ft.onto.base_ontology.PredicateMention\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"predicate_lemma\":false,\"framenet_id\":false}},\"ft.onto.base_ontology.PredicateLink\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"arg_type\":false}},\"ft.onto.base_ontology.Dependency\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"dep_label\":false,\"rel_type\":false}},\"ft.onto.base_ontology.EnhancedDependency\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"dep_label\":false}},\"ft.onto.base_ontology.RelationLink\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"rel_type\":false}},\"ft.onto.base_ontology.CrossDocEntityRelation\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"rel_type\":false}},\"ft.onto.base_ontology.CoreferenceGroup\":{\"is_selected\":false,\"is_shown\":true},\"ft.onto.base_ontology.EventRelation\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"rel_type\":false}},\"ft.onto.base_ontology.CrossDocEventRelation\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"rel_type\":false}},\"ft.onto.base_ontology.ConstituentNode\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"label\":false}},\"ft.onto.base_ontology.Title\":{\"is_selected\":false,\"is_shown\":true},\"ft.onto.base_ontology.Body\":{\"is_selected\":false,\"is_shown\":true},\"ft.onto.base_ontology.MCOption\":{\"is_selected\":false,\"is_shown\":true},\"ft.onto.base_ontology.MCQuestion\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ft.onto.base_ontology.MRCQuestion\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ft.onto.base_ontology.Recording\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ft.onto.base_ontology.AudioUtterance\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"speaker\":false}},\"ftx.medical.clinical_ontology.NegationContext\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}}},\"scopeConfigs\":{\"ft.onto.base_ontology.Token\":false,\"ft.onto.base_ontology.Subword\":false,\"ft.onto.base_ontology.Document\":false,\"ft.onto.base_ontology.Sentence\":false,\"ft.onto.base_ontology.Phrase\":false,\"ft.onto.base_ontology.UtteranceContext\":false,\"ft.onto.base_ontology.Utterance\":false,\"ft.onto.base_ontology.PredicateArgument\":false,\"ft.onto.base_ontology.EntityMention\":false,\"ft.onto.base_ontology.EventMention\":false,\"ft.onto.base_ontology.PredicateMention\":false,\"ft.onto.base_ontology.ConstituentNode\":false,\"ft.onto.base_ontology.Title\":false,\"ft.onto.base_ontology.Body\":false,\"ft.onto.base_ontology.MCOption\":false,\"ft.onto.base_ontology.MCQuestion\":false,\"ft.onto.base_ontology.MRCQuestion\":false,\"ftx.medical.clinical_ontology.NegationContext\":false},\"layoutConfigs\":{\"center-middle\":\"DialogueBox\",\"left\":\"disable\",\"right\":\"disable\",\"center-bottom\":\"disable\"},\"remoteConfigs\":{\"pipelineUrl\":\"http://localhost:8008\",\"doValidation\":false,\"expectedName\":\"\",\"inputFormat\":\"string\",\"expectedRecords\":{}}}"}
-
+ "ontology": {
+ "name": "base_ontology",
+ "definitions": [
+ {
+ "entry_name": "ft.onto.base_ontology.Token",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation :class:`Token`, used to represent a token or a word.",
+ "attributes": [
+ {
+ "name": "pos",
+ "type": "str"
+ },
+ {
+ "name": "ud_xpos",
+ "type": "str",
+ "description": "Language specific pos tag. Used in CoNLL-U Format. Refer to https://universaldependencies.org/format.html"
+ },
+ {
+ "name": "lemma",
+ "type": "str",
+ "description": "Lemma or stem of word form."
+ },
+ {
+ "name": "chunk",
+ "type": "str"
+ },
+ {
+ "name": "ner",
+ "type": "str"
+ },
+ {
+ "name": "sense",
+ "type": "str"
+ },
+ {
+ "name": "is_root",
+ "type": "bool"
+ },
+ {
+ "name": "ud_features",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "str"
+ },
+ {
+ "name": "ud_misc",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Subword",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "Used to represent subword tokenization results.",
+ "attributes": [
+ {
+ "name": "is_first_segment",
+ "type": "bool"
+ },
+ {
+ "name": "is_unk",
+ "type": "bool"
+ },
+ {
+ "name": "vocab_id",
+ "type": "int"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Classification",
+ "parent_entry": "forte.data.ontology.top.Generics",
+ "description": "Used to store values for classification prediction",
+ "attributes": [
+ {
+ "name": "classification_result",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "float"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Document",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Document`, normally used to represent a document.",
+ "attributes": [
+ {
+ "name": "document_class",
+ "type": "List",
+ "item_type": "str",
+ "description": "A list of class names that the document belongs to."
+ },
+ {
+ "name": "sentiment",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "float"
+ },
+ {
+ "name": "classifications",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "ft.onto.base_ontology.Classification",
+ "description": "Stores the classification results for this document. The key is the name/task of the classification, the value is an classification object storing the results."
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Sentence",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Sentence`, normally used to represent a sentence.",
+ "attributes": [
+ {
+ "name": "speaker",
+ "type": "str"
+ },
+ {
+ "name": "part_id",
+ "type": "int"
+ },
+ {
+ "name": "sentiment",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "float"
+ },
+ {
+ "name": "classification",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "float"
+ },
+ {
+ "name": "classifications",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "ft.onto.base_ontology.Classification",
+ "description": "Stores the classification results for this sentence. The key is the name/task of the classification, the value is an classification object storing the results."
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Phrase",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Phrase`.",
+ "attributes": [
+ {
+ "name": "phrase_type",
+ "type": "str"
+ },
+ {
+ "name": "headword",
+ "type": "ft.onto.base_ontology.Token"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.UtteranceContext",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "`UtteranceContext` represents the context part in dialogue."
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Utterance",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Utterance`, normally used to represent an utterance in dialogue.",
+ "attributes": [
+ {
+ "name": "speaker",
+ "type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.PredicateArgument",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `PredicateArgument`, normally used to represent an argument of a predicate, can be linked to the predicate via the predicate link.",
+ "attributes": [
+ {
+ "name": "ner_type",
+ "type": "str"
+ },
+ {
+ "name": "predicate_lemma",
+ "type": "str"
+ },
+ {
+ "name": "is_verb",
+ "type": "bool"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.EntityMention",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `EntityMention`, normally used to represent an Entity Mention in a piece of text.",
+ "attributes": [
+ {
+ "name": "ner_type",
+ "type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.EventMention",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `EventMention`, used to refer to a mention of an event.",
+ "attributes": [
+ {
+ "name": "event_type",
+ "type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.PredicateMention",
+ "parent_entry": "ft.onto.base_ontology.Phrase",
+ "description": "A span based annotation `PredicateMention`, normally used to represent a predicate (normally verbs) in a piece of text.",
+ "attributes": [
+ {
+ "name": "predicate_lemma",
+ "type": "str"
+ },
+ {
+ "name": "framenet_id",
+ "type": "str"
+ },
+ {
+ "name": "is_verb",
+ "type": "bool"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.PredicateLink",
+ "parent_entry": "forte.data.ontology.top.Link",
+ "description": "A `Link` type entry which represent a semantic role link between a predicate and its argument.",
+ "attributes": [
+ {
+ "name": "arg_type",
+ "type": "str",
+ "description": "The predicate link type."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.PredicateMention",
+ "child_type": "ft.onto.base_ontology.PredicateArgument"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Dependency",
+ "parent_entry": "forte.data.ontology.top.Link",
+ "description": "A `Link` type entry which represent a syntactic dependency.",
+ "attributes": [
+ {
+ "name": "dep_label",
+ "type": "str",
+ "description": "The dependency label."
+ },
+ {
+ "name": "rel_type",
+ "type": "str"
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.Token",
+ "child_type": "ft.onto.base_ontology.Token"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.EnhancedDependency",
+ "parent_entry": "forte.data.ontology.top.Link",
+ "description": "A `Link` type entry which represent a enhanced dependency: \n https://universaldependencies.org/u/overview/enhanced-syntax.html",
+ "attributes": [
+ {
+ "name": "dep_label",
+ "type": "str",
+ "description": "The enhanced dependency label in Universal Dependency."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.Token",
+ "child_type": "ft.onto.base_ontology.Token"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.RelationLink",
+ "parent_entry": "forte.data.ontology.top.Link",
+ "description": "A `Link` type entry which represent a relation between two entity mentions",
+ "attributes": [
+ {
+ "name": "rel_type",
+ "type": "str",
+ "description": "The type of the relation."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.EntityMention",
+ "child_type": "ft.onto.base_ontology.EntityMention"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.CrossDocEntityRelation",
+ "parent_entry": "forte.data.ontology.top.MultiPackLink",
+ "description": "A `Link` type entry which represent a relation between two entity mentions across the packs.",
+ "attributes": [
+ {
+ "name": "rel_type",
+ "type": "str",
+ "description": "The type of the relation."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.EntityMention",
+ "child_type": "ft.onto.base_ontology.EntityMention"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.CoreferenceGroup",
+ "parent_entry": "forte.data.ontology.top.Group",
+ "description": "A group type entry that take `EntityMention`, as members, used to represent coreferent group of entities.",
+ "member_type": "ft.onto.base_ontology.EntityMention"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.EventRelation",
+ "parent_entry": "forte.data.ontology.top.Link",
+ "description": "A `Link` type entry which represent a relation between two event mentions.",
+ "attributes": [
+ {
+ "name": "rel_type",
+ "type": "str",
+ "description": "The type of the relation."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.EventMention",
+ "child_type": "ft.onto.base_ontology.EventMention"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.CrossDocEventRelation",
+ "parent_entry": "forte.data.ontology.top.MultiPackLink",
+ "description": "A `Link` type entry which represent a relation between two event mentions across the packs.",
+ "attributes": [
+ {
+ "name": "rel_type",
+ "type": "str",
+ "description": "The type of the relation."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.EventMention",
+ "child_type": "ft.onto.base_ontology.EventMention"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.ConstituentNode",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `ConstituentNode` to represent constituents in constituency parsing. This can also sentiment values annotated on the nodes.",
+ "attributes": [
+ {
+ "name": "label",
+ "type": "str"
+ },
+ {
+ "name": "sentiment",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "float"
+ },
+ {
+ "name": "is_root",
+ "type": "bool"
+ },
+ {
+ "name": "is_leaf",
+ "type": "bool"
+ },
+ {
+ "name": "parent_node",
+ "type": "ft.onto.base_ontology.ConstituentNode"
+ },
+ {
+ "name": "children_nodes",
+ "type": "List",
+ "item_type": "ft.onto.base_ontology.ConstituentNode"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Title",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Title`, normally used to represent a title."
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Body",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Body`, normally used to represent a document body."
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.MCOption",
+ "parent_entry": "forte.data.ontology.top.Annotation"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.MCQuestion",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "attributes": [
+ {
+ "name": "options",
+ "type": "List",
+ "item_type": "ft.onto.base_ontology.MCOption"
+ },
+ {
+ "name": "answers",
+ "type": "List",
+ "item_type": "int"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.MRCQuestion",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "An `Annotation` type which represents an MRC question.",
+ "attributes": [
+ {
+ "name": "qid",
+ "type": "int"
+ },
+ {
+ "name": "answers",
+ "type": "List",
+ "item_type": "ft.onto.base_ontology.Phrase"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Recording",
+ "parent_entry": "forte.data.ontology.top.AudioAnnotation",
+ "description": "A span based annotation `Recording`, normally used to represent a recording.",
+ "attributes": [
+ {
+ "name": "recording_class",
+ "type": "List",
+ "item_type": "str",
+ "description": "A list of class names that the recording belongs to."
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.AudioUtterance",
+ "parent_entry": "forte.data.ontology.top.AudioAnnotation",
+ "description": "A span based annotation `AudioUtterance`, normally used to represent an utterance in dialogue.",
+ "attributes": [
+ {
+ "name": "speaker",
+ "type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.NegationContext",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `NegationContext`, used to represent the negation context of a named entity.",
+ "attributes": [
+ {
+ "name": "polarity",
+ "type": "bool"
+ }
+ ]
+ }
+ ]
+ },
+ "config": {
+ "legendConfigs": {
+ "ft.onto.base_ontology.Token": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "pos": false,
+ "ud_xpos": false,
+ "lemma": false,
+ "chunk": false,
+ "ner": false,
+ "sense": false
+ }
+ },
+ "ft.onto.base_ontology.Subword": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.Classification": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.Document": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.Sentence": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "speaker": false
+ }
+ },
+ "ft.onto.base_ontology.Phrase": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "phrase_type": false
+ }
+ },
+ "ft.onto.base_ontology.UtteranceContext": {
+ "is_selected": false,
+ "is_shown": true
+ },
+ "ft.onto.base_ontology.Utterance": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "speaker": false
+ }
+ },
+ "ft.onto.base_ontology.PredicateArgument": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "ner_type": false,
+ "predicate_lemma": false
+ }
+ },
+ "ft.onto.base_ontology.EntityMention": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "ner_type": false
+ }
+ },
+ "ft.onto.base_ontology.EventMention": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "event_type": false
+ }
+ },
+ "ft.onto.base_ontology.PredicateMention": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "predicate_lemma": false,
+ "framenet_id": false
+ }
+ },
+ "ft.onto.base_ontology.PredicateLink": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "arg_type": false
+ }
+ },
+ "ft.onto.base_ontology.Dependency": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "dep_label": false,
+ "rel_type": false
+ }
+ },
+ "ft.onto.base_ontology.EnhancedDependency": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "dep_label": false
+ }
+ },
+ "ft.onto.base_ontology.RelationLink": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "rel_type": false
+ }
+ },
+ "ft.onto.base_ontology.CrossDocEntityRelation": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "rel_type": false
+ }
+ },
+ "ft.onto.base_ontology.CoreferenceGroup": {
+ "is_selected": false,
+ "is_shown": true
+ },
+ "ft.onto.base_ontology.EventRelation": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "rel_type": false
+ }
+ },
+ "ft.onto.base_ontology.CrossDocEventRelation": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "rel_type": false
+ }
+ },
+ "ft.onto.base_ontology.ConstituentNode": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "label": false
+ }
+ },
+ "ft.onto.base_ontology.Title": {
+ "is_selected": false,
+ "is_shown": true
+ },
+ "ft.onto.base_ontology.Body": {
+ "is_selected": false,
+ "is_shown": true
+ },
+ "ft.onto.base_ontology.MCOption": {
+ "is_selected": false,
+ "is_shown": true
+ },
+ "ft.onto.base_ontology.MCQuestion": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.MRCQuestion": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.Recording": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.AudioUtterance": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "speaker": false
+ }
+ },
+ "ftx.medical.clinical_ontology.NegationContext": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {}
+ }
+ },
+ "scopeConfigs": {
+ "ft.onto.base_ontology.Token": false,
+ "ft.onto.base_ontology.Subword": false,
+ "ft.onto.base_ontology.Document": false,
+ "ft.onto.base_ontology.Sentence": false,
+ "ft.onto.base_ontology.Phrase": false,
+ "ft.onto.base_ontology.UtteranceContext": false,
+ "ft.onto.base_ontology.Utterance": false,
+ "ft.onto.base_ontology.PredicateArgument": false,
+ "ft.onto.base_ontology.EntityMention": false,
+ "ft.onto.base_ontology.EventMention": false,
+ "ft.onto.base_ontology.PredicateMention": false,
+ "ft.onto.base_ontology.ConstituentNode": false,
+ "ft.onto.base_ontology.Title": false,
+ "ft.onto.base_ontology.Body": false,
+ "ft.onto.base_ontology.MCOption": false,
+ "ft.onto.base_ontology.MCQuestion": false,
+ "ft.onto.base_ontology.MRCQuestion": false,
+ "ftx.medical.clinical_ontology.NegationContext": false
+ },
+ "layoutConfigs": {
+ "center-middle": "DialogueBox",
+ "left": "disable",
+ "right": "disable",
+ "center-bottom": "disable"
+ },
+ "remoteConfigs": {
+ "pipelineUrl": "http://localhost:8008",
+ "doValidation": false,
+ "expectedName": "",
+ "inputFormat": "string",
+ "expectedRecords": {}
+ }
+ }
+}
\ No newline at end of file
diff --git a/examples/clinical_pipeline/chat_query_document.json b/examples/clinical_pipeline/chat_query_document.json
index d8b95d4a..3b8dfbf4 100644
--- a/examples/clinical_pipeline/chat_query_document.json
+++ b/examples/clinical_pipeline/chat_query_document.json
@@ -1,5 +1,44 @@
{
"name": "query_chatbot.json",
"project_id": 99,
- "textPack": "{\n \"py/object\": \"forte.data.data_pack.DataPack\",\n \"py/state\": {\n \"creation_records\": {},\n \"field_records\": {},\n \"links\": [],\n \"groups\": [],\n \"meta\": {\n \"py/object\": \"forte.data.data_pack.Meta\",\n \"py/state\": {\n \"pack_name\": \"query_chatbot\",\n \"_pack_id\": 3,\n \"language\": \"eng\",\n \"span_unit\": \"character\"\n }\n },\n \"_text\": \"Welcome! Please type in a query to retrieve relevant clinical reports.\",\n \"annotations\": [\n {\n \"py/object\": \"ft.onto.base_ontology.Utterance\",\n \"py/state\": {\n \"_span\": {\n \"py/object\": \"forte.data.span.Span\",\n \"begin\": 0,\n \"end\": 70\n },\n \"_tid\": 0,\n \"speaker\": \"ai\"\n }\n }\n ],\n \"generics\": [],\n \"replace_back_operations\": [],\n \"processed_original_spans\": [],\n \"orig_text_len\": 70,\n \"serialization\": {\n \"next_id\": 1\n }\n }\n}"
-}
+ "textPack": {
+ "py/object": "forte.data.data_pack.DataPack",
+ "py/state": {
+ "creation_records": {},
+ "field_records": {},
+ "links": [],
+ "groups": [],
+ "meta": {
+ "py/object": "forte.data.data_pack.Meta",
+ "py/state": {
+ "pack_name": "query_chatbot",
+ "_pack_id": 3,
+ "language": "eng",
+ "span_unit": "character"
+ }
+ },
+ "_text": "Welcome! Please type in a query to retrieve relevant clinical reports.",
+ "annotations": [
+ {
+ "py/object": "ft.onto.base_ontology.Utterance",
+ "py/state": {
+ "_span": {
+ "py/object": "forte.data.span.Span",
+ "begin": 0,
+ "end": 70
+ },
+ "_tid": 0,
+ "speaker": "ai"
+ }
+ }
+ ],
+ "generics": [],
+ "replace_back_operations": [],
+ "processed_original_spans": [],
+ "orig_text_len": 70,
+ "serialization": {
+ "next_id": 1
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/examples/clinical_pipeline/default_onto_project.json b/examples/clinical_pipeline/default_onto_project.json
index 03122b4a..aad92a40 100644
--- a/examples/clinical_pipeline/default_onto_project.json
+++ b/examples/clinical_pipeline/default_onto_project.json
@@ -1,4 +1,732 @@
{
"name": "clinical_pipeline_base",
"project_type": "single_pack",
- "ontology": "{\n \"name\": \"base_ontology\",\n \"definitions\": [\n {\n \"entry_name\": \"ft.onto.base_ontology.Token\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation :class:`Token`, used to represent a token or a word.\",\n \"attributes\": [\n {\n \"name\": \"pos\",\n \"type\": \"str\"\n },\n {\n \"name\": \"ud_xpos\",\n \"type\": \"str\",\n \"description\": \"Language specific pos tag. Used in CoNLL-U Format. Refer to https://universaldependencies.org/format.html\"\n },\n {\n \"name\": \"lemma\",\n \"type\": \"str\",\n \"description\": \"Lemma or stem of word form.\"\n },\n {\n \"name\": \"chunk\",\n \"type\": \"str\"\n },\n {\n \"name\": \"ner\",\n \"type\": \"str\"\n },\n {\n \"name\": \"sense\",\n \"type\": \"str\"\n },\n {\n \"name\": \"is_root\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"ud_features\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"str\"\n },\n {\n \"name\": \"ud_misc\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Subword\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"Used to represent subword tokenization results.\",\n \"attributes\": [\n {\n \"name\": \"is_first_segment\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"is_unk\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"vocab_id\",\n \"type\": \"int\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Classification\",\n \"parent_entry\": \"forte.data.ontology.top.Generics\",\n \"description\": \"Used to store values for classification prediction\",\n \"attributes\": [\n {\n \"name\": \"classification_result\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Document\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Document`, normally used to represent a document.\",\n \"attributes\": [\n {\n \"name\": \"document_class\",\n \"type\": \"List\",\n \"item_type\": \"str\",\n \"description\": \"A list of class names that the document belongs to.\"\n },\n {\n \"name\": \"sentiment\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n },\n {\n \"name\": \"classifications\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"ft.onto.base_ontology.Classification\",\n \"description\": \"Stores the classification results for this document. The key is the name/task of the classification, the value is an classification object storing the results.\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Sentence\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Sentence`, normally used to represent a sentence.\",\n \"attributes\": [\n {\n \"name\": \"speaker\",\n \"type\": \"str\"\n },\n {\n \"name\": \"part_id\",\n \"type\": \"int\"\n },\n {\n \"name\": \"sentiment\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n },\n {\n \"name\": \"classification\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n },\n {\n \"name\": \"classifications\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"ft.onto.base_ontology.Classification\",\n \"description\": \"Stores the classification results for this sentence. The key is the name/task of the classification, the value is an classification object storing the results.\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Phrase\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Phrase`.\",\n \"attributes\": [\n {\n \"name\": \"phrase_type\",\n \"type\": \"str\"\n },\n {\n \"name\": \"headword\",\n \"type\": \"ft.onto.base_ontology.Token\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.UtteranceContext\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"`UtteranceContext` represents the context part in dialogue.\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Utterance\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Utterance`, normally used to represent an utterance in dialogue.\",\n \"attributes\": [\n {\n \"name\": \"speaker\",\n \"type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.PredicateArgument\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `PredicateArgument`, normally used to represent an argument of a predicate, can be linked to the predicate via the predicate link.\",\n \"attributes\": [\n {\n \"name\": \"ner_type\",\n \"type\": \"str\"\n },\n {\n \"name\": \"predicate_lemma\",\n \"type\": \"str\"\n },\n {\n \"name\": \"is_verb\",\n \"type\": \"bool\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.EntityMention\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `EntityMention`, normally used to represent an Entity Mention in a piece of text.\",\n \"attributes\": [\n {\n \"name\": \"ner_type\",\n \"type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.EventMention\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `EventMention`, used to refer to a mention of an event.\",\n \"attributes\": [\n {\n \"name\": \"event_type\",\n \"type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.PredicateMention\",\n \"parent_entry\": \"ft.onto.base_ontology.Phrase\",\n \"description\": \"A span based annotation `PredicateMention`, normally used to represent a predicate (normally verbs) in a piece of text.\",\n \"attributes\": [\n {\n \"name\": \"predicate_lemma\",\n \"type\": \"str\"\n },\n {\n \"name\": \"framenet_id\",\n \"type\": \"str\"\n },\n {\n \"name\": \"is_verb\",\n \"type\": \"bool\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.PredicateLink\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a semantic role link between a predicate and its argument.\",\n \"attributes\": [\n {\n \"name\": \"arg_type\",\n \"type\": \"str\",\n \"description\": \"The predicate link type.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.PredicateMention\",\n \"child_type\": \"ft.onto.base_ontology.PredicateArgument\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Dependency\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a syntactic dependency.\",\n \"attributes\": [\n {\n \"name\": \"dep_label\",\n \"type\": \"str\",\n \"description\": \"The dependency label.\"\n },\n {\n \"name\": \"rel_type\",\n \"type\": \"str\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.Token\",\n \"child_type\": \"ft.onto.base_ontology.Token\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.EnhancedDependency\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a enhanced dependency: \\n https://universaldependencies.org/u/overview/enhanced-syntax.html\",\n \"attributes\": [\n {\n \"name\": \"dep_label\",\n \"type\": \"str\",\n \"description\": \"The enhanced dependency label in Universal Dependency.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.Token\",\n \"child_type\": \"ft.onto.base_ontology.Token\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.RelationLink\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a relation between two entity mentions\",\n \"attributes\": [\n {\n \"name\": \"rel_type\",\n \"type\": \"str\",\n \"description\": \"The type of the relation.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.EntityMention\",\n \"child_type\": \"ft.onto.base_ontology.EntityMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.CrossDocEntityRelation\",\n \"parent_entry\": \"forte.data.ontology.top.MultiPackLink\",\n \"description\": \"A `Link` type entry which represent a relation between two entity mentions across the packs.\",\n \"attributes\": [\n {\n \"name\": \"rel_type\",\n \"type\": \"str\",\n \"description\": \"The type of the relation.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.EntityMention\",\n \"child_type\": \"ft.onto.base_ontology.EntityMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.CoreferenceGroup\",\n \"parent_entry\": \"forte.data.ontology.top.Group\",\n \"description\": \"A group type entry that take `EntityMention`, as members, used to represent coreferent group of entities.\",\n \"member_type\": \"ft.onto.base_ontology.EntityMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.EventRelation\",\n \"parent_entry\": \"forte.data.ontology.top.Link\",\n \"description\": \"A `Link` type entry which represent a relation between two event mentions.\",\n \"attributes\": [\n {\n \"name\": \"rel_type\",\n \"type\": \"str\",\n \"description\": \"The type of the relation.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.EventMention\",\n \"child_type\": \"ft.onto.base_ontology.EventMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.CrossDocEventRelation\",\n \"parent_entry\": \"forte.data.ontology.top.MultiPackLink\",\n \"description\": \"A `Link` type entry which represent a relation between two event mentions across the packs.\",\n \"attributes\": [\n {\n \"name\": \"rel_type\",\n \"type\": \"str\",\n \"description\": \"The type of the relation.\"\n }\n ],\n \"parent_type\": \"ft.onto.base_ontology.EventMention\",\n \"child_type\": \"ft.onto.base_ontology.EventMention\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.ConstituentNode\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `ConstituentNode` to represent constituents in constituency parsing. This can also sentiment values annotated on the nodes.\",\n \"attributes\": [\n {\n \"name\": \"label\",\n \"type\": \"str\"\n },\n {\n \"name\": \"sentiment\",\n \"type\": \"Dict\",\n \"key_type\": \"str\",\n \"value_type\": \"float\"\n },\n {\n \"name\": \"is_root\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"is_leaf\",\n \"type\": \"bool\"\n },\n {\n \"name\": \"parent_node\",\n \"type\": \"ft.onto.base_ontology.ConstituentNode\"\n },\n {\n \"name\": \"children_nodes\",\n \"type\": \"List\",\n \"item_type\": \"ft.onto.base_ontology.ConstituentNode\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Title\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Title`, normally used to represent a title.\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Body\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `Body`, normally used to represent a document body.\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.MCOption\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\"\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.MCQuestion\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"attributes\": [\n {\n \"name\": \"options\",\n \"type\": \"List\",\n \"item_type\": \"ft.onto.base_ontology.MCOption\"\n },\n {\n \"name\": \"answers\",\n \"type\": \"List\",\n \"item_type\": \"int\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.MRCQuestion\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"An `Annotation` type which represents an MRC question.\",\n \"attributes\": [\n {\n \"name\": \"qid\",\n \"type\": \"int\"\n },\n {\n \"name\": \"answers\",\n \"type\": \"List\",\n \"item_type\": \"ft.onto.base_ontology.Phrase\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.Recording\",\n \"parent_entry\": \"forte.data.ontology.top.AudioAnnotation\",\n \"description\": \"A span based annotation `Recording`, normally used to represent a recording.\",\n \"attributes\": [\n {\n \"name\": \"recording_class\",\n \"type\": \"List\",\n \"item_type\": \"str\",\n \"description\": \"A list of class names that the recording belongs to.\"\n }\n ]\n },\n {\n \"entry_name\": \"ft.onto.base_ontology.AudioUtterance\",\n \"parent_entry\": \"forte.data.ontology.top.AudioAnnotation\",\n \"description\": \"A span based annotation `AudioUtterance`, normally used to represent an utterance in dialogue.\",\n \"attributes\": [\n {\n \"name\": \"speaker\",\n \"type\": \"str\"\n }\n ]\n },\n {\n \"entry_name\": \"ftx.medical.clinical_ontology.NegationContext\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"A span based annotation `NegationContext`, used to represent the negation context of a named entity.\",\n \"attributes\": [\n {\n \"name\": \"polarity\",\n \"type\": \"bool\"\n }\n ]\n },\n {\n \"entry_name\": \"ftx.medical.clinical_ontology.MedicalEntityMention\",\n \"parent_entry\": \"ft.onto.base_ontology.EntityMention\",\n \"description\": \"A span based annotation class MedicalEntityMention, used to represent an Entity Mention in medical domain\",\n \"attributes\": [\n {\n \"name\": \"umls_link\",\n \"type\": \"str\"\n },\n {\n \"name\": \"umls_entities\",\n \"type\": \"List\",\n \"item_type\": \"ftx.medical.clinical_ontology.UMLSConceptLink\"\n }\n ]\n },\n {\n \"entry_name\": \"ftx.medical.clinical_ontology.MedicalArticle\",\n \"parent_entry\": \"forte.data.ontology.top.Annotation\",\n \"description\": \"An annotation based representation for the whole medical text chunk/document\",\n \"attributes\": [\n {\n \"name\": \"icd_version\",\n \"type\": \"int\",\n \"description\": \"The version of ICD-Coding being used.\"\n },\n {\n \"name\": \"icd_code\",\n \"type\": \"str\",\n \"description\": \"The ICD code assigned to current medical article.\"\n }\n ]\n }\n ]\n}\n", "config": "{\"legendConfigs\":{\"ft.onto.base_ontology.Token\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"pos\":false,\"ud_xpos\":false,\"lemma\":false,\"chunk\":false,\"ner\":false,\"sense\":false}},\"ft.onto.base_ontology.Subword\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ft.onto.base_ontology.Classification\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{}},\"ft.onto.base_ontology.Document\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ft.onto.base_ontology.Sentence\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"speaker\":false}},\"ft.onto.base_ontology.Phrase\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"phrase_type\":false}},\"ft.onto.base_ontology.UtteranceContext\":{\"is_selected\":false,\"is_shown\":false},\"ft.onto.base_ontology.Utterance\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"speaker\":false}},\"ft.onto.base_ontology.PredicateArgument\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"ner_type\":false,\"predicate_lemma\":false}},\"ft.onto.base_ontology.EntityMention\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"ner_type\":false}},\"ft.onto.base_ontology.EventMention\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"event_type\":false}},\"ft.onto.base_ontology.PredicateMention\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"predicate_lemma\":false,\"framenet_id\":false}},\"ft.onto.base_ontology.PredicateLink\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"arg_type\":false}},\"ft.onto.base_ontology.Dependency\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"dep_label\":false,\"rel_type\":false}},\"ft.onto.base_ontology.EnhancedDependency\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"dep_label\":false}},\"ft.onto.base_ontology.RelationLink\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"rel_type\":false}},\"ft.onto.base_ontology.CrossDocEntityRelation\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"rel_type\":false}},\"ft.onto.base_ontology.CoreferenceGroup\":{\"is_selected\":false,\"is_shown\":false},\"ft.onto.base_ontology.EventRelation\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"rel_type\":false}},\"ft.onto.base_ontology.CrossDocEventRelation\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"rel_type\":false}},\"ft.onto.base_ontology.ConstituentNode\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"label\":false}},\"ft.onto.base_ontology.Title\":{\"is_selected\":false,\"is_shown\":false},\"ft.onto.base_ontology.Body\":{\"is_selected\":false,\"is_shown\":false},\"ft.onto.base_ontology.MCOption\":{\"is_selected\":false,\"is_shown\":false},\"ft.onto.base_ontology.MCQuestion\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{}},\"ft.onto.base_ontology.MRCQuestion\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{}},\"ft.onto.base_ontology.Recording\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{}},\"ft.onto.base_ontology.AudioUtterance\":{\"is_selected\":false,\"is_shown\":false,\"attributes\":{\"speaker\":false}},\"ftx.medical.clinical_ontology.NegationContext\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{}},\"ftx.medical.clinical_ontology.MedicalEntityMention\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"umls_link\":false}},\"ftx.medical.clinical_ontology.MedicalArticle\":{\"is_selected\":false,\"is_shown\":true,\"attributes\":{\"icd_code\":false}}},\"scopeConfigs\":{\"ft.onto.base_ontology.Token\":false,\"ft.onto.base_ontology.Subword\":false,\"ft.onto.base_ontology.Document\":false,\"ft.onto.base_ontology.Sentence\":false,\"ft.onto.base_ontology.Phrase\":false,\"ft.onto.base_ontology.UtteranceContext\":false,\"ft.onto.base_ontology.Utterance\":false,\"ft.onto.base_ontology.PredicateArgument\":false,\"ft.onto.base_ontology.EntityMention\":false,\"ft.onto.base_ontology.EventMention\":false,\"ft.onto.base_ontology.PredicateMention\":false,\"ft.onto.base_ontology.ConstituentNode\":false,\"ft.onto.base_ontology.Title\":false,\"ft.onto.base_ontology.Body\":false,\"ft.onto.base_ontology.MCOption\":false,\"ft.onto.base_ontology.MCQuestion\":false,\"ft.onto.base_ontology.MRCQuestion\":false,\"ftx.medical.clinical_ontology.NegationContext\":false,\"ftx.medical.clinical_ontology.MedicalEntityMention\":false,\"ftx.medical.clinical_ontology.MedicalArticle\":false},\"layoutConfigs\":{\"center-middle\":\"default-nlp\",\"left\":\"default-meta\",\"right\":\"default-attribute\",\"center-bottom\":\"disable\"},\"remoteConfigs\":{\"pipelineUrl\":\"\",\"doValidation\":false,\"expectedName\":\"\",\"inputFormat\":\"string\",\"expectedRecords\":{}}}"}
+ "ontology": {
+ "name": "base_ontology",
+ "definitions": [
+ {
+ "entry_name": "ft.onto.base_ontology.Token",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation :class:`Token`, used to represent a token or a word.",
+ "attributes": [
+ {
+ "name": "pos",
+ "type": "str"
+ },
+ {
+ "name": "ud_xpos",
+ "type": "str",
+ "description": "Language specific pos tag. Used in CoNLL-U Format. Refer to https://universaldependencies.org/format.html"
+ },
+ {
+ "name": "lemma",
+ "type": "str",
+ "description": "Lemma or stem of word form."
+ },
+ {
+ "name": "chunk",
+ "type": "str"
+ },
+ {
+ "name": "ner",
+ "type": "str"
+ },
+ {
+ "name": "sense",
+ "type": "str"
+ },
+ {
+ "name": "is_root",
+ "type": "bool"
+ },
+ {
+ "name": "ud_features",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "str"
+ },
+ {
+ "name": "ud_misc",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Subword",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "Used to represent subword tokenization results.",
+ "attributes": [
+ {
+ "name": "is_first_segment",
+ "type": "bool"
+ },
+ {
+ "name": "is_unk",
+ "type": "bool"
+ },
+ {
+ "name": "vocab_id",
+ "type": "int"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Classification",
+ "parent_entry": "forte.data.ontology.top.Generics",
+ "description": "Used to store values for classification prediction",
+ "attributes": [
+ {
+ "name": "classification_result",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "float"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Document",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Document`, normally used to represent a document.",
+ "attributes": [
+ {
+ "name": "document_class",
+ "type": "List",
+ "item_type": "str",
+ "description": "A list of class names that the document belongs to."
+ },
+ {
+ "name": "sentiment",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "float"
+ },
+ {
+ "name": "classifications",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "ft.onto.base_ontology.Classification",
+ "description": "Stores the classification results for this document. The key is the name/task of the classification, the value is an classification object storing the results."
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Sentence",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Sentence`, normally used to represent a sentence.",
+ "attributes": [
+ {
+ "name": "speaker",
+ "type": "str"
+ },
+ {
+ "name": "part_id",
+ "type": "int"
+ },
+ {
+ "name": "sentiment",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "float"
+ },
+ {
+ "name": "classification",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "float"
+ },
+ {
+ "name": "classifications",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "ft.onto.base_ontology.Classification",
+ "description": "Stores the classification results for this sentence. The key is the name/task of the classification, the value is an classification object storing the results."
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Phrase",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Phrase`.",
+ "attributes": [
+ {
+ "name": "phrase_type",
+ "type": "str"
+ },
+ {
+ "name": "headword",
+ "type": "ft.onto.base_ontology.Token"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.UtteranceContext",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "`UtteranceContext` represents the context part in dialogue."
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Utterance",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Utterance`, normally used to represent an utterance in dialogue.",
+ "attributes": [
+ {
+ "name": "speaker",
+ "type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.PredicateArgument",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `PredicateArgument`, normally used to represent an argument of a predicate, can be linked to the predicate via the predicate link.",
+ "attributes": [
+ {
+ "name": "ner_type",
+ "type": "str"
+ },
+ {
+ "name": "predicate_lemma",
+ "type": "str"
+ },
+ {
+ "name": "is_verb",
+ "type": "bool"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.EntityMention",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `EntityMention`, normally used to represent an Entity Mention in a piece of text.",
+ "attributes": [
+ {
+ "name": "ner_type",
+ "type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.EventMention",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `EventMention`, used to refer to a mention of an event.",
+ "attributes": [
+ {
+ "name": "event_type",
+ "type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.PredicateMention",
+ "parent_entry": "ft.onto.base_ontology.Phrase",
+ "description": "A span based annotation `PredicateMention`, normally used to represent a predicate (normally verbs) in a piece of text.",
+ "attributes": [
+ {
+ "name": "predicate_lemma",
+ "type": "str"
+ },
+ {
+ "name": "framenet_id",
+ "type": "str"
+ },
+ {
+ "name": "is_verb",
+ "type": "bool"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.PredicateLink",
+ "parent_entry": "forte.data.ontology.top.Link",
+ "description": "A `Link` type entry which represent a semantic role link between a predicate and its argument.",
+ "attributes": [
+ {
+ "name": "arg_type",
+ "type": "str",
+ "description": "The predicate link type."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.PredicateMention",
+ "child_type": "ft.onto.base_ontology.PredicateArgument"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Dependency",
+ "parent_entry": "forte.data.ontology.top.Link",
+ "description": "A `Link` type entry which represent a syntactic dependency.",
+ "attributes": [
+ {
+ "name": "dep_label",
+ "type": "str",
+ "description": "The dependency label."
+ },
+ {
+ "name": "rel_type",
+ "type": "str"
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.Token",
+ "child_type": "ft.onto.base_ontology.Token"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.EnhancedDependency",
+ "parent_entry": "forte.data.ontology.top.Link",
+ "description": "A `Link` type entry which represent a enhanced dependency: \n https://universaldependencies.org/u/overview/enhanced-syntax.html",
+ "attributes": [
+ {
+ "name": "dep_label",
+ "type": "str",
+ "description": "The enhanced dependency label in Universal Dependency."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.Token",
+ "child_type": "ft.onto.base_ontology.Token"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.RelationLink",
+ "parent_entry": "forte.data.ontology.top.Link",
+ "description": "A `Link` type entry which represent a relation between two entity mentions",
+ "attributes": [
+ {
+ "name": "rel_type",
+ "type": "str",
+ "description": "The type of the relation."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.EntityMention",
+ "child_type": "ft.onto.base_ontology.EntityMention"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.CrossDocEntityRelation",
+ "parent_entry": "forte.data.ontology.top.MultiPackLink",
+ "description": "A `Link` type entry which represent a relation between two entity mentions across the packs.",
+ "attributes": [
+ {
+ "name": "rel_type",
+ "type": "str",
+ "description": "The type of the relation."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.EntityMention",
+ "child_type": "ft.onto.base_ontology.EntityMention"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.CoreferenceGroup",
+ "parent_entry": "forte.data.ontology.top.Group",
+ "description": "A group type entry that take `EntityMention`, as members, used to represent coreferent group of entities.",
+ "member_type": "ft.onto.base_ontology.EntityMention"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.EventRelation",
+ "parent_entry": "forte.data.ontology.top.Link",
+ "description": "A `Link` type entry which represent a relation between two event mentions.",
+ "attributes": [
+ {
+ "name": "rel_type",
+ "type": "str",
+ "description": "The type of the relation."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.EventMention",
+ "child_type": "ft.onto.base_ontology.EventMention"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.CrossDocEventRelation",
+ "parent_entry": "forte.data.ontology.top.MultiPackLink",
+ "description": "A `Link` type entry which represent a relation between two event mentions across the packs.",
+ "attributes": [
+ {
+ "name": "rel_type",
+ "type": "str",
+ "description": "The type of the relation."
+ }
+ ],
+ "parent_type": "ft.onto.base_ontology.EventMention",
+ "child_type": "ft.onto.base_ontology.EventMention"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.ConstituentNode",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `ConstituentNode` to represent constituents in constituency parsing. This can also sentiment values annotated on the nodes.",
+ "attributes": [
+ {
+ "name": "label",
+ "type": "str"
+ },
+ {
+ "name": "sentiment",
+ "type": "Dict",
+ "key_type": "str",
+ "value_type": "float"
+ },
+ {
+ "name": "is_root",
+ "type": "bool"
+ },
+ {
+ "name": "is_leaf",
+ "type": "bool"
+ },
+ {
+ "name": "parent_node",
+ "type": "ft.onto.base_ontology.ConstituentNode"
+ },
+ {
+ "name": "children_nodes",
+ "type": "List",
+ "item_type": "ft.onto.base_ontology.ConstituentNode"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Title",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Title`, normally used to represent a title."
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Body",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `Body`, normally used to represent a document body."
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.MCOption",
+ "parent_entry": "forte.data.ontology.top.Annotation"
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.MCQuestion",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "attributes": [
+ {
+ "name": "options",
+ "type": "List",
+ "item_type": "ft.onto.base_ontology.MCOption"
+ },
+ {
+ "name": "answers",
+ "type": "List",
+ "item_type": "int"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.MRCQuestion",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "An `Annotation` type which represents an MRC question.",
+ "attributes": [
+ {
+ "name": "qid",
+ "type": "int"
+ },
+ {
+ "name": "answers",
+ "type": "List",
+ "item_type": "ft.onto.base_ontology.Phrase"
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.Recording",
+ "parent_entry": "forte.data.ontology.top.AudioAnnotation",
+ "description": "A span based annotation `Recording`, normally used to represent a recording.",
+ "attributes": [
+ {
+ "name": "recording_class",
+ "type": "List",
+ "item_type": "str",
+ "description": "A list of class names that the recording belongs to."
+ }
+ ]
+ },
+ {
+ "entry_name": "ft.onto.base_ontology.AudioUtterance",
+ "parent_entry": "forte.data.ontology.top.AudioAnnotation",
+ "description": "A span based annotation `AudioUtterance`, normally used to represent an utterance in dialogue.",
+ "attributes": [
+ {
+ "name": "speaker",
+ "type": "str"
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.NegationContext",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "A span based annotation `NegationContext`, used to represent the negation context of a named entity.",
+ "attributes": [
+ {
+ "name": "polarity",
+ "type": "bool"
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.MedicalEntityMention",
+ "parent_entry": "ft.onto.base_ontology.EntityMention",
+ "description": "A span based annotation class MedicalEntityMention, used to represent an Entity Mention in medical domain",
+ "attributes": [
+ {
+ "name": "umls_link",
+ "type": "str"
+ },
+ {
+ "name": "umls_entities",
+ "type": "List",
+ "item_type": "ftx.medical.clinical_ontology.UMLSConceptLink"
+ }
+ ]
+ },
+ {
+ "entry_name": "ftx.medical.clinical_ontology.MedicalArticle",
+ "parent_entry": "forte.data.ontology.top.Annotation",
+ "description": "An annotation based representation for the whole medical text chunk/document",
+ "attributes": [
+ {
+ "name": "icd_version",
+ "type": "int",
+ "description": "The version of ICD-Coding being used."
+ },
+ {
+ "name": "icd_code",
+ "type": "str",
+ "description": "The ICD code assigned to current medical article."
+ }
+ ]
+ }
+ ]
+ },
+ "config": {
+ "legendConfigs": {
+ "ft.onto.base_ontology.Token": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "pos": false,
+ "ud_xpos": false,
+ "lemma": false,
+ "chunk": false,
+ "ner": false,
+ "sense": false
+ }
+ },
+ "ft.onto.base_ontology.Subword": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.Classification": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.Document": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.Sentence": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "speaker": false
+ }
+ },
+ "ft.onto.base_ontology.Phrase": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "phrase_type": false
+ }
+ },
+ "ft.onto.base_ontology.UtteranceContext": {
+ "is_selected": false,
+ "is_shown": false
+ },
+ "ft.onto.base_ontology.Utterance": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "speaker": false
+ }
+ },
+ "ft.onto.base_ontology.PredicateArgument": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "ner_type": false,
+ "predicate_lemma": false
+ }
+ },
+ "ft.onto.base_ontology.EntityMention": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "ner_type": false
+ }
+ },
+ "ft.onto.base_ontology.EventMention": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "event_type": false
+ }
+ },
+ "ft.onto.base_ontology.PredicateMention": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "predicate_lemma": false,
+ "framenet_id": false
+ }
+ },
+ "ft.onto.base_ontology.PredicateLink": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "arg_type": false
+ }
+ },
+ "ft.onto.base_ontology.Dependency": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "dep_label": false,
+ "rel_type": false
+ }
+ },
+ "ft.onto.base_ontology.EnhancedDependency": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "dep_label": false
+ }
+ },
+ "ft.onto.base_ontology.RelationLink": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "rel_type": false
+ }
+ },
+ "ft.onto.base_ontology.CrossDocEntityRelation": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "rel_type": false
+ }
+ },
+ "ft.onto.base_ontology.CoreferenceGroup": {
+ "is_selected": false,
+ "is_shown": false
+ },
+ "ft.onto.base_ontology.EventRelation": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "rel_type": false
+ }
+ },
+ "ft.onto.base_ontology.CrossDocEventRelation": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "rel_type": false
+ }
+ },
+ "ft.onto.base_ontology.ConstituentNode": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "label": false
+ }
+ },
+ "ft.onto.base_ontology.Title": {
+ "is_selected": false,
+ "is_shown": false
+ },
+ "ft.onto.base_ontology.Body": {
+ "is_selected": false,
+ "is_shown": false
+ },
+ "ft.onto.base_ontology.MCOption": {
+ "is_selected": false,
+ "is_shown": false
+ },
+ "ft.onto.base_ontology.MCQuestion": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.MRCQuestion": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.Recording": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {}
+ },
+ "ft.onto.base_ontology.AudioUtterance": {
+ "is_selected": false,
+ "is_shown": false,
+ "attributes": {
+ "speaker": false
+ }
+ },
+ "ftx.medical.clinical_ontology.NegationContext": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {}
+ },
+ "ftx.medical.clinical_ontology.MedicalEntityMention": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "umls_link": false
+ }
+ },
+ "ftx.medical.clinical_ontology.MedicalArticle": {
+ "is_selected": false,
+ "is_shown": true,
+ "attributes": {
+ "icd_code": false
+ }
+ }
+ },
+ "scopeConfigs": {
+ "ft.onto.base_ontology.Token": false,
+ "ft.onto.base_ontology.Subword": false,
+ "ft.onto.base_ontology.Document": false,
+ "ft.onto.base_ontology.Sentence": false,
+ "ft.onto.base_ontology.Phrase": false,
+ "ft.onto.base_ontology.UtteranceContext": false,
+ "ft.onto.base_ontology.Utterance": false,
+ "ft.onto.base_ontology.PredicateArgument": false,
+ "ft.onto.base_ontology.EntityMention": false,
+ "ft.onto.base_ontology.EventMention": false,
+ "ft.onto.base_ontology.PredicateMention": false,
+ "ft.onto.base_ontology.ConstituentNode": false,
+ "ft.onto.base_ontology.Title": false,
+ "ft.onto.base_ontology.Body": false,
+ "ft.onto.base_ontology.MCOption": false,
+ "ft.onto.base_ontology.MCQuestion": false,
+ "ft.onto.base_ontology.MRCQuestion": false,
+ "ftx.medical.clinical_ontology.NegationContext": false,
+ "ftx.medical.clinical_ontology.MedicalEntityMention": false,
+ "ftx.medical.clinical_ontology.MedicalArticle": false
+ },
+ "layoutConfigs": {
+ "center-middle": "default-nlp",
+ "left": "default-meta",
+ "right": "default-attribute",
+ "center-bottom": "disable"
+ },
+ "remoteConfigs": {
+ "pipelineUrl": "",
+ "doValidation": false,
+ "expectedName": "",
+ "inputFormat": "string",
+ "expectedRecords": {}
+ }
+ }
+}
\ No newline at end of file
From 34476f7d0e561cd3d05b52639cc020dab5518afd Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sat, 9 Jul 2022 10:20:49 +0800
Subject: [PATCH 08/21] Update README
---
examples/clinical_pipeline/README.md | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/examples/clinical_pipeline/README.md b/examples/clinical_pipeline/README.md
index 708dd0ad..4c539ddf 100644
--- a/examples/clinical_pipeline/README.md
+++ b/examples/clinical_pipeline/README.md
@@ -1,7 +1,6 @@
## A Clinical Information Processing Example
-This example shows how we can construct a project to make ForteHealth and Stave work
- side by side.
+This example shows how we can construct a project to make ForteHealth and Stave work side by side.
## Install extra dependencies
@@ -11,6 +10,8 @@ To install the latest code directly from source,
pip install git+https://git@github.com/asyml/forte-wrappers#egg=forte.elastic\&subdirectory=src/elastic
pip install git+https://git@github.com/asyml/forte-wrappers#egg=forte.spacy\&subdirectory=src/spacy
pip install git+https://git@github.com/asyml/forte-wrappers#egg=forte.spacy\&subdirectory=src/nltk
+pip install git+https://github.com/asyml/ForteHealth.git
+pip install git+https://github.com/astml/stave.git
```
To install from PyPI,
@@ -18,6 +19,8 @@ To install from PyPI,
pip install forte.elastic
pip install forte.spacy
pip install forte.nltk
+pip install forte.health
+pip install stave
```
## Downloading the models
@@ -89,9 +92,9 @@ The last argument, `use_mimiciii_reader` is whether to use the `Mimic3DischargeN
If we just need to check the remote pipeline connection to Stave.
-You can mask out Line 74 to Line 118 in `clinical_pipeline.py`.
+You can mask out Line 74 to Line 118 in `clinical_processing_pipeline.py`.
-Hence, if you just wish to run the demo pipeline with existing database entries, and wish to just connect with Stave for visualization, You can mask out Line 74 to Line 118 in `clinical_pipeline.py` and run this command:
+Hence, if you just wish to run the demo pipeline with existing database entries, and wish to just connect with Stave for visualization, You can mask out Line 74 to Line 118 in `clinical_processing_pipeline.py` and run this command:
```bash
python clinical_processing_pipeline.py ./ ./ 100 0
From 02e06c4cba24f4924c3a288ec84bd7d83b929198 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sat, 9 Jul 2022 11:49:52 +0800
Subject: [PATCH 09/21] Little format fix
---
.../clinical_processing_pipeline.py | 143 ++++++++++++++++++
1 file changed, 143 insertions(+)
create mode 100644 examples/clinical_pipeline/clinical_processing_pipeline.py
diff --git a/examples/clinical_pipeline/clinical_processing_pipeline.py b/examples/clinical_pipeline/clinical_processing_pipeline.py
new file mode 100644
index 00000000..4b7a2420
--- /dev/null
+++ b/examples/clinical_pipeline/clinical_processing_pipeline.py
@@ -0,0 +1,143 @@
+import json
+import sys
+import time
+
+import yaml
+from forte.common.configuration import Config
+from forte.data.data_pack import DataPack
+from forte.data.readers import PlainTextReader, RawDataDeserializeReader
+from forte.pipeline import Pipeline
+from forte.processors.writers import PackIdJsonPackWriter
+from fortex.elastic import ElasticSearchPackIndexProcessor
+from fortex.health.processors.icd_coding_processor import ICDCodingProcessor
+from fortex.health.processors.negation_context_analyzer import \
+ NegationContextAnalyzer
+from fortex.huggingface import BioBERTNERPredictor
+from fortex.nltk import NLTKNER, NLTKPOSTagger, NLTKSentenceSegmenter
+from fortex.spacy import SpacyProcessor
+from ft.onto.base_ontology import EntityMention, Sentence
+from ftx.medical.clinical_ontology import (MedicalArticle,
+ MedicalEntityMention,
+ NegationContext)
+from stave_backend.lib.stave_session import StaveSession
+
+from mimic3_note_reader import Mimic3DischargeNoteReader
+from utterance_searcher import LastUtteranceSearcher
+
+
+def get_json(path: str):
+ file_obj = open(path)
+ data = json.load(file_obj)
+ file_obj.close()
+ return data
+
+
+def update_stave_db(default_project_json, chat_project_json, chat_doc_json, config):
+ project_id_base = 0
+ with StaveSession(url=config.Stave.url) as session:
+ session.login(username=config.Stave.username, password=config.Stave.pw)
+
+ projects = session.get_project_list().json()
+ project_names = [project["name"] for project in projects]
+
+ if (
+ default_project_json["name"] in project_names
+ and chat_project_json["name"] in project_names
+ ):
+
+ base_project = [
+ proj
+ for proj in projects
+ if proj["name"] == default_project_json["name"]
+ ][0]
+ return base_project["id"]
+
+ resp1 = session.create_project(default_project_json)
+ project_id_base = json.loads(resp1.text)["id"]
+
+ resp2 = session.create_project(chat_project_json)
+ project_id_chat = json.loads(resp2.text)["id"]
+
+ chat_doc_json["project_id"] = project_id_chat
+ doc_id = session.create_document(chat_doc_json)
+ project_list = session.get_project_list().json()
+
+ return project_id_base
+
+
+def main(
+ input_path: str, output_path: str, max_packs: int = -1, use_mimiciii_reader=1
+ ):
+ print("Starting demo pipeline example..")
+ config = yaml.safe_load(open("clinical_config.yml", "r"))
+ config = Config(config, default_hparams=None)
+ print("Running pipeline...")
+ pl = Pipeline[DataPack]()
+ if use_mimiciii_reader == 1:
+ pl.set_reader(
+ Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs}
+ )
+ else:
+ pl.set_reader(PlainTextReader())
+
+ pl.add(SpacyProcessor(), {"processors": ["sentence", "tokenize"]})
+ pl.add(NLTKPOSTagger())
+ pl.add(BioBERTNERPredictor(), config=config.BioBERTNERPredictor)
+ pl.add(NegationContextAnalyzer())
+ pl.add(
+ ICDCodingProcessor(),
+ {
+ "entry_type": "ft.onto.base_ontology.Sentence",
+ },
+ )
+ pl.add(
+ ElasticSearchPackIndexProcessor(),
+ {
+ "indexer": {
+ "other_kwargs": {"refresh": True},
+ }
+ },
+ )
+ pl.add(
+ PackIdJsonPackWriter(),
+ {
+ "output_dir": output_path,
+ "indent": 2,
+ "overwrite": True,
+ "drop_record": True,
+ "zip_pack": False,
+ },
+ )
+
+ pl.initialize()
+
+ for idx, pack in enumerate(pl.process_dataset(input_path)):
+ if (idx + 1) % 50 == 0:
+ print(f"{time.strftime('%m-%d %H:%M')}: Processed {idx + 1} packs")
+
+ default_project_json = get_json(config.viewer_project_json)
+ chat_project_json = get_json(config.chat_project_json)
+ chat_doc_json = get_json(config.chat_document_json)
+
+ base_project_id = update_stave_db(
+ default_project_json, chat_project_json, chat_doc_json, config
+ )
+
+ remote_pl = Pipeline[DataPack]()
+ remote_pl.set_reader(RawDataDeserializeReader())
+ remote_pl.add(
+ LastUtteranceSearcher(),
+ config={
+ "query_result_project_id": base_project_id,
+ "stave_db_path": config.LastUtteranceSearcher.stave_db_path,
+ "url_stub": config.LastUtteranceSearcher.url,
+ },
+ )
+ remote_pl.serve(
+ port=config.Remote.port,
+ input_format=config.Remote.input_format,
+ service_name=config.Remote.service_name,
+ )
+
+
+main(sys.argv[1], sys.argv[2], int(sys.argv[3]), int(sys.argv[4]))
From e55f94849d5c63f7a1237c5f615061e9fb511617 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sat, 9 Jul 2022 11:50:33 +0800
Subject: [PATCH 10/21] Add instructions for plain text reader
---
examples/clinical_pipeline/README.md | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/examples/clinical_pipeline/README.md b/examples/clinical_pipeline/README.md
index 4c539ddf..2ccd5922 100644
--- a/examples/clinical_pipeline/README.md
+++ b/examples/clinical_pipeline/README.md
@@ -85,14 +85,20 @@ Here, you need to make sure `LastUtteranceSearcher.url` and `Stave.url` in `clin
Now, open a new terminal, other than the one running stave server. You can run the following command to parse some files and index them.
```bash
-python clinical_pipeline.py /path_to_mimiciii/1.4/NOTEEVENTS.csv.gz /path_to_sample_output 100 1
+python clinical_processing_pipeline.py path_to_mimiciii/1.4/NOTEEVENTS.csv.gz path_to_mimiciii_output 100 1
```
The last argument, `use_mimiciii_reader` is whether to use the `Mimic3DischargeNoteReader()`. If you set the argument to `1`, you will need to make sure the input data is mimic iii dataset, else `0` for `PlainTextReader()`.
+If you do not have the mimic iii datasets and just want to test the function, you can run the following command to test the function with the given sample data:
+
+```bash
+python clinical_processing_pipeline.py sample_data/ path_to_sample_output/ -1 0
+```
+
If we just need to check the remote pipeline connection to Stave.
-You can mask out Line 74 to Line 118 in `clinical_processing_pipeline.py`.
+You can mask out Line 74 to Line 116 in `clinical_processing_pipeline.py`.
Hence, if you just wish to run the demo pipeline with existing database entries, and wish to just connect with Stave for visualization, You can mask out Line 74 to Line 118 in `clinical_processing_pipeline.py` and run this command:
From 087aa4b3a445a1ba7e4360843994dfbbdb74068f Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sat, 9 Jul 2022 13:44:53 +0800
Subject: [PATCH 11/21] update readme
---
examples/clinical_pipeline/README.md | 2 +-
.../clinical_pipeline/clinical_pipeline.py | 145 ------------------
2 files changed, 1 insertion(+), 146 deletions(-)
delete mode 100644 examples/clinical_pipeline/clinical_pipeline.py
diff --git a/examples/clinical_pipeline/README.md b/examples/clinical_pipeline/README.md
index 2ccd5922..f5d833b7 100644
--- a/examples/clinical_pipeline/README.md
+++ b/examples/clinical_pipeline/README.md
@@ -98,7 +98,7 @@ python clinical_processing_pipeline.py sample_data/ path_to_sample_output/ -1 0
If we just need to check the remote pipeline connection to Stave.
-You can mask out Line 74 to Line 116 in `clinical_processing_pipeline.py`.
+You can mask out Line 76 to Line 118 in `clinical_processing_pipeline.py`.
Hence, if you just wish to run the demo pipeline with existing database entries, and wish to just connect with Stave for visualization, You can mask out Line 74 to Line 118 in `clinical_processing_pipeline.py` and run this command:
diff --git a/examples/clinical_pipeline/clinical_pipeline.py b/examples/clinical_pipeline/clinical_pipeline.py
deleted file mode 100644
index 480c76a4..00000000
--- a/examples/clinical_pipeline/clinical_pipeline.py
+++ /dev/null
@@ -1,145 +0,0 @@
-import json
-import sys
-import time
-
-import yaml
-from forte.common.configuration import Config
-from forte.data.data_pack import DataPack
-from forte.data.readers import RawDataDeserializeReader, PlainTextReader
-from forte.pipeline import Pipeline
-from forte.processors.writers import PackIdJsonPackWriter
-from fortex.elastic import ElasticSearchPackIndexProcessor
-from fortex.health.processors.icd_coding_processor import ICDCodingProcessor
-from fortex.health.processors.negation_context_analyzer import \
- NegationContextAnalyzer
-from fortex.huggingface import BioBERTNERPredictor
-from fortex.nltk import NLTKNER, NLTKPOSTagger, NLTKSentenceSegmenter
-from fortex.spacy import SpacyProcessor
-from ft.onto.base_ontology import EntityMention, Sentence
-from ftx.medical.clinical_ontology import (MedicalArticle,
- MedicalEntityMention,
- NegationContext)
-from stave_backend.lib.stave_session import StaveSession
-
-from mimic3_note_reader import Mimic3DischargeNoteReader
-from utterance_searcher import LastUtteranceSearcher
-
-
-def get_json(path: str):
- file_obj = open(path)
- data = json.load(file_obj)
- file_obj.close()
- return data
-
-
-def update_stave_db(default_project_json, chat_project_json, chat_doc_json, config):
- project_id_base = 0
- with StaveSession(url=config.Stave.url) as session:
- session.login(username=config.Stave.username, password=config.Stave.pw)
-
- projects = session.get_project_list().json()
- project_names = [project["name"] for project in projects]
-
- if (
- default_project_json["name"] in project_names
- and chat_project_json["name"] in project_names
- ):
-
- base_project = [
- proj
- for proj in projects
- if proj["name"] == default_project_json["name"]
- ][0]
- return base_project["id"]
-
- resp1 = session.create_project(default_project_json)
- project_id_base = json.loads(resp1.text)["id"]
-
- resp2 = session.create_project(chat_project_json)
- project_id_chat = json.loads(resp2.text)["id"]
-
- chat_doc_json["project_id"] = project_id_chat
- doc_id = session.create_document(chat_doc_json)
- project_list = session.get_project_list().json()
-
- return project_id_base
-
-
-def main(
- input_path: str, output_path: str, max_packs: int = -1, use_mimiciii_reader=1
-):
- print("Starting demo pipeline example..")
- config = yaml.safe_load(open("clinical_config.yml", "r"))
- config = Config(config, default_hparams=None)
- print("Running pipeline...")
- pl = Pipeline[DataPack]()
- if use_mimiciii_reader == 1:
- pl.set_reader(
- Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs}
- )
- else:
- pl.set_reader(PlainTextReader())
-
- pl.add(SpacyProcessor(), {"processors": ["sentence", "tokenize"]})
- pl.add(NLTKPOSTagger())
- pl.add(BioBERTNERPredictor(), config=config.BioBERTNERPredictor)
- pl.add(NegationContextAnalyzer())
- pl.add(
- ICDCodingProcessor(),
- {
- "entry_type": "ft.onto.base_ontology.Sentence",
- },
- )
- pl.add(
- ElasticSearchPackIndexProcessor(),
- {
- "indexer": {
- "other_kwargs": {"refresh": True},
- }
- },
- )
- pl.add(
- PackIdJsonPackWriter(),
- {
- "output_dir": output_path,
- "indent": 2,
- "overwrite": True,
- "drop_record": True,
- "zip_pack": False,
- },
- )
-
- pl.initialize()
-
- for idx, pack in enumerate(pl.process_dataset(input_path)):
- if (idx + 1) % 50 == 0:
- print(
- f"{time.strftime('%m-%d %H:%M')}: Processed {idx + 1} packs"
- )
-
- default_project_json = get_json(config.viewer_project_json)
- chat_project_json = get_json(config.chat_project_json)
- chat_doc_json = get_json(config.chat_document_json)
-
- base_project_id = update_stave_db(
- default_project_json, chat_project_json, chat_doc_json, config
- )
-
- remote_pl = Pipeline[DataPack]()
- remote_pl.set_reader(RawDataDeserializeReader())
- remote_pl.add(
- LastUtteranceSearcher(),
- config={
- "query_result_project_id": base_project_id,
- "stave_db_path": config.LastUtteranceSearcher.stave_db_path,
- "url_stub": config.LastUtteranceSearcher.url,
- },
- )
- remote_pl.serve(
- port=config.Remote.port,
- input_format=config.Remote.input_format,
- service_name=config.Remote.service_name,
- )
-
-
-main(sys.argv[1], sys.argv[2], int(sys.argv[3]), int(sys.argv[4]))
From ee765d998540f42ace69ded0f1ad8797930b7010 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 17 Jul 2022 15:42:32 +0800
Subject: [PATCH 12/21] remove print statements
---
examples/clinical_pipeline/utterance_searcher.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/examples/clinical_pipeline/utterance_searcher.py b/examples/clinical_pipeline/utterance_searcher.py
index 741514c4..b38a9a24 100644
--- a/examples/clinical_pipeline/utterance_searcher.py
+++ b/examples/clinical_pipeline/utterance_searcher.py
@@ -83,7 +83,6 @@ def _process(self, input_pack: DataPack):
db_id = sqlite_insert(conn, "stave_backend_document", item)
answers += [db_id]
- print(pack_id, db_id)
if len(answers) == 0:
create_utterance(
@@ -97,7 +96,6 @@ def _process(self, input_pack: DataPack):
"I found the following results:
-- "
+ "
-- ".join(links)
)
- print(response_text)
create_utterance(input_pack, response_text, "ai")
else:
From 9ed519b54d4d486b5ea84a772e48bf36291022e6 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 17 Jul 2022 15:57:16 +0800
Subject: [PATCH 13/21] remove the handling of super.
---
examples/clinical_pipeline/mimic3_note_reader.py | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/examples/clinical_pipeline/mimic3_note_reader.py b/examples/clinical_pipeline/mimic3_note_reader.py
index 70604871..b3f02de6 100644
--- a/examples/clinical_pipeline/mimic3_note_reader.py
+++ b/examples/clinical_pipeline/mimic3_note_reader.py
@@ -75,8 +75,6 @@ def _parse_pack(self, row: List[str]) -> Iterator[DataPack]:
@classmethod
def default_configs(cls):
- config = super().default_configs()
# If this is set (>0), the reader will only read up to
# the number specified.
- config["max_num_notes"] = -1
- return config
+ return {'max_num_notes':-1}
From 929d08ca82f54a23d1af4dbea3a36ae091951617 Mon Sep 17 00:00:00 2001
From: Leolty <569359974@qq.com>
Date: Sun, 17 Jul 2022 16:15:19 +0800
Subject: [PATCH 14/21] first version demo commit
---
.../clinical_config.yml | 13 ++
.../clinical_pipeline_processor.py | 56 ++++++
.../search_engine_streamlit/demo/__init__.py | 1 +
.../search_engine_streamlit/demo/clinical.py | 49 +++++
.../download_models.py | 25 +++
.../mimic3_note_reader.py | 80 ++++++++
.../search_engine_streamlit/multiple_pages.py | 180 ++++++++++++++++++
.../search_engine_streamlit/pack_searcher.py | 124 ++++++++++++
examples/search_engine_streamlit/pipelines.py | 26 +++
.../search_engine_streamlit/search_utils.py | 56 ++++++
examples/search_engine_streamlit/templates.py | 17 ++
11 files changed, 627 insertions(+)
create mode 100644 examples/search_engine_streamlit/clinical_config.yml
create mode 100644 examples/search_engine_streamlit/clinical_pipeline_processor.py
create mode 100644 examples/search_engine_streamlit/demo/__init__.py
create mode 100644 examples/search_engine_streamlit/demo/clinical.py
create mode 100644 examples/search_engine_streamlit/download_models.py
create mode 100644 examples/search_engine_streamlit/mimic3_note_reader.py
create mode 100644 examples/search_engine_streamlit/multiple_pages.py
create mode 100644 examples/search_engine_streamlit/pack_searcher.py
create mode 100644 examples/search_engine_streamlit/pipelines.py
create mode 100644 examples/search_engine_streamlit/search_utils.py
create mode 100644 examples/search_engine_streamlit/templates.py
diff --git a/examples/search_engine_streamlit/clinical_config.yml b/examples/search_engine_streamlit/clinical_config.yml
new file mode 100644
index 00000000..ef8641ea
--- /dev/null
+++ b/examples/search_engine_streamlit/clinical_config.yml
@@ -0,0 +1,13 @@
+BioBERTNERPredictor:
+ model_path: "resources/NCBI-disease"
+ ner_type: "DISEASE"
+ ignore_labels: ["O"]
+
+BERTTokenizer:
+ model_path: "resources/NCBI-disease"
+
+Stave:
+ stave_db_path: "C://Users//Leo//.stave//db.sqlite3"
+ url: "http://localhost:8899"
+ username: admin
+ pw: admin
diff --git a/examples/search_engine_streamlit/clinical_pipeline_processor.py b/examples/search_engine_streamlit/clinical_pipeline_processor.py
new file mode 100644
index 00000000..38907b48
--- /dev/null
+++ b/examples/search_engine_streamlit/clinical_pipeline_processor.py
@@ -0,0 +1,56 @@
+import time
+
+
+import yaml
+from forte.common.configuration import Config
+from forte.data.data_pack import DataPack
+from forte.pipeline import Pipeline
+from forte.processors.writers import PackIdJsonPackWriter
+from fortex.elastic import ElasticSearchPackIndexProcessor
+from fortex.huggingface import BioBERTNERPredictor
+from fortex.huggingface.transformers_processor import BERTTokenizer
+
+from mimic3_note_reader import Mimic3DischargeNoteReader
+from fortex.nltk import NLTKSentenceSegmenter
+
+
+def main(
+ input_path: str, output_path: str, max_packs: int = -1
+ ):
+
+ config = yaml.safe_load(open("clinical_config.yml", "r"))
+ config = Config(config, default_hparams=None)
+
+ pl = Pipeline[DataPack]()
+ pl.set_reader(
+ Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs}
+ )
+
+ pl.add(NLTKSentenceSegmenter())
+ pl.add(BERTTokenizer(), config=config.BERTTokenizer)
+
+ pl.add(BioBERTNERPredictor(), config=config.BioBERTNERPredictor)
+ pl.add(
+ ElasticSearchPackIndexProcessor(),
+ {
+ "indexer": {
+ "other_kwargs": {"refresh": True},
+ }
+ },
+ )
+ pl.add(
+ PackIdJsonPackWriter(),
+ {
+ "output_dir": output_path,
+ "indent": 2,
+ "overwrite": True,
+ "drop_record": True,
+ "zip_pack": False,
+ },
+ )
+
+ pl.initialize()
+
+ for idx, pack in enumerate(pl.process_dataset(input_path)):
+ if (idx + 1) % 50 == 0:
+ print(f"{time.strftime('%m-%d %H:%M')}: Processed {idx + 1} packs")
\ No newline at end of file
diff --git a/examples/search_engine_streamlit/demo/__init__.py b/examples/search_engine_streamlit/demo/__init__.py
new file mode 100644
index 00000000..49ecbbf8
--- /dev/null
+++ b/examples/search_engine_streamlit/demo/__init__.py
@@ -0,0 +1 @@
+# ***automatically_generated***
diff --git a/examples/search_engine_streamlit/demo/clinical.py b/examples/search_engine_streamlit/demo/clinical.py
new file mode 100644
index 00000000..68541b46
--- /dev/null
+++ b/examples/search_engine_streamlit/demo/clinical.py
@@ -0,0 +1,49 @@
+# ***automatically_generated***
+# ***source json:examples/clinical_pipeline/clinical_onto.json***
+# flake8: noqa
+# mypy: ignore-errors
+# pylint: skip-file
+"""
+Automatically generated ontology clinical. Do not change manually.
+"""
+
+from dataclasses import dataclass
+from forte.data.data_pack import DataPack
+from forte.data.ontology.top import Annotation
+from ft.onto.base_ontology import EntityMention
+
+__all__ = [
+ "ClinicalEntityMention",
+ "Description",
+ "Body",
+]
+
+
+@dataclass
+class ClinicalEntityMention(EntityMention):
+ """
+ A span based annotation `ClinicalEntityMention`, normally used to represent an Entity Mention in a piece of clinical text.
+ """
+
+ def __init__(self, pack: DataPack, begin: int, end: int):
+ super().__init__(pack, begin, end)
+
+
+@dataclass
+class Description(Annotation):
+ """
+ A span based annotation `Description`, used to represent the description in a piece of clinical note.
+ """
+
+ def __init__(self, pack: DataPack, begin: int, end: int):
+ super().__init__(pack, begin, end)
+
+
+@dataclass
+class Body(Annotation):
+ """
+ A span based annotation `Body`, used to represent the actual content in a piece of clinical note.
+ """
+
+ def __init__(self, pack: DataPack, begin: int, end: int):
+ super().__init__(pack, begin, end)
diff --git a/examples/search_engine_streamlit/download_models.py b/examples/search_engine_streamlit/download_models.py
new file mode 100644
index 00000000..db0d7cca
--- /dev/null
+++ b/examples/search_engine_streamlit/download_models.py
@@ -0,0 +1,25 @@
+from forte.data.data_utils import maybe_download
+
+# download resources
+urls = [
+ "https://drive.google.com/file/d/15RSfFkW9syQKtx-_fQ9KshN3BJ27Jf8t/"
+ "view?usp=sharing",
+ "https://drive.google.com/file/d/1Nh7D6Xam5JefdoSXRoL7S0DZK1d4i2UK/"
+ "view?usp=sharing",
+ "https://drive.google.com/file/d/1YWcI60lGKtTFH01Ai1HnwOKBsrFf2r29/"
+ "view?usp=sharing",
+ "https://drive.google.com/file/d/1ElHUEMPQIuWmV0GimroqFphbCvFKskYj/"
+ "view?usp=sharing",
+ "https://drive.google.com/file/d/1EhMXlieoEg-bGUbbQ2vN-iyNJvC4Dajl/"
+ "view?usp=sharing",
+]
+
+filenames = [
+ "config.json",
+ "pytorch_model.bin",
+ "special_tokens_map.json",
+ "tokenizer_config.json",
+ "vocab.txt",
+]
+
+maybe_download(urls=urls, path="resources/NCBI-disease", filenames=filenames)
diff --git a/examples/search_engine_streamlit/mimic3_note_reader.py b/examples/search_engine_streamlit/mimic3_note_reader.py
new file mode 100644
index 00000000..c3733e1f
--- /dev/null
+++ b/examples/search_engine_streamlit/mimic3_note_reader.py
@@ -0,0 +1,80 @@
+# Copyright 2021 The Forte Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import csv
+import logging
+from pathlib import Path
+from typing import Any, Iterator, Union, List
+
+from smart_open import open
+
+from demo.clinical import Description, Body
+from forte.data.data_pack import DataPack
+from forte.data.base_reader import PackReader
+from ft.onto.base_ontology import Document
+
+
+class Mimic3DischargeNoteReader(PackReader):
+ """This class is designed to read the discharge notes from MIMIC3 dataset
+ as plain text packs.
+
+ For more information for the dataset, visit:
+ https://mimic.physionet.org/
+ """
+
+ def __init__(self):
+ super().__init__()
+ self.headers: List[str] = []
+ self.text_col = -1 # Default to be last column.
+ self.description_col = 0 # Default to be first column.
+ self.__note_count = 0 # Count number of notes processed.
+
+ def _collect(self, mimic3_path: Union[Path, str]) -> Iterator[Any]: # type: ignore
+ with open(mimic3_path) as f:
+ for r in csv.reader(f):
+ if 0 < self.configs.max_num_notes <= self.__note_count:
+ break
+ yield r
+
+ def _parse_pack(self, row: List[str]) -> Iterator[DataPack]:
+ if len(self.headers) == 0:
+ self.headers.extend(row)
+ for i, h in enumerate(self.headers):
+ if h == "TEXT":
+ self.text_col = i
+ logging.info("Text Column is %d", i)
+ if h == "DESCRIPTION":
+ self.description_col = i
+ logging.info("Description Column is %d", i)
+ else:
+ pack: DataPack = DataPack()
+ description: str = row[self.description_col]
+ text: str = row[self.text_col]
+ delimiter = "\n-----------------\n"
+ full_text = description + delimiter + text
+ pack.set_text(full_text)
+
+ Description(pack, 0, len(description))
+ Body(pack, len(description) + len(delimiter), len(full_text))
+ Document(pack, 0, len(pack.text))
+ self.__note_count += 1
+ yield pack
+
+ @classmethod
+ def default_configs(cls):
+ config = {}
+ # If this is set (>0), the reader will only read up to
+ # the number specified.
+ config["max_num_notes"] = -1
+ return config
diff --git a/examples/search_engine_streamlit/multiple_pages.py b/examples/search_engine_streamlit/multiple_pages.py
new file mode 100644
index 00000000..eb00409e
--- /dev/null
+++ b/examples/search_engine_streamlit/multiple_pages.py
@@ -0,0 +1,180 @@
+from __future__ import annotations
+import streamlit as st
+from elasticsearch import Elasticsearch
+from search_utils import all_search, index_search
+import templates
+import spacy_streamlit
+import json
+from pipelines import process_data
+
+
+st.set_page_config(page_title="ForteHealth_Search_Engine", layout="wide")
+
+PAGES = ["Search Engine", "Plain Text Input"]
+es = Elasticsearch(hosts=["http://localhost:9200/"])
+INDEX = "elastic_indexer"
+
+page = st.sidebar.selectbox("Functions:", PAGES)
+
+# search engine
+if page == PAGES[0]:
+ r1c1, r1c2 = st.columns([6, 6])
+ with r1c1:
+ st.title("Search the MIMIC III Data...")
+ search = st.text_input("Enter search words:")
+
+ if not search:
+ records = {}
+ results = all_search(es, INDEX)
+ for i in range(len(results["hits"]["hits"])):
+ result = results["hits"]["hits"][i]
+ res = result["_source"]
+ annotations = json.loads(res["pack_info"])["py/state"]["annotations"]
+
+ ents = []
+ for annotation in annotations:
+ if "EntityMention" in annotation["py/object"]:
+ ents.append(
+ {
+ "start": annotation["py/state"]["_span"]["begin"],
+ "end": annotation["py/state"]["_span"]["end"],
+ "label": annotation["py/state"]["ner_type"],
+ }
+ )
+ records[res["doc_id"]] = [res["content"], ents]
+
+ options = []
+ for key in records:
+ options.append(key)
+
+ if options:
+ myradio = st.radio(
+ label="Select a report:",
+ options=options,
+ index=0,
+ format_func=lambda x: f"Report# {x}",
+ key="radio_demo",
+ help="Click the radio button please",
+ )
+
+ with r1c2:
+ data = [
+ {
+ "text": records[myradio][0],
+ "ents": records[myradio][1],
+ "title": None,
+ }
+ ]
+
+ spacy_streamlit.visualize_ner(
+ data,
+ labels=["DISEASE"],
+ show_table=False,
+ manual=True,
+ title="Disease NER Detection",
+ )
+ else:
+ st.warning("No results")
+
+ if search:
+ records = {}
+ results = index_search(es, INDEX, search)
+ total_hits = results["aggregations"]["match_count"]["value"]
+ # show number of results and time taken
+ st.write(
+ templates.number_of_results(total_hits, results["took"] / 1000),
+ unsafe_allow_html=True,
+ )
+ # search results
+ for i in range(len(results["hits"]["hits"])):
+ result = results["hits"]["hits"][i]
+ res = result["_source"]
+ annotations = json.loads(res["pack_info"])["py/state"]["annotations"]
+
+ ents = []
+ for annotation in annotations:
+ if "EntityMention" in annotation["py/object"]:
+ ents.append(
+ {
+ "start": annotation["py/state"]["_span"]["begin"],
+ "end": annotation["py/state"]["_span"]["end"],
+ "label": annotation["py/state"]["ner_type"],
+ }
+ )
+ records[res["doc_id"]] = [res["content"], ents]
+
+ options = []
+ for key in records:
+ options.append(key)
+
+ if options:
+ myradio = st.radio(
+ label="Select a report:",
+ options=options,
+ index=0,
+ format_func=lambda x: f"Report# {x}",
+ key="radio_demo",
+ help="Click the radio button please",
+ )
+
+ with r1c2:
+ data = [
+ {
+ "text": records[myradio][0],
+ "ents": records[myradio][1],
+ "title": None,
+ }
+ ]
+
+ spacy_streamlit.visualize_ner(
+ data,
+ labels=["DISEASE"],
+ show_table=False,
+ manual=True,
+ title="Disease NER Detection",
+ )
+ else:
+ st.warning("No results")
+
+
+# Plain Text
+if page == PAGES[1]:
+
+ st.title("Named Entity Recognition Visualization")
+ form = st.form("ner")
+ text = form.text_area(
+ "Input your text here:", value="The CEO of Tesla is Ellon Musk."
+ )
+
+ if form.form_submit_button("Visualize"):
+ pass
+
+ ents = process_data(text)
+
+ doc = [
+ {
+ "text": text,
+ "ents": ents,
+ "title": None,
+ }
+ ]
+
+ spacy_streamlit.visualize_ner(
+ doc,
+ labels=[
+ "ORG",
+ "DATE",
+ "NORP",
+ "ORDINAL",
+ "CARDINAL",
+ "PERSON",
+ "PERSENT",
+ "GPE",
+ "QUANTITY",
+ "LAW",
+ "MONEY",
+ ],
+ show_table=False,
+ title="NER",
+ manual=True,
+ )
diff --git a/examples/search_engine_streamlit/pack_searcher.py b/examples/search_engine_streamlit/pack_searcher.py
new file mode 100644
index 00000000..741514c4
--- /dev/null
+++ b/examples/search_engine_streamlit/pack_searcher.py
@@ -0,0 +1,124 @@
+import os
+import logging
+import sqlite3
+from typing import Dict, Any, Optional, List
+from fortex.elastic import ElasticSearchIndexer
+
+from forte.common import Resources, ProcessorConfigError
+from forte.common.configuration import Config
+from forte.data.common_entry_utils import create_utterance, get_last_utterance
+from forte.data.data_pack import DataPack
+from forte.processors.base import PackProcessor
+from ft.onto.base_ontology import Utterance
+
+
+def sqlite_insert(conn, table, row):
+ cols: str = ", ".join('"{}"'.format(col) for col in row.keys())
+ vals: str = ", ".join(":{}".format(col) for col in row.keys())
+ sql: str = 'INSERT INTO "{0}" ({1}) VALUES ({2})'.format(table, cols, vals)
+ cursor = conn.cursor()
+ cursor.execute(sql, row)
+ conn.commit()
+ return cursor.lastrowid
+
+
+def create_links(url_stub: str, ids: List[int]) -> List[str]:
+ links: List[str] = []
+
+ url_stub: str = url_stub.strip("/")
+ for temp_idm in ids:
+ links.append(
+ f"Report #{temp_idm}"
+ )
+ return links
+
+
+class LastUtteranceSearcher(PackProcessor):
+ # pylint: disable=attribute-defined-outside-init
+
+ def initialize(self, resources: Resources, configs: Config):
+ super().initialize(resources, configs)
+ self.index = ElasticSearchIndexer(self.configs.indexer.hparams)
+ if self.configs.query_result_project_id < 0:
+ raise ProcessorConfigError("Query Result Project is not set.")
+
+ if not os.path.exists(self.configs.stave_db_path):
+ raise ProcessorConfigError(
+ f"Cannot find Stave DB at: {self.configs.stave_db_path}"
+ )
+
+ def _process(self, input_pack: DataPack):
+ # Make sure we take the last utterance from the user.
+ utterance: Optional[Utterance] = get_last_utterance(input_pack, "user")
+
+ if utterance is not None:
+ logging.info("The last utterance is %s", utterance)
+ # Create the query using the last utterance from user.
+ size = self.configs.size or 1000
+ field = self.configs.field or "content"
+ query_value = {
+ "query": {"match": {field: utterance.text}},
+ "size": size,
+ }
+
+ # Search against the index.
+ results = self.index.search(query_value)
+ hits = results["hits"]["hits"]
+
+ conn = sqlite3.connect(self.configs.stave_db_path)
+
+ answers = []
+ for idx, hit in enumerate(hits):
+ source = hit["_source"]
+ # The raw pack string and pack id (not database id)
+ raw_pack_str: str = source["pack_info"]
+ pack_id: str = source["doc_id"]
+
+ # Now you can write the pack into the database and generate url.
+ item = {
+ "name": f"clinical_results_{idx}",
+ "textPack": raw_pack_str,
+ "project_id": self.configs.query_result_project_id,
+ }
+
+ db_id = sqlite_insert(conn, "stave_backend_document", item)
+ answers += [db_id]
+ print(pack_id, db_id)
+
+ if len(answers) == 0:
+ create_utterance(
+ input_pack,
+ "No results found. Please try another query.",
+ "ai",
+ )
+ else:
+ links: List[str] = create_links(self.configs.url_stub, answers)
+ response_text: str = (
+ "I found the following results:
-- "
+ + "
-- ".join(links)
+ )
+ print(response_text)
+
+ create_utterance(input_pack, response_text, "ai")
+ else:
+ logging.info("Cannot get another utterance.")
+ create_utterance(
+ input_pack,
+ "Hey, I didn't get what you say, could you try again?",
+ "ai",
+ )
+
+ @classmethod
+ def default_configs(cls) -> Dict[str, Any]:
+ return {
+ "size": 5,
+ "field": "content",
+ "indexer": {
+ "name": "ElasticSearchIndexer",
+ "hparams": ElasticSearchIndexer.default_configs(),
+ "other_kwargs": {"request_timeout": 10, "refresh": False},
+ },
+ "stave_db_path": "~/projects/stave/simple-backend/db.sqlite3",
+ "url_stub": "http://localhost:3000",
+ "query_result_project_id": -1,
+ }
diff --git a/examples/search_engine_streamlit/pipelines.py b/examples/search_engine_streamlit/pipelines.py
new file mode 100644
index 00000000..5e711fea
--- /dev/null
+++ b/examples/search_engine_streamlit/pipelines.py
@@ -0,0 +1,26 @@
+from forte.data.readers import StringReader
+from fortex.spacy import SpacyProcessor
+from forte.data.data_pack import DataPack
+from forte import Pipeline
+from ft.onto.base_ontology import EntityMention
+from typing import List
+
+
+def process_data(text: str):
+
+ pipeline: Pipeline = Pipeline[DataPack]()
+ pipeline.set_reader(StringReader())
+ pipeline.add(SpacyProcessor(), {"processors": ["sentence", "tokenize", "ner"]})
+
+ for pack in pipeline.initialize().process_dataset(text):
+ pack_ents: List[EntityMention] = list(pack.get(EntityMention))
+
+ begin = [x.begin for x in pack_ents]
+ end = [x.end for x in pack_ents]
+ ner_type = [x.ner_type for x in pack_ents]
+
+ res = []
+ for i in range(len(begin)):
+ res.append({"start": int(begin[i]), "end": int(end[i]), "label": ner_type[i]})
+
+ return res
diff --git a/examples/search_engine_streamlit/search_utils.py b/examples/search_engine_streamlit/search_utils.py
new file mode 100644
index 00000000..5aacdca1
--- /dev/null
+++ b/examples/search_engine_streamlit/search_utils.py
@@ -0,0 +1,56 @@
+def all_search(es, index: str) -> dict:
+ """
+ Args:
+ es: Elasticsearch client instance.
+ index: Name of the index we are going to use.
+ size: Number of results returned in each search.
+ """
+ # search query
+ body = {"query": {"match_all": {}}}
+
+ res = es.search(index=index, body=body)
+
+ return res
+
+
+def index_search(es, index: str, keywords: str) -> dict:
+ """
+ Args:
+ es: Elasticsearch client instance.
+ index: Name of the index we are going to use.
+ keywords: Search keywords.
+ from_i: Start index of the results for pagination.
+ size: Number of results returned in each search.
+ """
+ # search query
+ body = {
+ "query": {
+ "bool": {
+ "must": [
+ {
+ "query_string": {
+ "query": keywords,
+ "fields": ["content"],
+ "default_operator": "AND",
+ }
+ }
+ ],
+ }
+ },
+ "highlight": {
+ "pre_tags": [' '],
+ "post_tags": [""],
+ "fields": {"content": {}},
+ },
+ # "from": from_i,
+ # "size": size,
+ "aggs": {"match_count": {"value_count": {"field": "_id"}}},
+ }
+
+ res = es.search(index=index, body=body)
+
+ return res
+
+
+def do():
+ return "no"
diff --git a/examples/search_engine_streamlit/templates.py b/examples/search_engine_streamlit/templates.py
new file mode 100644
index 00000000..f5013f9c
--- /dev/null
+++ b/examples/search_engine_streamlit/templates.py
@@ -0,0 +1,17 @@
+def number_of_results(total_hits: int, duration: float) -> str:
+ """HTML scripts to display number of results and duration."""
+ return f"""
+