Skip to content

Commit

Permalink
Merge pull request #104 from oindrillac/inference-pipeline
Browse files Browse the repository at this point in the history
added inference pipeline
  • Loading branch information
Karanraj Chauhan authored Nov 15, 2021
2 parents ec09e5c + fb58a63 commit 5a82b34
Show file tree
Hide file tree
Showing 6 changed files with 284 additions and 8 deletions.
2 changes: 1 addition & 1 deletion notebooks/demo2/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
BASE_CURATION_FOLDER = DATA_FOLDER / "curation"

CHECKPOINT_S3_PREFIX = "corpdata/saved_models"
DATA_S3_PREFIX = "corpdata/ESG"
DATA_S3_PREFIX = "corpdata/ESG/pipeline_run/samples_4"
BASE_PDF_S3_PREFIX = f"{DATA_S3_PREFIX}/pdfs"
BASE_ANNOTATION_S3_PREFIX = f"{DATA_S3_PREFIX}/annotations"
BASE_EXTRACTION_S3_PREFIX = f"{DATA_S3_PREFIX}/extraction"
Expand Down
30 changes: 30 additions & 0 deletions notebooks/demo2/infer_kpi.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,36 @@
"infer_config = QAInferConfig(\"infer_demo\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5607a79-d139-40e4-ae56-646cd54a1838",
"metadata": {},
"outputs": [],
"source": [
"# When running in Automation using Elyra and Kubeflow Pipelines,\n",
"# set AUTOMATION = 1 as an environment variable\n",
"if os.getenv(\"AUTOMATION\"):\n",
" \n",
" # inference results dir\n",
" if not os.path.exists(infer_config.relevance_dir['Text']):\n",
" pathlib.Path(infer_config.relevance_dir['Text']).mkdir(parents=True, exist_ok=True)\n",
"\n",
" # kpi inference results dir\n",
" if not os.path.exists(infer_config.result_dir['Text']):\n",
" pathlib.Path(infer_config.result_dir['Text']).mkdir(parents=True, exist_ok=True)\n",
"\n",
" # load dir\n",
" if not os.path.exists(infer_config.load_dir['Text']):\n",
" pathlib.Path(infer_config.load_dir['Text']).mkdir(parents=True, exist_ok=True)\n",
"\n",
" # download relevance predictions from s3 \n",
" s3c.download_files_in_prefix_to_dir(\n",
" config.BASE_INFER_RELEVANCE_S3_PREFIX,\n",
" infer_config.relevance_dir['Text'],\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 5,
Expand Down
31 changes: 30 additions & 1 deletion notebooks/demo2/infer_relevance.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,35 @@
"infer_config = InferConfig(\"infer_demo\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "88d46b7b-f93a-4b2b-a631-39c05cbf81f0",
"metadata": {},
"outputs": [],
"source": [
"# When running in Automation using Elyra and Kubeflow Pipelines,\n",
"# set AUTOMATION = 1 as an environment variable\n",
"if os.getenv(\"AUTOMATION\"):\n",
" # extracted pdfs\n",
" if not os.path.exists(config.BASE_EXTRACTION_FOLDER):\n",
" config.BASE_EXTRACTION_FOLDER.mkdir(parents=True, exist_ok=True)\n",
" \n",
" # inference results dir\n",
" if not os.path.exists(infer_config.result_dir['Text']):\n",
" pathlib.Path(infer_config.result_dir['Text']).mkdir(parents=True, exist_ok=True)\n",
"\n",
" # load dir\n",
" if not os.path.exists(infer_config.load_dir['Text']):\n",
" pathlib.Path(infer_config.load_dir['Text']).mkdir(parents=True, exist_ok=True)\n",
"\n",
" # download extracted pdfs from s3 \n",
" s3c.download_files_in_prefix_to_dir(\n",
" config.BASE_EXTRACTION_S3_PREFIX,\n",
" config.BASE_EXTRACTION_FOLDER,\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 5,
Expand Down Expand Up @@ -611,7 +640,7 @@
"# upload the predicted files to s3\n",
"s3c.upload_files_in_dir_to_prefix(\n",
" infer_config.result_dir['Text'],\n",
" config.BASE_INFER_S3_PREFIX\n",
" config.BASE_INFER_RELEVANCE_S3_PREFIX\n",
")"
]
},
Expand Down
217 changes: 217 additions & 0 deletions notebooks/demo2/inference.pipeline
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
{
"doc_type": "pipeline",
"version": "3.0",
"json_schema": "http://api.dataplatform.ibm.com/schemas/common-pipeline/pipeline-flow/pipeline-flow-v3-schema.json",
"id": "a8a93b22-37c1-45e1-ac1f-aab89ae94740",
"primary_pipeline": "e765911e-1682-43f6-8e89-7f5a949cd22a",
"pipelines": [
{
"id": "e765911e-1682-43f6-8e89-7f5a949cd22a",
"nodes": [
{
"id": "7c05fc65-926d-46ee-a166-2fe0b4bf44fe",
"type": "execution_node",
"op": "execute-notebook-node",
"app_data": {
"filename": "pdf_text_extraction.ipynb",
"runtime_image": "quay.io/os-climate/aicoe-osc-demo:v0.11.0",
"env_vars": [
"S3_ACCESS_KEY=",
"S3_SECRET_KEY=",
"S3_ENDPOINT=https://s3.us-east-1.amazonaws.com",
"S3_BUCKET=ocp-odh-os-demo-s3",
"AUTOMATION=1"
],
"include_subdirectories": false,
"invalidNodeError": null,
"outputs": [],
"dependencies": [
"config.py"
],
"cpu": 1,
"memory": 16,
"ui_data": {
"label": "pdf_text_extraction.ipynb",
"image": "data:image/svg+xml;utf8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20width%3D%2216%22%20viewBox%3D%220%200%2022%2022%22%3E%0A%20%20%3Cg%20class%3D%22jp-icon-warn0%20jp-icon-selectable%22%20fill%3D%22%23EF6C00%22%3E%0A%20%20%20%20%3Cpath%20d%3D%22M18.7%203.3v15.4H3.3V3.3h15.4m1.5-1.5H1.8v18.3h18.3l.1-18.3z%22%2F%3E%0A%20%20%20%20%3Cpath%20d%3D%22M16.5%2016.5l-5.4-4.3-5.6%204.3v-11h11z%22%2F%3E%0A%20%20%3C%2Fg%3E%0A%3C%2Fsvg%3E%0A",
"x_pos": 84,
"y_pos": 203,
"description": "Notebook file"
}
},
"inputs": [
{
"id": "inPort",
"app_data": {
"ui_data": {
"cardinality": {
"min": 0,
"max": -1
},
"label": "Input Port"
}
}
}
],
"outputs": [
{
"id": "outPort",
"app_data": {
"ui_data": {
"cardinality": {
"min": 0,
"max": -1
},
"label": "Output Port"
}
}
}
]
},
{
"id": "04b7e52d-af9a-47c2-8215-f6c588e16144",
"type": "execution_node",
"op": "execute-notebook-node",
"app_data": {
"filename": "infer_relevance.ipynb",
"runtime_image": "quay.io/os-climate/aicoe-osc-demo:v0.11.0",
"env_vars": [
"S3_ACCESS_KEY=",
"S3_SECRET_KEY=",
"S3_ENDPOINT=https://s3.us-east-1.amazonaws.com",
"S3_BUCKET=ocp-odh-os-demo-s3",
"AUTOMATION=1"
],
"include_subdirectories": false,
"invalidNodeError": null,
"outputs": [],
"dependencies": [
"config_farm_train.py",
"config.py"
],
"cpu": 1,
"memory": 16,
"gpu": 1,
"ui_data": {
"label": "infer_relevance.ipynb",
"image": "data:image/svg+xml;utf8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20width%3D%2216%22%20viewBox%3D%220%200%2022%2022%22%3E%0A%20%20%3Cg%20class%3D%22jp-icon-warn0%20jp-icon-selectable%22%20fill%3D%22%23EF6C00%22%3E%0A%20%20%20%20%3Cpath%20d%3D%22M18.7%203.3v15.4H3.3V3.3h15.4m1.5-1.5H1.8v18.3h18.3l.1-18.3z%22%2F%3E%0A%20%20%20%20%3Cpath%20d%3D%22M16.5%2016.5l-5.4-4.3-5.6%204.3v-11h11z%22%2F%3E%0A%20%20%3C%2Fg%3E%0A%3C%2Fsvg%3E%0A",
"x_pos": 315,
"y_pos": 203,
"description": "Notebook file"
}
},
"inputs": [
{
"id": "inPort",
"app_data": {
"ui_data": {
"cardinality": {
"min": 0,
"max": -1
},
"label": "Input Port"
}
},
"links": [
{
"id": "be1eb5bf-3bb7-47b7-bfbe-1d53b3887756",
"node_id_ref": "7c05fc65-926d-46ee-a166-2fe0b4bf44fe",
"port_id_ref": "outPort"
}
]
}
],
"outputs": [
{
"id": "outPort",
"app_data": {
"ui_data": {
"cardinality": {
"min": 0,
"max": -1
},
"label": "Output Port"
}
}
}
]
},
{
"id": "baab27b2-9d86-4835-8d2d-8a785f55765c",
"type": "execution_node",
"op": "execute-notebook-node",
"app_data": {
"filename": "infer_kpi.ipynb",
"runtime_image": "quay.io/os-climate/aicoe-osc-demo:v0.11.0",
"env_vars": [
"S3_ACCESS_KEY=",
"S3_SECRET_KEY=",
"S3_ENDPOINT=https://s3.us-east-1.amazonaws.com",
"S3_BUCKET=ocp-odh-os-demo-s3",
"AUTOMATION=1"
],
"include_subdirectories": false,
"invalidNodeError": null,
"outputs": [],
"dependencies": [
"config_qa_farm_train.py",
"config_farm_train.py",
"config.py"
],
"cpu": 1,
"memory": 16,
"ui_data": {
"label": "infer_kpi.ipynb",
"image": "data:image/svg+xml;utf8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20width%3D%2216%22%20viewBox%3D%220%200%2022%2022%22%3E%0A%20%20%3Cg%20class%3D%22jp-icon-warn0%20jp-icon-selectable%22%20fill%3D%22%23EF6C00%22%3E%0A%20%20%20%20%3Cpath%20d%3D%22M18.7%203.3v15.4H3.3V3.3h15.4m1.5-1.5H1.8v18.3h18.3l.1-18.3z%22%2F%3E%0A%20%20%20%20%3Cpath%20d%3D%22M16.5%2016.5l-5.4-4.3-5.6%204.3v-11h11z%22%2F%3E%0A%20%20%3C%2Fg%3E%0A%3C%2Fsvg%3E%0A",
"x_pos": 537,
"y_pos": 202,
"description": "Notebook file"
}
},
"inputs": [
{
"id": "inPort",
"app_data": {
"ui_data": {
"cardinality": {
"min": 0,
"max": -1
},
"label": "Input Port"
}
},
"links": [
{
"id": "a2cc5551-5039-45c4-b9ec-d32f4ddd3fd5",
"node_id_ref": "04b7e52d-af9a-47c2-8215-f6c588e16144",
"port_id_ref": "outPort"
}
]
}
],
"outputs": [
{
"id": "outPort",
"app_data": {
"ui_data": {
"cardinality": {
"min": 0,
"max": -1
},
"label": "Output Port"
}
}
}
]
}
],
"app_data": {
"ui_data": {
"comments": []
},
"version": 3
},
"runtime_ref": ""
}
],
"schemas": []
}
12 changes: 6 additions & 6 deletions notebooks/demo2/pdf_text_extraction.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -99,7 +99,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -119,7 +119,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -132,14 +132,14 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['/opt/app-root/src/automation-flags/aicoe-osc-demo/data/pdfs/sustainability-report-2019.pdf']\n"
"['/opt/app-root/src/aicoe-osc-demo/data/pdfs/sustainability-report-2019.pdf', '/opt/app-root/src/aicoe-osc-demo/data/pdfs/75506106_BOA_2016-12-31.pdf', '/opt/app-root/src/aicoe-osc-demo/data/pdfs/90044053_Fisher & Paykel Hl_2017-11-07.pdf', '/opt/app-root/src/aicoe-osc-demo/data/pdfs/88094292_Carriage Svcs Inc_2019-07-23.pdf']\n"
]
}
],
Expand All @@ -150,7 +150,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand Down
File renamed without changes.

0 comments on commit 5a82b34

Please sign in to comment.