Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

extract-data-csv plugin #175

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions utils/extract-data-csv-plugin/.bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[bumpversion]
current_version = 0.1.0
commit = False
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
serialize =
{major}.{minor}.{patch}-{release}{dev}
{major}.{minor}.{patch}

[bumpversion:part:release]
optional_value = _
first_value = dev
values =
dev
_

[bumpversion:part:dev]

[bumpversion:file:pyproject.toml]
search = version = "{current_version}"
replace = version = "{new_version}"

[bumpversion:file:VERSION]

[bumpversion:file:README.md]

[bumpversion:file:plugin.json]

[bumpversion:file:src/polus/mm/utils/extract_data_csv/__init__.py]
4 changes: 4 additions & 0 deletions utils/extract-data-csv-plugin/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.venv
out
tests
__pycache__
1 change: 1 addition & 0 deletions utils/extract-data-csv-plugin/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.csv filter=lfs diff=lfs merge=lfs -text
1 change: 1 addition & 0 deletions utils/extract-data-csv-plugin/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
poetry.lock
5 changes: 5 additions & 0 deletions utils/extract-data-csv-plugin/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# CHANGELOG

## 0.1.0

Initial release.
22 changes: 22 additions & 0 deletions utils/extract-data-csv-plugin/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
FROM condaforge/mambaforge

ENV EXEC_DIR="/opt/executables"
ENV POLUS_LOG="INFO"
RUN mkdir -p ${EXEC_DIR}


# Work directory defined in the base container
# WORKDIR ${EXEC_DIR}

COPY pyproject.toml ${EXEC_DIR}
COPY VERSION ${EXEC_DIR}
COPY README.md ${EXEC_DIR}
COPY CHANGELOG.md ${EXEC_DIR}

# Install needed packages here

COPY src ${EXEC_DIR}/src

RUN pip3 install ${EXEC_DIR} --no-cache-dir

CMD ["--help"]
18 changes: 18 additions & 0 deletions utils/extract-data-csv-plugin/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# extract_data_csv (0.1.0)

Extract data from a CSV file

## Options

This plugin takes 6 input arguments and 2 output argument:

| Name | Description | I/O | Type | Default |
|---------------|-------------------------|--------|--------|---------|
| input_csv_path | Path to the input csv file, Type: string, File type: input, Accepted formats: csv | Input | File | File |
| query | query str to search the dataset, Type: string, File type: input, Accepted formats: txt | Input | string | string |
| min_row | The row min inex, Type: int | Input | int | int |
| max_row | The row max inex, Type: int | Input | int | int |
| column_name | The name of the column to load data, Type: string, File type: input, Accepted formats: txt | Input | string | string |
| output_txt_path | Path to the txt datoutput file, Type: string, File type: output, Accepted formats: txt | Input | string | string |
| output_txt_path | Path to the txt output file | Output | File | File |
| output_data | The output data | Output | {'type': 'array', 'items': 'string'} | {'type': 'array', 'items': 'string'} |
1 change: 1 addition & 0 deletions utils/extract-data-csv-plugin/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.1.0
4 changes: 4 additions & 0 deletions utils/extract-data-csv-plugin/build-docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

version=$(<VERSION)
docker build . -t polusai/extract-data-csv-tool:${version}
141 changes: 141 additions & 0 deletions utils/extract-data-csv-plugin/extract_data_csv_0@[email protected]
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: Extract data from a CSV file

doc: |-
Extract data from a CSV file

baseCommand: ["python", "-m", "polus.mm.utils.extract_data_csv"]

hints:
DockerRequirement:
dockerPull: polusai/extract-data-csv-tool@sha256:2ef6682340ba4e34b826f5028bc4377b523c635975777f50deaf6d7278baa344

requirements:
InlineJavascriptRequirement: {}
# Enabling InitialWorkDirRequirement will stage the input csv file
InitialWorkDirRequirement:
listing:
- $(inputs.input_csv_path)

inputs:
input_csv_path:
label: Path to the input csv file
doc: |-
Path to the input csv file
Type: string
File type: input
Accepted formats: csv
type: File
format: edam:format_3752
inputBinding:
prefix: --input_csv_path

query:
label: query str to search the dataset
doc: |-
query str to search the dataset
Type: string
File type: input
Accepted formats: txt
type: string?
format:
- edam:format_2330
inputBinding:
prefix: --query

min_row:
label: The row min index
doc: |-
The row min inex
Type: int
type: int?
format:
- edam:format_2330
inputBinding:
prefix: --min_row
default: 1

max_row:
label: The row max index
doc: |-
The row max inex
Type: int
type: int?
format:
- edam:format_2330
inputBinding:
prefix: --max_row
default: -1

column_name:
label: The name of the column to load data
doc: |-
The name of the column to load data
Type: string
File type: input
Accepted formats: txt
type: string
format:
- edam:format_2330
inputBinding:
prefix: --column_name

output_txt_path:
label: Path to the txt output file
doc: |-
Path to the txt datoutput file
Type: string
File type: output
Accepted formats: txt
type: string
format:
- edam:format_2330
inputBinding:
prefix: --output_txt_path
default: system.log

outputs:
output_txt_path:
label: Path to the txt output file
doc: |-
Path to the txt output file
type: File
outputBinding:
glob: $(inputs.output_txt_path)
format: edam:format_2330

output_data:
label: The output data
doc: |-
The output data
type:
type: array
items: string
outputBinding:
glob: $(inputs.output_txt_path)
loadContents: true
outputEval: |
${
var lines = self[0].contents.split("\n");
// remove black lines
lines = lines.filter(function(line) {return line.trim() !== '';});
var data = [];
for (var i = 0; i < lines.length; i++) {
// The format of the lines is as follows: data
var words = lines[i].split(",").map(function(item) {return item.trim();});
data.push(words[0]);

}
return data;
}


$namespaces:
edam: https://edamontology.org/

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
87 changes: 87 additions & 0 deletions utils/extract-data-csv-plugin/ict.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
specVersion: "0.1.0"
name: extract_data_csv
version: 0.1.0
container: extract-data-csv-plugin
entrypoint:
title: extract_data_csv
description: Extract data from a CSV file
author: Brandon Walker, Nazanin Donyapour
contact: [email protected], [email protected]
repository:
documentation:
citation:

inputs:
- name: input_csv_path
required: true
description: Path to the input csv file, Type string, File type input, Accepted formats csv
type: File
format:
uri: edam:format_3752
- name: query
required: true
description: query str to search the dataset, Type string, File type input, Accepted formats txt
type: string
format:
uri: edam:format_2330
- name: min_row
required: true
description: The row min inex, Type int
type: int
format:
uri: edam:format_2330
- name: max_row
required: true
description: The row max inex, Type int
type: int
format:
uri: edam:format_2330
- name: column_name
required: true
description: The name of the column to load data, Type string, File type input, Accepted formats txt
type: string
format:
uri: edam:format_2330
- name: output_txt_path
required: true
description: Path to the txt datoutput file, Type string, File type output, Accepted formats txt
type: string
defaultValue: system.log
format:
uri: edam:format_2330
outputs:
- name: output_txt_path
required: true
description: Path to the txt output file
type: File
format:
uri: edam:format_2330
- name: output_data
required: true
description: The output data
type: {'type': 'array', 'items': 'string'}
ui:
- key: inputs.input_csv_path
title: "input_csv_path: "
description: "Path to the input csv file, Type string, File type input, Accepted formats csv"
type: File
- key: inputs.query
title: "query: "
description: "query str to search the dataset, Type string, File type input, Accepted formats txt"
type: string
- key: inputs.min_row
title: "min_row: "
description: "The row min inex, Type int"
type: int
- key: inputs.max_row
title: "max_row: "
description: "The row max inex, Type int"
type: int
- key: inputs.column_name
title: "column_name: "
description: "The name of the column to load data, Type string, File type input, Accepted formats txt"
type: string
- key: inputs.output_txt_path
title: "output_txt_path: "
description: "Path to the txt datoutput file, Type string, File type output, Accepted formats txt"
type: string
31 changes: 31 additions & 0 deletions utils/extract-data-csv-plugin/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
[tool.poetry]
name = "polus-mm-utils-extract-data-csv"
version = "0.1.0"
description = "Extract data from a CSV file"
authors = ["Nazanin Donyapour <[email protected]>", "Brandon Walker <[email protected]>"]
readme = "README.md"
packages = [{include = "polus", from = "src"}]

[tool.poetry.dependencies]
python = ">=3.9,<3.13"
typer = "^0.7.0"
sophios = "0.1.4"
pandas = "2.2.2"

[tool.poetry.group.dev.dependencies]
bump2version = "^1.0.1"
pytest = "^7.4"
pytest-sugar = "^0.9.6"
pre-commit = "^3.2.1"
black = "^23.3.0"
mypy = "^1.1.1"
ruff = "^0.0.270"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.pytest.ini_options]
pythonpath = [
"."
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""extract_data_csv."""

__version__ = "0.1.0"

from polus.mm.utils.extract_data_csv.extract_data_csv import ( # noqa # pylint: disable=unused-import
extract_data_csv,
)
Loading
Loading