Skip to content

Commit

Permalink
extract-data-csv plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
ndonyapour committed Dec 26, 2024
1 parent 92d2d65 commit f3e463b
Show file tree
Hide file tree
Showing 18 changed files with 527 additions and 0 deletions.
29 changes: 29 additions & 0 deletions utils/extract-data-csv-plugin/.bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[bumpversion]
current_version = 0.1.0
commit = False
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
serialize =
{major}.{minor}.{patch}-{release}{dev}
{major}.{minor}.{patch}

[bumpversion:part:release]
optional_value = _
first_value = dev
values =
dev
_

[bumpversion:part:dev]

[bumpversion:file:pyproject.toml]
search = version = "{current_version}"
replace = version = "{new_version}"

[bumpversion:file:VERSION]

[bumpversion:file:README.md]

[bumpversion:file:plugin.json]

[bumpversion:file:src/polus/mm/utils/extract_data_csv/__init__.py]
4 changes: 4 additions & 0 deletions utils/extract-data-csv-plugin/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.venv
out
tests
__pycache__
1 change: 1 addition & 0 deletions utils/extract-data-csv-plugin/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.csv filter=lfs diff=lfs merge=lfs -text
1 change: 1 addition & 0 deletions utils/extract-data-csv-plugin/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
poetry.lock
5 changes: 5 additions & 0 deletions utils/extract-data-csv-plugin/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# CHANGELOG

## 0.1.0

Initial release.
22 changes: 22 additions & 0 deletions utils/extract-data-csv-plugin/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
FROM condaforge/mambaforge

ENV EXEC_DIR="/opt/executables"
ENV POLUS_LOG="INFO"
RUN mkdir -p ${EXEC_DIR}


# Work directory defined in the base container
# WORKDIR ${EXEC_DIR}

COPY pyproject.toml ${EXEC_DIR}
COPY VERSION ${EXEC_DIR}
COPY README.md ${EXEC_DIR}
COPY CHANGELOG.md ${EXEC_DIR}

# Install needed packages here

COPY src ${EXEC_DIR}/src

RUN pip3 install ${EXEC_DIR} --no-cache-dir

CMD ["--help"]
18 changes: 18 additions & 0 deletions utils/extract-data-csv-plugin/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# extract_data_csv (0.1.0)

Extract data from a CSV file

## Options

This plugin takes 6 input arguments and 2 output argument:

| Name | Description | I/O | Type | Default |
|---------------|-------------------------|--------|--------|---------|
| input_csv_path | Path to the input csv file, Type: string, File type: input, Accepted formats: csv | Input | File | File |
| query | query str to search the dataset, Type: string, File type: input, Accepted formats: txt | Input | string | string |
| min_row | The row min inex, Type: int | Input | int | int |
| max_row | The row max inex, Type: int | Input | int | int |
| column_name | The name of the column to load data, Type: string, File type: input, Accepted formats: txt | Input | string | string |
| output_txt_path | Path to the txt datoutput file, Type: string, File type: output, Accepted formats: txt | Input | string | string |
| output_txt_path | Path to the txt output file | Output | File | File |
| output_data | The output data | Output | {'type': 'array', 'items': 'string'} | {'type': 'array', 'items': 'string'} |
1 change: 1 addition & 0 deletions utils/extract-data-csv-plugin/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.1.0
4 changes: 4 additions & 0 deletions utils/extract-data-csv-plugin/build-docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

version=$(<VERSION)
docker build . -t polusai/extract-data-csv-tool:${version}
141 changes: 141 additions & 0 deletions utils/extract-data-csv-plugin/extract_data_csv_0@[email protected]
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: Extract data from a CSV file

doc: |-
Extract data from a CSV file

baseCommand: ["python", "-m", "polus.mm.utils.extract_data_csv"]

hints:
DockerRequirement:
dockerPull: polusai/extract-data-csv-tool@sha256:2ef6682340ba4e34b826f5028bc4377b523c635975777f50deaf6d7278baa344

requirements:
InlineJavascriptRequirement: {}
# Enabling InitialWorkDirRequirement will stage the input csv file
InitialWorkDirRequirement:
listing:
- $(inputs.input_csv_path)

inputs:
input_csv_path:
label: Path to the input csv file
doc: |-
Path to the input csv file
Type: string
File type: input
Accepted formats: csv
type: File
format: edam:format_3752
inputBinding:
prefix: --input_csv_path

query:
label: query str to search the dataset
doc: |-
query str to search the dataset
Type: string
File type: input
Accepted formats: txt
type: string?
format:
- edam:format_2330
inputBinding:
prefix: --query

min_row:
label: The row min index
doc: |-
The row min inex
Type: int
type: int?
format:
- edam:format_2330
inputBinding:
prefix: --min_row
default: 1

max_row:
label: The row max index
doc: |-
The row max inex
Type: int
type: int?
format:
- edam:format_2330
inputBinding:
prefix: --max_row
default: -1

column_name:
label: The name of the column to load data
doc: |-
The name of the column to load data
Type: string
File type: input
Accepted formats: txt
type: string
format:
- edam:format_2330
inputBinding:
prefix: --column_name

output_txt_path:
label: Path to the txt output file
doc: |-
Path to the txt datoutput file
Type: string
File type: output
Accepted formats: txt
type: string
format:
- edam:format_2330
inputBinding:
prefix: --output_txt_path
default: system.log

outputs:
output_txt_path:
label: Path to the txt output file
doc: |-
Path to the txt output file
type: File
outputBinding:
glob: $(inputs.output_txt_path)
format: edam:format_2330

output_data:
label: The output data
doc: |-
The output data
type:
type: array
items: string
outputBinding:
glob: $(inputs.output_txt_path)
loadContents: true
outputEval: |
${
var lines = self[0].contents.split("\n");
// remove black lines
lines = lines.filter(function(line) {return line.trim() !== '';});
var data = [];
for (var i = 0; i < lines.length; i++) {
// The format of the lines is as follows: data
var words = lines[i].split(",").map(function(item) {return item.trim();});
data.push(words[0]);

}
return data;
}


$namespaces:
edam: https://edamontology.org/

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
87 changes: 87 additions & 0 deletions utils/extract-data-csv-plugin/ict.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
specVersion: "0.1.0"
name: extract_data_csv
version: 0.1.0
container: extract-data-csv-plugin
entrypoint:
title: extract_data_csv
description: Extract data from a CSV file
author: Brandon Walker, Nazanin Donyapour
contact: [email protected], [email protected]
repository:
documentation:
citation:

inputs:
- name: input_csv_path
required: true
description: Path to the input csv file, Type string, File type input, Accepted formats csv
type: File
format:
uri: edam:format_3752
- name: query
required: true
description: query str to search the dataset, Type string, File type input, Accepted formats txt
type: string
format:
uri: edam:format_2330
- name: min_row
required: true
description: The row min inex, Type int
type: int
format:
uri: edam:format_2330
- name: max_row
required: true
description: The row max inex, Type int
type: int
format:
uri: edam:format_2330
- name: column_name
required: true
description: The name of the column to load data, Type string, File type input, Accepted formats txt
type: string
format:
uri: edam:format_2330
- name: output_txt_path
required: true
description: Path to the txt datoutput file, Type string, File type output, Accepted formats txt
type: string
defaultValue: system.log
format:
uri: edam:format_2330
outputs:
- name: output_txt_path
required: true
description: Path to the txt output file
type: File
format:
uri: edam:format_2330
- name: output_data
required: true
description: The output data
type: {'type': 'array', 'items': 'string'}
ui:
- key: inputs.input_csv_path
title: "input_csv_path: "
description: "Path to the input csv file, Type string, File type input, Accepted formats csv"
type: File
- key: inputs.query
title: "query: "
description: "query str to search the dataset, Type string, File type input, Accepted formats txt"
type: string
- key: inputs.min_row
title: "min_row: "
description: "The row min inex, Type int"
type: int
- key: inputs.max_row
title: "max_row: "
description: "The row max inex, Type int"
type: int
- key: inputs.column_name
title: "column_name: "
description: "The name of the column to load data, Type string, File type input, Accepted formats txt"
type: string
- key: inputs.output_txt_path
title: "output_txt_path: "
description: "Path to the txt datoutput file, Type string, File type output, Accepted formats txt"
type: string
31 changes: 31 additions & 0 deletions utils/extract-data-csv-plugin/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
[tool.poetry]
name = "polus-mm-utils-extract-data-csv"
version = "0.1.0"
description = "Extract data from a CSV file"
authors = ["Nazanin Donyapour <[email protected]>", "Brandon Walker <[email protected]>"]
readme = "README.md"
packages = [{include = "polus", from = "src"}]

[tool.poetry.dependencies]
python = ">=3.9,<3.13"
typer = "^0.7.0"
sophios = "0.1.4"
pandas = "2.2.2"

[tool.poetry.group.dev.dependencies]
bump2version = "^1.0.1"
pytest = "^7.4"
pytest-sugar = "^0.9.6"
pre-commit = "^3.2.1"
black = "^23.3.0"
mypy = "^1.1.1"
ruff = "^0.0.270"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.pytest.ini_options]
pythonpath = [
"."
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""extract_data_csv."""

__version__ = "0.1.0"

from polus.mm.utils.extract_data_csv.extract_data_csv import ( # noqa # pylint: disable=unused-import
extract_data_csv,
)
Loading

0 comments on commit f3e463b

Please sign in to comment.