-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Brandon Duane Walker
authored and
Brandon Duane Walker
committed
May 29, 2024
1 parent
b3f4953
commit 56c9975
Showing
16 changed files
with
395 additions
and
0 deletions.
There are no files selected for viewing
1 change: 1 addition & 0 deletions
1
...er.container_name}}/src/{{cookiecutter.package_folders}}/{{cookiecutter.package_name}}.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"""{{ cookiecutter.plugin_name }}.""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
[bumpversion] | ||
current_version = 0.1.0 | ||
commit = False | ||
tag = False | ||
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))? | ||
serialize = | ||
{major}.{minor}.{patch}-{release}{dev} | ||
{major}.{minor}.{patch} | ||
|
||
[bumpversion:part:release] | ||
optional_value = _ | ||
first_value = dev | ||
values = | ||
dev | ||
_ | ||
|
||
[bumpversion:part:dev] | ||
|
||
[bumpversion:file:pyproject.toml] | ||
search = version = "{current_version}" | ||
replace = version = "{new_version}" | ||
|
||
[bumpversion:file:VERSION] | ||
|
||
[bumpversion:file:README.md] | ||
|
||
[bumpversion:file:plugin.json] | ||
|
||
[bumpversion:file:src/polus/mm/utils/torchdrug_download/__init__.py] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.venv | ||
out | ||
tests | ||
__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
poetry.lock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# CHANGELOG | ||
|
||
## 0.1.0 | ||
|
||
Initial release. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# docker build -f Dockerfile -t mrbrandonwalker/torch_drug . | ||
|
||
FROM condaforge/mambaforge | ||
|
||
ENV EXEC_DIR="/opt/executables" | ||
ENV POLUS_LOG="INFO" | ||
RUN mkdir -p ${EXEC_DIR} | ||
|
||
# Work directory defined in the base container | ||
WORKDIR ${EXEC_DIR} | ||
|
||
COPY pyproject.toml ${EXEC_DIR} | ||
COPY VERSION ${EXEC_DIR} | ||
COPY README.md ${EXEC_DIR} | ||
COPY CHANGELOG.md ${EXEC_DIR} | ||
# need this here because poetry install needs the src directory | ||
COPY src ${EXEC_DIR}/src | ||
|
||
# Install needed packages here | ||
RUN pip3 install --upgrade pip | ||
RUN pip3 install poetry | ||
RUN poetry install | ||
|
||
RUN pip3 install ${EXEC_DIR} --no-cache-dir |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# torchdrug (0.1.0) | ||
|
||
Access datasets and models from TorchDrug | ||
|
||
## Reading inputs/outputs from .cwl files | ||
This adds inputs/outputs from .cwl files into cookiecutter.json | ||
`python read_cwl_inputs_outputs.py path_to_cwl_file.cwl` | ||
|
||
## Modifying template files | ||
To dynamically add inputs/outputs from cookiecutter.json to README.MD, __main__.py and plugin_package function | ||
`python modify_base_template.py` | ||
|
||
## Building | ||
|
||
To build the Docker image for the conversion plugin, run `./build-docker.sh`. | ||
|
||
## Install WIPP Plugin | ||
|
||
If WIPP is running, navigate to the plugins page and add a new plugin. Paste the | ||
contents of `plugin.json` into the pop-up window and submit. | ||
## Options | ||
|
||
This plugin takes 1 input arguments and 1 output argument: | ||
|
||
| Name | Description | I/O | Type | Default | | ||
|---------------|-------------------------|--------|--------|---------| | ||
| dataset | Input dataset to extract | Input | string | string | | ||
| outdir | Output collection. | Output | collection | collection | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
0.1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
specVersion: 0.1.0 | ||
name: labshare/torchdrug-download | ||
version: 0.1.0 | ||
container: polusai/torchdrug-plugin:0.1.0 | ||
entrypoint: "" | ||
title: torchdrug_download | ||
description: Access datasets and models from TorchDrug | ||
author: Data Scientist ([email protected]) | ||
repository: https://github.com/labshare/mmtools | ||
documentation: https://ncats.nih.gov/preclinical/core/informatics | ||
citation: "" | ||
inputs: | ||
- name: dataset | ||
required: true | ||
description: Input dataset to extract | ||
type: string | ||
outputs: | ||
- name: outdir | ||
required: false | ||
description: Output collection. | ||
type: collection |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
[tool.poetry] | ||
name = "polus-mm-utils-torchdrug_download" | ||
version = "0.1.0" | ||
description = "Access datasets and models from TorchDrug" | ||
authors = ["Data Scientist <[email protected]>"] | ||
readme = "README.md" | ||
packages = [{include = "polus", from = "src"}] | ||
|
||
[tool.poetry.dependencies] | ||
python = ">=3.8,<3.11" | ||
torch = { version = "1.12.1", source="torch"} | ||
torchaudio = { version = "0.12.1", source="torch"} | ||
torchvision = { version = "0.13.1", source="torch"} | ||
torch-cluster = { version = "1.6.0", source="pyg"} | ||
torch-scatter = { version = "2.0.9", source="pyg"} | ||
torchdrug = "0.2.1" | ||
rdkit = "2023.9.5" | ||
typer = "^0.7.0" | ||
|
||
[[tool.poetry.source]] | ||
name = "torch" | ||
url = "https://download.pytorch.org/whl/cu116" | ||
secondary = true | ||
|
||
[[tool.poetry.source]] | ||
name = "pyg" | ||
url = "https://data.pyg.org/whl/torch-1.12.1+cu116.html" | ||
secondary = true | ||
|
||
[tool.poetry.group.dev.dependencies] | ||
bump2version = "^1.0.1" | ||
pytest = "^7.4" | ||
pytest-sugar = "^0.9.6" | ||
pre-commit = "^3.2.1" | ||
black = "^23.3.0" | ||
mypy = "^1.1.1" | ||
ruff = "^0.0.270" | ||
|
||
[build-system] | ||
requires = ["poetry-core"] | ||
build-backend = "poetry.core.masonry.api" | ||
|
||
[tool.pytest.ini_options] | ||
pythonpath = [ | ||
"." | ||
] |
7 changes: 7 additions & 0 deletions
7
utils/torchdrug_download-plugin/src/polus/mm/utils/torchdrug_download/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
"""torchdrug.""" | ||
|
||
__version__ = "0.1.0" | ||
|
||
from polus.mm.utils.torchdrug_download.torchdrug_download import ( # noqa # pylint: disable=unused-import | ||
torchdrug_download, | ||
) |
152 changes: 152 additions & 0 deletions
152
utils/torchdrug_download-plugin/src/polus/mm/utils/torchdrug_download/__main__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
"""Package entrypoint for the torchdrug package.""" | ||
|
||
# Base packages | ||
import logging | ||
from enum import Enum | ||
from os import environ | ||
from pathlib import Path | ||
|
||
import typer | ||
from polus.mm.utils.torchdrug_download.torchdrug_download import torchdrug_download | ||
from torchdrug import datasets | ||
|
||
logging.basicConfig( | ||
format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", | ||
datefmt="%d-%b-%y %H:%M:%S", | ||
) | ||
POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO")) | ||
logger = logging.getLogger("polus.mm.utils.torchdrug_download.") | ||
logger.setLevel(POLUS_LOG) | ||
|
||
app = typer.Typer(help="torchdrug_download.") | ||
|
||
|
||
class DatabaseEnum(str, Enum): | ||
"""class DatabaseEnum for the input database to be processed.""" | ||
|
||
ClinTox = "ClinTox" | ||
PDBBind = "PDBBind" | ||
FB15k = "FB15k" | ||
FB15k237 = "FB15k237" | ||
WN18 = "WN18" | ||
WN18RR = "WN18RR" | ||
Hetionet = "Hetionet" | ||
BACE = "BACE" | ||
BBBP = "BBBP" | ||
CEP = "CEP" | ||
ChEMBLFiltered = "ChEMBLFiltered" | ||
Delaney = "Delaney" | ||
FreeSolv = "FreeSolv" | ||
HIV = "HIV" | ||
Lipophilicity = "Lipophilicity" | ||
MUV = "MUV" | ||
Malaria = "Malaria" | ||
OPV = "OPV" | ||
QM8 = "QM8" | ||
QM9 = "QM9" | ||
SIDER = "SIDER" | ||
Tox21 = "Tox21" | ||
ToxCast = "ToxCast" | ||
ZINC250k = "ZINC250k" | ||
ZINC2m = "ZINC2m" | ||
MOSES = "MOSES" | ||
PCQM4M = "PCQM4M" | ||
BetaLactamase = "BetaLactamase" | ||
Fluorescence = "Fluorescence" | ||
Stability = "Stability" | ||
Solubility = "Solubility" | ||
BinaryLocalization = "BinaryLocalization" | ||
SubcellularLocalization = "SubcellularLocalization" | ||
EnzymeCommission = "EnzymeCommission" | ||
GeneOntology = "GeneOntology" | ||
AlphaFoldDB = "AlphaFoldDB" | ||
Fold = "Fold" | ||
SecondaryStructure = "SecondaryStructure" | ||
ProteinNet = "ProteinNet" | ||
HumanPPI = "HumanPPI" | ||
YeastPPI = "YeastPPI" | ||
PPIAffinity = "PPIAffinity" | ||
BindingDB = "BindingDB" | ||
USPTO50k = "USPTO50k" | ||
Cora = "Cora" | ||
PubMed = "PubMed" | ||
|
||
|
||
@app.command() | ||
def main( | ||
dataset: DatabaseEnum = typer.Option( | ||
..., | ||
"--dataset", | ||
help="Input database to be processed.", | ||
), | ||
out_dir: Path = typer.Option( | ||
..., | ||
"--outdir", | ||
help="Output directory.", | ||
exists=True, | ||
writable=True, | ||
file_okay=False, | ||
resolve_path=True, | ||
), | ||
) -> None: | ||
"""torchdrug_download.""" | ||
dataset_mapping = { | ||
"PDBBind": datasets.PDBBind, | ||
"ClinTox": datasets.ClinTox, | ||
"FB15k": datasets.FB15k, | ||
"FB15k237": datasets.FB15k237, | ||
"WN18": datasets.WN18, | ||
"WN18RR": datasets.WN18RR, | ||
"Hetionet": datasets.Hetionet, | ||
"BACE": datasets.BACE, | ||
"BBBP": datasets.BBBP, | ||
"CEP": datasets.CEP, | ||
"ChEMBLFiltered": datasets.ChEMBLFiltered, | ||
"Delaney": datasets.Delaney, | ||
"FreeSolv": datasets.FreeSolv, | ||
"HIV": datasets.HIV, | ||
"Lipophilicity": datasets.Lipophilicity, | ||
"MUV": datasets.MUV, | ||
"Malaria": datasets.Malaria, | ||
"OPV": datasets.OPV, | ||
"QM8": datasets.QM8, | ||
"QM9": datasets.QM9, | ||
"SIDER": datasets.SIDER, | ||
"Tox21": datasets.Tox21, | ||
"ToxCast": datasets.ToxCast, | ||
"ZINC250k": datasets.ZINC250k, | ||
"ZINC2m": datasets.ZINC2m, | ||
"MOSES": datasets.MOSES, | ||
"PCQM4M": datasets.PCQM4M, | ||
"BetaLactamase": datasets.BetaLactamase, | ||
"Fluorescence": datasets.Fluorescence, | ||
"Stability": datasets.Stability, | ||
"Solubility": datasets.Solubility, | ||
"BinaryLocalization": datasets.BinaryLocalization, | ||
"SubcellularLocalization": datasets.SubcellularLocalization, | ||
"EnzymeCommission": datasets.EnzymeCommission, | ||
"GeneOntology": datasets.GeneOntology, | ||
"AlphaFoldDB": datasets.AlphaFoldDB, | ||
"Fold": datasets.Fold, | ||
"SecondaryStructure": datasets.SecondaryStructure, | ||
"ProteinNet": datasets.ProteinNet, | ||
"HumanPPI": datasets.HumanPPI, | ||
"YeastPPI": datasets.YeastPPI, | ||
"PPIAffinity": datasets.PPIAffinity, | ||
"BindingDB": datasets.BindingDB, | ||
"USPTO50k": datasets.USPTO50k, | ||
"Cora": datasets.Cora, | ||
"PubMed": datasets.PubMed, | ||
} | ||
|
||
if dataset not in dataset_mapping: | ||
msg = f"Unsupported dataset: {dataset}" | ||
raise ValueError(msg) | ||
|
||
logger.info(f"database: {dataset}") | ||
logger.info(f"outdir: {out_dir}") | ||
torchdrug_download(dataset, out_dir, dataset_mapping) | ||
|
||
|
||
if __name__ == "__main__": | ||
app() |
19 changes: 19 additions & 0 deletions
19
utils/torchdrug_download-plugin/src/polus/mm/utils/torchdrug_download/torchdrug_download.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
"""torchdrug_download.""" | ||
from pathlib import Path | ||
|
||
|
||
def torchdrug_download(dataset: str, outdir: Path, dataset_mapping: dict) -> None: | ||
"""torchdrug. | ||
Args: | ||
dataset: Input dataset to extract | ||
outdir: Output collection. | ||
dataset_mapping: Mapping of dataset to class. | ||
Returns: | ||
None | ||
""" | ||
# Create an instance of the selected dataset class | ||
selected_dataset_class = dataset_mapping[dataset] | ||
# lazy = False causes issues with PDBBind dataset such as invalid sequence | ||
dataset = selected_dataset_class(outdir, lazy=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"""Tests for torchdrug_download.""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
"""Tests for torchdrug_download.""" | ||
from pathlib import Path | ||
|
||
from polus.mm.utils.torchdrug_download.torchdrug_download import torchdrug_download | ||
from torchdrug import datasets | ||
|
||
|
||
def test_torchdrug_download_check() -> None: | ||
"""Test torchdrug_download.""" | ||
dataset = "Tox21" | ||
outdir = Path.cwd() | ||
dataset_mapping = {"Tox21": datasets.Tox21} | ||
torchdrug_download(dataset, outdir, dataset_mapping) | ||
assert Path("tox21.csv").exists() |
Oops, something went wrong.