-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Brandon Duane Walker
authored and
Brandon Duane Walker
committed
Jun 4, 2024
1 parent
b3f4953
commit 113fb28
Showing
16 changed files
with
420 additions
and
0 deletions.
There are no files selected for viewing
1 change: 1 addition & 0 deletions
1
...er.container_name}}/src/{{cookiecutter.package_folders}}/{{cookiecutter.package_name}}.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"""{{ cookiecutter.plugin_name }}.""" |
29 changes: 29 additions & 0 deletions
29
utils/pre-process/data-download/torchdrug_download-tool/.bumpversion.cfg
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
[bumpversion] | ||
current_version = 0.1.0 | ||
commit = False | ||
tag = False | ||
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))? | ||
serialize = | ||
{major}.{minor}.{patch}-{release}{dev} | ||
{major}.{minor}.{patch} | ||
|
||
[bumpversion:part:release] | ||
optional_value = _ | ||
first_value = dev | ||
values = | ||
dev | ||
_ | ||
|
||
[bumpversion:part:dev] | ||
|
||
[bumpversion:file:pyproject.toml] | ||
search = version = "{current_version}" | ||
replace = version = "{new_version}" | ||
|
||
[bumpversion:file:VERSION] | ||
|
||
[bumpversion:file:README.md] | ||
|
||
[bumpversion:file:plugin.json] | ||
|
||
[bumpversion:file:src/polus/mm/utils/torchdrug_download/__init__.py] |
4 changes: 4 additions & 0 deletions
4
utils/pre-process/data-download/torchdrug_download-tool/.dockerignore
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.venv | ||
out | ||
tests | ||
__pycache__ |
1 change: 1 addition & 0 deletions
1
utils/pre-process/data-download/torchdrug_download-tool/.gitignore
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
poetry.lock |
5 changes: 5 additions & 0 deletions
5
utils/pre-process/data-download/torchdrug_download-tool/CHANGELOG.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# CHANGELOG | ||
|
||
## 0.1.0 | ||
|
||
Initial release. |
49 changes: 49 additions & 0 deletions
49
utils/pre-process/data-download/torchdrug_download-tool/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# docker build -f Dockerfile -t mrbrandonwalker/torch_drug . | ||
|
||
FROM condaforge/mambaforge | ||
|
||
ENV EXEC_DIR="/opt/executables" | ||
ENV POLUS_LOG="INFO" | ||
RUN mkdir -p ${EXEC_DIR} | ||
|
||
# Install g++ and other essential packages | ||
# needed to install torch | ||
RUN apt-get update && apt-get install -y \ | ||
g++ \ | ||
&& apt-get clean \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# Work directory defined in the base container | ||
WORKDIR ${EXEC_DIR} | ||
|
||
COPY pyproject.toml ${EXEC_DIR} | ||
COPY VERSION ${EXEC_DIR} | ||
COPY README.md ${EXEC_DIR} | ||
COPY CHANGELOG.md ${EXEC_DIR} | ||
# need this here because poetry install needs the src directory | ||
COPY src ${EXEC_DIR}/src | ||
|
||
# Install needed packages here | ||
RUN pip3 install --upgrade pip | ||
RUN pip3 install poetry | ||
RUN poetry install | ||
# need to install torch here otherwise get | ||
#0 2.132 Collecting torch-cluster==1.6.0 (from polus-mm-utils-torchdrug_download==0.1.0) | ||
#0 2.144 Downloading torch_cluster-1.6.0.tar.gz (43 kB) | ||
#0 2.150 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 43.4/43.4 kB 9.0 MB/s eta 0:00:00 | ||
#0 2.164 Preparing metadata (setup.py): started | ||
#0 2.329 Preparing metadata (setup.py): finished with status 'error' | ||
#0 2.333 error: subprocess-exited-with-error | ||
#0 2.333 | ||
#0 2.333 × python setup.py egg_info did not run successfully. | ||
#0 2.333 │ exit code: 1 | ||
#0 2.333 ╰─> [6 lines of output] | ||
#0 2.333 Traceback (most recent call last): | ||
#0 2.333 File "<string>", line 2, in <module> | ||
#0 2.333 File "<pip-setuptools-caller>", line 34, in <module> | ||
#0 2.333 File "/tmp/pip-install-fdght_zq/torch-cluster_10a1a1bbf63e4e1ca1e035b9639f5253/setup.py", line 8, in <module> | ||
#0 2.333 import torch | ||
#0 2.333 ModuleNotFoundError: No module named 'torch' | ||
RUN pip3 install torch | ||
|
||
RUN pip3 install ${EXEC_DIR} --no-cache-dir |
28 changes: 28 additions & 0 deletions
28
utils/pre-process/data-download/torchdrug_download-tool/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# torchdrug (0.1.0) | ||
|
||
Access datasets and models from TorchDrug | ||
|
||
## Reading inputs/outputs from .cwl files | ||
This adds inputs/outputs from .cwl files into cookiecutter.json | ||
`python read_cwl_inputs_outputs.py path_to_cwl_file.cwl` | ||
|
||
## Modifying template files | ||
To dynamically add inputs/outputs from cookiecutter.json to README.MD, __main__.py and plugin_package function | ||
`python modify_base_template.py` | ||
|
||
## Building | ||
|
||
To build the Docker image for the conversion plugin, run `./build-docker.sh`. | ||
|
||
## Install WIPP Plugin | ||
|
||
If WIPP is running, navigate to the plugins page and add a new plugin. Paste the | ||
contents of `plugin.json` into the pop-up window and submit. | ||
## Options | ||
|
||
This plugin takes 1 input arguments and 1 output argument: | ||
|
||
| Name | Description | I/O | Type | Default | | ||
|---------------|-------------------------|--------|--------|---------| | ||
| dataset | Input dataset to extract | Input | string | string | | ||
| outdir | Output collection. | Output | collection | collection | |
1 change: 1 addition & 0 deletions
1
utils/pre-process/data-download/torchdrug_download-tool/VERSION
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
0.1.0 |
21 changes: 21 additions & 0 deletions
21
utils/pre-process/data-download/torchdrug_download-tool/ict.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
specVersion: 0.1.0 | ||
name: labshare/torchdrug-download | ||
version: 0.1.0 | ||
container: polusai/torchdrug-tool:0.1.0 | ||
entrypoint: "" | ||
title: torchdrug_download | ||
description: Access datasets and models from TorchDrug | ||
author: Brandon Walker ([email protected]) | ||
repository: https://github.com/labshare/mmtools | ||
documentation: https://ncats.nih.gov/preclinical/core/informatics | ||
citation: "" | ||
inputs: | ||
- name: dataset | ||
required: true | ||
description: Input dataset to extract | ||
type: string | ||
outputs: | ||
- name: outdir | ||
required: false | ||
description: Output collection. | ||
type: collection |
46 changes: 46 additions & 0 deletions
46
utils/pre-process/data-download/torchdrug_download-tool/pyproject.toml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
[tool.poetry] | ||
name = "polus-mm-utils-torchdrug_download" | ||
version = "0.1.0" | ||
description = "Access datasets and models from TorchDrug" | ||
authors = ["Data Scientist <[email protected]>"] | ||
readme = "README.md" | ||
packages = [{include = "polus", from = "src"}] | ||
|
||
[tool.poetry.dependencies] | ||
python = ">=3.8,<3.11" | ||
torch = { version = "1.12.1", source="torch"} | ||
torchaudio = { version = "0.12.1", source="torch"} | ||
torchvision = { version = "0.13.1", source="torch"} | ||
torch-cluster = { version = "1.6.0", source="pyg"} | ||
torch-scatter = { version = "2.0.9", source="pyg"} | ||
torchdrug = "0.2.1" | ||
rdkit = "2023.9.5" | ||
typer = "^0.7.0" | ||
|
||
[[tool.poetry.source]] | ||
name = "torch" | ||
url = "https://download.pytorch.org/whl/cu116" | ||
secondary = true | ||
|
||
[[tool.poetry.source]] | ||
name = "pyg" | ||
url = "https://data.pyg.org/whl/torch-1.12.1+cu116.html" | ||
secondary = true | ||
|
||
[tool.poetry.group.dev.dependencies] | ||
bump2version = "^1.0.1" | ||
pytest = "^7.4" | ||
pytest-sugar = "^0.9.6" | ||
pre-commit = "^3.2.1" | ||
black = "^23.3.0" | ||
mypy = "^1.1.1" | ||
ruff = "^0.0.270" | ||
|
||
[build-system] | ||
requires = ["poetry-core"] | ||
build-backend = "poetry.core.masonry.api" | ||
|
||
[tool.pytest.ini_options] | ||
pythonpath = [ | ||
"." | ||
] |
7 changes: 7 additions & 0 deletions
7
...s/data-download/torchdrug_download-tool/src/polus/mm/utils/torchdrug_download/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
"""torchdrug.""" | ||
|
||
__version__ = "0.1.0" | ||
|
||
from polus.mm.utils.torchdrug_download.torchdrug_download import ( # noqa # pylint: disable=unused-import | ||
torchdrug_download, | ||
) |
152 changes: 152 additions & 0 deletions
152
...s/data-download/torchdrug_download-tool/src/polus/mm/utils/torchdrug_download/__main__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
"""Package entrypoint for the torchdrug package.""" | ||
|
||
# Base packages | ||
import logging | ||
from enum import Enum | ||
from os import environ | ||
from pathlib import Path | ||
|
||
import typer | ||
from polus.mm.utils.torchdrug_download.torchdrug_download import torchdrug_download | ||
from torchdrug import datasets | ||
|
||
logging.basicConfig( | ||
format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", | ||
datefmt="%d-%b-%y %H:%M:%S", | ||
) | ||
POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO")) | ||
logger = logging.getLogger("polus.mm.utils.torchdrug_download.") | ||
logger.setLevel(POLUS_LOG) | ||
|
||
app = typer.Typer(help="torchdrug_download.") | ||
|
||
|
||
class DatabaseEnum(str, Enum): | ||
"""class DatabaseEnum for the input database to be processed.""" | ||
|
||
ClinTox = "ClinTox" | ||
PDBBind = "PDBBind" | ||
FB15k = "FB15k" | ||
FB15k237 = "FB15k237" | ||
WN18 = "WN18" | ||
WN18RR = "WN18RR" | ||
Hetionet = "Hetionet" | ||
BACE = "BACE" | ||
BBBP = "BBBP" | ||
CEP = "CEP" | ||
ChEMBLFiltered = "ChEMBLFiltered" | ||
Delaney = "Delaney" | ||
FreeSolv = "FreeSolv" | ||
HIV = "HIV" | ||
Lipophilicity = "Lipophilicity" | ||
MUV = "MUV" | ||
Malaria = "Malaria" | ||
OPV = "OPV" | ||
QM8 = "QM8" | ||
QM9 = "QM9" | ||
SIDER = "SIDER" | ||
Tox21 = "Tox21" | ||
ToxCast = "ToxCast" | ||
ZINC250k = "ZINC250k" | ||
ZINC2m = "ZINC2m" | ||
MOSES = "MOSES" | ||
PCQM4M = "PCQM4M" | ||
BetaLactamase = "BetaLactamase" | ||
Fluorescence = "Fluorescence" | ||
Stability = "Stability" | ||
Solubility = "Solubility" | ||
BinaryLocalization = "BinaryLocalization" | ||
SubcellularLocalization = "SubcellularLocalization" | ||
EnzymeCommission = "EnzymeCommission" | ||
GeneOntology = "GeneOntology" | ||
AlphaFoldDB = "AlphaFoldDB" | ||
Fold = "Fold" | ||
SecondaryStructure = "SecondaryStructure" | ||
ProteinNet = "ProteinNet" | ||
HumanPPI = "HumanPPI" | ||
YeastPPI = "YeastPPI" | ||
PPIAffinity = "PPIAffinity" | ||
BindingDB = "BindingDB" | ||
USPTO50k = "USPTO50k" | ||
Cora = "Cora" | ||
PubMed = "PubMed" | ||
|
||
|
||
@app.command() | ||
def main( | ||
dataset: DatabaseEnum = typer.Option( | ||
..., | ||
"--dataset", | ||
help="Input database to be processed.", | ||
), | ||
out_dir: Path = typer.Option( | ||
..., | ||
"--outdir", | ||
help="Output directory.", | ||
exists=True, | ||
writable=True, | ||
file_okay=False, | ||
resolve_path=True, | ||
), | ||
) -> None: | ||
"""torchdrug_download.""" | ||
dataset_mapping = { | ||
"PDBBind": datasets.PDBBind, | ||
"ClinTox": datasets.ClinTox, | ||
"FB15k": datasets.FB15k, | ||
"FB15k237": datasets.FB15k237, | ||
"WN18": datasets.WN18, | ||
"WN18RR": datasets.WN18RR, | ||
"Hetionet": datasets.Hetionet, | ||
"BACE": datasets.BACE, | ||
"BBBP": datasets.BBBP, | ||
"CEP": datasets.CEP, | ||
"ChEMBLFiltered": datasets.ChEMBLFiltered, | ||
"Delaney": datasets.Delaney, | ||
"FreeSolv": datasets.FreeSolv, | ||
"HIV": datasets.HIV, | ||
"Lipophilicity": datasets.Lipophilicity, | ||
"MUV": datasets.MUV, | ||
"Malaria": datasets.Malaria, | ||
"OPV": datasets.OPV, | ||
"QM8": datasets.QM8, | ||
"QM9": datasets.QM9, | ||
"SIDER": datasets.SIDER, | ||
"Tox21": datasets.Tox21, | ||
"ToxCast": datasets.ToxCast, | ||
"ZINC250k": datasets.ZINC250k, | ||
"ZINC2m": datasets.ZINC2m, | ||
"MOSES": datasets.MOSES, | ||
"PCQM4M": datasets.PCQM4M, | ||
"BetaLactamase": datasets.BetaLactamase, | ||
"Fluorescence": datasets.Fluorescence, | ||
"Stability": datasets.Stability, | ||
"Solubility": datasets.Solubility, | ||
"BinaryLocalization": datasets.BinaryLocalization, | ||
"SubcellularLocalization": datasets.SubcellularLocalization, | ||
"EnzymeCommission": datasets.EnzymeCommission, | ||
"GeneOntology": datasets.GeneOntology, | ||
"AlphaFoldDB": datasets.AlphaFoldDB, | ||
"Fold": datasets.Fold, | ||
"SecondaryStructure": datasets.SecondaryStructure, | ||
"ProteinNet": datasets.ProteinNet, | ||
"HumanPPI": datasets.HumanPPI, | ||
"YeastPPI": datasets.YeastPPI, | ||
"PPIAffinity": datasets.PPIAffinity, | ||
"BindingDB": datasets.BindingDB, | ||
"USPTO50k": datasets.USPTO50k, | ||
"Cora": datasets.Cora, | ||
"PubMed": datasets.PubMed, | ||
} | ||
|
||
if dataset not in dataset_mapping: | ||
msg = f"Unsupported dataset: {dataset}" | ||
raise ValueError(msg) | ||
|
||
logger.info(f"database: {dataset}") | ||
logger.info(f"outdir: {out_dir}") | ||
torchdrug_download(dataset, out_dir, dataset_mapping) | ||
|
||
|
||
if __name__ == "__main__": | ||
app() |
19 changes: 19 additions & 0 deletions
19
...nload/torchdrug_download-tool/src/polus/mm/utils/torchdrug_download/torchdrug_download.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
"""torchdrug_download.""" | ||
from pathlib import Path | ||
|
||
|
||
def torchdrug_download(dataset: str, outdir: Path, dataset_mapping: dict) -> None: | ||
"""torchdrug. | ||
Args: | ||
dataset: Input dataset to extract | ||
outdir: Output collection. | ||
dataset_mapping: Mapping of dataset to class. | ||
Returns: | ||
None | ||
""" | ||
# Create an instance of the selected dataset class | ||
selected_dataset_class = dataset_mapping[dataset] | ||
# lazy = False causes issues with PDBBind dataset such as invalid sequence | ||
dataset = selected_dataset_class(outdir, lazy=True) |
1 change: 1 addition & 0 deletions
1
utils/pre-process/data-download/torchdrug_download-tool/tests/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"""Tests for torchdrug_download.""" |
14 changes: 14 additions & 0 deletions
14
utils/pre-process/data-download/torchdrug_download-tool/tests/test_torchdrug.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
"""Tests for torchdrug_download.""" | ||
from pathlib import Path | ||
|
||
from polus.mm.utils.torchdrug_download.torchdrug_download import torchdrug_download | ||
from torchdrug import datasets | ||
|
||
|
||
def test_torchdrug_download_check() -> None: | ||
"""Test torchdrug_download.""" | ||
dataset = "Tox21" | ||
outdir = Path.cwd() | ||
dataset_mapping = {"Tox21": datasets.Tox21} | ||
torchdrug_download(dataset, outdir, dataset_mapping) | ||
assert Path("tox21.csv").exists() |
Oops, something went wrong.