Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Brandon Duane Walker authored and Brandon Duane Walker committed Jun 4, 2024
1 parent b3f4953 commit 113fb28
Show file tree
Hide file tree
Showing 16 changed files with 420 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""{{ cookiecutter.plugin_name }}."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[bumpversion]
current_version = 0.1.0
commit = False
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
serialize =
{major}.{minor}.{patch}-{release}{dev}
{major}.{minor}.{patch}

[bumpversion:part:release]
optional_value = _
first_value = dev
values =
dev
_

[bumpversion:part:dev]

[bumpversion:file:pyproject.toml]
search = version = "{current_version}"
replace = version = "{new_version}"

[bumpversion:file:VERSION]

[bumpversion:file:README.md]

[bumpversion:file:plugin.json]

[bumpversion:file:src/polus/mm/utils/torchdrug_download/__init__.py]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.venv
out
tests
__pycache__
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
poetry.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# CHANGELOG

## 0.1.0

Initial release.
49 changes: 49 additions & 0 deletions utils/pre-process/data-download/torchdrug_download-tool/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# docker build -f Dockerfile -t mrbrandonwalker/torch_drug .

FROM condaforge/mambaforge

ENV EXEC_DIR="/opt/executables"
ENV POLUS_LOG="INFO"
RUN mkdir -p ${EXEC_DIR}

# Install g++ and other essential packages
# needed to install torch
RUN apt-get update && apt-get install -y \
g++ \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Work directory defined in the base container
WORKDIR ${EXEC_DIR}

COPY pyproject.toml ${EXEC_DIR}
COPY VERSION ${EXEC_DIR}
COPY README.md ${EXEC_DIR}
COPY CHANGELOG.md ${EXEC_DIR}
# need this here because poetry install needs the src directory
COPY src ${EXEC_DIR}/src

# Install needed packages here
RUN pip3 install --upgrade pip
RUN pip3 install poetry
RUN poetry install
# need to install torch here otherwise get
#0 2.132 Collecting torch-cluster==1.6.0 (from polus-mm-utils-torchdrug_download==0.1.0)
#0 2.144 Downloading torch_cluster-1.6.0.tar.gz (43 kB)
#0 2.150 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 43.4/43.4 kB 9.0 MB/s eta 0:00:00
#0 2.164 Preparing metadata (setup.py): started
#0 2.329 Preparing metadata (setup.py): finished with status 'error'
#0 2.333 error: subprocess-exited-with-error
#0 2.333
#0 2.333 × python setup.py egg_info did not run successfully.
#0 2.333 │ exit code: 1
#0 2.333 ╰─> [6 lines of output]
#0 2.333 Traceback (most recent call last):
#0 2.333 File "<string>", line 2, in <module>
#0 2.333 File "<pip-setuptools-caller>", line 34, in <module>
#0 2.333 File "/tmp/pip-install-fdght_zq/torch-cluster_10a1a1bbf63e4e1ca1e035b9639f5253/setup.py", line 8, in <module>
#0 2.333 import torch
#0 2.333 ModuleNotFoundError: No module named 'torch'
RUN pip3 install torch

RUN pip3 install ${EXEC_DIR} --no-cache-dir
28 changes: 28 additions & 0 deletions utils/pre-process/data-download/torchdrug_download-tool/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# torchdrug (0.1.0)

Access datasets and models from TorchDrug

## Reading inputs/outputs from .cwl files
This adds inputs/outputs from .cwl files into cookiecutter.json
`python read_cwl_inputs_outputs.py path_to_cwl_file.cwl`

## Modifying template files
To dynamically add inputs/outputs from cookiecutter.json to README.MD, __main__.py and plugin_package function
`python modify_base_template.py`

## Building

To build the Docker image for the conversion plugin, run `./build-docker.sh`.

## Install WIPP Plugin

If WIPP is running, navigate to the plugins page and add a new plugin. Paste the
contents of `plugin.json` into the pop-up window and submit.
## Options

This plugin takes 1 input arguments and 1 output argument:

| Name | Description | I/O | Type | Default |
|---------------|-------------------------|--------|--------|---------|
| dataset | Input dataset to extract | Input | string | string |
| outdir | Output collection. | Output | collection | collection |
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.1.0
21 changes: 21 additions & 0 deletions utils/pre-process/data-download/torchdrug_download-tool/ict.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
specVersion: 0.1.0
name: labshare/torchdrug-download
version: 0.1.0
container: polusai/torchdrug-tool:0.1.0
entrypoint: ""
title: torchdrug_download
description: Access datasets and models from TorchDrug
author: Brandon Walker ([email protected])
repository: https://github.com/labshare/mmtools
documentation: https://ncats.nih.gov/preclinical/core/informatics
citation: ""
inputs:
- name: dataset
required: true
description: Input dataset to extract
type: string
outputs:
- name: outdir
required: false
description: Output collection.
type: collection
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
[tool.poetry]
name = "polus-mm-utils-torchdrug_download"
version = "0.1.0"
description = "Access datasets and models from TorchDrug"
authors = ["Data Scientist <[email protected]>"]
readme = "README.md"
packages = [{include = "polus", from = "src"}]

[tool.poetry.dependencies]
python = ">=3.8,<3.11"
torch = { version = "1.12.1", source="torch"}
torchaudio = { version = "0.12.1", source="torch"}
torchvision = { version = "0.13.1", source="torch"}
torch-cluster = { version = "1.6.0", source="pyg"}
torch-scatter = { version = "2.0.9", source="pyg"}
torchdrug = "0.2.1"
rdkit = "2023.9.5"
typer = "^0.7.0"

[[tool.poetry.source]]
name = "torch"
url = "https://download.pytorch.org/whl/cu116"
secondary = true

[[tool.poetry.source]]
name = "pyg"
url = "https://data.pyg.org/whl/torch-1.12.1+cu116.html"
secondary = true

[tool.poetry.group.dev.dependencies]
bump2version = "^1.0.1"
pytest = "^7.4"
pytest-sugar = "^0.9.6"
pre-commit = "^3.2.1"
black = "^23.3.0"
mypy = "^1.1.1"
ruff = "^0.0.270"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.pytest.ini_options]
pythonpath = [
"."
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""torchdrug."""

__version__ = "0.1.0"

from polus.mm.utils.torchdrug_download.torchdrug_download import ( # noqa # pylint: disable=unused-import
torchdrug_download,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
"""Package entrypoint for the torchdrug package."""

# Base packages
import logging
from enum import Enum
from os import environ
from pathlib import Path

import typer
from polus.mm.utils.torchdrug_download.torchdrug_download import torchdrug_download
from torchdrug import datasets

logging.basicConfig(
format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s",
datefmt="%d-%b-%y %H:%M:%S",
)
POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO"))
logger = logging.getLogger("polus.mm.utils.torchdrug_download.")
logger.setLevel(POLUS_LOG)

app = typer.Typer(help="torchdrug_download.")


class DatabaseEnum(str, Enum):
"""class DatabaseEnum for the input database to be processed."""

ClinTox = "ClinTox"
PDBBind = "PDBBind"
FB15k = "FB15k"
FB15k237 = "FB15k237"
WN18 = "WN18"
WN18RR = "WN18RR"
Hetionet = "Hetionet"
BACE = "BACE"
BBBP = "BBBP"
CEP = "CEP"
ChEMBLFiltered = "ChEMBLFiltered"
Delaney = "Delaney"
FreeSolv = "FreeSolv"
HIV = "HIV"
Lipophilicity = "Lipophilicity"
MUV = "MUV"
Malaria = "Malaria"
OPV = "OPV"
QM8 = "QM8"
QM9 = "QM9"
SIDER = "SIDER"
Tox21 = "Tox21"
ToxCast = "ToxCast"
ZINC250k = "ZINC250k"
ZINC2m = "ZINC2m"
MOSES = "MOSES"
PCQM4M = "PCQM4M"
BetaLactamase = "BetaLactamase"
Fluorescence = "Fluorescence"
Stability = "Stability"
Solubility = "Solubility"
BinaryLocalization = "BinaryLocalization"
SubcellularLocalization = "SubcellularLocalization"
EnzymeCommission = "EnzymeCommission"
GeneOntology = "GeneOntology"
AlphaFoldDB = "AlphaFoldDB"
Fold = "Fold"
SecondaryStructure = "SecondaryStructure"
ProteinNet = "ProteinNet"
HumanPPI = "HumanPPI"
YeastPPI = "YeastPPI"
PPIAffinity = "PPIAffinity"
BindingDB = "BindingDB"
USPTO50k = "USPTO50k"
Cora = "Cora"
PubMed = "PubMed"


@app.command()
def main(
dataset: DatabaseEnum = typer.Option(
...,
"--dataset",
help="Input database to be processed.",
),
out_dir: Path = typer.Option(
...,
"--outdir",
help="Output directory.",
exists=True,
writable=True,
file_okay=False,
resolve_path=True,
),
) -> None:
"""torchdrug_download."""
dataset_mapping = {
"PDBBind": datasets.PDBBind,
"ClinTox": datasets.ClinTox,
"FB15k": datasets.FB15k,
"FB15k237": datasets.FB15k237,
"WN18": datasets.WN18,
"WN18RR": datasets.WN18RR,
"Hetionet": datasets.Hetionet,
"BACE": datasets.BACE,
"BBBP": datasets.BBBP,
"CEP": datasets.CEP,
"ChEMBLFiltered": datasets.ChEMBLFiltered,
"Delaney": datasets.Delaney,
"FreeSolv": datasets.FreeSolv,
"HIV": datasets.HIV,
"Lipophilicity": datasets.Lipophilicity,
"MUV": datasets.MUV,
"Malaria": datasets.Malaria,
"OPV": datasets.OPV,
"QM8": datasets.QM8,
"QM9": datasets.QM9,
"SIDER": datasets.SIDER,
"Tox21": datasets.Tox21,
"ToxCast": datasets.ToxCast,
"ZINC250k": datasets.ZINC250k,
"ZINC2m": datasets.ZINC2m,
"MOSES": datasets.MOSES,
"PCQM4M": datasets.PCQM4M,
"BetaLactamase": datasets.BetaLactamase,
"Fluorescence": datasets.Fluorescence,
"Stability": datasets.Stability,
"Solubility": datasets.Solubility,
"BinaryLocalization": datasets.BinaryLocalization,
"SubcellularLocalization": datasets.SubcellularLocalization,
"EnzymeCommission": datasets.EnzymeCommission,
"GeneOntology": datasets.GeneOntology,
"AlphaFoldDB": datasets.AlphaFoldDB,
"Fold": datasets.Fold,
"SecondaryStructure": datasets.SecondaryStructure,
"ProteinNet": datasets.ProteinNet,
"HumanPPI": datasets.HumanPPI,
"YeastPPI": datasets.YeastPPI,
"PPIAffinity": datasets.PPIAffinity,
"BindingDB": datasets.BindingDB,
"USPTO50k": datasets.USPTO50k,
"Cora": datasets.Cora,
"PubMed": datasets.PubMed,
}

if dataset not in dataset_mapping:
msg = f"Unsupported dataset: {dataset}"
raise ValueError(msg)

logger.info(f"database: {dataset}")
logger.info(f"outdir: {out_dir}")
torchdrug_download(dataset, out_dir, dataset_mapping)


if __name__ == "__main__":
app()
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""torchdrug_download."""
from pathlib import Path


def torchdrug_download(dataset: str, outdir: Path, dataset_mapping: dict) -> None:
"""torchdrug.
Args:
dataset: Input dataset to extract
outdir: Output collection.
dataset_mapping: Mapping of dataset to class.
Returns:
None
"""
# Create an instance of the selected dataset class
selected_dataset_class = dataset_mapping[dataset]
# lazy = False causes issues with PDBBind dataset such as invalid sequence
dataset = selected_dataset_class(outdir, lazy=True)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Tests for torchdrug_download."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Tests for torchdrug_download."""
from pathlib import Path

from polus.mm.utils.torchdrug_download.torchdrug_download import torchdrug_download
from torchdrug import datasets


def test_torchdrug_download_check() -> None:
"""Test torchdrug_download."""
dataset = "Tox21"
outdir = Path.cwd()
dataset_mapping = {"Tox21": datasets.Tox21}
torchdrug_download(dataset, outdir, dataset_mapping)
assert Path("tox21.csv").exists()
Loading

0 comments on commit 113fb28

Please sign in to comment.