From 467ed6ee194e918ad36383c02c1b22613c8c33d0 Mon Sep 17 00:00:00 2001 From: Nazanin Donyapour Date: Wed, 22 May 2024 13:42:31 -0400 Subject: [PATCH] load_trained_molgan_model --- .../.bumpversion.cfg | 29 ++++ .../.dockerignore | 4 + .../.gitignore | 1 + .../CHANGELOG.md | 5 + .../Dockerfile | 38 +++++ .../README.md | 20 +++ .../load-trained-molgan-model-plugin/VERSION | 1 + .../build-docker.sh | 4 + .../load-trained-molgan-model-plugin/ict.yml | 114 ++++++++++++++ .../load_trained_molgan_model_0@1@0.cwl | 149 ++++++++++++++++++ .../pyproject.toml | 29 ++++ .../tests/__init__.py | 1 + .../tests/test_load_trained_molgan_model.py | 34 ++++ 13 files changed, 429 insertions(+) create mode 100644 utils/load-trained-molgan-model-plugin/.bumpversion.cfg create mode 100644 utils/load-trained-molgan-model-plugin/.dockerignore create mode 100644 utils/load-trained-molgan-model-plugin/.gitignore create mode 100644 utils/load-trained-molgan-model-plugin/CHANGELOG.md create mode 100644 utils/load-trained-molgan-model-plugin/Dockerfile create mode 100644 utils/load-trained-molgan-model-plugin/README.md create mode 100644 utils/load-trained-molgan-model-plugin/VERSION create mode 100755 utils/load-trained-molgan-model-plugin/build-docker.sh create mode 100644 utils/load-trained-molgan-model-plugin/ict.yml create mode 100644 utils/load-trained-molgan-model-plugin/load_trained_molgan_model_0@1@0.cwl create mode 100644 utils/load-trained-molgan-model-plugin/pyproject.toml create mode 100644 utils/load-trained-molgan-model-plugin/tests/__init__.py create mode 100644 utils/load-trained-molgan-model-plugin/tests/test_load_trained_molgan_model.py diff --git a/utils/load-trained-molgan-model-plugin/.bumpversion.cfg b/utils/load-trained-molgan-model-plugin/.bumpversion.cfg new file mode 100644 index 00000000..5198fae5 --- /dev/null +++ b/utils/load-trained-molgan-model-plugin/.bumpversion.cfg @@ -0,0 +1,29 @@ +[bumpversion] +current_version = 0.1.0 +commit = False +tag = False +parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? +serialize = + {major}.{minor}.{patch}-{release}{dev} + {major}.{minor}.{patch} + +[bumpversion:part:release] +optional_value = _ +first_value = dev +values = + dev + _ + +[bumpversion:part:dev] + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" + +[bumpversion:file:VERSION] + +[bumpversion:file:README.md] + +[bumpversion:file:plugin.json] + +[bumpversion:file:src/polus/mm/utils/load_trained_molgan_model/__init__.py] diff --git a/utils/load-trained-molgan-model-plugin/.dockerignore b/utils/load-trained-molgan-model-plugin/.dockerignore new file mode 100644 index 00000000..7c603f81 --- /dev/null +++ b/utils/load-trained-molgan-model-plugin/.dockerignore @@ -0,0 +1,4 @@ +.venv +out +tests +__pycache__ diff --git a/utils/load-trained-molgan-model-plugin/.gitignore b/utils/load-trained-molgan-model-plugin/.gitignore new file mode 100644 index 00000000..c04bc49f --- /dev/null +++ b/utils/load-trained-molgan-model-plugin/.gitignore @@ -0,0 +1 @@ +poetry.lock diff --git a/utils/load-trained-molgan-model-plugin/CHANGELOG.md b/utils/load-trained-molgan-model-plugin/CHANGELOG.md new file mode 100644 index 00000000..b67793f7 --- /dev/null +++ b/utils/load-trained-molgan-model-plugin/CHANGELOG.md @@ -0,0 +1,5 @@ +# CHANGELOG + +## 0.1.0 + +Initial release. diff --git a/utils/load-trained-molgan-model-plugin/Dockerfile b/utils/load-trained-molgan-model-plugin/Dockerfile new file mode 100644 index 00000000..5b254b1a --- /dev/null +++ b/utils/load-trained-molgan-model-plugin/Dockerfile @@ -0,0 +1,38 @@ +# docker build -f Dockerfile -t polusai/molgan-tool:0.1.0 . +FROM condaforge/mambaforge +# NOT mambaforge-pypy3 (rdkit is incompatible with pypy) + +# RDKIT logging +ENV RDKIT_ERROR_LOGGING="OFF" + +RUN apt-get update && apt-get install -y wget git + +# Install Python 3.10 using Mamba +RUN mamba install -y python=3.10 + +# Clone MolGAN +RUN git clone https://github.com/ndonyapour/MolGAN.git + +# Build and install python bindings +# MolGAN was initially implemented using TensorFlow v1, and TensorFlow version 2 offers support +# for v1 functionalities. However, it's important to mention that the current patch for upgrading +# to v2 is not truly upgrading v1 API to v2 API, but calling legacy v1 API from v2 package via +# "tf.compat.v1". Essentially, it’s still v1.certain. Truely upgrade to v2 requires rewriting most +# functions of MolGAN, including model creation, data processing, and training. + +RUN mamba install -c conda-forge rdkit "tensorflow<2.13" numpy scikit-learn xorg-libxrender + +# Make sure rdkit is activated +RUN python -c "import rdkit" + +# Train a Model +WORKDIR /MolGAN + +# Download the gdb9 database +RUN bash data/download_dataset.sh data/gdb9.sdf data/NP_score.pkl.gz data/SA_score.pkl.gz + +# Download the pretrained model +RUN wget -nv --no-clobber https://huggingface.co/ndonyapour/MolGAN/resolve/main/MolGAN_model.tar.gz && tar xvzf MolGAN_model.tar.gz +RUN mv MolGAN_model trained_models +RUN wget -nv --no-clobber https://huggingface.co/ndonyapour/MolGAN/resolve/main/data.pkl -O data/data.pkl +ADD Dockerfile . diff --git a/utils/load-trained-molgan-model-plugin/README.md b/utils/load-trained-molgan-model-plugin/README.md new file mode 100644 index 00000000..f5a9b83e --- /dev/null +++ b/utils/load-trained-molgan-model-plugin/README.md @@ -0,0 +1,20 @@ +# load_trained_molgan_model (0.1.0) + +MolGAN tool for generating small molecules + +## Options + +This plugin takes 7 input arguments and 3 output argument: + +| Name | Description | I/O | Type | Default | +|---------------|-------------------------|--------|--------|---------| +| input_data_path | Path to the input data file, Type: string, File type: input, Accepted formats: pkl, Example file: https://github.com/bioexcel/biobb_ml/raw/master/biobb_ml/test/reference/classification/ref_output_model_support_vector_machine.pkl | Input | string | string | +| input_NP_Score_path | Output ceout file (AMBER ceout), Type: string, File type: input, Accepted formats: gz, Example file: https://github.com/bioexcel/biobb_amber/raw/master/biobb_amber/test/data/cphstats/sander.ceout.gz | Input | string | string | +| input_SA_Score_path | Output ceout file (AMBER ceout), Type: string, File type: input, Accepted formats: gz, Example file: https://github.com/bioexcel/biobb_amber/raw/master/biobb_amber/test/data/cphstats/sander.ceout.gz | Input | string | string | +| input_model_dir | Input directory of trained models | Input | string | string | +| output_log_path | Path to the log file, Type: string, File type: output, Accepted formats: log | Input | string | string | +| output_sdf_path | Path to the output file, Type: string, File type: output, Accepted formats: sdf | Input | string | string | +| num_samples | The number of training epochs, Type: int | Input | int | int | +| rdkit_error_logging | Enable or disable RDKit error logging | Input | string | string | +| output_log_path | Path to the log file | Output | File | File | +| output_sdf_path | Path to the output file | Output | File | File | diff --git a/utils/load-trained-molgan-model-plugin/VERSION b/utils/load-trained-molgan-model-plugin/VERSION new file mode 100644 index 00000000..6e8bf73a --- /dev/null +++ b/utils/load-trained-molgan-model-plugin/VERSION @@ -0,0 +1 @@ +0.1.0 diff --git a/utils/load-trained-molgan-model-plugin/build-docker.sh b/utils/load-trained-molgan-model-plugin/build-docker.sh new file mode 100755 index 00000000..2d99aea2 --- /dev/null +++ b/utils/load-trained-molgan-model-plugin/build-docker.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +version=$("] +readme = "README.md" + +[tool.poetry.dependencies] +python = ">=3.9,<3.12" +typer = "^0.7.0" +sophios = "0.1.1" + +[tool.poetry.group.dev.dependencies] +bump2version = "^1.0.1" +pytest = "^7.4" +pytest-sugar = "^0.9.6" +pre-commit = "^3.2.1" +black = "^23.3.0" +mypy = "^1.1.1" +ruff = "^0.0.270" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +pythonpath = [ + "." +] diff --git a/utils/load-trained-molgan-model-plugin/tests/__init__.py b/utils/load-trained-molgan-model-plugin/tests/__init__.py new file mode 100644 index 00000000..a0d26f69 --- /dev/null +++ b/utils/load-trained-molgan-model-plugin/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for load_trained_molgan_model.""" diff --git a/utils/load-trained-molgan-model-plugin/tests/test_load_trained_molgan_model.py b/utils/load-trained-molgan-model-plugin/tests/test_load_trained_molgan_model.py new file mode 100644 index 00000000..d5e66f62 --- /dev/null +++ b/utils/load-trained-molgan-model-plugin/tests/test_load_trained_molgan_model.py @@ -0,0 +1,34 @@ +"""Tests for load_trained_molgan_model.""" +from pathlib import Path + +from sophios.api.pythonapi import Step +from sophios.api.pythonapi import Workflow + + +def test_load_trained_molgan_model_cwl() -> None: + """Test load_trained_molgan_model CWL.""" + cwl_file = Path("load_trained_molgan_model_0@1@0.cwl") + + # Create the step for the CWL file + load_trained_molgan_model_step = Step(clt_path=cwl_file) + + load_trained_molgan_model_step.input_data_path = "/MolGAN/data/data.pkl" + load_trained_molgan_model_step.input_NP_Score_path = "/MolGAN/data/NP_score.pkl.gz" + load_trained_molgan_model_step.input_SA_Score_path = "/MolGAN/data/SA_score.pkl.gz" + load_trained_molgan_model_step.input_model_dir = "/MolGAN/trained_models" + load_trained_molgan_model_step.output_sdf_path = "generated_mols.sdf" + load_trained_molgan_model_step.output_log_path = "output.txt" + + # Define the workflow with the step + steps = [load_trained_molgan_model_step] + filename = "load_trained_molgan_model" + workflow = Workflow(steps, filename) + + # Run the workflow + workflow.run() + + # Check for the existence of the output file + outdir = Path("outdir") + assert any( + file.name == "generated_mols.sdf" for file in outdir.rglob("*") + ), "The file generated_mols.sdf was not found."