diff --git a/docs/sphinx-builddir/doctrees/README.doctree b/docs/sphinx-builddir/doctrees/README.doctree index 27d0338..9335a3c 100644 Binary files a/docs/sphinx-builddir/doctrees/README.doctree and b/docs/sphinx-builddir/doctrees/README.doctree differ diff --git a/docs/sphinx-builddir/doctrees/environment.pickle b/docs/sphinx-builddir/doctrees/environment.pickle index bf8198b..c40c51b 100644 Binary files a/docs/sphinx-builddir/doctrees/environment.pickle and b/docs/sphinx-builddir/doctrees/environment.pickle differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_101_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_101_0.png new file mode 100644 index 0000000..44e9906 Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_101_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_110_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_110_0.png new file mode 100644 index 0000000..69ff963 Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_110_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_119_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_119_0.png new file mode 100644 index 0000000..9e01a66 Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_119_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_123_1.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_123_1.png new file mode 100644 index 0000000..c3166fa Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_123_1.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_125_1.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_125_1.png new file mode 100644 index 0000000..a3a7159 Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_125_1.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_133_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_133_0.png new file mode 100644 index 0000000..9fef1ba Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_133_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_135_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_135_0.png new file mode 100644 index 0000000..7bc28cd Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_135_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_137_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_137_0.png new file mode 100644 index 0000000..9f9b3be Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_137_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_148_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_148_0.png new file mode 100644 index 0000000..419c5e6 Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_148_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_153_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_153_0.png new file mode 100644 index 0000000..184fd3f Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_153_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_160_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_160_0.png new file mode 100644 index 0000000..57f617f Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_160_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_162_2.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_162_2.png new file mode 100644 index 0000000..ad64a86 Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_162_2.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_170_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_170_0.png new file mode 100644 index 0000000..97c226c Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_170_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_172_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_172_0.png new file mode 100644 index 0000000..46b652c Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_172_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_174_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_174_0.png new file mode 100644 index 0000000..561a458 Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_174_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_176_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_176_0.png new file mode 100644 index 0000000..c42504b Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_176_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_199_1.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_199_1.png new file mode 100644 index 0000000..5b897ab Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_199_1.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_207_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_207_0.png new file mode 100644 index 0000000..dfb3cb1 Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_207_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_21_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_21_0.png new file mode 100644 index 0000000..871c3da Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_21_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_236_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_236_0.png new file mode 100644 index 0000000..cde6997 Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_236_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_23_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_23_0.png new file mode 100644 index 0000000..9b2bfda Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_23_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_243_1.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_243_1.png new file mode 100644 index 0000000..29cf995 Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_243_1.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_35_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_35_0.png new file mode 100644 index 0000000..540a452 Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_35_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_56_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_56_0.png new file mode 100644 index 0000000..5190bd8 Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_56_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_67_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_67_0.png new file mode 100644 index 0000000..69a8c36 Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_67_0.png differ diff --git a/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_71_0.png b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_71_0.png new file mode 100644 index 0000000..9b20630 Binary files /dev/null and b/docs/sphinx-builddir/doctrees/nbsphinx/notebooks_QSARtuna_Tutorial_71_0.png differ diff --git a/docs/sphinx-builddir/doctrees/notebooks/QSARtuna_Tutorial.doctree b/docs/sphinx-builddir/doctrees/notebooks/QSARtuna_Tutorial.doctree index e65a7df..ad07e31 100644 Binary files a/docs/sphinx-builddir/doctrees/notebooks/QSARtuna_Tutorial.doctree and b/docs/sphinx-builddir/doctrees/notebooks/QSARtuna_Tutorial.doctree differ diff --git a/docs/sphinx-builddir/html/README.html b/docs/sphinx-builddir/html/README.html index 88045a9..ed97a6f 100644 --- a/docs/sphinx-builddir/html/README.html +++ b/docs/sphinx-builddir/html/README.html @@ -136,6 +136,7 @@

Backgroundhere.

+

QSARtuna Publication available here.

The three-step process

QSARtuna is structured around three steps:

@@ -357,7 +358,7 @@

Run from Python/Jupyter Notebook activate my_env_with_qsartuna module purge # Just in case. which python # Check. Should output path that contains "my_env_with_qsartuna". -python -m pip install https://github.com/MolecularAI/QSARtuna/files/14742594/qsartuna-3.0.0.1.tar.gz +python -m pip install https://github.com/MolecularAI/QSARtuna/releases/download/3.1.0/qsartuna-3.1.0.tar.gz

Then you can use QSARtuna inside your Notebook:

diff --git a/docs/sphinx-builddir/html/_images/mlflow-inspecting-trials.png b/docs/sphinx-builddir/html/_images/mlflow-inspecting-trials.png new file mode 100644 index 0000000..90060e0 Binary files /dev/null and b/docs/sphinx-builddir/html/_images/mlflow-inspecting-trials.png differ diff --git a/docs/sphinx-builddir/html/_images/mlflow-select-experiment.png b/docs/sphinx-builddir/html/_images/mlflow-select-experiment.png new file mode 100644 index 0000000..f4169d3 Binary files /dev/null and b/docs/sphinx-builddir/html/_images/mlflow-select-experiment.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_101_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_101_0.png new file mode 100644 index 0000000..44e9906 Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_101_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_110_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_110_0.png new file mode 100644 index 0000000..69ff963 Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_110_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_119_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_119_0.png new file mode 100644 index 0000000..9e01a66 Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_119_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_123_1.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_123_1.png new file mode 100644 index 0000000..c3166fa Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_123_1.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_125_1.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_125_1.png new file mode 100644 index 0000000..a3a7159 Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_125_1.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_133_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_133_0.png new file mode 100644 index 0000000..9fef1ba Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_133_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_135_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_135_0.png new file mode 100644 index 0000000..7bc28cd Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_135_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_137_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_137_0.png new file mode 100644 index 0000000..9f9b3be Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_137_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_148_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_148_0.png new file mode 100644 index 0000000..419c5e6 Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_148_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_153_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_153_0.png new file mode 100644 index 0000000..184fd3f Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_153_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_160_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_160_0.png new file mode 100644 index 0000000..57f617f Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_160_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_162_2.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_162_2.png new file mode 100644 index 0000000..ad64a86 Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_162_2.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_170_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_170_0.png new file mode 100644 index 0000000..97c226c Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_170_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_172_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_172_0.png new file mode 100644 index 0000000..46b652c Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_172_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_174_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_174_0.png new file mode 100644 index 0000000..561a458 Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_174_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_176_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_176_0.png new file mode 100644 index 0000000..c42504b Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_176_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_199_1.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_199_1.png new file mode 100644 index 0000000..5b897ab Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_199_1.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_207_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_207_0.png new file mode 100644 index 0000000..dfb3cb1 Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_207_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_21_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_21_0.png new file mode 100644 index 0000000..871c3da Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_21_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_236_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_236_0.png new file mode 100644 index 0000000..cde6997 Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_236_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_23_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_23_0.png new file mode 100644 index 0000000..9b2bfda Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_23_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_243_1.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_243_1.png new file mode 100644 index 0000000..29cf995 Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_243_1.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_35_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_35_0.png new file mode 100644 index 0000000..540a452 Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_35_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_56_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_56_0.png new file mode 100644 index 0000000..5190bd8 Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_56_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_67_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_67_0.png new file mode 100644 index 0000000..69a8c36 Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_67_0.png differ diff --git a/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_71_0.png b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_71_0.png new file mode 100644 index 0000000..9b20630 Binary files /dev/null and b/docs/sphinx-builddir/html/_images/notebooks_QSARtuna_Tutorial_71_0.png differ diff --git a/docs/sphinx-builddir/html/_sources/README.md.txt b/docs/sphinx-builddir/html/_sources/README.md.txt index f13382f..b836e94 100644 --- a/docs/sphinx-builddir/html/_sources/README.md.txt +++ b/docs/sphinx-builddir/html/_sources/README.md.txt @@ -21,6 +21,8 @@ explainability python packages Further documentation in the GitHub pages [here](https://molecularai.github.io/QSARtuna/). +QSARtuna Publication available [here](https://doi.org/10.1021/acs.jcim.4c00457). + ### The three-step process QSARtuna is structured around three steps: @@ -266,7 +268,7 @@ conda create --name my_env_with_qsartuna python=3.10.10 jupyter pip conda activate my_env_with_qsartuna module purge # Just in case. which python # Check. Should output path that contains "my_env_with_qsartuna". -python -m pip install https://github.com/MolecularAI/QSARtuna/files/14742594/qsartuna-3.0.0.1.tar.gz +python -m pip install https://github.com/MolecularAI/QSARtuna/releases/download/3.1.0/qsartuna-3.1.0.tar.gz ``` Then you can use QSARtuna inside your Notebook: diff --git a/docs/sphinx-builddir/html/_sources/notebooks/QSARtuna_Tutorial.ipynb.txt b/docs/sphinx-builddir/html/_sources/notebooks/QSARtuna_Tutorial.ipynb.txt new file mode 100644 index 0000000..ba84fe3 --- /dev/null +++ b/docs/sphinx-builddir/html/_sources/notebooks/QSARtuna_Tutorial.ipynb.txt @@ -0,0 +1,11486 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# QSARtuna CLI Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## This tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This tutorial is intended to provide a new user with the necessary background to start using QSARtuna through a command line interface (CLI).\n", + "\n", + "A separate tutorial is available describing the use of the QSARtuna GUI." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Background" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "QSARtuna is a python package to automate the model building process for *REINVENT*. These models can use a variety of algorithms to fit to your input data and most of them have one or more so-called hyper-parameters (e.g. the maximum number of trees using a *Random Forest* or the C parameter in SVRs, controlling the influence of every support vector).\n", + "\n", + "For both regression and classification tasks, QSARtuna allows you to specify input data for which the optimal hyper-parameters and a model can obtained automatically. If you want to get an idea on how the package is structured, read on otherwise you might want to skip it and The following examples should give you an idea how.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The three-step process" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "QSARtuna is structured around three steps:\n", + "1. **Hyperparameter Optimization:** \n", + " Train many models with different parameters using Optuna.\n", + " Only the training dataset is used here. \n", + " Training is usually done with cross-validation.\n", + "2. **Build (Training):**\n", + " Pick the best model from Optimization, \n", + " re-train it without cross-validation,\n", + " and optionally evaluate its performance on the test dataset.\n", + "3. **Prod-build (or build merged):** \n", + " Re-train the best-performing model on the merged training and test datasets. \n", + " This step has a drawback that there is no data left to evaluate the resulting model, \n", + " but it has a big benefit that this final model is trained on the all available data. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Preparation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To use QSARtuna from Jupyter Notebook, install it with:\n", + "```\n", + "python -m pip install https://github.com/MolecularAI/QSARtuna/releases/download/3.1.0/qsartuna-3.1.0.tar.gz\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Regression example\n", + "This is a toy example of training a model that will predict molecular weight for a subset of DRD2 molecules. This example was chosen so that the whole run would take less than a minute.\n", + "\n", + "Training dataset is a CSV file. It has SMILES strings in a column named \"canonical\". It has the value that we will try to predict in column \"molwt\".\n", + "\n", + "This example has train and test (holdout) dataset ready. If you have single dataset and would like QSARtuna to split it into train and test (holdout) datasets, see the next section." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here are a few lines from the input file:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "pycharm": { + "is_executing": true + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "canonical,activity,molwt,molwt_gt_330\r\n", + "Cc1cc(NC(=O)c2cccc(COc3ccc(Br)cc3)c2)no1,0,387.233,True\r\n", + "O=C(Nc1ccc(F)cc1F)Nc1sccc1-c1nc2ccccc2s1,0,387.4360000000001,True\r\n", + "COC(=O)c1ccccc1NC(=O)c1cc([N+](=O)[O-])nn1Cc1ccccc1,0,380.36000000000007,True\r\n", + "CCOC(=O)C(C)Sc1nc(-c2ccccc2)ccc1C#N,0,312.39400000000006,False\r\n", + "CCC(CC)NC(=O)c1nn(Cc2ccccc2)c(=O)c2ccccc12,0,349.4340000000001,True\r\n", + "Brc1ccccc1OCCCOc1cccc2cccnc12,0,358.235,True\r\n", + "CCCCn1c(COc2cccc(OC)c2)nc2ccccc21,0,310.39700000000005,False\r\n", + "CCOc1cccc(NC(=O)c2sc3nc(-c4ccc(F)cc4)ccc3c2N)c1,0,407.4700000000001,True\r\n", + "COc1ccc(S(=O)(=O)N(CC(=O)Nc2ccc(C)cc2)c2ccc(C)cc2)cc1OC,0,454.54800000000023,True\r\n" + ] + } + ], + "source": [ + "!head ../tests/data/DRD2/subset-50/train.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create configuration" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "QSARtuna configuration can be read from a JSON file or created in Python. Here we create it in Python." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append(\"..\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "# Start with the imports.\n", + "import sklearn\n", + "from optunaz.three_step_opt_build_merge import (\n", + " optimize,\n", + " buildconfig_best,\n", + " build_best,\n", + " build_merged,\n", + ")\n", + "from optunaz.config import ModelMode, OptimizationDirection\n", + "from optunaz.config.optconfig import (\n", + " OptimizationConfig,\n", + " SVR,\n", + " RandomForestRegressor,\n", + " Ridge,\n", + " Lasso,\n", + " PLSRegression,\n", + " KNeighborsRegressor\n", + ")\n", + "from optunaz.datareader import Dataset\n", + "from optunaz.descriptors import ECFP, MACCS_keys, ECFP_counts, PathFP" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Prepare hyperparameter optimization configuration.\n", + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\", # Typical names are \"SMILES\" and \"smiles\".\n", + " response_column=\"molwt\", # Often a specific name (like here), or just \"activity\".\n", + " training_dataset_file=\"../tests/data/DRD2/subset-50/train.csv\",\n", + " test_dataset_file=\"../tests/data/DRD2/subset-50/test.csv\" # Hidden during optimization.\n", + " ),\n", + " descriptors=[\n", + " ECFP.new(),\n", + " ECFP_counts.new(),\n", + " MACCS_keys.new(),\n", + " PathFP.new()\n", + " ],\n", + " algorithms=[\n", + " SVR.new(),\n", + " RandomForestRegressor.new(n_estimators={\"low\": 5, \"high\": 10}),\n", + " Ridge.new(),\n", + " Lasso.new(),\n", + " PLSRegression.new(),\n", + " KNeighborsRegressor.new()\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=3,\n", + " n_trials=100, # Total number of trials.\n", + " n_startup_trials=50, # Number of startup (\"random\") trials.\n", + " random_seed=42, # Seed for reproducability\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ") " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run optimization" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Setup basic logging.\n", + "import logging\n", + "from importlib import reload\n", + "reload(logging)\n", + "logging.basicConfig(level=logging.INFO)\n", + "logging.getLogger(\"train\").disabled = True # Prevent ChemProp from logging\n", + "import numpy as np\n", + "np.seterr(divide=\"ignore\")\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\", category=FutureWarning)\n", + "warnings.filterwarnings(\"ignore\", category=RuntimeWarning)\n", + "\n", + "import tqdm\n", + "from functools import partialmethod, partial\n", + "tqdm.__init__ = partialmethod(tqdm.__init__, disable=True) # Prevent tqdm in ChemProp from flooding log\n", + "\n", + "# Avoid decpreciated warnings from packages etc\n", + "import warnings\n", + "warnings.simplefilter(\"ignore\")\n", + "def warn(*args, **kwargs):\n", + " pass\n", + "warnings.warn = warn" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:26,561] A new study created in memory with name: my_study\n", + "[I 2024-07-02 13:17:26,714] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 13:17:27,022] Trial 0 finished with value: -3594.2228073972638 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 3, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 10, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"PathFP\", \"parameters\": {\"maxPath\": 3, \"fpSize\": 2048}}'}. Best is trial 0 with value: -3594.2228073972638.\n", + "[I 2024-07-02 13:17:27,171] Trial 1 finished with value: -5029.734616310275 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.039054412752107935, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 3.1242780840717016e-07, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: -3594.2228073972638.\n", + "[I 2024-07-02 13:17:27,429] Trial 2 finished with value: -4242.092751193529 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 20, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 0 with value: -3594.2228073972638.\n", + "[I 2024-07-02 13:17:27,579] Trial 3 finished with value: -3393.577488426015 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.06877704223043679, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 3 with value: -3393.577488426015.\n", + "[I 2024-07-02 13:17:27,644] Trial 4 finished with value: -427.45250420148204 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.7896547008552977, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:27,698] Trial 5 finished with value: -3387.245629616474 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:27,853] Trial 6 finished with value: -5029.734620250011 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 2.3661540064603184, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.1799882524170321, 'descriptor': '{\"name\": \"PathFP\", \"parameters\": {\"maxPath\": 3, \"fpSize\": 2048}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:28,029] Trial 7 finished with value: -9650.026568221794 and parameters: {'algorithm_name': 'KNeighborsRegressor', 'KNeighborsRegressor_algorithm_hash': '1709d2c39117ae29f6c9debe7241287b', 'metric__1709d2c39117ae29f6c9debe7241287b': , 'n_neighbors__1709d2c39117ae29f6c9debe7241287b': 7, 'weights__1709d2c39117ae29f6c9debe7241287b': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:28,070] Trial 8 finished with value: -5437.151635569594 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.05083825348819038, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:28,336] Trial 9 finished with value: -2669.8534551928174 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 4, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:28,373] Trial 10 finished with value: -4341.586120152291 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.7921825998469865, 'descriptor': '{\"name\": \"PathFP\", \"parameters\": {\"maxPath\": 3, \"fpSize\": 2048}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:28,532] Trial 11 finished with value: -5514.404088878843 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 7, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:28,680] Trial 12 finished with value: -5431.634989239215 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 10, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:28,722] Trial 13 finished with value: -3530.5496618991288 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:28,765] Trial 14 finished with value: -3497.6833185436312 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 2, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:28,793] Trial 15 finished with value: -4382.16208862162 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 5, 'descriptor': '{\"name\": \"PathFP\", \"parameters\": {\"maxPath\": 3, \"fpSize\": 2048}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:28,831] Trial 16 finished with value: -5029.734620031822 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.002825619931800395, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 1.309885135051862e-09, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:28,870] Trial 17 finished with value: -679.3109044887755 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.16827992999009767, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 4 with value: -427.45250420148204.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:28,932] Trial 18 finished with value: -2550.114129318373 and parameters: {'algorithm_name': 'KNeighborsRegressor', 'KNeighborsRegressor_algorithm_hash': '1709d2c39117ae29f6c9debe7241287b', 'metric__1709d2c39117ae29f6c9debe7241287b': , 'n_neighbors__1709d2c39117ae29f6c9debe7241287b': 7, 'weights__1709d2c39117ae29f6c9debe7241287b': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:28,974] Trial 19 finished with value: -4847.085792360169 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.735431606118867, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:29,016] Trial 20 finished with value: -5029.268760278916 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0014840820994557746, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.04671166881768783, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:29,096] Trial 21 finished with value: -4783.0470154796785 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 15, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 10, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:29,135] Trial 22 finished with value: -3905.0064899852296 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:29,259] Trial 23 finished with value: -4030.45773791647 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 11, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 9, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:29,340] Trial 24 finished with value: -4681.602145939593 and parameters: {'algorithm_name': 'KNeighborsRegressor', 'KNeighborsRegressor_algorithm_hash': '1709d2c39117ae29f6c9debe7241287b', 'metric__1709d2c39117ae29f6c9debe7241287b': , 'n_neighbors__1709d2c39117ae29f6c9debe7241287b': 4, 'weights__1709d2c39117ae29f6c9debe7241287b': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:29,381] Trial 25 finished with value: -4398.544034028325 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.6452011213193165, 'descriptor': '{\"name\": \"PathFP\", \"parameters\": {\"maxPath\": 3, \"fpSize\": 2048}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:29,474] Trial 26 finished with value: -4454.143979828408 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 21, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 9, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:29,503] Trial 27 pruned. Duplicate parameter set\n", + "[I 2024-07-02 13:17:29,533] Trial 28 pruned. Duplicate parameter set\n", + "[I 2024-07-02 13:17:29,600] Trial 29 finished with value: -4397.330360587512 and parameters: {'algorithm_name': 'KNeighborsRegressor', 'KNeighborsRegressor_algorithm_hash': '1709d2c39117ae29f6c9debe7241287b', 'metric__1709d2c39117ae29f6c9debe7241287b': , 'n_neighbors__1709d2c39117ae29f6c9debe7241287b': 8, 'weights__1709d2c39117ae29f6c9debe7241287b': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:29,617] Trial 30 pruned. Duplicate parameter set\n", + "[I 2024-07-02 13:17:29,682] Trial 31 finished with value: -2602.7561184287083 and parameters: {'algorithm_name': 'KNeighborsRegressor', 'KNeighborsRegressor_algorithm_hash': '1709d2c39117ae29f6c9debe7241287b', 'metric__1709d2c39117ae29f6c9debe7241287b': , 'n_neighbors__1709d2c39117ae29f6c9debe7241287b': 6, 'weights__1709d2c39117ae29f6c9debe7241287b': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 4 with value: -427.45250420148204.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}, return [-3530.5496618991288]\n", + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}, return [-3530.5496618991288]\n", + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}, return [-3387.245629616474]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:29,715] Trial 32 finished with value: -5267.388279961089 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.2015560027548533, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:29,794] Trial 33 finished with value: -4863.581760751052 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 23, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 4 with value: -427.45250420148204.\n", + "[I 2024-07-02 13:17:29,836] Trial 34 finished with value: -388.96473594016675 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.5528259214839937, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:29,906] Trial 35 finished with value: -5539.698232987626 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6400992020612235, 'descriptor': '{\"name\": \"PathFP\", \"parameters\": {\"maxPath\": 3, \"fpSize\": 2048}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:29,962] Trial 36 finished with value: -5180.5533034102455 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8968910439566395, 'descriptor': '{\"name\": \"PathFP\", \"parameters\": {\"maxPath\": 3, \"fpSize\": 2048}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:30,005] Trial 37 finished with value: -4989.929984864281 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.04458440839692226, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 4.492108041427977, 'descriptor': '{\"name\": \"PathFP\", \"parameters\": {\"maxPath\": 3, \"fpSize\": 2048}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:30,034] Trial 38 pruned. Duplicate parameter set\n", + "[I 2024-07-02 13:17:30,103] Trial 39 finished with value: -6528.215066535042 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.16700143339733753, 'descriptor': '{\"name\": \"PathFP\", \"parameters\": {\"maxPath\": 3, \"fpSize\": 2048}}'}. Best is trial 34 with value: -388.96473594016675.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'KNeighborsRegressor', 'KNeighborsRegressor_algorithm_hash': '1709d2c39117ae29f6c9debe7241287b', 'metric__1709d2c39117ae29f6c9debe7241287b': , 'n_neighbors__1709d2c39117ae29f6c9debe7241287b': 8, 'weights__1709d2c39117ae29f6c9debe7241287b': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}, return [-4397.330360587512]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:30,240] Trial 40 finished with value: -4168.7955967552625 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:30,311] Trial 41 finished with value: -6177.060727800014 and parameters: {'algorithm_name': 'KNeighborsRegressor', 'KNeighborsRegressor_algorithm_hash': '1709d2c39117ae29f6c9debe7241287b', 'metric__1709d2c39117ae29f6c9debe7241287b': , 'n_neighbors__1709d2c39117ae29f6c9debe7241287b': 1, 'weights__1709d2c39117ae29f6c9debe7241287b': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:30,401] Trial 42 finished with value: -3963.906954658343 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 21, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"PathFP\", \"parameters\": {\"maxPath\": 3, \"fpSize\": 2048}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:30,435] Trial 43 finished with value: -5029.6805334166565 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.013186009009851564, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.001008958590140135, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:30,501] Trial 44 finished with value: -9300.86840721566 and parameters: {'algorithm_name': 'KNeighborsRegressor', 'KNeighborsRegressor_algorithm_hash': '1709d2c39117ae29f6c9debe7241287b', 'metric__1709d2c39117ae29f6c9debe7241287b': , 'n_neighbors__1709d2c39117ae29f6c9debe7241287b': 9, 'weights__1709d2c39117ae29f6c9debe7241287b': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:30,547] Trial 45 finished with value: -5029.734620250011 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 83.87968210939489, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 6.382674443425525e-09, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:30,565] Trial 46 pruned. Duplicate parameter set\n", + "[I 2024-07-02 13:17:30,594] Trial 47 pruned. Duplicate parameter set\n", + "[I 2024-07-02 13:17:30,626] Trial 48 pruned. Duplicate parameter set\n", + "[I 2024-07-02 13:17:30,717] Trial 49 finished with value: -3660.9359502556 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 2, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 7, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"PathFP\", \"parameters\": {\"maxPath\": 3, \"fpSize\": 2048}}'}. Best is trial 34 with value: -388.96473594016675.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'KNeighborsRegressor', 'KNeighborsRegressor_algorithm_hash': '1709d2c39117ae29f6c9debe7241287b', 'metric__1709d2c39117ae29f6c9debe7241287b': , 'n_neighbors__1709d2c39117ae29f6c9debe7241287b': 9, 'weights__1709d2c39117ae29f6c9debe7241287b': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}, return [-9300.86840721566]\n", + "Duplicated trial: {'algorithm_name': 'KNeighborsRegressor', 'KNeighborsRegressor_algorithm_hash': '1709d2c39117ae29f6c9debe7241287b', 'metric__1709d2c39117ae29f6c9debe7241287b': , 'n_neighbors__1709d2c39117ae29f6c9debe7241287b': 7, 'weights__1709d2c39117ae29f6c9debe7241287b': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}, return [-2550.114129318373]\n", + "Duplicated trial: {'algorithm_name': 'KNeighborsRegressor', 'KNeighborsRegressor_algorithm_hash': '1709d2c39117ae29f6c9debe7241287b', 'metric__1709d2c39117ae29f6c9debe7241287b': , 'n_neighbors__1709d2c39117ae29f6c9debe7241287b': 6, 'weights__1709d2c39117ae29f6c9debe7241287b': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}, return [-2602.7561184287083]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:30,767] Trial 50 finished with value: -688.5244070398325 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.5267860995545326, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:30,813] Trial 51 finished with value: -690.6494438072099 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.8458809314722497, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:30,848] Trial 52 finished with value: -691.1197058420935 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.9167866889210807, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:30,898] Trial 53 finished with value: -691.3111710449325 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.945685900574672, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:30,934] Trial 54 finished with value: -690.9665592812149 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.8936837761725833, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:30,970] Trial 55 finished with value: -688.4682747008223 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.5183865279530455, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:31,030] Trial 56 finished with value: -687.5230947231512 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.3771771681361766, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:31,078] Trial 57 finished with value: -687.4503442069594 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.3663259819415374, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:31,127] Trial 58 finished with value: -686.9553733616618 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.2925652230875628, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 34 with value: -388.96473594016675.\n", + "[I 2024-07-02 13:17:31,174] Trial 59 finished with value: -370.2038330506566 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.3962903248948568, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 59 with value: -370.2038330506566.\n", + "[I 2024-07-02 13:17:31,222] Trial 60 finished with value: -377.25988028857313 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.45237513161879, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 59 with value: -370.2038330506566.\n", + "[I 2024-07-02 13:17:31,270] Trial 61 finished with value: -379.8933285317637 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.4741161933311207, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 59 with value: -370.2038330506566.\n", + "[I 2024-07-02 13:17:31,319] Trial 62 finished with value: -374.50897467366013 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.4290962207409417, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 59 with value: -370.2038330506566.\n", + "[I 2024-07-02 13:17:31,356] Trial 63 finished with value: -376.5588572940058 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.4464295711264585, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 59 with value: -370.2038330506566.\n", + "[I 2024-07-02 13:17:31,416] Trial 64 finished with value: -379.237448916406 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.4687500034684213, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 59 with value: -370.2038330506566.\n", + "[I 2024-07-02 13:17:31,454] Trial 65 finished with value: -375.7474776359051 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.4395650011783436, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 59 with value: -370.2038330506566.\n", + "[I 2024-07-02 13:17:31,504] Trial 66 finished with value: -362.2834906299732 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.3326755354190032, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 66 with value: -362.2834906299732.\n", + "[I 2024-07-02 13:17:31,542] Trial 67 finished with value: -357.3474880122588 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.2887212943233457, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 67 with value: -357.3474880122588.\n", + "[I 2024-07-02 13:17:31,591] Trial 68 finished with value: -354.279045046449 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.2577677164664005, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 68 with value: -354.279045046449.\n", + "[I 2024-07-02 13:17:31,642] Trial 69 finished with value: -347.36894395697703 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.1672928587680225, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 69 with value: -347.36894395697703.\n", + "[I 2024-07-02 13:17:31,706] Trial 70 finished with value: -345.17697390093394 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.1242367255308854, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 70 with value: -345.17697390093394.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:31,757] Trial 71 finished with value: -347.74610809299037 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.1728352983905301, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 70 with value: -345.17697390093394.\n", + "[I 2024-07-02 13:17:31,807] Trial 72 finished with value: -345.23464281634324 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.1265380781508565, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 70 with value: -345.17697390093394.\n", + "[I 2024-07-02 13:17:31,856] Trial 73 finished with value: -344.6848312222365 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.0829896313820404, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 73 with value: -344.6848312222365.\n", + "[I 2024-07-02 13:17:31,902] Trial 74 finished with value: -344.9111966504334 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.1070414661080543, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 73 with value: -344.6848312222365.\n", + "[I 2024-07-02 13:17:31,966] Trial 75 finished with value: -344.70116419828565 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.0875643695329498, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 73 with value: -344.6848312222365.\n", + "[I 2024-07-02 13:17:32,026] Trial 76 finished with value: -344.62647974688133 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.0716281620790837, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 76 with value: -344.62647974688133.\n", + "[I 2024-07-02 13:17:32,089] Trial 77 finished with value: -344.6759429204596 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.0456289319914898, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 76 with value: -344.62647974688133.\n", + "[I 2024-07-02 13:17:32,141] Trial 78 finished with value: -343.58131497761616 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.0010195360522613, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 78 with value: -343.58131497761616.\n", + "[I 2024-07-02 13:17:32,193] Trial 79 finished with value: -342.7290581014813 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.9073210715005748, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 79 with value: -342.7290581014813.\n", + "[I 2024-07-02 13:17:32,254] Trial 80 finished with value: -342.67866114080107 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.9166305667100072, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 80 with value: -342.67866114080107.\n", + "[I 2024-07-02 13:17:32,317] Trial 81 finished with value: -342.6440308445311 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.9248722692093634, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -342.6440308445311.\n", + "[I 2024-07-02 13:17:32,367] Trial 82 finished with value: -343.02085648448934 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8776928646870886, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -342.6440308445311.\n", + "[I 2024-07-02 13:17:32,416] Trial 83 finished with value: -343.1662266300702 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.867592364677856, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -342.6440308445311.\n", + "[I 2024-07-02 13:17:32,457] Trial 84 finished with value: -343.30158716569775 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8599491178327108, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -342.6440308445311.\n", + "[I 2024-07-02 13:17:32,497] Trial 85 finished with value: -344.2803074848341 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8396948389352923, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -342.6440308445311.\n", + "[I 2024-07-02 13:17:32,547] Trial 86 finished with value: -344.28301101884045 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8396651775801683, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -342.6440308445311.\n", + "[I 2024-07-02 13:17:32,587] Trial 87 finished with value: -344.6781906268143 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8356021935129933, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -342.6440308445311.\n", + "[I 2024-07-02 13:17:32,639] Trial 88 finished with value: -354.0405418264898 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.7430046191126949, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -342.6440308445311.\n", + "[I 2024-07-02 13:17:32,677] Trial 89 finished with value: -342.77203208258476 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.9015965341429055, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -342.6440308445311.\n", + "[I 2024-07-02 13:17:32,717] Trial 90 finished with value: -363.1622720320929 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6746575663752555, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -342.6440308445311.\n", + "[I 2024-07-02 13:17:32,757] Trial 91 finished with value: -342.7403796626193 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.9057564666836629, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -342.6440308445311.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:32,797] Trial 92 finished with value: -342.63579667712696 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.9332275205203372, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 92 with value: -342.63579667712696.\n", + "[I 2024-07-02 13:17:32,848] Trial 93 finished with value: -342.6886425884964 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.9433063264508291, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 92 with value: -342.63579667712696.\n", + "[I 2024-07-02 13:17:32,898] Trial 94 finished with value: -342.9341048659705 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.884739221967487, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 92 with value: -342.63579667712696.\n", + "[I 2024-07-02 13:17:32,935] Trial 95 finished with value: -342.63507445779743 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.9381000493689634, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 95 with value: -342.63507445779743.\n", + "[I 2024-07-02 13:17:32,986] Trial 96 finished with value: -343.06021011302374 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.963138023068903, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 95 with value: -342.63507445779743.\n", + "[I 2024-07-02 13:17:33,026] Trial 97 finished with value: -342.9990546212019 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.9601651093867907, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 95 with value: -342.63507445779743.\n", + "[I 2024-07-02 13:17:33,066] Trial 98 finished with value: -3821.2267845437514 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 2, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 95 with value: -342.63507445779743.\n", + "[I 2024-07-02 13:17:33,117] Trial 99 finished with value: -356.6786067133016 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.721603508336166, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 95 with value: -342.63507445779743.\n" + ] + } + ], + "source": [ + "# Run Optuna Study.\n", + "study = optimize(config, study_name=\"my_study\")\n", + "# Optuna will log it's progress to sys.stderr\n", + "# (usually rendered in red in Jupyter Notebooks)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualize optimization progress" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "sns.set_theme(style=\"darkgrid\")\n", + "default_reg_scoring= config.settings.scoring\n", + "ax = sns.scatterplot(data=study.trials_dataframe(), x=\"number\", y=\"value\");\n", + "ax.set(xlabel=\"Trial number\", ylabel=f\"Ojbective value\\n({default_reg_scoring})\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sometimes it might be interesting to look at individual CV scores instead of aggregated score (mean CV score by default). Here we can plot all 3 cross validation scores (neg_mean_squared_error) for each trial (folds highlighted using different colors)." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cv_test = study.trials_dataframe()[\"user_attrs_test_scores\"].map(lambda d: d[default_reg_scoring])\n", + "x = []\n", + "y = []\n", + "fold = []\n", + "for i, vs in cv_test.items():\n", + " for idx, v in enumerate(vs):\n", + " x.append(i)\n", + " y.append(v)\n", + " fold.append(idx)\n", + "ax = sns.scatterplot(x=x, y=y, hue=fold, style=fold, palette='Set1')\n", + "ax.set(xlabel=\"Trial number\", ylabel=f\"Ojbective value\\n({default_reg_scoring})\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pick the best trial and build a model for it" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We pick the best trial, inspect its configuration, build the best model, and save it as a pickled file." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the best Trial from the Study and make a Build (Training) configuration for it.\n", + "buildconfig = buildconfig_best(study)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Optional: write out JSON of the best configuration." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"data\": {\n", + " \"training_dataset_file\": \"../tests/data/DRD2/subset-50/train.csv\",\n", + " \"input_column\": \"canonical\",\n", + " \"response_column\": \"molwt\",\n", + " \"response_type\": \"regression\",\n", + " \"deduplication_strategy\": {\n", + " \"name\": \"KeepMedian\"\n", + " },\n", + " \"split_strategy\": {\n", + " \"name\": \"NoSplitting\"\n", + " },\n", + " \"test_dataset_file\": \"../tests/data/DRD2/subset-50/test.csv\",\n", + " \"save_intermediate_files\": false,\n", + " \"log_transform\": false,\n", + " \"log_transform_base\": null,\n", + " \"log_transform_negative\": null,\n", + " \"log_transform_unit_conversion\": null,\n", + " \"probabilistic_threshold_representation\": false,\n", + " \"probabilistic_threshold_representation_threshold\": null,\n", + " \"probabilistic_threshold_representation_std\": null\n", + " },\n", + " \"metadata\": {\n", + " \"name\": \"\",\n", + " \"cross_validation\": 3,\n", + " \"shuffle\": false,\n", + " \"best_trial\": 95,\n", + " \"best_value\": -342.63507445779743,\n", + " \"n_trials\": 100,\n", + " \"visualization\": null\n", + " },\n", + " \"descriptor\": {\n", + " \"name\": \"ECFP_counts\",\n", + " \"parameters\": {\n", + " \"radius\": 3,\n", + " \"useFeatures\": true,\n", + " \"nBits\": 2048\n", + " }\n", + " },\n", + " \"settings\": {\n", + " \"mode\": \"regression\",\n", + " \"scoring\": \"neg_mean_squared_error\",\n", + " \"direction\": \"maximize\",\n", + " \"n_trials\": 100,\n", + " \"tracking_rest_endpoint\": null\n", + " },\n", + " \"algorithm\": {\n", + " \"name\": \"Lasso\",\n", + " \"parameters\": {\n", + " \"alpha\": 0.9381000493689634\n", + " }\n", + " },\n", + " \"task\": \"building\"\n", + "}\n" + ] + } + ], + "source": [ + "import apischema\n", + "buildconfig_as_dict = apischema.serialize(buildconfig)\n", + "\n", + "import json\n", + "print(json.dumps(buildconfig_as_dict, indent=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Build (re-Train) and save the best model. This time training uses all training data, without splitting it into cross-validation folds." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "best_build = build_best(buildconfig, \"../target/best.pkl\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can use the best (or merged) model as following" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 67.43103985, 177.99850936])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pickle\n", + "with open(\"../target/best.pkl\", \"rb\") as f:\n", + " model = pickle.load(f)\n", + "model.predict_from_smiles([\"CCC\", \"CC(=O)Nc1ccc(O)cc1\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can explore how good the best model performs on the test (holdout) set." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.read_csv(config.data.test_dataset_file) # Load test data.\n", + "\n", + "expected = df[config.data.response_column]\n", + "predicted = model.predict_from_smiles(df[config.data.input_column])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot expected vs predicted values for the best model.\n", + "import matplotlib.pyplot as plt\n", + "ax = plt.scatter(expected, predicted)\n", + "lims = [expected.min(), expected.max()]\n", + "plt.plot(lims, lims) # Diagonal line.\n", + "plt.xlabel(f\"Expected {config.data.response_column}\");\n", + "plt.ylabel(f\"Predicted {config.data.response_column}\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also calculate custom metrics for the best model:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2: 0.8566354978126369, RMSE: 26.204909888075044, Mean absolute error: 19.298453946973815\n" + ] + } + ], + "source": [ + "from sklearn.metrics import (r2_score, mean_squared_error, mean_absolute_error)\n", + "import numpy as np\n", + "\n", + "# R2\n", + "r2 = r2_score(y_true=expected, y_pred=predicted)\n", + "\n", + "# RMSE. sklearn 0.24 added squared=False to get RMSE, here we use np.sqrt().\n", + "rmse = np.sqrt(mean_squared_error(y_true=expected, y_pred=predicted)) \n", + "\n", + "# MAE\n", + "mae = mean_absolute_error(y_true=expected, y_pred=predicted)\n", + "\n", + "print(f\"R2: {r2}, RMSE: {rmse}, Mean absolute error: {mae}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If the metrics look acceptable, the model is ready for use." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build merged model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can merge train and test data, and build (train) the model again. We will have no more holdout data to evaluate the model, but hopefully the model will be a little better by seeing a little more data." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Build (Train) and save the model on the merged train+test data.\n", + "build_merged(buildconfig, \"../target/merged.pkl\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Preprocessing: splitting data into train and test sets, and removing duplicates " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Splitting into train and test dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "QSARtuna can split data into train and test (holdout) datasets. To do so, send all data in as training_dataset_file, and choose a splitting strategy. Currently QSARtuna supports three splitting strategies: random, temporal and stratified. \n", + "\n", + "Random strategy splits data randomly, taking a specified fraction of observations to be test dataset. \n", + "\n", + "Temporal strategy takes the first observations as training dataset, and the last specified fraction of observations as test dataset. The input dataset must be already sorted, from oldest in the beginning to newest and the end. This sorting can be done in any external tool (e.g. Excel). \n", + "\n", + "Stratified strategy splits data into bins first, and then takes a fraction from each bin to be the test dataset. This ensures that the distributions in the train and test data are similar. This is a better strategy if dataset is unballanced." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Removing duplicates" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All the algorithms QSARtuna supports do not work with duplicates. Duplicates can come from multiple measurements for the same compound, or from the fact that the molecular descriptors we use are all disregard stereochemistry, so even if compounds are different, descriptors make them into duplicates. QSARtuna provides several strategies to remove duplicates:\n", + "* keep median - factors experimental deviation using all replicates into one median value (robust to outliers - recommended)\n", + "* keep average - use all experimental data acorss all replicates (less robust to outliers vs. median)\n", + "* keep first / keep last - when the first or the last measurement is the trusted one\n", + "* keep max / keep min - when we want to keep the most extreme value out of many\n", + "* keep random - when we are agnostic to which replicate kept" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Configuration example" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "from optunaz.utils.preprocessing.splitter import Stratified\n", + "from optunaz.utils.preprocessing.deduplicator import KeepMedian\n", + "# Prepare hyperparameter optimization configuration.\n", + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-100/train.csv\", # This will be split into train and test.\n", + " split_strategy=Stratified(fraction=0.2),\n", + " deduplication_strategy=KeepMedian(),\n", + " ),\n", + " descriptors=[\n", + " ECFP.new(), \n", + " ECFP_counts.new(), \n", + " MACCS_keys.new(),\n", + " ],\n", + " algorithms=[\n", + " SVR.new(),\n", + " RandomForestRegressor.new(n_estimators={\"low\": 5, \"high\": 10}),\n", + " Ridge.new(),\n", + " Lasso.new(),\n", + " PLSRegression.new(),\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=3,\n", + " n_trials=100,\n", + " n_startup_trials=50,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " track_to_mlflow=False,\n", + " ),\n", + ") " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:36,922] A new study created in memory with name: my_study_stratified_split\n", + "[I 2024-07-02 13:17:36,963] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 13:17:37,046] Trial 0 finished with value: -1856.4459752935309 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 0 with value: -1856.4459752935309.\n", + "[I 2024-07-02 13:17:37,123] Trial 1 finished with value: -1692.0451328577294 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.2918844591266672, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 1 with value: -1692.0451328577294.\n", + "[I 2024-07-02 13:17:37,592] Trial 2 finished with value: -1378.9731014410709 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.471164936778079, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 2 with value: -1378.9731014410709.\n", + "[I 2024-07-02 13:17:37,688] Trial 3 finished with value: -2658.13214897931 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 2 with value: -1378.9731014410709.\n", + "[I 2024-07-02 13:17:37,804] Trial 4 finished with value: -2059.3079659969176 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 27, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 9, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 2 with value: -1378.9731014410709.\n", + "[I 2024-07-02 13:17:38,330] Trial 5 finished with value: -280.17777558722315 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.7001901522391756, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: -280.17777558722315.\n", + "[I 2024-07-02 13:17:38,422] Trial 6 finished with value: -3551.475476217507 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 31, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 5 with value: -280.17777558722315.\n", + "[I 2024-07-02 13:17:38,466] Trial 7 finished with value: -2124.9660426577593 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 2, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 5 with value: -280.17777558722315.\n", + "[I 2024-07-02 13:17:38,509] Trial 8 finished with value: -1686.5737716985532 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.9841058851292832, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 5 with value: -280.17777558722315.\n", + "[I 2024-07-02 13:17:38,552] Trial 9 finished with value: -1702.174704715547 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.861494545249233, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 5 with value: -280.17777558722315.\n", + "[I 2024-07-02 13:17:38,578] Trial 10 pruned. Duplicate parameter set\n", + "[I 2024-07-02 13:17:38,621] Trial 11 finished with value: -1204.636967895143 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.5238298142840006, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 5 with value: -280.17777558722315.\n", + "[I 2024-07-02 13:17:38,676] Trial 12 finished with value: -228.44505332657158 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.9836853549192415, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:38,729] Trial 13 finished with value: -3949.499774068696 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.04535826280986047, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.012999584021838e-09, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 2, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}, return [-2124.9660426577593]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:38,829] Trial 14 finished with value: -2856.917927507731 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 13, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 10, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.306e+01, tolerance: 3.824e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:38,882] Trial 15 finished with value: -2554.2079198900733 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.10588223712643852, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:38,922] Trial 16 finished with value: -1261.484274761188 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.0950442632698256, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:38,965] Trial 17 finished with value: -282.6478019258886 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.2920636100136971, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:39,004] Trial 18 finished with value: -1814.6019641143478 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:39,048] Trial 19 finished with value: -1284.7430070920798 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.1729012287538991, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:39,103] Trial 20 finished with value: -237.98783693000647 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.1721667984096773, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:39,192] Trial 21 finished with value: -2129.55317061882 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 12, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:39,235] Trial 22 finished with value: -3949.4997740833423 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 3.779895470793612, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 4.260941957410989e-09, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:39,279] Trial 23 finished with value: -1740.8894369939983 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.02841448247455669, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.698e+02, tolerance: 3.824e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.280e+02, tolerance: 3.820e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.352e+02, tolerance: 3.770e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:39,373] Trial 24 finished with value: -3317.417858905051 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.003050380617617421, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:39,404] Trial 25 pruned. Duplicate parameter set\n", + "[I 2024-07-02 13:17:39,448] Trial 26 finished with value: -1256.7270466276807 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.1594144041655936, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:39,491] Trial 27 finished with value: -1245.1399766270456 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.336730512398918, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:39,583] Trial 28 finished with value: -2908.3563960057677 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 14, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 10, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 12 with value: -228.44505332657158.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}, return [-2658.13214897931]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:39,628] Trial 29 finished with value: -1775.55204856041 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 5, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:39,721] Trial 30 finished with value: -2059.3079659969176 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 19, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 9, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:39,765] Trial 31 finished with value: -1257.9288888831513 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.1441514794000534, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:39,808] Trial 32 finished with value: -280.98174313112844 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.1939105579414777, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:39,900] Trial 33 finished with value: -3054.7066202193805 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 23, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 10, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:39,944] Trial 34 finished with value: -1227.082986184029 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.909508127148669, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:39,988] Trial 35 finished with value: -1676.7481962719485 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.4307837873914335, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:40,079] Trial 36 finished with value: -2059.307965996918 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 26, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 9, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:40,168] Trial 37 finished with value: -3441.9109103644514 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 12, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:40,211] Trial 38 finished with value: -1670.5213862925175 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.07945856808433427, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:40,264] Trial 39 finished with value: -2756.046839500092 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 2, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:40,320] Trial 40 finished with value: -3949.4997735530674 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.022099719935614482, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 1.4657380646234507e-08, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:40,376] Trial 41 finished with value: -3949.4997740833423 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 1.0862402902634642, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.12519632281925502, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:40,467] Trial 42 finished with value: -3438.566583971217 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 7, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:40,524] Trial 43 finished with value: -254.4422556954731 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.19967589906728334, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 7.016e+01, tolerance: 3.820e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:40,618] Trial 44 finished with value: -359.7639743940817 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.059252880514551576, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:40,662] Trial 45 finished with value: -1246.7813032646238 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.3074782262329858, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:40,755] Trial 46 finished with value: -2224.3845873049813 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 17, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:40,810] Trial 47 finished with value: -1673.9639799911165 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.2737740844660712, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:40,896] Trial 48 finished with value: -3163.129883232068 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 32, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:40,987] Trial 49 finished with value: -2753.414173913392 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 10, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 10, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:41,057] Trial 50 finished with value: -263.1352845182604 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.627030918721665, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:41,105] Trial 51 finished with value: -271.2979718788249 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.8548903728617034, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:41,165] Trial 52 finished with value: -277.86441431259567 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.9605867591283856, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:41,227] Trial 53 finished with value: -277.4329099850367 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.9537398361705693, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:41,274] Trial 54 finished with value: -274.3838070241422 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.9045589309769144, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:41,334] Trial 55 finished with value: -260.4460398258507 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.5589021326002044, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:41,383] Trial 56 finished with value: -257.95032410206767 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.5053759377103249, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:41,444] Trial 57 finished with value: -256.5958038666581 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.4789082433356577, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:41,495] Trial 58 finished with value: -253.4269973575198 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.4281024602273042, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:41,560] Trial 59 finished with value: -249.40822811603962 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.3546313579812586, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:41,620] Trial 60 finished with value: -245.71101688809983 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.2913960369109012, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:41,675] Trial 61 finished with value: -247.88538215472033 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.3274897484709072, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:41,737] Trial 62 finished with value: -244.23847775159297 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.2647865635312279, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:41,803] Trial 63 finished with value: -247.59033004585282 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.3228443521984092, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:41,863] Trial 64 finished with value: -243.40694430653753 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.2489205103047292, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 12 with value: -228.44505332657158.\n", + "[I 2024-07-02 13:17:41,928] Trial 65 finished with value: -223.85145692792733 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8934822741396387, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 65 with value: -223.85145692792733.\n", + "[I 2024-07-02 13:17:41,990] Trial 66 finished with value: -221.94026043724057 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8552798675517863, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 66 with value: -221.94026043724057.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:42,048] Trial 67 finished with value: -219.60947928367543 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8149866573467666, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 67 with value: -219.60947928367543.\n", + "[I 2024-07-02 13:17:42,108] Trial 68 finished with value: -221.84441955310717 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8531301788095305, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 67 with value: -219.60947928367543.\n", + "[I 2024-07-02 13:17:42,170] Trial 69 finished with value: -221.24134912135943 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8418420411160932, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 67 with value: -219.60947928367543.\n", + "[I 2024-07-02 13:17:42,232] Trial 70 finished with value: -223.34805357903284 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.883998932301903, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 67 with value: -219.60947928367543.\n", + "[I 2024-07-02 13:17:42,293] Trial 71 finished with value: -221.99342925522842 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8564564664338091, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 67 with value: -219.60947928367543.\n", + "[I 2024-07-02 13:17:42,353] Trial 72 finished with value: -222.50886633416462 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8672069097403997, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 67 with value: -219.60947928367543.\n", + "[I 2024-07-02 13:17:42,415] Trial 73 finished with value: -221.61235541906441 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8482856353268698, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 67 with value: -219.60947928367543.\n", + "[I 2024-07-02 13:17:42,479] Trial 74 finished with value: -217.7749814513912 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.7823980442129331, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 74 with value: -217.7749814513912.\n", + "[I 2024-07-02 13:17:42,538] Trial 75 finished with value: -216.00225784039503 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.7113129125761161, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 75 with value: -216.00225784039503.\n", + "[I 2024-07-02 13:17:42,601] Trial 76 finished with value: -216.8736767409489 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6250904023479531, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 75 with value: -216.00225784039503.\n", + "[I 2024-07-02 13:17:42,666] Trial 77 finished with value: -216.94414119442342 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6227757503715069, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 75 with value: -216.00225784039503.\n", + "[I 2024-07-02 13:17:42,731] Trial 78 finished with value: -216.45936690929625 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6343056785694773, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 75 with value: -216.00225784039503.\n", + "[I 2024-07-02 13:17:42,797] Trial 79 finished with value: -216.63861804615567 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6302707941523814, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 75 with value: -216.00225784039503.\n", + "[I 2024-07-02 13:17:42,860] Trial 80 finished with value: -1969.3749442111905 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.00019861806798724335, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 89.586529041453, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 75 with value: -216.00225784039503.\n", + "[I 2024-07-02 13:17:42,923] Trial 81 finished with value: -215.82051598778696 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6518244359516081, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -215.82051598778696.\n", + "[I 2024-07-02 13:17:42,987] Trial 82 finished with value: -216.06387687700067 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6440087841656821, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -215.82051598778696.\n", + "[I 2024-07-02 13:17:43,041] Trial 83 finished with value: -216.24994687849525 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6393212787552464, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -215.82051598778696.\n", + "[I 2024-07-02 13:17:43,106] Trial 84 finished with value: -216.92984604804667 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6232144947646524, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -215.82051598778696.\n", + "[I 2024-07-02 13:17:43,170] Trial 85 finished with value: -217.25506613319246 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.603388647930941, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -215.82051598778696.\n", + "[I 2024-07-02 13:17:43,223] Trial 86 finished with value: -2733.5772576431627 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 5, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -215.82051598778696.\n", + "[I 2024-07-02 13:17:43,287] Trial 87 finished with value: -217.29854648789728 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.5873312673596333, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -215.82051598778696.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:43,347] Trial 88 finished with value: -221.16592450348784 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.4337907998582289, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 81 with value: -215.82051598778696.\n", + "[I 2024-07-02 13:17:43,410] Trial 89 finished with value: -215.68514116107337 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6695836226711808, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 89 with value: -215.68514116107337.\n", + "[I 2024-07-02 13:17:43,475] Trial 90 finished with value: -220.8939514172608 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.4420925048614356, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 89 with value: -215.68514116107337.\n", + "[I 2024-07-02 13:17:43,535] Trial 91 finished with value: -215.72299797702155 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6960582933068138, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 89 with value: -215.68514116107337.\n", + "[I 2024-07-02 13:17:43,601] Trial 92 finished with value: -215.69285146262294 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.69078828949453, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 89 with value: -215.68514116107337.\n", + "[I 2024-07-02 13:17:43,665] Trial 93 finished with value: -216.0538787714827 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.7144357045239296, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 89 with value: -215.68514116107337.\n", + "[I 2024-07-02 13:17:43,728] Trial 94 finished with value: -216.4213281391621 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.7353090312302926, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 89 with value: -215.68514116107337.\n", + "[I 2024-07-02 13:17:43,794] Trial 95 finished with value: -3949.4997740833423 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 50.74724725664498, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 8.92653950485437e-05, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 89 with value: -215.68514116107337.\n", + "[I 2024-07-02 13:17:43,858] Trial 96 finished with value: -216.12287184152592 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.7183304951103088, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 89 with value: -215.68514116107337.\n", + "[I 2024-07-02 13:17:43,922] Trial 97 finished with value: -216.22186485689846 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.7234233661662641, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 89 with value: -215.68514116107337.\n", + "[I 2024-07-02 13:17:43,977] Trial 98 finished with value: -2720.793752592223 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 89 with value: -215.68514116107337.\n", + "[I 2024-07-02 13:17:44,042] Trial 99 finished with value: -219.3855763846717 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.4726201914486088, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 89 with value: -215.68514116107337.\n" + ] + } + ], + "source": [ + "study = optimize(config, study_name=\"my_study_stratified_split\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Choosing scoring function " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By default, QSARtuna uses `neg_mean_squared_error` for regression and `roc_auc` for classification. It is possible to change to other scoring functions that supported by scikit-learn (https://scikit-learn.org/stable/modules/model_evaluation.html) amongst others:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['explained_variance',\n", + " 'max_error',\n", + " 'neg_mean_absolute_error',\n", + " 'neg_mean_squared_error',\n", + " 'neg_median_absolute_error',\n", + " 'r2',\n", + " 'accuracy',\n", + " 'average_precision',\n", + " 'balanced_accuracy',\n", + " 'f1',\n", + " 'f1_macro',\n", + " 'f1_micro',\n", + " 'f1_weighted',\n", + " 'jaccard',\n", + " 'jaccard_macro',\n", + " 'jaccard_micro',\n", + " 'jaccard_weighted',\n", + " 'neg_brier_score',\n", + " 'precision',\n", + " 'precision_macro',\n", + " 'precision_micro',\n", + " 'precision_weighted',\n", + " 'recall',\n", + " 'recall_macro',\n", + " 'recall_micro',\n", + " 'recall_weighted',\n", + " 'roc_auc',\n", + " 'auc_pr_cal',\n", + " 'bedroc',\n", + " 'concordance_index']" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from optunaz import objective\n", + "list(objective.regression_scores) + list(objective.classification_scores)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This value can be set using `settings.scoring`:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-100/train.csv\",\n", + " ),\n", + " descriptors=[\n", + " ECFP.new(), \n", + " ECFP_counts.new(), \n", + " MACCS_keys.new(),\n", + " ],\n", + " algorithms=[\n", + " SVR.new(),\n", + " RandomForestRegressor.new(n_estimators={\"low\": 5, \"high\": 10}),\n", + " Ridge.new(),\n", + " Lasso.new(),\n", + " PLSRegression.new(),\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=3,\n", + " n_trials=100,\n", + " n_startup_trials=50,\n", + " random_seed=42,\n", + " scoring=\"r2\", # Scoring function name from scikit-learn.\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " track_to_mlflow=False,\n", + " ),\n", + ") " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:44,945] A new study created in memory with name: my_study_r2\n", + "[I 2024-07-02 13:17:44,947] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 13:17:45,072] Trial 0 finished with value: -0.011171868665159623 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: -0.011171868665159623.\n", + "[I 2024-07-02 13:17:45,197] Trial 1 finished with value: -0.08689402230378174 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 7, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: -0.011171868665159623.\n", + "[I 2024-07-02 13:17:45,283] Trial 2 finished with value: -0.12553701248394863 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 5.141096648805748, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.4893466963980463e-08, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 0 with value: -0.011171868665159623.\n", + "[I 2024-07-02 13:17:45,358] Trial 3 finished with value: 0.3039309544203818 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 5, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 3 with value: 0.3039309544203818.\n", + "[I 2024-07-02 13:17:45,410] Trial 4 finished with value: 0.20182749628697164 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 3 with value: 0.3039309544203818.\n", + "[I 2024-07-02 13:17:45,485] Trial 5 finished with value: 0.8187194367176578 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.7896547008552977, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: 0.8187194367176578.\n", + "[I 2024-07-02 13:17:45,558] Trial 6 finished with value: 0.4647239019719945 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.6574750183038587, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 5 with value: 0.8187194367176578.\n", + "[I 2024-07-02 13:17:45,611] Trial 7 finished with value: 0.8614818478547979 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.3974313630683448, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 7 with value: 0.8614818478547979.\n", + "[I 2024-07-02 13:17:45,705] Trial 8 finished with value: -0.12769795082909816 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 28, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 7 with value: 0.8614818478547979.\n", + "[I 2024-07-02 13:17:45,773] Trial 9 finished with value: 0.8639946428338224 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.2391884918766034, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:45,838] Trial 10 finished with value: -0.12553701248377633 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.00044396482429275296, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.3831436879125245e-10, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:45,892] Trial 11 finished with value: -0.12553700871203702 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.00028965395242758657, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.99928292425642e-07, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:45,934] Trial 12 finished with value: 0.2935582042429075 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:45,976] Trial 13 finished with value: 0.18476333152695587 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 2, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:46,029] Trial 14 finished with value: 0.8190707459213998 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.4060379177903557, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:46,118] Trial 15 finished with value: 0.12206148974315871 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 20, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:46,174] Trial 16 finished with value: 0.3105263811279067 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.344271094811757, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:46,215] Trial 17 finished with value: 0.3562469062424869 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.670604991178476, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:46,316] Trial 18 finished with value: 0.045959695906983344 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 22, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 9 with value: 0.8639946428338224.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:46,381] Trial 19 finished with value: 0.8583939656024446 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.5158832554303112, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:46,433] Trial 20 finished with value: 0.3062574078515544 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:46,487] Trial 21 finished with value: -0.11657354998283716 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0009327650919528738, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 6.062479210472502, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:46,586] Trial 22 pruned. Duplicate parameter set\n", + "[I 2024-07-02 13:17:46,629] Trial 23 finished with value: 0.8498478905829554 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.1366172066709432, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:46,733] Trial 24 finished with value: -0.1276979508290982 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 26, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:46,786] Trial 25 finished with value: -0.13519830637607919 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 43.92901911959232, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 27.999026012594694, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 9 with value: 0.8639946428338224.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}, return [0.2935582042429075]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:46,839] Trial 26 finished with value: 0.8198078293055633 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.5888977841391714, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:46,878] Trial 27 finished with value: 0.8201573964824842 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.19435298754153707, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:46,958] Trial 28 finished with value: 0.045959695906983344 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 13, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:47,013] Trial 29 finished with value: -0.12553701248394863 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 1.6285506249643193, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.35441495011256785, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:47,103] Trial 30 finished with value: 0.11934070343348298 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 10, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:47,145] Trial 31 finished with value: 0.4374125584543907 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.2457809516380005, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:47,213] Trial 32 finished with value: 0.3625576518621392 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6459129458824919, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:47,254] Trial 33 finished with value: 0.36175556871883746 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.8179058888285398, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:47,285] Trial 34 pruned. Duplicate parameter set\n", + "[I 2024-07-02 13:17:47,330] Trial 35 finished with value: 0.8202473217121523 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.0920052840435055, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:47,372] Trial 36 finished with value: 0.3672927879319306 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.8677032984759461, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:47,402] Trial 37 pruned. Duplicate parameter set\n", + "[I 2024-07-02 13:17:47,445] Trial 38 finished with value: 0.40076792599874356 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.2865764368847064, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 9 with value: 0.8639946428338224.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}, return [0.2935582042429075]\n", + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}, return [0.3062574078515544]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:47,535] Trial 39 finished with value: 0.26560316846701765 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:47,632] Trial 40 finished with value: 0.41215254857081174 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 9, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:47,671] Trial 41 pruned. Duplicate parameter set\n", + "[I 2024-07-02 13:17:47,763] Trial 42 finished with value: -0.00461414372160085 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 25, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:47,808] Trial 43 finished with value: 0.27282533524183633 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 2, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 9 with value: 0.8639946428338224.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 5, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}, return [0.3039309544203818]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:47,919] Trial 44 finished with value: -0.10220127407364991 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 22, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 9, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:47,975] Trial 45 finished with value: 0.30323404130582854 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:48,030] Trial 46 finished with value: 0.3044553805553568 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.6437201185807124, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:48,076] Trial 47 finished with value: -0.12553701248394863 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 82.41502276709562, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.10978379088847677, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:48,120] Trial 48 finished with value: 0.36160209098547913 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.022707289534838138, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:48,175] Trial 49 finished with value: 0.2916101445983833 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.936e+02, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.434e+02, tolerance: 4.977e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:48,276] Trial 50 finished with value: 0.8609413020928532 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.04987590926279814, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.794e+02, tolerance: 4.977e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.830e+02, tolerance: 4.906e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.578e+02, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:48,387] Trial 51 finished with value: 0.8610289662757457 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.019211413400468974, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.754e+02, tolerance: 4.977e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.843e+02, tolerance: 4.906e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.507e+02, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:48,493] Trial 52 finished with value: 0.8610070549049179 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.018492644772509947, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.840e+02, tolerance: 4.977e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.513e+02, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.924e+02, tolerance: 4.906e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:48,600] Trial 53 finished with value: 0.8569771623635769 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.008783442408928633, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.243e+02, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.014e+02, tolerance: 4.977e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:48,700] Trial 54 finished with value: 0.8624781673814641 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.05782221001517797, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.113e+02, tolerance: 4.977e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.935e+02, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.122e+02, tolerance: 4.906e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:48,798] Trial 55 finished with value: 0.8618589507037001 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.02487072255316275, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 9 with value: 0.8639946428338224.\n", + "[I 2024-07-02 13:17:48,886] Trial 56 finished with value: 0.864754359721037 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.2079910754941946, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: 0.864754359721037.\n", + "[I 2024-07-02 13:17:48,946] Trial 57 finished with value: 0.8622236413326235 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.333215560931422, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: 0.864754359721037.\n", + "[I 2024-07-02 13:17:49,009] Trial 58 finished with value: 0.861832165638517 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.3628098560209365, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: 0.864754359721037.\n", + "[I 2024-07-02 13:17:49,068] Trial 59 finished with value: 0.8620108533993581 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.34240779695521706, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: 0.864754359721037.\n", + "[I 2024-07-02 13:17:49,142] Trial 60 finished with value: 0.8638540565650902 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.26493714991266293, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: 0.864754359721037.\n", + "[I 2024-07-02 13:17:49,217] Trial 61 finished with value: 0.8629799500771645 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.30596394512914815, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: 0.864754359721037.\n", + "[I 2024-07-02 13:17:49,276] Trial 62 finished with value: 0.8621408609583922 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.33648829357762355, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: 0.864754359721037.\n", + "[I 2024-07-02 13:17:49,351] Trial 63 finished with value: 0.8638132124078156 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.2679814646317183, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: 0.864754359721037.\n", + "[I 2024-07-02 13:17:49,424] Trial 64 finished with value: 0.863983758876634 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.24062119162159595, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: 0.864754359721037.\n", + "[I 2024-07-02 13:17:49,500] Trial 65 finished with value: 0.8627356047945115 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.3141728910335158, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: 0.864754359721037.\n", + "[I 2024-07-02 13:17:49,583] Trial 66 finished with value: 0.8639203054085788 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.23391390640786494, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: 0.864754359721037.\n", + "[I 2024-07-02 13:17:49,644] Trial 67 finished with value: 0.8570103863991635 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6124885145996103, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: 0.864754359721037.\n", + "[I 2024-07-02 13:17:49,742] Trial 68 finished with value: 0.8647961976727571 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.2059976546070975, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 68 with value: 0.8647961976727571.\n", + "[I 2024-07-02 13:17:49,830] Trial 69 finished with value: 0.8648312544921793 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.20266060662750784, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 69 with value: 0.8648312544921793.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:49,926] Trial 70 finished with value: 0.8648431452862716 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.20027647978240445, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 70 with value: 0.8648431452862716.\n", + "[I 2024-07-02 13:17:50,010] Trial 71 finished with value: 0.8648491459660418 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.1968919999787333, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: 0.8648491459660418.\n", + "[I 2024-07-02 13:17:50,106] Trial 72 finished with value: 0.8650873115156988 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.174598921162764, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "[I 2024-07-02 13:17:50,204] Trial 73 finished with value: 0.8650350577921149 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.16468002989641095, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "[I 2024-07-02 13:17:50,300] Trial 74 finished with value: 0.8649412283687147 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.1606717091615047, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.986e+01, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:50,396] Trial 75 finished with value: 0.8649537211609554 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.14694925097689848, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "[I 2024-07-02 13:17:50,506] Trial 76 finished with value: 0.8649734575435447 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.147612713300643, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 6.446e+01, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:50,620] Trial 77 finished with value: 0.8648761002838515 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.14440434705706803, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.398e+02, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:50,775] Trial 78 finished with value: 0.8639826593122782 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.1265357179513065, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 8.690e+01, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:50,875] Trial 79 finished with value: 0.864435565531768 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.1374245525868926, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "[I 2024-07-02 13:17:50,938] Trial 80 finished with value: 0.8590221951825531 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.49890830155012533, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "[I 2024-07-02 13:17:51,042] Trial 81 finished with value: 0.8649098880804443 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.1573428812070292, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 8.405e+01, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:51,142] Trial 82 finished with value: 0.864536410656637 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.13886104722511608, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "[I 2024-07-02 13:17:51,208] Trial 83 finished with value: 0.8597401050431873 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.47746341180045787, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "[I 2024-07-02 13:17:51,259] Trial 84 finished with value: 0.8537465461603838 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8599491178327108, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.050e+01, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:51,388] Trial 85 finished with value: 0.8642643827090003 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.13446778921611002, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.175e+02, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:51,524] Trial 86 finished with value: 0.8641621818665252 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.1286796719653316, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.446e+01, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:51,625] Trial 87 finished with value: 0.864182755916388 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.13303218726548235, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "[I 2024-07-02 13:17:51,693] Trial 88 finished with value: -0.1255357440899417 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.021711452917433944, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 5.559714273835951e-05, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "[I 2024-07-02 13:17:51,758] Trial 89 finished with value: 0.8604596648091501 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.43644874418279245, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.463e+02, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:51,861] Trial 90 finished with value: 0.8635689909135862 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.10940922083495383, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "[I 2024-07-02 13:17:51,951] Trial 91 finished with value: 0.8648544336551733 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.1912756875742137, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "[I 2024-07-02 13:17:52,042] Trial 92 finished with value: 0.8648496595672595 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.19628449928540487, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "[I 2024-07-02 13:17:52,096] Trial 93 finished with value: 0.8452625121122099 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.4324661283995224, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "[I 2024-07-02 13:17:52,149] Trial 94 finished with value: 0.8378670635846416 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.839206620815206, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 8.002e+01, tolerance: 4.977e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.082e+02, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:52,249] Trial 95 finished with value: 0.8649365368153895 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.07270781179126021, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 72 with value: 0.8650873115156988.\n", + "[I 2024-07-02 13:17:52,374] Trial 96 finished with value: 0.8875676754699953 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.0006995169897945908, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 96 with value: 0.8875676754699953.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 5.586e+01, tolerance: 4.977e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 5.618e+01, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 5.234e+01, tolerance: 4.906e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:52,484] Trial 97 finished with value: 0.8730555131061773 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.0018186269840273495, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 96 with value: 0.8875676754699953.\n", + "[I 2024-07-02 13:17:52,552] Trial 98 finished with value: -0.12553508835019533 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.04867556317570456, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0011658455138452, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 96 with value: 0.8875676754699953.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.284e+02, tolerance: 4.782e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.016e+02, tolerance: 4.906e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.177e+02, tolerance: 4.977e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 13:17:52,664] Trial 99 finished with value: 0.8586292788613132 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.005078762921098462, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 96 with value: 0.8875676754699953.\n" + ] + } + ], + "source": [ + "study = optimize(config, study_name=\"my_study_r2\")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkwAAAG1CAYAAAALEauPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABLGElEQVR4nO3deXhU5dnH8d9kJiEb0YAgFAUFJZElEpYABWSR8mKNqBStUqwbuCBiqSDySnFDRQXRECNaQV9Aq1VwoWJFq9VaFUERbNkKIgUlBGUJZM/Mef+gmWaSSc7MZObMku/nurhIzjb3uXNmzj3Pec5zbIZhGAIAAECD4sIdAAAAQKSjYAIAADBBwQQAAGCCggkAAMAEBRMAAIAJCiYAAAATFEwAAAAmKJgAAABMUDABAACYcIQ7gFhiGIZcruAPnB4XZwvJdlEfubYOubYOubYW+bZOMHIdF2eTzWYzXY6CKYhcLkOHDpUEdZsOR5zS01NUXFyq6mpXULcNT+TaOuTaOuTaWuTbOsHKdatWKbLbzQsmLskBAACYoGACAAAwQcEEAABggoIJAADABAUTAACACQomAAAAExRMAAAAJiiYAAAATFAwAQAAmKBgAgAAMEHBBAAAQs6w2VRa7dIPxytVWu2S4cPz2yIJz5IDAAAh5bTZVLByszbuOOielp3RRpPHZsluRMeDimlhAgAAIWN4KZYkaeP2gypYtTlqWpoomAAAQMiUVTnrFUs1Nm4/qLIqp8URBYaCCQAAhExpeXWT5kcKCiYAABAyyYneu0snJth1+ciuSmzhiIqO4HT6BgAAIZMUb1d2Rhtt3P7fy3KJCXbNmNBXb/7tG/3xvR3u6ZHcEZwWJgAAEDI2w9DksVnKzmjjnjbmvC5682/faNO/oqcjOC1MaBLDZlNZlVOl5dVKTnQoKd4uWwR+MwCiSbDeV4bNpuJyp4r2HFJSC4cSHXE+bafu6yfH22VIpjH5EnftZVKSHGoR71B5ZXWjr1V3mi/rWbmMr/nwdT1//x7eYvT1mLFq2ylJDt3yi3Pd20ls4fBoWaqtpiN4siOy2nQomNAgsw+/WBhXA/BVICd6f0+iZRXVapnSQk/XeV/1736qJl7cUxX/eT2z1/d1O76sl5hg15zrB+iVv+zwmDbx4h7K6NRKZeVVSklyKN5h11Mmnwe1PzNqX5KpaWVo6LVqT/NlPSuXqbuvNSfVCqfh8flotl5Dn5l1P2d9ibGx7dYcf778rf3ddmNx114v2RGnH45XNriudKIjeHJqQqPLWM1mGJzZgsXpdOnQoZKgbtPhiFN6eooOHy5RdbUrqNtujFkxZNhsyvcyrkbNclPGZkV0S5O3YjDebgtLrpujcB3XgQrkRC95FijuE9Sqhk9Ql4/squ17DnucnOq+ni+v78t2fF2v7jRv22no9cac10VZZ52iBEec0lIS9PRrX7v33ZfX8jYt0papvb8TL+6hzE6tZLNJz77xD49jwZcc1S2yvX3O+hJjjbqfxbWP40D3v6Ft1+bL+aGsyqkp8/9ab36N/OnD3C1MDX15D9bnSKtWKbLbzVuzIqu9CxHBl0HGonlcDed/3sxT5v9Vd+R/rCnz/6r8VZtV4YzcAg/hU/f94K3vxZjzuujl9zyLpcQEu0b1P0P5r2zSlPl/1fqtRfVaYOqul9kpvd7Jqe7r+fL6vmzH1/XqTvO2nbrL1BRV2/cc1uzFn+iO/I/1/Q8lHvvuy2t5mxZpy9Te37999b2mzP9ABw6V1vt89CVHNZ9Fzv/03/H2OetLjDVqfxbXPY4D3X9v267Ll/NDTUdwb7Iz2igp3i6p4c/rmhxZiYIJ9fhysEfruBpmxeCx0sabidH81H0/+HoSrVtY+LJeZVX9b8mBnMR92Y6v69Wd5m07dZfxVlTVXcaX1wp0PSuXkervry/rNdTpeevuQ9q08weVVhs6VloVUIy11XwW1z2OA91/b9v2dXrt+d46gkv/vZJh+8+VjMY+r63+jksfJtTjy8He0LgaNczmh4tZMXj0eIVS4n3/HkGn99hX9/3g60k0s1O6R6dWX9ZL8HLsBXIS92U7vq5Xd5q37dRdpu6+e1vGl9cKdD0rl5Hq768v63nLUe3LnfmvbNKc6/sHFGNtNZ/FdY/jQPff27Z9nV53vt0w3JfnvH2Gmn1el1Q4dUqjrxRctDChHl8Odl+bUyONWTFYUlb/G11DIqmpGKFT9/3g60k0kGJk257DOvfsNo0uE6zt+Lpe3WnetlN3GW9FVd1lfHmtQNezchlv++vLer60VAUaY43an8V1j+OmbDsxwa4pl50ryeZ1wEl/zg82w1CyI06npCYouc5dnOZf3n3/vA4GCibU48vB7ktzaiQyKwZTkuJ92k6sPEwS5uq+H3w9iQZSjLz50S6NGdK50ZNWsLbj63pvfrRLvxzZ1Z0Db9upu563oqruMr68lrdpvqxn5TLe9teX9Rpqqap9iS7QGKX6n8V1j+NAt11zo8DfN53or+Xty2Kwzg/mX959+7wOFu6SC6KYu0tu1WaPkVm93UoabZekDJtN+XX2q0Z2RhvNmNBX1RVVprkurXb5fIcH6ovKu+T+c9w0erfZ+zvcx5Yvd5d5W6/BW/b9fP3a26morFZSC4fiHXHu7fj6+g0OPfCa5/uo9h2Bkk1LVv+j3vus9p1kpf/Zt1gYhyktJUFPv/51vZGs694BV3s9bzm689f9NG/Z+no5G3NeF2V2Sldyi3ilJgc+VlLdz3Wzv7W3bde927G2unfONfX8YPZ5PXXcuTqllXV3yVEwBVEsFUxS9BVDvmqwGPxFltq1TvUp1z8cr9Qd+R83OP+RKYN1SoSNIRJJoq1gkvwfh6luoSP5doIK5oCP3obLCNVgil7HafPhS1esCGR/664z5/r+um/JugZfIxhfxJr6ue7ty2Ltoi4lMV4pScE7XzSW1xZBGgqGgikMYq1gimVNHYeJFqamaU7Hdbi/ePClyzon9tel8spqJSY4lBRvPrJ67Rx5a6mqESnj29X9stiUwS19xThMQBg11tHQF9Ha6R3Wa+qxFs2a277bDENpiXZldGqltETfisPaOUqwKeL7htbtV2TFM+Ei5TiKzHu/gQhX06mxoabiSPhgAxB9zG61D7eaL4s1n3vehkeoEanPhAsUBRMQoEj/YAMQnWpaVNzPUougz5S6XxZ9Gdwy0p4JFygKJqAJIvmDDQBCofaXRaer8c+8SB3EOBCx0U4Gvxk2m0qrXV4HHYsU0RBjoGrvW5nTJVdcXMTtayznH0DT1HxZTG3haDb9OWOn9IPPaj+xukak3e4bDTEGqva+WXGHSVNjjJSYAESe5tSfk2EFgigahhUw/vM4D18GHQuXcMVoxe3Xdfet7uCGtYXr72FF/pvTsALhRq6t1VzzHY4hJBhWACFl9jDDEyPQhlc0xBiouvvm7cnvNcK1r7GcfwChESm3/ocSBVMzY/4ww8bnWyEaYgxU3dh9ucPEarGcfwAIVMwUTC6XS3l5eRoyZIh69eqlSZMmae/evQ0u/+OPP+r222/XgAED1L9/f02bNk0HDhywMOLwMH+YYfi7tUVDjIGqG7u3B3A2trwVYjn/ABComCmYCgoK9OKLL+r+++/XSy+9JJfLpYkTJ6qystLr8r/5zW/0/fff67nnntNzzz2n77//XrfccovFUVsvGkaojoYYA1V337w9+b1GuPY1lvMPAIGKiYKpsrJSS5cu1dSpUzVs2DBlZmZq4cKFKiws1Nq1a+stX1xcrM8//1yTJk3SOeeco27duumGG27Q119/rSNHjli/AxaquaMhkofej4YYA1V33978aJfGDOlcr2gK577Gcv4BIFAx0ba+bds2lZSUaODAge5paWlp6tatm9avX6/c3FyP5RMTE5WSkqLXX39dOTk5kqQ33nhDZ555ptLS0iyNPRyiYYTqaIgxUHX3LSXJoVsvO9fjyfPh3tdYzj8ABCImCqbCwkJJUvv27T2mt23b1j2vtoSEBM2bN09z5sxR3759ZbPZ1LZtW61YsUJxcU1rdHME+Zk5Nbc6+nLLo7/i7Q6lefRHibyBCa2MMZS59qbuviUkRt7fI1T5tzrXzRm5thb5to7VuY6JgqmsrEzSiUKothYtWujo0aP1ljcMQ1u3blV2drYmTpwop9OphQsXavLkyfrDH/6g1NTUgOKIi7MpPT0loHXNpKUlhWS7qI9cW4dcW4dcW4t8W8eqXMdEwZSYmCjpRF+mmp8lqaKiQklJ9RP59ttva8WKFfrggw/cxdHixYs1fPhwvfrqq7rmmmsCisPlMlRcXBrQug2x2+OUlpak4uIyOZ3NZxC0cCDX1iHX1iHX1iLf1glWrtPSknxqpYqJgqnmUlxRUZE6duzonl5UVKSMjIx6y2/YsEFnnnmmR0vSSSedpDPPPFN79uxpUiyhGtnV6XQ1q1Fjw4lcW4dcW4dcW4t8W8eqXMfERdbMzEylpqZq3bp17mnFxcXasmWL+vXrV2/5du3aac+ePaqoqHBPKy0t1b59+3TGGWdYETIAAIgiMVEwJSQkaMKECZo/f77+8pe/aNu2bZo2bZratWunUaNGyel06uDBgyovL5ckXXLJJZJOjMW0bds2bdu2Tb/97W/VokULjR07Nox7AgAAIlFMFEySNHXqVI0bN06zZ8/WlVdeKbvdriVLlig+Pl779+/X4MGDtWbNGkkn7p578cUXZRiGrr76al177bWKj4/Xiy++qJYtW4Z5TwAAQKSxGQYDqwSL0+nSoUMlQd1mc33ydTiQa+uQa+uQa2uRb+sEK9etWqX41Ok7ZlqYAAAAQoWCCQAAwAQFEwAAgAkKJgAAABMUTAAAACYomAAAAExQMAEAAJigYAIAADBBwQQAAGCCggkAAMAEBRMAAIAJCiYAAAATFEwAAAAmKJgAAABMUDABAACYoGACAAAwQcEEAABggoIJAADABAUTAACACQomAAAAExRMAAAAJiiYAAAATFAwAQAAmKBgAgAAMEHBBAAAYIKCCQAAwAQFEwAAgAkKJgAAABMUTAAAACYomAAAAExQMAEAAJigYAIAADBBwQQAAGCCggkAAMAEBRMAAIAJCiYAAAATFEwAAAAmKJgAAABMUDABAACYoGACAAAwQcEEAABggoIJAADABAUTAACACQomAAAAExRMAAAAJiiYAAAATFAwAQAAmKBgAgAAMEHBBAAAYIKCCQAAwAQFEwAAgAkKJgAAABMUTAAAACYomAAAAEzETMHkcrmUl5enIUOGqFevXpo0aZL27t3b4PJVVVVasGCBe/kJEyZo69atFkYMAACiRcwUTAUFBXrxxRd1//3366WXXpLL5dLEiRNVWVnpdfl77rlHq1at0oMPPqiVK1eqVatWmjRpko4dO2Zx5AAAINLFRMFUWVmppUuXaurUqRo2bJgyMzO1cOFCFRYWau3atfWW37t3r1auXKkHHnhAQ4YMUZcuXTR37lwlJCToH//4Rxj2AAAARLKYKJi2bdumkpISDRw40D0tLS1N3bp10/r16+st//e//10tW7bUeeed57H8+++/77ENAAAASXKEO4BgKCwslCS1b9/eY3rbtm3d82rbvXu3Tj/9dK1du1bPPPOMDhw4oG7duunOO+9Uly5dmhSLwxHcGtRuj/P4H6FDrq1Drq1Drq1Fvq1jda5jomAqKyuTJCUkJHhMb9GihY4ePVpv+ePHj2vPnj0qKCjQHXfcobS0ND311FMaP3681qxZo9atWwcUR1ycTenpKQGtayYtLSkk20V95No65No65Npa5Ns6VuU6JgqmxMRESSf6MtX8LEkVFRVKSqqfSIfDoePHj2vhwoXuFqWFCxdq6NCheu211zRx4sSA4nC5DBUXlwa0bkPs9jilpSWpuLhMTqcrqNuGJ3JtHXJtHXJtLfJtnWDlOi0tyadWqpgomGouxRUVFaljx47u6UVFRcrIyKi3fLt27eRwODwuvyUmJur000/Xvn37mhRLdXVo3iBOpytk24Yncm0dcm0dcm0t8m0dq3IdExdZMzMzlZqaqnXr1rmnFRcXa8uWLerXr1+95fv166fq6mp9/fXX7mnl5eXau3evOnXqZEnMAAAgesREC1NCQoImTJig+fPnq1WrVurQoYMeffRRtWvXTqNGjZLT6dShQ4fUsmVLJSYmqm/fvvrpT3+qmTNn6r777tPJJ5+svLw82e12XXzxxeHeHQAAEGFiooVJkqZOnapx48Zp9uzZuvLKK2W327VkyRLFx8dr//79Gjx4sNasWeNeftGiRcrJydGUKVM0btw4HT9+XMuWLVOrVq3CuBcAACAS2QzDMMIdRKxwOl06dKgkqNt0OOKUnp6iw4dLuB4eYuTaOuTaOuTaWuTbOsHKdatWKT51+o6ZFiYAAIBQoWACAAAwQcEEAABggoIJAADABAUTAACACQomAAAAExRMAAAAJiiYAAAATFAwAQAAmKBgAgAAMEHBBAAAYKLJBVNFRYV4HB0AAIhljkBW+uabb5SXl6dPPvlEx48f1yuvvKJXX31VnTt31lVXXRXsGAEAAMLK7xamrVu3aty4cfrnP/+piy66yN26ZLfb9eCDD+q1114LepAAAADh5HcL08MPP6wePXpo6dKlkqQXXnhBkjR79mxVVFRo2bJluvTSS4MbJQAAQBj53cL01Vdf6ZprrpHD4ZDNZvOY9/Of/1zffvttsGIDAACICH4XTC1atFB5ebnXeUeOHFFCQkKTgwIAAIgkfhdMgwYNUl5engoLC93TbDabSkpKtHTpUv30pz8NaoAAAADh5ncfphkzZuiXv/ylRo8erczMTNlsNs2bN0+7d++WYRh67LHHQhEnAABA2PjdwtS+fXu98cYbuvrqq2UYhjp27KjS0lLl5uZq1apVOv3000MRJwAAQNgENA5Tenq6pk2bFuxYAAAAIpLfBdP69etNl+nXr19AwQAAAEQivwumq666SjabzeNxKHWHF9i6dWvTIwMAAIgQfhdMy5YtqzettLRUGzZs0BtvvKFFixYFJTAAAIBI4XfBlJOT43X6sGHDlJycrKeeekpPP/10kwMDAACIFH7fJdeYvn376vPPPw/mJgEAAMIuqAXT+++/r5SUlGBuEgAAIOz8viT361//ut40l8ulwsJCfffdd5o0aVJQAgMAAIgUfhdMte+OqxEXF6euXbvqxhtv1C9+8YugBAYAABAp/C6Yli9fHoo4AAAAIpZPBdP333/v10Z/8pOfBBQMAABAJPKpYBoxYkS9wSkbw8CVAAAglvhUMD344IN+FUwAAACxxKeCaezYsaGOA2Fm2Gwqq3KqtLxayYkOJcXbZfPSwR8AgObI707fkrR582atW7dOlZWV7rvmDMNQaWmpvvjiC/3xj38MapAILafNpoKVm7Vxx0H3tOyMNpo8Nkt2iiYAAPwvmF544QXNnTu3weEFBg8eHJTAYA3DS7EkSRu3H1TBqs2aMjaLliYAQLPn90jfK1as0Hnnnad169bpuuuu0+WXX66vvvpKTzzxhFq0aKExY8aEIk6ESFmVs16xVGPj9oMqq3JaHBEAAJHH74Jp3759Gj9+vE466ST16NFDX3zxhRITE/U///M/uuGGG7Rs2bJQxIkQKS2vbtJ8AACaA78Lpvj4eCUmJkqSOnXqpD179qiqqkqS1KdPH3377bdBDRChlZzY+FVZs/kAADQHfhdM55xzjj744ANJ0plnnimXy6VNmzZJkgoLC4MbHUIuKd6u7Iw2XudlZ7RRUrzd4ogAAIg8fhdM1157rZ5//nn97//+r5KTk3X++efrjjvu0Lx58/Twww+rT58+oYgTIWIzDE0em1WvaKq5S44O3wAABHCX3MiRI7V48WLt2rVLknTffffp9ttv10svvaSePXtqzpw5QQ8SoWU3DE0Zm8U4TAAANMDvgsnpdGrYsGEaNmyYJCk9PV1Lly4NdlywmM0wlOyIU3JqwokJFEtRiQFIASA0/C6YBg8erAsvvFAXX3yxevbsGYqYAASAAUgBIHT87sOUm5urd955R5dffrlGjx6txYsX67vvvgtFbAB8ZDYAqcGzIAGgSfwumO666y599NFHWrp0qfr27avnnntOP/vZzzRhwgS98sorOnbsWCjiBNAIBiAFgNDyu2CSJJvNpoEDB2ru3Ln6+OOPVVBQoPbt2+vee+/VkCFDgh0jABMMQAoAodWkUQmrq6v18ccf6+2339ZHH30kSRo4cGBQAgPgOwYgBYDQ8vtT1DAMffbZZ3rrrbf07rvv6ujRo8rKytLUqVP185//XOnp6aGIE0AjagYg3bi9/mU59wCkdPwGgID5XTANGTJEP/74o37yk59o/Pjxuvjii3XGGWeEIDQAvqoZgLRg1WaPookBSAEgOPwumEaMGKExY8aob9++oYgHQIAYgBQAQsfvgum+++4LRRwAgiAaBiA1bDYVlztVtOeQklo4lOiIo6gDEPHoCQrAMgyuCSBaBTSsQCRyuVzKy8vTkCFD1KtXL02aNEl79+71ad0333xTGRkZ2rdvX4ijBJovBtcEEM1ipmAqKCjQiy++qPvvv18vvfSSXC6XJk6cqMrKykbX++6777jMCASJYbOptNqlH45XqrTa5VEEMbgmgGgWE5fkKisrtXTpUk2fPt39UOCFCxdqyJAhWrt2rXJzc72u53K5NGPGDHXv3l2fffaZhREDscfscpsvg2u6+14BQIQJuIXpww8/1EMPPaRp06Zp7969Wrt2bUDPlCstLdWuXbu0adMm/fvf/1ZFRYXf29i2bZtKSko8Bs1MS0tTt27dtH79+gbXW7x4saqqqnTjjTf6/ZoA/suXy20Mrgkgmvn9CVVWVqZbbrlFn3zyiVJTU1VSUqKJEyfqD3/4g7Zs2aIVK1bo7LPPbnQblZWVevXVV7V69Wp9/fXXcjr/2xRvt9vVt29fXXDBBbr00kuVkGD+jbOwsFCS1L59e4/pbdu2dc+ra/PmzVq6dKleffVVHThwwPQ1ADTMl8ttDK4JIJr5XTA99thj+uc//6nnn39effv2VY8ePSRJDz/8sCZOnKgnnnhC+fn5Da6/atUqLViwQBUVFRo+fLguuOACdejQQcnJyTp69KgKCwv15Zdf6rHHHlN+fr6mTp2qyy67rNGYysrKJKlecdWiRQsdPXq03vKlpaWaPn26pk+frjPOOCOoBZPDEdxuYXZ7nMf/CB1yHbjS4433FSwtr1ZaoqPhwTV/kaX4OJskOn4HG8e1tci3dazOtd8F09tvv63f/va3GjBggEfLUNu2bXXzzTc32oH6xhtv1MGDBzVnzhwNHz68wdaja665RpWVlVqzZo2ee+45rV27Vr///e8b3G5iYqKkEy1XNT9LUkVFhZKSkuotP3fuXJ155pm64oorTPfXH3FxNqWnpwR1mzXS0urvB0KDXPuvpMrV6PyWKQnu98aMCX119HiFSsqqlJIUr5NSW6hlMn2XQo3j2lrk2zpW5drvgqm4uFgdOnTwOu+kk05SaWlpg+uOGjVKv/jFL3x6nYSEBF1yySW6+OKL9eqrrza6bM2luKKiInXs2NE9vaioSBkZGfWWX7lypRISEpSdnS1J7sIvNzdXN910k2666SafYqzL5TJUXNzw/gfCbo9TWlqSiovL5HQ2flJC05DrwCU64hq93JboiNPhwyXuaWmJDp3WtqWKi8tUXVGlwxVVVobbrHBcW4t8WydYuU5LS/Kplcrvgunss8/W6tWrNXjw4Hrz3n///Ub7L/laLNVms9lML8llZmYqNTVV69atcxdMxcXF2rJliyZMmFBv+bVr13r8vmnTJs2YMUPPPPOMunbt6neMtVVXh+YN4nS6QrZteCLXgWnsWXaG0yVv98iRa+uQa2uRb+tYlWu/C6abb75ZU6ZM0ZEjRzR8+HDZbDatX79eq1at0ksvvaQFCxY0un5lZaX++te/6t///rfOPvtsDR06tN4yBw4c0CuvvKIpU6b4FFNCQoImTJig+fPnq1WrVurQoYMeffRRtWvXTqNGjZLT6dShQ4fUsmVLJSYmqlOnTh7r13QM/8lPfqKTTz7Zt0QA8MCz7ADEMr8LppEjR+rRRx/VggUL9OGHH0qS5s2bp9atW+uee+7R6NGjG1z3xx9/1DXXXKN//etfkk60Hp1zzjnKy8vTaaed5l6usLBQTz75pM8FkyRNnTpV1dXVmj17tsrLy9WvXz8tWbJE8fHx2rdvn84//3w99NBDGjt2rL+7DMBH0fAsOwAIhM0wAv9E++abb3TkyBGlpaWpc+fOiotr/BrgrFmztG7dOj3++OPq3Lmz3n33XT3yyCOKj4/XsmXLdMYZZ0g6cYnsiiuu0NatWwMNLSycTpcOHSoxX9APDkec0tNTdPhwCc27IUaurUOurUOurUW+rROsXLdqleJTHya/78W75ZZb9O6776qqqkqdO3dW7969ddZZZ5kWS5L06aef6rbbblNWVpZSU1N16aWX6uWXX1ZcXJyuu+46HTzofRwXAOHX2GNPACDW+X1Jbt++fbr11lt10kknafTo0br44ovVu3dvn9YtLi5WmzZtPKZ17NhRS5Ys0fjx4zVp0iS98MIL/oYEIMTMHnsCALHO7xamN954Q2+99ZbGjx+vzz77TOPHj9fIkSOVl5enPXv2NLpuzWW4urp06aK8vDzt3LlTkydPbnRoAgD+qd0yVOZ0yRUX51dLkS+PPYl2tJ4BMNOkPkyS9PXXX2vNmjV65513tH//fmVlZenll1/2uuzbb7+tadOmadiwYbr88ss1YsQIj/l/+tOfNHPmTKWmpqq4uJg+TOJ6uJViMde1W4YSE+yaMaGv3vzbN9r0L99bikqrXZoy/68Nvkb+9GFK9nOE+0jKday3nkVSrpsD8m2diO/DVFfHjh3VpUsXZWRkKC4uTv/+978bXPaCCy7QggUL9P3333t9KG5ubq4KCgoUHx/f1LDQjNFacELdlqEx53WpVyxJ5i1FpeXeRlDyfX4kaw6tZwCCI6DHg5eWluq9997TmjVr9Pe//11xcXEaOnSo8vLyvI6rVNuFF16oCy+8ULt27fI6f+jQoXr//ff15ZdfBhIamrlYby3wR90H4mZ2Stcf39vhddmaB+R6aylKTmz8Y8JsfiTz5aHB/raeAYhNfn8S3HbbbRo0aJBmzpypY8eO6Xe/+50+/vhj5eXl6fzzz5fD4duH5/jx4/X66697nZeQkKABAwb4GxqaOVoLPNVt+ak0ed5bQy1FSfF2ZWe08TovO6ONkuLtgQUYAWK59QxAcPn91XD79u2aNGmSxowZ4zHYpL/i4+OVnp4e8PpAXbQWeKrb8pMQ3/i+N9RSZDOMRh97Es0jecdy6xmA4PL70+DPf/5zUF74tttu0yOPPKJjx44pMzNTycnJ9Zb5yU9+EpTXQvPgS2uBewTqZqCmZaimyNm257DOPbtNvT5MUq2WogaKn1h97EndHNVmlhMAzYtPBdOsWbM0efJknX766Zo1a1ajy9psNj344IOm27znnnvkdDo1Y8aMBpeJtrvkEF60Fniq2zL05ke7NGNCX0nyepecWfETi489ieXWMwDB5dMZZN26dbr66qvdPwfD3Llzg7IdoAatBfXVbRlKSXLo1svOVXlldUy1FDVFrLaeAQiuJo/DhP9iHKbwc9psDbYWmN0lR66tQ66tQ66tRb6tE/HjMM2aNUt79+71Ou+bb77RTTfd1OC6d9xxh3744Qe/Xq+wsFC33367X+ug+appLcifPkyPTBms/OnDNKUZDikAAAguny7Jff/99+6fX3vtNY0cOVJ2e/1biT/66CN98sknDW4nMzNTubm5ys3N1ZgxY5SVldXgsps3b9bLL7+stWvX6sYbb/QlTEBSbPa1AQCEl08F07333quPPvpI0olO3VOmTPG6nGEYGjRoUIPbue666zR06FDNnz9fv/zlL9W2bVv17NlTp512mpKSknTs2DHt379fGzdu1OHDhzVs2DC98MIL6tq1awC7BgAAEBw+9WE6cOCAPvnkExmGof/93//VzTffrI4dO3osExcXp7S0NPXv39/rEAF17dixQ6tXr9a6deu0d+9eHTt2TOnp6erQoYMGDRqkUaNGKSMjI/A9CwP6MEU3cm0dcm0dcm0t8m0dq/sw+dTCdOqpp+rSSy+VdKKFadiwYUpLS3NflisvL1dVVZVatmzpc4Bdu3albxIAAIgKfnf6zs3N1eOPP67LL7/cPe3LL7/UwIED9fDDD8vloqIGAACxxe+CadGiRXrzzTeVm5vrntatWzdNnz5df/zjH/Xss88GNUAAAIBw83vo49WrV2vmzJm64oor3NNOPvlkXXPNNXI4HFq2bJluuOGGoAYJNMSw2RhwEAAQcn4XTIcPH9bpp5/udV7nzp1VWFjY5KAAXzhtNhWs3OzxwF1fB6msy7DZVFzuVNGeQ0pq4VCiI47CCwDg5nfB1LlzZ73zzjtehw94//331alTp6AEhsA1h1YXw0uxJEkbtx9UwarNmuLHc8CCWXghvOoe+8nxdhlSzL8fAISe3wXTr3/9a9155506cuSIRo4cqdatW+vQoUP64IMP9Pbbb+uhhx4KRZzwUXM5+ZdVOesVSzU2bj+osiqnkh3mXfSCWXghvOoe+4kJds25foBe+cuOmH8/AAg9vwumSy65RCUlJSooKNDatWvd09PT0/W73/1Ol1xySTDjgx+a08m/tLzadL57pO9GBKvwQnh5O/bHnNdFL7+3Q5v+FfvvBwCh53fBJEm/+tWvNH78eO3evVtHjhxRWlqaOnfurLg4Tizh1JxO/smJjR+6ZvNrBKvwQnh5O/YzO6Xrj+/t8Lp8rL0fAIRewJ8WxcXF2r17t7Zv365WrVrp22+/lQ+DhiOEfDn5RwrDZlNptUs/HK9UabVLhs3m1/pJ8XZlZ7TxOi87o42S4us/69CbYBVeCC9vx3ZlVeNjwjX2fmjq8Qkg9gR0Nnjqqaf09NNPq7y8XDabTVlZWXr88cd1+PBhLV26VGlpacGOEz6IlpN/MPpZ2QxDk8dmqWDVZm3cXn87vl5qqSm8am+j9raS4u08vDcKeDu2E+Ib/z7Y0PuhufQDBOAfv1uYVqxYoUWLFunaa6/VH//4R3er0oQJE7R371498cQTQQ8SvglWq0somfWz8uebvN0wNGVslvKnD9MjUwYrf/owTfHzpFZTeNXNm7+FF8LL27G/bc9hnXu2f++HYB6fAGKL300Oy5cv1w033KDbbrtNTqfTPX3o0KH6zW9+o2eeeUa/+93vghokfBOsVpdQCnY/K5thKNkR999+RgHsY03hVVblUnlltRITHEqKZxymaOLt2H/zo12ac/0AxcXJ5/dDc+oHCMA/fhdM33//vXJycrzO69y5s3744YcmB4XA/ffkH5njzkRqJ2ubYSgt0a5O7dN4yniU8nbsJ8fb/Xo/ROrxCSD8/P6q1L59e23cuNHrvH/84x9q3759k4NC09S0upySmqDkCBuxOlr6WSE61T32ZRh+vR84PgE0xO+Cady4cVq8eLGWLFmib7/9VpJUWlqqd955R08//bQuvfTSYMeIGBIN/awQmay4c43jE0BDbIafYwEYhqG7775br7zyivt3238+uC666CLNmzev2Y7H5HS6dOhQSVC36XDEKT09JaYuEzlttgb7WYXzLqRYzHWk8jfXVt65FqnHZ6A4rq1Fvq0TrFy3apUiu928bvG7YKqxe/durVu3TkeOHFHLli3Vr18/de3aNZBNxQwKJt9F4vPuYjXXkcifXBs2m/K93LkmnShkQjFidyQen4HiuLYW+baO1QVTwBfkzzzzTLlcLhUXF6t169bq2LFjoJtCMxSMu9sQHbwVH/4Ix51rHJ8A6gqoYFq+fLmefvpp/fjjj+5p7du3129/+1vl5uYGLTgA0a2xS2m+4s41AJHA74JpxYoVeuCBBzRy5Ej97Gc/U+vWrXXw4EG99dZbmjFjhux2uy644IJQxAogipgNAjljQl+ftsOdawAigd+fNMuWLdOECRM0e/Zsj+mXXnqp7rrrLuXn51MwATC9lHb0eIVSTB5fIvH4GgCRwe8L/4WFhRoxYoTXebm5udq7d2+TgwIQ/cwupZWUVfm0HR5fAyAS+N3C1LNnT3366af66U9/Wm/eli1blJGREZTAAEQ3s0tlKUnxPm8r0kewBxD7fCqY1q9f7/75wgsv1EMPPaSysjJdcMEFatOmjY4cOaIPP/xQy5cv19y5c0MWLIDoYXYp7aTUFqqu8K2VSeLONQDh5dM4TJmZme7BKaUTg1VKanDa1q1bgx1nVGAcpuhGroOvwUEgf5Gldq1TybUFOK6tRb6tE5HjMC1btizgQAA0Xw1dSouPC/5jTQAglHwqmHJyckIdB4AY5f1SGgUTgOjiU8GUn5+vyy67TKeeeqry8/NNl09OTlbHjh01bNgwORyMkQIAAKKbzwXTeeed53PBJJ3oy3TRRRfpkUceaVKAAAAA4eZTwbRt2zavPzeksrJSixcvpu8TAACICQFfLzt27JgMw1BaWlq9eQkJCRo0aJC2b9/epOAAAAAigV8F065du/T73/9ef/nLX3T8+HFJUkpKis4//3xdd911HoNW9unTR3369AlutAAAAGHgc8G0Zs0azZo1S3FxcfrpT3+qjh07yuFwaO/evXr//ff19ttv68EHH1Rubm4o4wUAALCcTwXTrl27NGvWLA0dOlT333+/TjrpJI/5x48f1913363Zs2frnHPOUZcuXUISLAAAQDj49PDd559/XmeddZYWLlxYr1iSpNTUVD366KPKzMzU//3f/wU9SAAAgHDyqWD69NNPNX78eNnt9oY3FBenK664Qp988knQggMAAIgEPhVMRUVF6tSpk+lyp512mg4erP+gTQAAgGjmU8GUlpamoqIi0+WKiorUqlWrJgcVCJfLpby8PA0ZMkS9evXSpEmTtHfv3gaX/9e//qUbbrhB/fv318CBAzV16lR9//33FkYMAACihU8FU+/evfX666+bLrdq1Sr17t27qTEFpKCgQC+++KLuv/9+vfTSS3K5XJo4caIqKyvrLXv48GFde+21SkxM1PLly/X73/9ehw4d0sSJE1VRURGG6EPLsNlUWu3SD8crVVrtkmHjOV4AAPjDp4Lp6quv1t/+9jcVFBQ0uMyCBQv06aef6uqrrw5acL6qrKzU0qVLNXXqVA0bNkyZmZlauHChCgsLtXbt2nrLv/feeyotLdUjjzyirl27qkePHnr00Ue1a9cuffnll5bHH0pOm035Kzdryvy/6o78jzVl/l+Vv2qznBRNAAD4zKdhBfr06aNp06bpscce01tvvaXhw4erQ4cOcjgc+u6777R27Vrt3r1bM2fOVFZWVqhjrmfbtm0qKSnRwIED3dPS0tLUrVs3rV+/vt7YUAMHDlRBQYESExPd0+LiTtSOxcXF1gRtAcNmU8HKzdq4w7Nf2cbtB1WwarOmjM2SzTDCFB0AANHD54Erb7jhBp199tnKz8/Xs88+6zGvV69e+v3vf6/BgwcHPUBfFBYWSpLat2/vMb1t27buebWddtppOu200zymPfPMM0pMTFS/fv2aFIvD4VOjnc/s9jiP//1RXO6sVyzV2Lj9oMqqXEpLbPjOx+amKbmGf8i1dci1tci3dazOtV+PRhk+fLiGDx+uw4cP67vvvpNhGOrQoUPYOnrXKCsrk3TiGXa1tWjRQkePHjVdf/ny5VqxYoVmz57dpH2Ji7MpPT0l4PUbk5aW5Pc6RXsONTq/vLJandrXfxZgcxdIrhEYcm0dcm0t8m0dq3Id0MN309PTlZ6eHuxYAlZzaa2ystLjMltFRYWSkhpOpGEYeuKJJ/TUU0/p5ptv1lVXXdWkOFwuQ8XFpU3aRl12e5zS0pJUXFwmp9Pl17qJCY3/eRMTHDp8uKQp4QWN05BKKpwqLa9SSlK8khPsslvczaopuYZ/yLV1yLW1yLd1gpXrtLQkn1qpAiqYIk3NpbiioiJ17NjRPb2oqMjjgcC1VVVVadasWfrTn/6kWbNm6ZprrglKLNXVoXmDOJ0uv7edFB+n7Iw22ri9/mW57Iw2SoqPC1m8/nB66WuVndFGk8dmyR6GPlaB5BqBIdfWIdfWIt/WsSrXMXGRNTMzU6mpqVq3bp17WnFxsbZs2dJgn6Q77rhDf/7zn7VgwYKgFUuRxmYYmjw2S9kZbTym1xQjkdDh26xjOkMgAAAiQUy0MCUkJGjChAmaP3++WrVqpQ4dOujRRx9Vu3btNGrUKDmdTh06dEgtW7ZUYmKiVq1apTVr1uiOO+5QTk6Ox+jkNcvECrthaMrYLJVVOVVaXq3kRIeS4u0RUSxJUlmVWcd0p5KD3JEeAAB/xcyZaOrUqRo3bpxmz56tK6+8Una7XUuWLFF8fLz279+vwYMHa82aNZKkP/3pT5KkRx55RIMHD/b4V7NMLLEZhpIdcTolNUHJjriIKZYkqbS8uknzAQCwQky0MEmS3W7XjBkzNGPGjHrzTjvtNG3fvt39+9KlS60MDY1ITmz8EDSbDwCAFWKmhQnRKSneXq+PVY0THdMZJwoAEH4UTAiraOiYDgAA1zsQdpHeMR0AAAomRISajunJqf8ZrZ1iCQAQQbgkBwAAYIKCCQAAwAQFEwAAgAkKJgAAABMUTAAAACYomAAAAExQMAEAAJigYAIAADBBwQQAAGCCggkAAMAEBRMAAIAJCiZEDcNmU2m1Sz8cr1RptUuGzRbukAAAzQQP30VUcNpsKli5WRt3HHRPy85oo8ljs2Rvxg/qNWw2lVU5VVpereREh5Li7bI143wAQKhQMCHiGV6KJUnauP2gClZt1pSxWc2ySKCIBADrcEkOEa+sylmvWKqxcftBlVU5LY4o/MyKSC5XAkBwUTAh4pWWVzdpfiyiiAQAa1EwIeIlJzZ+5dhsfiyiiAQAa1EwIeIlxduVndHG67zsjDZKirdbHFH4UUQCgLUomBDxbIahyWOz6hVNNR2cm2OHb4pIALAWX0MRFeyGoSljs7iF/j9qisiCVZu1cXv9u+Saa14AIFQomBA1bIahZEecklMTTkxo5kUBRSQAWIeCCYhiFJEAYA36MAEAAJigYAIAADBBwQQAAGCCggkAAMAEBRMAAIAJCiYAAAATFEwAAAAmKJgAAABMUDABAACYoGACAAAwQcEEAABggoIJAADABAUTAACACQomAAAAExRMiHmGzabSapd+OF6p0mqXDJst3CH5JFrjBoBY5Ah3AEAoOW02FazcrI07DrqnZWe00eSxWbIbRhgja1y0xg0AsYoWJsQsw0vRIUkbtx9UwarNEdtiE61xA0Aso2BCzCqrctYrOmps3H5QZVVOiyPyTbTGDQCxjIIJMau0vLpJ88MlWuMGgFhGwYSYlZzYeBc9s/nhEq1xA0Aso2BCzEqKtys7o43XedkZbZQUb7c4It9Ea9wAEMsomBCzbIahyWOz6hUfNXeb2SL0brNojRsAYhlt+4hpdsPQlLFZKqtyqrS8WsmJDiXF2yO+6IjWuAEgVlEwIebZDEPJjjglpyacmBAlRUe0xg0AsYhLcgAAACYomAAAAExQMAEAAJigYAIAADARMwWTy+VSXl6ehgwZol69emnSpEnau3dvg8sfPnxYt99+u/r166ecnBzde++9KisrszBiAAAQLWKmYCooKNCLL76o+++/Xy+99JJcLpcmTpyoyspKr8tPnTpVe/bs0fPPP68nnnhCH374oe655x5rgwYAAFEhJgqmyspKLV26VFOnTtWwYcOUmZmphQsXqrCwUGvXrq23/MaNG/X555/r4YcfVvfu3TVw4EDdd999euONN3TgwIEw7AEAAIhkMVEwbdu2TSUlJRo4cKB7Wlpamrp166b169fXW37Dhg1q06aNunTp4p6Wk5Mjm82mL774wpKYATTMsNlUWu3SD8crVVrtkmGzhTskAM1cTAxcWVhYKElq3769x/S2bdu659V24MCBessmJCTo5JNP1v79+5sUi8MR3BrUbo/z+B+hQ66t01iuK5yGClZu1sYdB93Tah4L08JO4eQvjmtrkW/rWJ3rmCiYajprJyQkeExv0aKFjh496nX5usvWLF9RURFwHHFxNqWnpwS8fl3HSit19HiF9h8+pJSkeJ2U2kItk+vHjeBKS0sKdwjNRt1cHyut1MIVGzyKJUnauP2gClZt1owJfXkPBIjj2lrk2zpW5TomCqbExERJJ/oy1fwsSRUVFUpKqp/IxMREr53BKyoqlJycHHAcLpeh4uLSgNf3iIVv2Zaz2+OUlpak4uIyOZ2ucIcT0xrKdXG5Uxu3H/S6zsbtB3XoaLmqK6qsCjMmcFxbi3xbJ1i5TktL8qmVKiYKpprLa0VFRerYsaN7elFRkTIyMuot365dO7333nse0yorK3XkyBG1bdu2SbFUVzf9DWLYbPWKJem/37Kn8MT6kHI6XUH5O8Jc3VyXljdeDJWWVynZwReGQHBcW4t8W8eqXMfERdbMzEylpqZq3bp17mnFxcXasmWL+vXrV2/5fv36qbCwUHv27HFP+/zzzyVJffr0CX3AJsqqnPWKpRobtx9UWZXT4ogAayQnNv4dzmw+AIRKTBRMCQkJmjBhgubPn6+//OUv2rZtm6ZNm6Z27dpp1KhRcjqdOnjwoMrLyyVJ5557rnr37q1p06Zp8+bN+uyzzzRnzhxdcsklOvXUU8O8N1JpeXWT5gPRKineruyMNl7nZWe0UVK83eKIAOCEmCiYpBMDUY4bN06zZ8/WlVdeKbvdriVLlig+Pl779+/X4MGDtWbNGkmSzWZTfn6+TjvtNF199dX6zW9+o/POOy9iBq7kWzaaK5thaPLYrHpFU03/PS5FAwgXm2HwCRQsTqdLhw6VNHk7hs2m/FWbvXZ+zc5oQx+mEHE44pSenqLDh0voexBiZrk2bDaVVTlVWl6t5ESHkuLtHPMB4ri2Fvm2TrBy3apVik+dvmOmhSmW8C0bzZ3NMJTsiNMpqQlKdsRxzAMIO67tRCi7YWjK2CyVVblUXlmtxASHkuI5cQAAEA60MEUwm2EoLdGujE6tlJbIJQkAAMKFggkAAMAEBRMAAIAJCiYAAAATFEwAAAAmKJgAAABMUDABAACYoGACAAAwQcEEAABggoIJAADABAUTAACACQomAAAAExRMAAAAJiiYAAAATFAwAQAAmKBgAgAAMEHBFGUMm02l1S79cLxSpdUuGTZbuEMCACDmOcIdAHzntNlUsHKzNu446J6WndFGk8dmyW4YYYwMAIDYRgtTlHAaqlcsSdLG7QdVsGozLU0AAIQQBVOUKKlw1iuWamzcflBlVU6LIwIAoPmgYIoSpeVVJvOrLYoEAIDmh4IpSiQnxpvMpzsamh9uggBgFc6yUSKlhV3ZGW20cXv9y3LZGW2UFG+X6PiNZoSbIABYiRamKGG3SZPHZik7o43H9JoThI0TBJoRw0uxJHETBIDQoYUpitgNQ1PGZqmsyqnS8molJzqUFG+nWEKzU1ZlfhNEsoPvgwCCh0+UKGMzDCU74nRKaoKSHXEUSyFE/5jIZXaTAzdBAAg2WpgAL+gfE9nMbnLgJggAwUYLE1AHg4RGvqR4e73+fDXcN0EAQBBRMEW5SLxsVDumMqdLrri4iIuxMZE6SGgk/q3DxWYYAd8EQR7RXETrsR6pcdNuHcXCcdnIsNka7XReO6bEBLtmTOirN//2jTb9K3oubfkySGhyaoLXeWb5CRSXCOsL5CYI8ojmIlqP9UiO22YYEZy5KON0unToUElQt+lwxCk9PUWHD5eoutrlnm7YbMr3ctlIOnFwTQnBUANmB3LdmC4f2VXb9xz2KJZCHWNT1OR6z/5iTZn/QYPL5U8f5vUOrFC90cPxtw61ho7rUIrFPPoiHLluziIh39F6rPsbd7By3apViux28wtuXJKLUr7cVh1Mvox7UzemzE7pXoulUMUYLDWDhHrTUP+YhvKzdfchbdr5g0qrjYCbl63+W8cq8ojmIlqP9UiPm0tyUcqX26obumwUCF8O5LoxVVY1XvEHO8ZgqRkktGDVZo+R1RvrH+MtP7UvSea/sqnednxtdbL6bx2ryCOai2g91iM9bgqmKGX1bdU+Hch1XjMhvvEGzEi+9dvf/jHe8jPmvC71+m9J/22V87VZnFvog4M8ormI1mM90uPmklyUsvq2al8O5LoxbdtzWOeeHb23fvszSKi3/ATrkiS30AcHeURzEa3HeqTHTcEUpZpyW3UgfDmQ68b05ke7NGZI53pFUyw+/85bfny5JOkLq//WsYo8ormI1mM90uPmLrkgsvIuuRqhuo3dG6fN1mC/ntr9cWrHlJLkUIt4h8orqyP++XdNveOibn7mXN9f9y1Z1+DyDd1t1xAr/9ahFs47iWIpj76IhLu2mpNIyne0Huu+xm31XXKReSETPqu5bOTuCBfCN4Ov/XrqxeRyWRZjONXNT1pKgrIz2ngUmDXczct+5MLKv3UsI49oLqL1WI/UuLkkB7/w8N/G1c5Pwn/utovU5mUAgO9oYQJCKJDRqAEAkYeCCQixSG1eBgD4jktyAAAAJiiYAAAATFAwAQAAmKBgAgAAMEHBBAAAYIKCCQAAwAQFEwAAgAkKJgAAABMUTAAAACYomAAAAEzYDIPnNASLYRhyuYKfTrs9Tk6nK+jbRX3k2jrk2jrk2lrk2zrByHVcnE02m810OQomAAAAE1ySAwAAMEHBBAAAYIKCCQAAwAQFEwAAgAkKJgAAABMUTAAAACYomAAAAExQMAEAAJigYAIAADBBwQQAAGCCggkAAMAEBRMAAIAJCiYAAAATFEwRzOVyKS8vT0OGDFGvXr00adIk7d27N9xhRb0jR45ozpw5Ou+889S7d29deeWV2rBhg3v+p59+qrFjx+rcc8/V6NGj9dZbb4Ux2tixe/duZWdna9WqVe5pW7du1YQJE9SrVy+NGDFCy5YtC2OE0e/111/Xz3/+c/Xs2VMXXnih3n77bfe8ffv26cYbb1Tv3r01ePBgPf7443I6nWGMNrpVV1friSee0PDhw5Wdna1f/epX+uqrr9zzObaD4+mnn9ZVV13lMc0styE7dxqIWIsWLTL69+9vfPDBB8bWrVuN6667zhg1apRRUVER7tCi2rXXXmvk5uYa69evN7755hvj3nvvNbKysoxdu3YZO3fuNHr27Gk89thjxs6dO41nn33W6Natm/HJJ5+EO+yoVllZaYwdO9bo2rWrsXLlSsMwDOPQoUNG//79jVmzZhk7d+40Xn31VaNnz57Gq6++GuZoo9Prr79udOvWzVixYoWxZ88eo6CgwMjMzDS+/PJLo7Ky0hg1apRxww03GNu3bzfeffddIycnx3jiiSfCHXbUysvLMwYNGmT87W9/M7799lvjrrvuMvr06WMcOHCAYztIVqxYYWRmZhoTJkxwT/Mlt6E6d1IwRaiKigojOzvbeOGFF9zTjh49amRlZRmrV68OY2TR7dtvvzW6du1qbNiwwT3N5XIZI0eONB5//HHjd7/7nTFu3DiPdX77298a1113ndWhxpQFCxYYv/71rz0KpsWLFxuDBw82qqqqPJYbNWpUuMKMWi6Xyxg+fLgxb948j+nXXXedsXjxYmP16tVGjx49jCNHjrjnvfTSS0bv3r35AhagMWPGGA899JD792PHjhldu3Y13nnnHY7tJiosLDRuvPFGo1evXsbo0aM9Ciaz3Iby3MkluQi1bds2lZSUaODAge5paWlp6tatm9avXx/GyKJbenq6nnnmGfXs2dM9zWazyWazqbi4WBs2bPDIuSQNGDBAX3zxhQzDsDrcmLB+/Xq9/PLLmjdvnsf0DRs2KCcnRw6Hwz1twIAB+vbbb/XDDz9YHWZU2717t7777jtddNFFHtOXLFmiG2+8URs2bFD37t110kknuecNGDBAx48f19atW60ONya0bt1aH3zwgfbt2yen06mXX35ZCQkJyszM5Nhuon/+85+Kj4/Xm2++qXPPPddjnlluQ3nupGCKUIWFhZKk9u3be0xv27atex78l5aWpqFDhyohIcE97Z133tGePXs0ZMgQFRYWql27dh7rtG3bVmVlZTp8+LDV4Ua94uJi3XHHHZo9e3a9Y7mhXEvS/v37LYsxFuzevVuSVFpaquuvv14DBw7UZZddpvfff18SuQ6Fu+66S/Hx8Tr//PPVs2dPLVy4UHl5eerYsSP5bqIRI0Zo0aJFOv300+vNM8ttKM+dFEwRqqysTJI8TuyS1KJFC1VUVIQjpJj05ZdfatasWRo1apSGDRum8vLyejmv+b2ysjIcIUa1e+65R9nZ2fVaPiR5zXWLFi0kiWPcT8ePH5ckzZw5U7m5uVq6dKkGDRqkyZMn69NPPyXXIbBz5061bNlSTz75pF5++WWNHTtW06dP19atW8l3CJnlNpTnTof5IgiHxMRESSdO0jU/SycOiKSkpHCFFVPee+89TZ8+Xb1799b8+fMlnXhT1S2Man4n7/55/fXXtWHDBq1evdrr/MTExHq5rvlAS05ODnl8sSQ+Pl6SdP311+vSSy+VJJ1zzjnasmWLnnvuOXIdZPv379ftt9+u559/Xn379pUk9ezZUzt37tSiRYvIdwiZ5TaU505amCJUTXNiUVGRx/SioiKdeuqp4QgppqxYsUK33nqrhg8frsWLF7u/obRv395rzpOTk9WyZctwhBq1Vq5cqR9//FHDhg1Tdna2srOzJUl33323Jk6cqHbt2nnNtSSOcT/V5Ktr164e08866yzt27ePXAfZpk2bVFVV5dEXUpLOPfdc7dmzh3yHkFluQ3nupGCKUJmZmUpNTdW6devc04qLi7Vlyxb169cvjJFFvxdffFH333+/fvWrX+mxxx7zaLrt27evPv/8c4/lP/vsM/Xu3Vtxcbxd/DF//nytWbNGr7/+uvufJE2dOlUPPPCA+vXrpy+++MJjLKDPPvtMZ555plq3bh2mqKNT9+7dlZKSok2bNnlM37Fjhzp27Kh+/fppy5Yt7kt30olcp6SkKDMz0+pwo15NH5rt27d7TN+xY4fOOOMMju0QMsttSM+dTbrHDiH12GOPGTk5OcZ7773nMZZEZWVluEOLWt98843RvXt345ZbbjGKioo8/hUXFxs7duwwunfvbjz66KPGzp07jSVLljAOUxDVHlbghx9+MPr162fMnDnT+Ne//mWsXLnS6Nmzp7Fq1aowRxmdnnzySSM7O9tYvXq1xzhMn332mVFeXm6MHDnSuP76642tW7e6x2FatGhRuMOOSk6n07jyyiuN0aNHG59++qmxe/duY+HChcY555xjfPXVVxzbQTRz5kyPYQV8yW2ozp0UTBGsurraeOSRR4wBAwYYvXr1MiZNmmTs3bs33GFFtaeeesro2rWr138zZ840DMMwPvzwQyM3N9fo0aOHMXr0aOOtt94Kc9Sxo3bBZBiGsWnTJuPyyy83evToYQwfPtxYvnx5GKOLfkuXLjVGjBhhdO/e3RgzZozx7rvvuud9++23xrXXXmv07NnTGDx4sPH4448bTqczjNFGtyNHjhj33HOPMWzYMCM7O9v45S9/aaxbt849n2M7OOoWTIZhnttQnTtthsHgMgAAAI2hUwYAAIAJCiYAAAATFEwAAAAmKJgAAABMUDABAACYoGACAAAwQcEEAABggofvArDMnXfeqddee63RZXJycrR8+XKv86666ipJanC+NyNGjFBOTo7mzZvne6AWy8jI0JQpU3TrrbeGOxQADaBgAmCZyZMn64orrnD/XlBQoC1btig/P989LTU1tcH177777pDGBwANoWACYJmOHTuqY8eO7t9btWqlhIQE9erVy6f1zzrrrBBFBgCNow8TgIizatUqdevWTa+88ooGDRqknJwc7dy5U1dddZX7spwkHTp0SPfee6+GDx+uHj16KCcnR7fccov27dvn82stWrRIP/vZz/TXv/5VF110kXr06KH/+Z//0euvv+4RT0ZGRr3tjhgxQnfeeaf794yMDP3hD3/QnXfeqT59+ignJ0dz585VeXm5Hn74YQ0YMED9+/fXXXfdpYqKCo9tHT9+XNOnT1d2drYGDhyouXPnqqyszGOZ9957T2PHjlXPnj01aNAgzZ07V6WlpfX2JT8/Xzk5ORo8eLCOHj3qcy4ANIwWJgARyel0aunSpXrggQd0+PBhdenSxWO+YRi68cYbdfToUU2fPl2nnHKKtm/frscff1x33323lixZ4vNrHTx4UPfdd59uvvlmdejQQUuWLNHMmTPVs2fPeq9r5tFHH1Vubq7y8/P1wQcf6P/+7//08ccfKzMzU/Pnz9dXX32lRYsW6cwzz9TEiRPd6y1fvlxDhw7V448/rt27d2vhwoXav3+/nnzySUnS6tWrNX36dF100UX6zW9+o++++04LFy7Uzp079dxzz8lms0mSvv/+e3344YdauHChjhw5opNOOsmv+AF4R8EEIGLddNNNGjZsmNd5RUVFSkpK0syZM9W3b19JUv/+/fXvf/9bL7/8sl+vU1ZWpgceeEADBw6UJJ1xxhkaPny4PvzwQ78LprPOOkv33XefpBMd2F955RVVVVVp/vz5cjgcGjx4sN555x19+eWXHut16dJFTz75pOLi4jR06FDZbDY9+OCD2rFjh84++2zNnz9fQ4YM0fz5893rnHHGGbrmmmv04YcfuvNUXV3tkRMAwcElOQAR65xzzmlw3qmnnqply5apT58+2rdvn/7+979r+fLl+vLLL1VZWen3a9XuR9WuXTtJ8rjc5avs7Gz3z3a7Xenp6erevbscjv9+Pz355JN17Ngxj/VGjx6tuLj/fiSPGjVKkrR+/Xp98803Kiws1IgRI1RdXe3+169fP6Wmpurvf/+7x7YayxuAwNDCBCBiJScnNzr/zTff1GOPPab9+/fr5JNP1jnnnKPExMSAXispKcn9c03hYhiG39vxdpef2X5IUps2bTx+b926tSSpuLhYR44ckSTde++9uvfee+utW1RU5PF7SkqKr+EC8BEFE4CotGHDBs2cOVNXXXWVrr/+ep166qmSpEceeURffPFFUF+rpn+Qy+XymF5SUhK016gpimocPHhQ0onCKS0tTZJ0xx13KCcnp9669FMCQo9LcgCi0saNG+VyuXTrrbe6iyWn06lPPvlEUv3ipilqWo0KCwvd03bt2lWvyGmKjz76yOP3t956SzabTTk5OercubNat26tffv2qWfPnu5/p556qhYsWKAtW7YELQ4A3tHCBCAqZWVlSZLuu+8+/eIXv9DRo0f1wgsvaNu2bZJO9D9qbBBMf/Tv31+JiYmaN2+ebrvtNpWUlCgvL08nn3xyULYvSV9//bXuuusu5ebm6uuvv1ZeXp7GjRunM844Q5I0bdo0zZkzR3a7XcOHD1dxcbEKCgp04MABde/ePWhxAPCOgglAVOrfv7/mzJmj5557Tn/+8591yimnqH///srPz9ctt9yiL774QkOHDg3Ka6WlpWnRokVasGCBbrnlFnXo0EFTpkzxGKupqW655Rb94x//0E033aSWLVtq4sSJmjJlinv+ZZddppSUFD377LN6+eWXlZycrN69e2v+/Pk6/fTTgxYHAO9sRiC9GgEAAJoR+jABAACYoGACAAAwQcEEAABggoIJAADABAUTAACACQomAAAAExRMAAAAJiiYAAAATFAwAQAAmKBgAgAAMEHBBAAAYIKCCQAAwMT/A490AK+hxNO5AAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ax = sns.scatterplot(data=study.trials_dataframe(), x=\"number\", y=\"value\")\n", + "ax.set(xlabel=\"Trial number\", ylabel=\"Ojbective value\\n(r2)\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Advanced functoinaility: algorithms & runs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Various algorithms are available in QSARtuna:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(optunaz.config.optconfig.Lasso,\n", + " optunaz.config.optconfig.PLSRegression,\n", + " optunaz.config.optconfig.RandomForestRegressor,\n", + " optunaz.config.optconfig.Ridge,\n", + " optunaz.config.optconfig.KNeighborsRegressor,\n", + " optunaz.config.optconfig.SVR,\n", + " optunaz.config.optconfig.XGBRegressor,\n", + " optunaz.config.optconfig.PRFClassifier,\n", + " optunaz.config.optconfig.ChemPropRegressor,\n", + " optunaz.config.optconfig.ChemPropRegressorPretrained,\n", + " optunaz.config.optconfig.ChemPropHyperoptRegressor,\n", + " optunaz.config.optconfig.AdaBoostClassifier,\n", + " optunaz.config.optconfig.KNeighborsClassifier,\n", + " optunaz.config.optconfig.LogisticRegression,\n", + " optunaz.config.optconfig.RandomForestClassifier,\n", + " optunaz.config.optconfig.SVC,\n", + " optunaz.config.optconfig.ChemPropClassifier,\n", + " optunaz.config.optconfig.ChemPropHyperoptClassifier,\n", + " optunaz.config.optconfig.CalibratedClassifierCVWithVA,\n", + " optunaz.config.optconfig.Mapie)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from optunaz.config.optconfig import AnyAlgorithm\n", + "AnyAlgorithm.__args__" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This tutorial will now look at more complex considerations that should be factored for more advanced functionaility such as the PRF and ChemProp algorithms" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Probabilistic Random Forest (PRF)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "PRF is a modification of the long-established Random Forest (RF) algorithm and takes into account uncertainties in features and/or labels (though only uncertainty in labels are currently implemented in QSARtuna), which was first described in[1]. It can be seen as a probabilistic method to factor experimental uncertainty during training, and is considered a hybrid between regression and classification algorithms.\n", + "\n", + "In more detail; PRF treats labels as probability distribution functions [PDFs] (denoted as `∆y`), rather than deterministic quantities. In comparison, the traditional RF uses discrete variables for activity (binary y-labels, also referred to as `y`) from the discretised bioactivity scale defining active/inactive sets. \n", + "\n", + "PTR integration was added to QSARtuna to afford this probabilistic approach towards modelling, and is particularly useful combined with the PTR (See the preprocessing notebook for details). In this combination, PRF takes as input real-valued probabilities (similar to regression), from a Probabilistic Threshold Representation (PTR). However, similar to classification algorithms, PRF outputs the probability of activity for the active class.\n", + "\n", + "Note that QSARtuna runs the PRFClassifier in a regression setting, since the model only outputs class liklihood membership based on `∆y`\n", + "\n", + "[1] https://iopscience.iop.org/article/10.3847/1538-3881/aaf101/meta\n", + "\n", + "The following code imports the PRFClassifier and sets up a config to use the PRF with PTR:" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "from optunaz.config.optconfig import PRFClassifier\n", + "\n", + "# Prepare hyperparameter optimization configuration.\n", + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"Smiles\",\n", + " response_column=\"Measurement\",\n", + " training_dataset_file=\"../tests/data/pxc50/P24863.csv\",\n", + " probabilistic_threshold_representation=True, # This enables PTR\n", + " probabilistic_threshold_representation_threshold=8, # This defines the activity threshold\n", + " probabilistic_threshold_representation_std=0.6, # This captures the deviation/uncertainty in the dataset\n", + " ),\n", + " descriptors=[\n", + " ECFP.new(),\n", + " ECFP_counts.new(),\n", + " MACCS_keys.new(),\n", + " ],\n", + " algorithms=[\n", + " PRFClassifier.new(n_estimators={\"low\": 20, \"high\": 20}), #n_estimators set low for the example to run fast\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=2,\n", + " n_trials=15,\n", + " random_seed=42,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that QSARtuna is run in regression mode (`ModelMode.REGRESSION`), as outputs from the algorithm are always continuous values.\n", + "\n", + "Next we can run the PRF/PTR study:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:17:53,733] A new study created in memory with name: my_study\n", + "[I 2024-07-02 13:17:53,734] A new study created in memory with name: study_name_0\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/joblib/memory.py:577: JobLibCollisionWarning: Possible name collisions between functions 'calculate_from_smi' (/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/descriptors.py:-1) and 'calculate_from_smi' (/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/descriptors.py:180)\n", + " return self._cached_call(args, kwargs, shelving=False)[0]\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/joblib/memory.py:577: JobLibCollisionWarning: Possible name collisions between functions 'calculate_from_smi' (/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/descriptors.py:-1) and 'calculate_from_smi' (/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/descriptors.py:180)\n", + " return self._cached_call(args, kwargs, shelving=False)[0]\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/joblib/memory.py:577: JobLibCollisionWarning: Possible name collisions between functions 'calculate_from_smi' (/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/descriptors.py:-1) and 'calculate_from_smi' (/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/descriptors.py:180)\n", + " return self._cached_call(args, kwargs, shelving=False)[0]\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/joblib/memory.py:577: JobLibCollisionWarning: Possible name collisions between functions 'calculate_from_smi' (/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/descriptors.py:-1) and 'calculate_from_smi' (/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/descriptors.py:180)\n", + " return self._cached_call(args, kwargs, shelving=False)[0]\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/joblib/memory.py:577: JobLibCollisionWarning: Possible name collisions between functions 'calculate_from_smi' (/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/descriptors.py:-1) and 'calculate_from_smi' (/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/descriptors.py:180)\n", + " return self._cached_call(args, kwargs, shelving=False)[0]\n", + "[I 2024-07-02 13:18:00,764] Trial 0 finished with value: -0.08099580623289632 and parameters: {'algorithm_name': 'PRFClassifier', 'PRFClassifier_algorithm_hash': 'efe0ba9870529a6cde0dd3ad22447cbb', 'max_depth__efe0ba9870529a6cde0dd3ad22447cbb': 13, 'n_estimators__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'max_features__efe0ba9870529a6cde0dd3ad22447cbb': , 'min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb': 5, 'use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: -0.08099580623289632.\n", + "[I 2024-07-02 13:18:05,408] Trial 1 finished with value: -0.07261454017489567 and parameters: {'algorithm_name': 'PRFClassifier', 'PRFClassifier_algorithm_hash': 'efe0ba9870529a6cde0dd3ad22447cbb', 'max_depth__efe0ba9870529a6cde0dd3ad22447cbb': 6, 'n_estimators__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'max_features__efe0ba9870529a6cde0dd3ad22447cbb': , 'min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 1 with value: -0.07261454017489567.\n", + "[I 2024-07-02 13:18:07,780] Trial 2 finished with value: -0.08791063872794351 and parameters: {'algorithm_name': 'PRFClassifier', 'PRFClassifier_algorithm_hash': 'efe0ba9870529a6cde0dd3ad22447cbb', 'max_depth__efe0ba9870529a6cde0dd3ad22447cbb': 2, 'n_estimators__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'max_features__efe0ba9870529a6cde0dd3ad22447cbb': , 'min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb': 5, 'use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 1 with value: -0.07261454017489567.\n", + "[I 2024-07-02 13:18:11,911] Trial 3 finished with value: -0.07114663955819509 and parameters: {'algorithm_name': 'PRFClassifier', 'PRFClassifier_algorithm_hash': 'efe0ba9870529a6cde0dd3ad22447cbb', 'max_depth__efe0ba9870529a6cde0dd3ad22447cbb': 7, 'n_estimators__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'max_features__efe0ba9870529a6cde0dd3ad22447cbb': , 'min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb': 2, 'use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 3 with value: -0.07114663955819509.\n", + "[I 2024-07-02 13:18:15,879] Trial 4 finished with value: -0.06537440628140882 and parameters: {'algorithm_name': 'PRFClassifier', 'PRFClassifier_algorithm_hash': 'efe0ba9870529a6cde0dd3ad22447cbb', 'max_depth__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'n_estimators__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'max_features__efe0ba9870529a6cde0dd3ad22447cbb': , 'min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 4 with value: -0.06537440628140882.\n", + "[I 2024-07-02 13:18:28,446] Trial 5 finished with value: -0.05680450487193368 and parameters: {'algorithm_name': 'PRFClassifier', 'PRFClassifier_algorithm_hash': 'efe0ba9870529a6cde0dd3ad22447cbb', 'max_depth__efe0ba9870529a6cde0dd3ad22447cbb': 26, 'n_estimators__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'max_features__efe0ba9870529a6cde0dd3ad22447cbb': , 'min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: -0.05680450487193368.\n", + "[I 2024-07-02 13:18:29,968] Trial 6 pruned. Duplicate parameter set\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PRFClassifier', 'PRFClassifier_algorithm_hash': 'efe0ba9870529a6cde0dd3ad22447cbb', 'max_depth__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'n_estimators__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'max_features__efe0ba9870529a6cde0dd3ad22447cbb': , 'min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}, return [-0.06537440628140882]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:18:33,543] Trial 7 finished with value: -0.0656836821774901 and parameters: {'algorithm_name': 'PRFClassifier', 'PRFClassifier_algorithm_hash': 'efe0ba9870529a6cde0dd3ad22447cbb', 'max_depth__efe0ba9870529a6cde0dd3ad22447cbb': 27, 'n_estimators__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'max_features__efe0ba9870529a6cde0dd3ad22447cbb': , 'min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb': 2, 'use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: -0.05680450487193368.\n", + "[I 2024-07-02 13:18:37,333] Trial 8 finished with value: -0.07863564862376404 and parameters: {'algorithm_name': 'PRFClassifier', 'PRFClassifier_algorithm_hash': 'efe0ba9870529a6cde0dd3ad22447cbb', 'max_depth__efe0ba9870529a6cde0dd3ad22447cbb': 5, 'n_estimators__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'max_features__efe0ba9870529a6cde0dd3ad22447cbb': , 'min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb': 3, 'use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: -0.05680450487193368.\n", + "[I 2024-07-02 13:18:42,329] Trial 9 finished with value: -0.0648840199215795 and parameters: {'algorithm_name': 'PRFClassifier', 'PRFClassifier_algorithm_hash': 'efe0ba9870529a6cde0dd3ad22447cbb', 'max_depth__efe0ba9870529a6cde0dd3ad22447cbb': 22, 'n_estimators__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'max_features__efe0ba9870529a6cde0dd3ad22447cbb': , 'min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb': 2, 'use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: -0.05680450487193368.\n", + "[I 2024-07-02 13:18:46,014] Trial 10 finished with value: -0.07861037073288182 and parameters: {'algorithm_name': 'PRFClassifier', 'PRFClassifier_algorithm_hash': 'efe0ba9870529a6cde0dd3ad22447cbb', 'max_depth__efe0ba9870529a6cde0dd3ad22447cbb': 32, 'n_estimators__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'max_features__efe0ba9870529a6cde0dd3ad22447cbb': , 'min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb': 4, 'use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 5 with value: -0.05680450487193368.\n", + "[I 2024-07-02 13:18:50,608] Trial 11 finished with value: -0.06669924317660021 and parameters: {'algorithm_name': 'PRFClassifier', 'PRFClassifier_algorithm_hash': 'efe0ba9870529a6cde0dd3ad22447cbb', 'max_depth__efe0ba9870529a6cde0dd3ad22447cbb': 30, 'n_estimators__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'max_features__efe0ba9870529a6cde0dd3ad22447cbb': , 'min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 5 with value: -0.05680450487193368.\n", + "[I 2024-07-02 13:18:54,997] Trial 12 finished with value: -0.06734611679947522 and parameters: {'algorithm_name': 'PRFClassifier', 'PRFClassifier_algorithm_hash': 'efe0ba9870529a6cde0dd3ad22447cbb', 'max_depth__efe0ba9870529a6cde0dd3ad22447cbb': 14, 'n_estimators__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'max_features__efe0ba9870529a6cde0dd3ad22447cbb': , 'min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb': 2, 'use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 5 with value: -0.05680450487193368.\n", + "[I 2024-07-02 13:18:59,526] Trial 13 finished with value: -0.06810559387741143 and parameters: {'algorithm_name': 'PRFClassifier', 'PRFClassifier_algorithm_hash': 'efe0ba9870529a6cde0dd3ad22447cbb', 'max_depth__efe0ba9870529a6cde0dd3ad22447cbb': 18, 'n_estimators__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'max_features__efe0ba9870529a6cde0dd3ad22447cbb': , 'min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 5 with value: -0.05680450487193368.\n", + "[I 2024-07-02 13:19:11,856] Trial 14 finished with value: -0.0528189695245453 and parameters: {'algorithm_name': 'PRFClassifier', 'PRFClassifier_algorithm_hash': 'efe0ba9870529a6cde0dd3ad22447cbb', 'max_depth__efe0ba9870529a6cde0dd3ad22447cbb': 25, 'n_estimators__efe0ba9870529a6cde0dd3ad22447cbb': 20, 'max_features__efe0ba9870529a6cde0dd3ad22447cbb': , 'min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb': 1, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 14 with value: -0.0528189695245453.\n" + ] + } + ], + "source": [ + "# Run the PRF/PTR Optuna Study.\n", + "study = optimize(config, study_name=\"my_study\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can now plot obtained performance across the Optuna trials." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.set_theme(style=\"darkgrid\")\n", + "default_reg_scoring = config.settings.scoring\n", + "ax = sns.scatterplot(data=study.trials_dataframe(), x=\"number\", y=\"value\")\n", + "ax.set(xlabel=\"Trial number\", ylabel=f\"Ojbective value\\n({default_reg_scoring})\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Build the best PRF model:" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "buildconfig = buildconfig_best(study)\n", + "best_built = build_best(buildconfig, \"../target/best.pkl\")\n", + "\n", + "with open(\"../target/best.pkl\", \"rb\") as f:\n", + " model = pickle.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot predictions from the merged model for the (seen) train data for demonstration purposes" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAG1CAYAAAAfhDVuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAACQRklEQVR4nO3dd3xT5f4H8E/SJp1JF7QFWqBA6YRSoIWyKVOsWlGvICByL+OCioigeEVErtfBuChIGSoulgpaxmWDorKnzJZpKaMDulfa5JzfH/0lNs06JzmZ/b5fL19CcnLOk6eh55vn+T7fR8SyLAtCCCGEEBcltncDCCGEEEKsiYIdQgghhLg0CnYIIYQQ4tIo2CGEEEKIS6NghxBCCCEujYIdQgghhLg0CnYIIYQQ4tIo2CGEEEKIS6NghxBCCCEuzd3eDXAELMuCYaxTSFosFlnt3OQv1M+2Qf1sO9TXtkH9bDtC97VYLIJIJOJ0LAU7ABiGRVFRpeDndXcXIyDAB2VlVVAqGcHPT+pRP9sG9bPtUF/bBvWz7VijrwMDfeDmxi3YoWksQgghhLg0CnYIIYQQ4tIo2CGEEEKIS6NghxBCCCEujYIdQgghhLg0CnYIIYQQ4tIo2CGEEEKIS6NghxBCCCEujYIdQgghhLg0hwp2Vq9ejXHjxhk9pri4GK+99hqSkpKQnJyMd999F9XV1TZqISGEEEKcjcNsF7F+/Xp8/PHH6N69u9Hjpk+fjurqanz11VcoKyvDW2+9haqqKnz00Uc2aikhhBBCnIndg538/Hy88847OH78ONq2bWv02LNnz+LEiRPYuXMn2rdvDwBYsGABJk6ciJkzZyIkJMQGLSaEEEKIM7H7NNalS5cgkUiwbds2JCQkGD321KlTaN68uSbQAYDk5GSIRCKcPn3a2k0lhBBCiBOy+8hOamoqUlNTOR2bn5+PFi1aaD0mlUrh7++P+/fvW9QOd3fh4z43N7HW/4l1UD/bBvWz7VBf2wb1s/XdKazAwdN3MCS5NQICfOzW13YPdviorq6GVCrVedzDwwMKhcLs84rFIgQE+FjSNKPkci+rnZv8hfrZNqifbYf62jaon4WlYlhcuvEAv567i4OnclGnZBAU4I3Y9s3t1tdOFex4enqitrZW53GFQgFvb2+zz8swLMrKqixpml5ubmLI5V4oK6uGSsUIfn5Sj/rZNqifbYf62jaony3DMCyybxejpKIW/r5SRLUOwOmrhVi3OwvFFX/dq93dRAjwlgCAoH0tl3txHilyqmAnNDQU+/fv13qstrYWJSUlCA4OtujcSqX1PugqFWPV85N61M+2Qf1sO9TXtkH9zN/p7AJs2H8NxeV/zar4eLqjskapc6xSxWLV1ksI8PdGTLifXfraqSYqk5KSkJeXh5ycHM1jJ06cAAB069bNXs0ihBBCmozT2QVY8dNFrUAHgN5Ap6HPtl4Ew7DWbJpBDh3sqFQqFBYWoqamBgCQkJCArl274tVXX8X58+dx7NgxzJs3D+np6bTsnBBCCLEyhmGxYf81s177oKQa2beLBW4RNw4d7Ny/fx99+vTBzp07AQAikQiffvopwsLCMH78eMyYMQP9+vXD/Pnz7dtQQgghpAm4mluiM6LDR0mFbt6tLThUzs6HH36o9fewsDBkZ2drPRYUFIRly5bZslmEEEIIAVBSaX6gAwD+vrorqm3BoUd2CCGEEOI4/H08zH5tM38vRLUOELA13FGwQwghhBBO3N3FEIvMe+2kJ+IhNvfFFnKoaSxCCCGEOB6WZbHvZC5++OUGTC2oarwEPVDmgTHDotCrc0sUF1dauaX6UbBDCCGEEDAMi6u5JSipVMDfxwMdw/0hFotQUV2Htf+7gnPXHwAAukU1R2KHZtjy602tZOVAmQdGD45EYmRznfNIpW72elsAKNghhBBCmjx9RQIDZB4YmNgKv5y7i6IyBdzdRHg2NRKpXVtBJBKhZ1yo3uAIAKLb2Cc3xxAKdgghhJAmTF0ksLHicgV+/PUmACA4wAtTn4hHm1CZ5nmxWORwQY0hFOwQQgghTRSXIoFSdzHefr47fLwkNmqV8Gg1FiGEENJEcSkSWKtkkFtQYfJcDMMiK6cYxy7nISun2G5bQ+hDIzuEEEJIE8W1SKCp4wxtDDqkexjSekVY1EYh0MgOIYQQ0kTJvblVNDZWTNDYxqCZv/+JV5b9hpNZBRa101I0skMIIYQ0QaezC/DVriyTxwXK6lda6cMl56eyRonlm8/D18cDMeF+5jTVYjSyQwghhDQxJ7PyseKni1rF/wwZPTjSYOVjPhuDfrb1ot3yeGhkhxBCCHFhjYsFNvPzxGfbL5t8XYDMA88NjkS3qGCDx/DZGPRBSTWybxcjMsyf82uEQsEOIYQQ4qL0JQ6LAHAZX/nHiBjERgRqPdY4cJJ78dvFvKSiltfxQqFghxBCCHFBhooFcp1IKqvWDkwMVVluvBeWMf6+/IIjoVCwQwghhLgYLonDpjRcgWWsyjJXzfy9ENU6wC55O5SgTAghhLgYPonD+ohEQIdW9SunuAROvl4SeHsaHz+Z9ES8wURna6NghxBCCHExReU1Fr2eZYHrd0sBcAucKqrrMC09Hul9IuDTKOgJlHng5ac7o1fnlha1yRI0jUUIIYS4kAel1dh5NMfi86gDHK4rrsqqavF4nwik9Wqrsxu6VOpmcXssQcEOIYQQ4iLOXi3E2p1XUFmj5LzqypBv9mSjsKQKHVr5czpenePjiLuhU7BDCCGEODmlisEPP9/AvlO5AICIFjL06dQC3+69avY5FXUqZP7+J3w83U2uuAqUeaBDKz9k5RRrjejYK0enMQp2CCGEECdWWFKNVVsv4tb9cgDA0KRwPD2gPdzdxJD7SHWWiwfKPNA6xBfnrj/kdH4uy8qTY4LxxuqjOsvSTRUltBUKdgghhBAndSqrAF/uykK1QgkfT3f8/dEYJEY21zzfLSoYiZHNtXJoOrTywxurj/K+lq+XBO5uIq3CgIEyDyTHBGP3iVyd44vLFVjx00W8+GQ8esSFmvcGBULBDiGEEOJk6pQqfHfwOg6euQsAaN9KjimPx6GZn5fOsY1zaLJyis1all5RXYdZo7pALBLxCpw27r+GpJgQ3tcTEgU7hBBCiIAab6kgdO5KfnEVVmZexO38CgDAIz1a48l+7eDuxq2aDJ/9rBorq6pFz9i/Rmm4BE5F5Qpk3y5GryBfs69rKQp2CCGEEIEY2lJBqNyV45fz8fXuLNTUquDrJcHEtBh0bt+M1zkaVkbmq/FruQZO9toTS42KChJCCCECUG+p0HikQ527cjq7wOxz19ap8PXuLKzedgk1tSp0DPPDvBe6Q+ruhmOX85CVU8x5G4aO4f4IkPEPeAJl9aNUDXENnOy1J5YajewQQgghFuKypcLG/deQGNmc95TW/YeVWJl5CXcKKyAC8GivNghv7osP1p0xawRJLBbhucGReve6Mmb04EidtqsDJ2NTWYEyD0S1tm/dHRrZIYQQQizEZUuFonIFruaW8Drv0Yt5WPDVKdwprIDcW4KZz3ZBmxAZVm69ZNEIUreoYLz4ZDx8vSSc2pHeJ0JvEKUOnIzRFyTZGgU7hBBCiIU4565wPE5Rq8La/13BZzsuQ1GnQnRrf8z/ezJi2gSYHEH6Znc2lErG5DW6RQXjvy/2hsxEwBPgK0Var7Z6n2MYFj6eEgzpHqZznkCZB158Mp7q7BBCCCGugHPuCofj7hZWYOXWS7j3oBIiAI/3icBjvdpCLBZxWv1UXl2H11YcxvPDo0wGGu7uYjw/PMrolNZzQzrqHZnRl4zt6yVBSlwIEiObO1QFZRrZIYQQQizEJelXX4JvQyzL4rfz9/Dvr0/h3oNK+PlIMWt0Ip7oE6EJGriODJVX1/Ge0mrcfmMjM4aSsSuq67Dv1B1U1tQ5TKAD0MgOIYQQYjEuSb/GcldqapX4dk82jl7KBwDEtQ3AxMfi4OejvYqJ77JxrknR+iotGxqZsWYytrVQsEMIIYQIQD1Com8vqtFGVknlFlRgZeZF5BVVQSQCnuzbDiNS2kAs0g0UuKx+akidFM1lF3Kuu5XzScZ2lN3PKdghhBBCBMJnhIRlWRz64x427LsGpYpBgMwDUx6PMzrVZc6ycUsqJltyPqGvawkKdgghhBABcRkhqVYo8fXuLJy4Up9T06ldECamxUDm/de0laFtJ9QjSF/vzkZFdZ3J9lhSMdmS8wl9XUtQsEMIIYTYUE5eOVZuvYiC4mqIRSI8NaAdhiW31pq2MrXtRLeoYCS0b4bXVhxGuZGAx1RStDm4FhIU+rqWoNVYhBBCiA2wLIsDp+/gP9+eQkFxNYLkHpgztise6dFGJ9Dhsu2Eetm4MdYo6OcshQQbomCHEEIIsbKqmjpkZF7E+n1XoVSx6NKhGd6ZkIwOrfy0juO60km9D5Y5y8aFYK/rmoumsQghhBArunW/DCszL+JBaQ3cxCI8M7ADhnQPg0jPaitzVjrxSYo2xlCOkCFCXdcWKNghhBBCrIBlWew7dQc//HwdKoZFMz9PTE2PR0QLucHXmLvSieuycUNM5QgZYul1bYWCHUIIIURgFdV1WPu/Kzh3/QEAoFvH5pgwIhrensb3obLHSid1jlBj6hwhR5yW4ouCHUIIIU0aw7DIyikWbCrm+t1SrN56EQ/LFHB3E+HZ1Eikdm2ld9qqMSFWOvGZjuKSI/TVrix4Sd0R3SbAIaeouKBghxBCSJN15Pw9rP7xPIp4Tt/ow7As9py4jR8P3YSKYRHs74Wp6fFoEyrjfA5Lt53gOx3FJUeoskaJxd+dM7tfHAGtxiKEENIkncwqwAdfn9QKdADdJd5clFfVYtnm8/jh5xtQMSySY4LxzoQko4GOekTp2OU8ZOUUa1ZYmYvrkvWG+FQ5Nqdf1O/x6MU8XLj+wOL3aC4a2SGEENLkMAyL9XuyjR5jajNL9XTRldtF+PnMPVRU18HdTYznhkSif0JLo9NWhkZgRg3qgE0HrvNul7mbc5qT+8N1k09979HUPmHWQiM7hBBCmpyruSU6IzqNqZd463M6uwCzMg5j4caz2H44BxXVdRCLRXiqfzsM6GI8P8fYCMzKzEucl543fj/mvE6dI8SHsX5RM/Qei8wYHRICBTuEEEKaHEs2s1TfyEsqarUeZxgW3x28bvRGzmUExpx2WbJk3VQ1ZC7naYhvYURboGCHEEJIk2PuEm+GYfH1btPTX4Zu5FxGYMxpl7zBBqLG6DvOUDVkPtdXYxgW+0/lmjXKZE2Us0MIIaTJMWeJt1LJYNmW8yZ3Gm9c4bghPgnBXNsFAOA6SGLgOHU15KzbxViZeRGVNUp+14f+HB1jhOgLrmhkhxBCSJNz9loh6upURo9puMT71z/uYtrSQ7h4q4jT+Q3dyIUoBqhv6XlZda2Bo7UZO04sFiG2bSBeeCSa9/UN5egYI2RhRFMo2CGEENKkqG/MFQZGL3y9JFpVgzN/u4mvdmVDqeKeY2LoRm5OQrCasU02hay8zHeTT3PykEwVRhQaTWMRQghpMrjcmCVuIiRGNoeKYZD52y3872gOr2v4eLqDYVgwDKszAsKlaGBDMi8JRg2KRIDMeCVkISovN8Rnk09z8pCMFUa0Bgp2CCGECIrv7tm2xOXGXFxRi9PZBThw+g6u3inlfQ1TFYfVIyff7M5GuYn8n/LqOgTIPExutmlp5WVD5+SyySef3JtAuQdGD7J9nR0KdgghhAjG3N2zralh8HXvQSWn16zdlQVFrQoSNzHqVIxZ1zW2kWa3qGAolCp8vv2KyfNwDSbUQZStC/lxnUKb+Hg8+sSH2KWKst2DHYZh8Omnn+KHH35AeXk5kpKSMG/ePISHh+s9/uHDh3j//fdx+PBhsCyLXr16Yc6cOQgJCbFxywkhhDTkiLtn810hpKaoVaF1iC+GJoXj8x2mAxJjDFUcDvT15PR6Pom8fKafhMJpCk3ugbS+7VBWWmWXYMfuCcoZGRnYsGED/v3vf2PTpk1gGAYTJ05Eba3+jPEZM2bg3r17+PLLL/Hll1/i3r17ePHFF23cakIIIQ05YiE5c1YIqQ1MbIW3xnVDz9hQsxOK1QzVlOGSrGxOIq96+qlnbKhNdirnUphwzNAouNlxKtOuwU5tbS3Wrl2L6dOnY8CAAYiOjsbSpUuRl5eHvXv36hxfVlaGEydOYNKkSYiJiUFsbCwmT56MCxcuoKSkxPZvgBBCCADztyuwFksqFQ9PDse4YVGQuLuZXWG4MX1TUVzO3TjPRujNQ4ViagVXUrR9d0q36zRWVlYWKisrkZKSonlMLpcjNjYWJ0+eRFpamtbxnp6e8PHxQWZmJpKTkwEAW7duRUREBORyuU3bTggh5C+WbL9gDeasEHITizB6cCRSu4ZpPW4oF8bX093g8vXGDE1F8cmzccR8qIbsMYXGlV2Dnby8PABAixYttB4PDg7WPNeQVCrFhx9+iHnz5qF79+4QiUQIDg7GunXrIBZbNkjl7i78IJebm1jr/8Q6qJ9tg/rZdpyxr4Pk3PJPguSeVvl925ipVU6NRYX7Y/ZziZBK3PQ+3yMuFEkxIci+XYySilrIvSVYs/0ywCHYCZR7IDYi0OBNv/G5/X2liGqtPf10Mst4PtTLT3e2++iJWnz7IJ3H7P2ZtmuwU11dDaA+iGnIw8MDpaW6y/1YlsWVK1eQmJiIiRMnQqVSYenSpZg2bRo2btwIX19fs9ohFosQEOBj1mu5kMu9rHZu8hfqZ9ugfrYdZ+rrHn7eCNp+GQ9Lawwe08zfCz0SwmySuxHewo/X8Y/1a4+QYNMzBL2C6u8zF64/4DxyNOXJzggKMn5/UjEsZMU1qGNFkMk84R/go+knFcNiw76rRl+/cf81DOrR1q55MVzY6zNt12DH07P+m0Btba3mzwCgUCjg5aXbIbt27cK6devw888/awKbVatWYeDAgdi8eTNeeOEFs9rBMCzKyqrMeq0xbm5iyOVeKCurhsrMpYvENOpn26B+th1n7evnhnTE8s3nDT4/enAkykqF/12rT8sATwTKPFDEMSC5l1+Gnb/d0Duqok/ufW71d4YlhyMm3A/FxbpL3hmGRfbtYpy5WogjF/NQXvXXaFSgzANjhkUhKToYV/4sMhpEAsCDkmoc/+MOYtoGcmqXrVnjMy2Xe3EeKbJrsKOeviooKEDr1q01jxcUFCAqKkrn+FOnTiEiIkJrBMfPzw8RERHIyeFX4bIxpdJ6v1BUKsaq5yf1qJ9tg/rZdpytrxM7NDOaf5LYoZlN389ojpWKRSJoJTNzyYOReUk4tSHA1wO1tSq9e0kZWxJfVK7A8s3n8eKT8Zzr/Dwsq3H4z4u9PtN2nRCOjo6Gr68vjh8/rnmsrKwMly9fRlJSks7xoaGhyMnJgULx14ejqqoKd+7cQdu2bW3RZEIIIUZ0iwrGoqm98ProREx+PBavj07Ewqm97JJAq07+9ZAYv9WxjRY0qfNgTmcXGHwN1z2uNh28jtkrj2idi8+S+I37r0HuJTV5HACUVdQ63CotR2HXYEcqlWLs2LFYvHgxDhw4gKysLLz66qsIDQ3F0KFDoVKpUFhYiJqa+uG79PR0APW1drKyspCVlYWZM2fCw8MDI0eOtOM7IYQQomaqzoutlk/X1qlw6VYRFHX1IwmNZ6ZEJtJbjNUF4rMkvWHwxHdJfFG5AhDBZGAlEtUHVmu2XcbCjWd1Aqymzu4VlKdPnw6lUom5c+eipqYGSUlJ+OKLLyCRSHDnzh0MGjQIH3zwAUaOHIng4GBs2LABixYtwvjx4yEWi9G9e3ds2LABMpnM3m+FEEKICdZePq3eGuLP/DL8fOYuCktqIAIwIqUNHu/dFjfulqGkUoGyilpsOnjd6LnUdYEM7Q/VLSoYw5PDsedkrs7okD4b91+Dl9Sd95L4sqpak/teGRqdskfVakckYlkuPyLXplIxKCritl8KH+7uYgQE+KC4uNLh51GdGfWzbVA/246r9rWh7STULL0x6wukRCIgLaUNnuzXXuvYY5fzsGbbZZPnnPx4LHrGhhq8Htfdy9XSerXBjiP8ckxfH52I6DYBBt+fsbt4oMwDC6f2snutG2t8pgMDfZwjQZkQQkjTwHU7CX17SHFhKPBgWWD7kRy0DpFpBVJc95sydJzZFZp5Di803C6icdE+IUanmgrnqVhFCCHEaVlzOwmGYfHtnmyjxzTOv+kY7o9AC/alMqdCMwBEtwngtddW4+0iGuZDyX25JS5bu2q1o25h0RCN7BBCCLE6a20nwbIsthy6gbIq4xWTG49wiMUijBkWZbIukKFRJnMCiECZB6JbB5jMv1Ef23i7iMbk3tyCHa7HmcPRt7BQo2CHEEKI1ZkzbaRUMjh45g4KSqoR7O+F1K5hWltN1NQq8e2eqzh6SXd7IX0aByhJ0cF4c3wSVv94Xqv4IJdAg+v7aUgdPBnaD0vmJUHPuBAkRjbntqcU1wEUKw20GJo6dMTkaAp2CCGEcKJe6WTOJo/qujTGpn4aTht9f/Caziqn736+jmFJ4fhbaiRyCyqwautF3H9YZTJJV01fgNKrc0tEtZLj8q0iXu+Ly/tp+L4aB09CbJpZVl0r6HF8WDsHS2gU7BBCCDHJ0ukKdV0aY9M36pGP7w9ew+4TuTrPsyyw+0QucgsqcPVOKeqUDPx9pZj8WBw+23GZcyClr218E3i5vJ8h3cOMjtIYui7XoNLSJGtL8MnBcoTkaAp2CCGEGCXUdIWh6ZuGIx9KJYM9J3UDnYYu/VkMAIhvF4iJabGQe0s5B1JC4vJ++OITVPIdLROStXKwrIV3sPP888/jnXfeQfv27XWey8rKwuzZs7F9+3ZBGkcIIcS+uE5XJLRvhut3S02ORpiavjl45g6nKamE9kF4+enOEP9/GWRrBB5cCDEdpcY3qOQzWiY0e44qmYNTsHPq1Cmoaw+eOHECJ0+eRFFRkc5xP//8M3JzjUfkhBBCnAfX6YqZKw6jovqvFVHGpriMTRsVlFRzaleQn6cm0FETMvDgw5xpsMbMzYGxV5Bnz1Elc3AKdn744Qds3boVIpEIIpEI7777rs4x6mAoLS1N2BYSQgixG67TEA0DHcD8FTnB/l4WHacv8LAksdpWLMmBsUeQZ89RJXNwCnbmzp2Lp556CizLYvz48Zg3bx46dOigdYxYLIZcLkdkJLeN0QghhDg+S6ch+K7ISe0ahu8OXje6Wlr0/8dxYSwHpkec/m0g7MHSHBghRpf4steokjk4BTsymQzJyckAgG+++QaxsbHw9fW1asMIIYTYH58l1vroG40wNtIiEtXfuFVGqvB6SMScgidTOTBiNzGGpkSY8a6E52w5MGr2mjrki3eCcnJyMsrLy7Fv3z5UVVVB3z6i6enpQrSNEEKInXGZrjCl4WiEsZGWqNYBWLb5vNFABwBq6hiTS5q55MCs35uNQT3acnsTVuZsOTAN2WNUiS/ewc5vv/2G6dOno6amRm+gIxKJKNghhBAXYrDir7cE5Sa2aQD+Go0wNdLi6yXRyf0xxNS0D6ccmDIFLt98iLAgbnlC1uRsOTDOhnews2TJErRr1w5vvvkmQkJCIBbTXqKEEOLq9E1XdGjlhzdWH+U0GsFlpKWiug7+vlKUVJiu+GtqOodrDkxRWY1DBDuAc+XAOBvewc6NGzeQkZGB7t27W6M9hBBCHJS+6QquoxFZOcWc8n7GDY3Cun1XOU/nGMr/4Zrbcu9BJTpHOM4UjLPkwDgb3sFOy5YtUVFRYY22EEIIcTJcRyO4jrQolCrOAZSx/J/EyOacEqs37MlCkEyKxA7NOLXPFoTOgXGGpffWxjvYmTJlClasWIFOnTohLIzb0j9CCCGui8toBJ/VRtFtAkwGUFyqDXNNrF6/NxsJ7YJcMgCwdE8zV8E72Nm+fTvy8/MxZMgQBAYGwtPTU+t5kUiE/fv3C9ZAQgghjs/UaETHcH/4+UhRWmk4H6fh9FS3qGAktG+Gg2fuoKCkGsH+XkjtGgZ3dzHnasMLp/ZCep+2yPz9T6PHFpU5zoaVQhJqTzNXwDvYCQ0NRWio4xRiIoQQ4viu5pagTsUYPabhaiN9IxJ7TubiucGR8PGUcK42HBzozal9jrJhpVDM3X7CVfEOdj744ANrtIMQQogdWSuvg2FYbD/yJ7YdvgWWrZ9CUakYlDVYst44v8fUiMSQ7txSKNTvhQtHK9ZnKUu2n3BFvIMdtRs3buDw4cMoKCjAuHHjkJubi+joaKqsTAghTsZaeR2lFQqs2X4ZV3KKAQB9OrXAmCEdIXEXGwysuIxIHLucz+n66nObLNYnd8xifZawdPsJV8M72GEYBvPmzcOWLVvAsixEIhEeeeQRZGRk4Pbt21i3bh1NcxFCiJOwVl7HpT+L8Nm2SyirqoNUIsbzw6LQK76F5nlDowlcRiTKq+pMFiBU5/9wKdY3ZmiUy03lNNURLUN4VwTMyMjA9u3b8d577+Hw4cOaKsqzZ88GwzBYunSp4I0khBAiPK55HYyJ7RsaUjEMfvz1Bv676RzKquoQ1twH77yQhJ6xocjKKcaxy3nIyik2eE6uIw0pcSFGn2+Y/6NeHh8g076xB8o98Ob4JCRFu16SrnpEyxhH3X7CGniP7GzZsgXTp0/HU089BZVKpXk8JiYG06dPx+LFiwVtICGEEOsQOq+juFyB1dsu4WpuCQCgX0JLPDc4EhduPsTiTec4TZNxHWnw8TR++7qaWwIfT4lmdEff8vjYiEAEBfni4cMKZOUUu1QdGtp+QhvvYOfBgweIiYnR+1xISAjKysosbhQhhBDLmUo6FjKv48LNh/hs+2VUVNfBQ+qG8cOj0DM2lPc0GZccmwCZB345d89oe/aduoN9p+5oBVWNl8eLxSIcOX8Pq388jyIXrEND20/8hXew06ZNGxw6dAi9evXSee7EiRNo06aNIA0jhBBiPi5Jx+bmdTQMomReUlz6swi7j98GALQO9sXU9HiEBHqbtfxZLBahR0wwdp/INfiayFZ+OJFVwKntxnKPTmYVYPnm87xeo48jVyim7Sfq8Q52xo8fj3nz5qGurg4DBw6ESCRCTk4Ojh8/jrVr12LOnDnWaCchhBCOuI6mcFqp1CivQ18QpTawayuMSu0AibsbAPOmyRiGxfErxgOZS38WGX1en8ZBFcOwWL8nm9dr9HGGCsVCbz/hjHgnKD/zzDOYMWMGfvzxR0yePBksy2LmzJlYunQp/v73v2P06NHWaCchhBAO+CQdq/M6jGlc6G/FTxcNBjCxbQI0gQ4AnL1WyKnNDafJuARIlTVKTudtSB1UNbxOEcdAzBBD/aEOKk9ncxt9ItZnVp2dKVOmYMyYMThz5gxKS0shl8uRkJAAf39/gZtHCCGED76jKVzzOvhOSZ3OLsC+U3c4tbnhNBnXPCIfT3feQU/Dc1uar0QVip2L2UUFfX190a9fPyHbQgghxELm3MS55HXwCaI6hvubDATURCKgQys/zd+55hEN6R5mcs+rxhqe29I6NFSh2LnwDnZKS0uxbNkynDlzRu/KK9oIlBBC7Mfcm7ipvA4+U1JcAgE1lgWu3y3VXJtrHlFarwi0au5rMH9I32sa5h51DPdHoMzD6FSWsTo0VKHYufAOdt5++20cOHAAffv2RXR0tDXaRAghxEzmJB0bU6dk8P3B6zhwhvuUFN8bfMPj+dSHaTgidfZaodFps8Y1ZcRiEcYMi9K7GsvQaxqiCsXOhXewc+TIEcydO5cSkQkhxAFxCRaeHRTJaSlyfnEVVmVeQk5+OQDAQ+IGRZ1K5zg1dRCVdbuYV5sbBwR86sOoR6Si2wRops+41pRJig7Gm+OTdOrscKlDI3RQSayLd7Dj4+ODsDBuO84SQgixPWPBQnJMMDYdML1U+sSVfHy1Kws1tSr4eknwj0djoFQxJkdczl4rxPp9Vzm31VBAYE59GHNe06tzS0S1kuPyrSJedWioQrFzEbHqza04WrNmDY4cOYIVK1bAx8fHWu2yKZWKQVFRpeDndXcXIyDAB8XFlVAqGcHPT+pRP9sG9bPtCNXXjYvdlVfXYmXmJYPHv/hkPDq1C8Kmg9fxy9m7AIAOYX745+NxCJR7AtBfV0Y9EgLA6M3f0DXtVY9GiH421h+OUmfHGFsVRLTG74/AQB+4uXGroMM72KmqqsJTTz2FwsJCREREwMvLS/uEIhG+/vprPqe0Owp2nBv1s21QP9uONfqaYVjMXnnE6LSLn48EMm8p7hTW/z58NKUN0vtGwE2sfUPRd4MEYPL8DTlCQGCtoNJZKhTbsiCivYMd3tNY8+bNw61bt9CuXTt4enqicazEM3YihBCn5Uw3OS4rpEor61BaWQeZtwST0mIR3y5I73H6Vm5l5RRzCnQeTWmNuLZBDt1XfDljhWK+e5Y5O97BzsGDB/Haa69h0qRJ1mgPIYQ4BWfYJqBhMHbvAbfR6xZB3nj1mQScuVqIc9cfINjfC6ldw+DubvwbNNcVWK2a+zpdYGAv1gqmm2JBRN7BjlQqRXx8vDXaQgghTsHW34rNuekZ28PKmJZBXnhj9VE0HKT/7ufrGJYUjr+lGt5agpZiC8uawXRTLIjIO9h54oknsHHjRvTo0QNiMe+ttQghxKnZ+luxOTc9Q8EYp+tdfajzGMtCswu5oYCnKS7FttbIiznBNJ+2NMWCiLyDHZlMhs2bNyM1NRWdO3fWWZElEonw/vvvC9ZAQghxJLb8Vnwyy/hNb2p6HGReUp1EYa5bNfC1+0Qu0vu0g1TqpvNcU1uKba2RF3OCab5taYqjcLyDnR9//BF+fvX7mFy8qPuhFolc44NMCCH6WPNbsfrbeXl1HVqFyLFuT7bR41dtvaQ13RQg80D/hJa8p674eHXF7/j7iBi9N1E+xQAbc6Zkb2tOY/INps1pS1MchTMrQZkQQpoqa30rNifHpvHi1+JyBTJ/v8XrunxVK1RGb+jmFPZzhmRvNWtPY/IJps1tS1MbhQMAs5NuGIZBVlYWfv31V1RUVKCkpETAZhFCiGNSfys2hu+3YvW3c2uOyAht4/5rYBj9pUbUS7F7xoYiuk2AyUBH33tXj0yczi4QtN2W4jPyYg4+wbQlbVGPwjX+LAfKPFxu2TlgxsgOAGzduhVLlixBQUEBRCIRNm/ejOXLl0MikWDJkiWQSqVCt5MQQhyC0N+KuXw7d0RC5CU54xJoayf38pliOpGVb1FbzBmFc1a8R3Z27tyJN954Az179sTSpUs1RQSHDBmCQ4cOISMjQ/BGEkKIo2AYFj6eEgzpHgaZl0TrOXO+FXP5du6oLF2tY+1REmuwdnKvOpg2Rh1MC9EWPqNwzoz3yM6qVaswatQozJ8/HyrVX7vfPvXUUygqKsL333+PGTNmCNlGQghxCPpyS3y9JEiJC0FiZHOzvhU/KK8Wupk2Y+lqHWdcAm2L5F6uid5NMdHYXLyDnVu3buGNN97Q+1xCQgKWL19ucaMIIcTRGFr1UlFdh32n7pg9/H/rbhnv14hEusnJfAT4SgGRyKIRJSFuos64BNpWyb1cppiaYqKxuXhPYwUFBeHGjRt6n7tx4waCgvTvpUIIIc6Ka26JoYRd4/jdiNL7tMU/nzBexV5qYmuH54Z0NDlVMjw53OjzQtxErZHsbQu2Su7lMsXU1BKNzcV7ZGfEiBFYtmwZgoOD0b9/fwD1tXUuXryIjIwMpKWlCd5IQgixJyEKCRqqI9Pc35NXW3794z4WTu2F4cnh2HMyV2eERywCag3sKt14GsTUVEn7Vn5m1czhyplHJhwpudeR2uKoeAc7M2bMwNWrVzFjxgzNdhHjxo1DVVUVunfvjldeeUXwRhJCiD1ZmltirI5MWHNfXm0pKldgx5E/Nds3NGZocCm9TwTSerXVugGaukkaeh6o3+VciBurJYUIrc1UoUNH2u3ckdriiMzaCPTzzz/H4cOHcezYMZSUlEAmkyE5ORn9+/enCsqEEJdjSW6JqQq3Q7qH8W7PzmN/8n7Nr3/cQ1qvtjqPm7pJNn7eGgUAHXFkwpkKHRLTeAc7//jHPzBx4kT07t0bvXv3tkabCCHEoZi76oVLrs+xS9xqpTRUq+SfG2RuXZyGoxsFRVXI/P1PnWOE2CbBkUYmbL2rvTNxpm09GuId7Jw5c4ZGbwghTYq5uSVccn3Kq+sg85KgvLpOkLYaw3cJN98tLBytAKA5nLHQoa0482gX79VYffv2xbZt21BXJ8w/TIZhsGzZMvTt2xddunTBpEmTkJurfy4aAOrq6rBkyRLN8WPHjsWVK1cEaQshhBhizqoXrsFFz7gQQdpoitybe3V7c7awcLQCgOZwxkKHtuBs23o0xntkx8PDA9u2bcOuXbvQvn17eHt7az0vEonw9ddfcz5fRkYGNmzYgA8//BChoaFYtGgRJk6ciO3bt+vddmL+/Pn45Zdf8OGHH6Jly5b45JNPMGnSJOzatQsymYzv2yGEEM745pZwzfVRFyTkuxEobxxnvyzZwsKRCgCawxkLHVqbK4x28Q528vLykJiYqPk722jdY+O/G1NbW4u1a9di1qxZGDBgAABg6dKl6Nu3L/bu3auzjD03NxdbtmzBqlWr0LdvXwDAe++9h/T0dFy8eBEpKSl83w4hhPDCJ7eET66PWCzSBFLl1XUIDZbhw69PoFqhMvhavkorazkdZ8kWFo5UANAc5iSjO2seC1dClF6wN97BzrfffivYxbOyslBZWakVpMjlcsTGxuLkyZM6wc7hw4chk8nQr18/reMPHjwoWJsIIUQofHN91IGUWCzCt/uuChroAMDGA9cglYhN5leYG+g4YgFAvvgmoztzHgtXrjDaZdau50LJy8sDALRo0ULr8eDgYM1zDd26dQvh4eHYu3cv1qxZg/z8fMTGxmLOnDlo3769RW1xN1Fx1BxubmKt/xProH62Depn8/SIC4XYTYz1e7JR1LCOjNwDY4ZGISla+4ZYUqHA6q2XcOlWkeBtqaiuw4qfLuLlpzvrXLehyhrzcjLHDIuCVOpmbvNsztBneuywKCzffN7g69Tv82SW8VVbpvrZWQTJuRW+DJJ7GryX2vv3B+9gJzU11eRqrAMHDnA6V3V1/QZ4jXNzPDw8UFpaqnN8RUUFcnJykJGRgddffx1yuRwrV67Ec889h507d5q9VYVYLEJAgI9Zr+VCLvey2rnJX6ifbYP6mb+hKREY1KMtLt98iKKyGgTKPRHbLghujaY6zl0twJINZ1BSroDEXYw6A5WQLbVx/zUM6tFW5/pqoc355T828/fCpCfi0atzSyGaZ3ONP9NDUyLg6+OBNZkX8LC0RvN4w/epYlhs2HfV6HlN9bOz6OHnjaDtl7X6orFm/l7okRBm8r3a6/cH72AnOTlZJ9iprKzEhQsXoFAoMH78eM7n8vSsjxZra2s1fwYAhUIBLy/dDnF3d0dFRQWWLl2qGclZunQp+vfvj59++gkTJ07k+3YA1M+3lpVVmfVaY9zcxJDLvVBWVg2Vyjq/tBwRw7DIvl2Mkopa+PtKEdVa/54uQmmq/Wxr1M+6+H7Ww4K8EBZU/7utrPSv3zkqhsFPv97C9t9vgQUQFuyLN55PwjtrjqCoTPipgQcl1Tj+xx3EtA3U+7wHx8GZx3u3RVxEoOZ9FxdXCthK6zP2mY4J98OSF3vr/fkWF1fiyp9FRm/+QH0/Hz13B2IRbPb70FqeG9LR6GjXqNQOWp/pxqzx+0Mu9+I8UsQ72Pnwww/1Pl5XV4dp06ZpRmu4UE9fFRQUoHXr1prHCwoKEBUVpXN8aGgo3N3dtaasPD09ER4ejjt37nC+rj5KK32DAgCVirHq+R2JPeevm1I/2xP1cz19n3WZlwQ940I0q6u43NSKyxVYve2SZilzv4SWeH54FEKC5Rgz1Ph0io+nOyprlJq/B8o8kBjVDAdO3TV53YdlNQZ/ju1b+nHKW3m8dwTEYhEYhjVzE1THYOwzHRnmr/lzw/f5sMx4oKP26ZbzWj8jZ83nSezQTO+2Hmrr910Fy7Im35e9fn8INnkmkUjw/PPPY/PmzZxfEx0dDV9fXxw/flzzWFlZGS5fvoykpCSd45OSkqBUKnHhwgXNYzU1NcjNzUWbNm0sewPEYs5eh4EQrgx91sur67Dv1B0s3HgWs1ceMfmZv3DzId5ZewJXc0vgIXXD5Mdi8cIj0ZBK6odWkqKN1/b5ZHpfvD46EZMfj8XroxOxcGovdIvkdhM1tupInVhtTL+EFkafd3VcV201DHQA5/592C0qGKMGddD7nKO/L0ETlEtLS1FZyX0YUyqVYuzYsVi8eDECAwPRqlUrLFq0CKGhoRg6dChUKhWKioogk8ng6emJ7t27o1evXnjjjTewYMEC+Pv7Y9myZXBzc8MTTzwh5FshPLlCHQZCuOBag8bY1gJKFYPM325h57EcAEB4sC+mpscjNNBb5zymavs0Xupr7tYWfGX+/icO/XHfqqMUjrykm0s/G+OMvw8ZhsWmA9eNHuOo74t3sJOZmanzmEqlQl5eHtatW4fu3bvzOt/06dOhVCoxd+5c1NTUICkpCV988QUkEgnu3LmDQYMG4YMPPsDIkSMBAMuXL8fixYvx0ksvoaamBl27dsU333yDwED9c8/ENlyhDgMhXPCtQdP4l39RWQ1Wbb2E63frF2EM7NoKo1I7QOJuOFGGT20fvsvd9QUUACwO6Czl6Eu6ufSzMc74+9CZf8/zDnbmzJlj8LnExES8/fbbvM7n5uaG2bNnY/bs2TrPhYWFITs7W+sxX19fzJ8/H/Pnz+d1HWJdrlCHgRAu+H6Gi8oV2H8qF3JfKR6U1GDPiduorFHCy8MNLzwSY9HS5IaBitxbCrBAWXUt/H08MDU9DpsOXNe6OQXKPDC6QbBgKKDon9DSooDOUs6yEad6C5HGfdg4l8oQZ/t96My/53kHO/qWlYtEIvj6+kIulwvSKOJ8zKk6SogzMuczvOmg9tB/c39PvPZsFwQH6E5bcWVqk84AmQdGDYqEzEuidxrIWECR+fstXm3h8m2e65SUs02J65tmZBgWi787Z/K1zvb70Jl/z/MOdlq1aqXzWGFhIXJzcxEdHQ03N+cpKEWEY6s8AUKEYEkuiKW5GgBQWFKD3IIKs4MdQ4FKQ8XlCqzMrB8F6RkbqvWcJXtfGWLs2zyfKSlnnCppPM3IMKxL/j505t/zvFdjVVRU4M0338T69esBALt27cLAgQPx9NNPIy0tDffv3xe8kcTxcVm90TBPgBB7OZ1dgNkrj2DhxrNYs+0y55VTalw+61xs3H/NrOXafAMVfdexZO8rQwqK9Jcd4btK05mnStRc9fehM78v3sHOkiVLsGfPHvj5+QEAFi9ejOjoaHz66adwd3fH4sWLBW8kcQ7q+WtDy2QdYY6dNG1ClUcw9FnnQz06wRffQKWoXIHM328iK6dYE/RYI1DI/P2WTv9xnZJqGIw581RJQ676+9BZ35dZOTtz5sxBWloaLl68iLt37+L111/HoEGDoFQq8c4771ijncRJmFomS4i9CJ0L0vCzfvZaIY5eykdFNb89pcwJOsx5zY4jOdhxJEczdcQ1UEjv0xaH/rjPObhq3H/mTEk581RJY676+9AZ3xfvYKekpATt2rUDABw6dAju7u7o3bs3AMDPzw8KheMOLRLb4LNMlhBbsUYuiPqzHt0mAM+mRmLb4VvYdfw25z2tzBmdsGREQz2CNTU9jlNAkdYrAmm9InA1twQHTufi9NUHRs/fuP/MmZLiu3Te0bnq70Nne1+8p7FatWqlWQ6+f/9+dOnSBb6+vgDqg5+wsDBhW0gIIQKwZi5InVKF9fuuYtvhP1GnZNChlR/8fKRGX8NndIJhWGTlFOPY5TxN8qslvjtwHaMGccu9UN/UunFcIt+w/8ydknLWqRLiuHiP7IwaNQoffvgh1q9fj5s3b+K///0vAOCll17CgQMHMHfuXMEbSQghlpJ7GQ8++B6nlldUhZWZF5FbUAEAeDSlDZ7oE4E/rj8QZHRC30omH0/Lit8XlSsg85LorRHTuBaPmjmBiyVTUs44VUIcF+9/MePHj0dQUBBOnjyJl156CSNGjABQvzfW/Pnz8eyzzwreSEIIsRjXeySPe+mxS3n4ek82FLUq+HpJMPmxWMS3CwJguOCcoWBCn5NZ+peYqwvWcS1ep09JpQI9Y0M5BxTmBC6WTkk521QJcVxmfT1IS0tDWlqa1mNLly4VpEGEEGINZVW1gh2nqFNh4/6r+PWP+lIbUeH+mPx4nM60iyWjEyqGxfo92UaPkbqLMXVUF5y5WoiDZ0zvdN6QegSGa0BhbuAiRNBHiKXMCnbOnz+P48ePo7a2Fixbv2SQZVlUVVXh9OnT+P777wVtJCGEWEqoJc33HlRi5daLuFtYCRGAx3q3xWO928JNrD8F0tzRics3H6LIREJ1cUUtxCIRukcF8wp2zF3NZG7gQlNSxN54Bzvr16/He++9pwlyGhKLxejTp48gDSOEkMasXfnYVBBw+MJ9fLs3G7V1DOQ+Ukx+LBaxba2zCXFRWQ2n40oqFUiODuFV1dmS1UzmBi40JUXsiXews27dOvTr1w8LFy7E6tWrUVFRgX/96184dOgQ5syZg8cff9wa7SSENHGnswuwft9VlFT8Nc3k7yvFmCEdOU2FWJI/oqhVYd3ebBy+mAcAiGkTgMmPxcLPV/8okCVBmVqg3JPTcf4+Hpx34BZq6ogCF+JseAc7d+7cwZw5c+Dn54f4+HisWLECnp6eGDZsGG7evIlvvvlGJ5+HEFJPiJtgU2RoL6iSilpeu2CbMw1zp6ACK7dexP2HVRCJgCf6RCAtpa3BnxuffaCMiW0XhECZh9GprIYjUYbem8xbgp6xIUiMbE6fN9Jk8Q52JBIJPD3rv3G0adMGOTk5qKurg0QiQbdu3fDll18K3khCXIFQN8GmhmFYfLUry+gxX+/KMqvysbGgk2VZ/Hb+Ptbvu4o6JQN/XymmPB6HqNaGRzSM7STOJygDADexCGOGRWH55vMGj2k8EmWP3BgK4Ikz4B3sxMTE4Oeff0aPHj0QEREBhmHwxx9/oHv37sjLy7NGGwlxekLeBJuarJxik8urK2qUyMopRmwEt/wZU9Mw1Qolvt2TjWOX8wEA8RGBmPhYLOTehmvwCL0dBQAkRfMfibLlFBMF8MRZ8A52JkyYgJdeegllZWV4//33MWjQILz++usYOnQotm/fjm7dulmjnYQ4LWvcBJuSrNxizsdxDXaMuZ1fjpWZF5FfXA2RqD7g6J/QEr6eEqOvs8Z2FIDjrmSiAJ44E97BzuDBg7Fq1SrcuHEDALBgwQK89tpr2LRpEzp16oR58+YJ3khCnJm1boJNBcvorvy05DiDr2dZ/HL2LjYeuA6lioFIBLAscOJKAU5cKTA5YsF1JRSfHcvVHC0hmAJ44mzMqrMzYMAADBgwAAAQEBCAtWvXCtkmQlyKNfdkagp8vIyPqPA9Tp+qGiW+2p2FU1kFmscaV9cwNWJRzrFoIdfjHBkF8MTZmL3ByqFDh3DkyBEUFBRg5syZuHLlCuLi4tCqVSsh20eI0xOqmF1T5cexX/QdxyV59tb9MqzaehGFJTVwE4sglYhRrVAZvI6hEQtfH27BFtfjHBkF8MTZ8A52qqur8eKLL+LIkSPw9fVFZWUlJk6ciI0bN+Ly5ctYt24dIiON76ZLSFMiRDE7a3PkFTVcd/hufJyp5FmWZbH/9B18f/A6VAyLILknHunRGuv2XTV6HUMjFoG+3OricD3OkVEAT5yN/vrmRvz3v//FpUuX8NVXX+HYsWOaSsofffQRQkJC8MknnwjeSEKcmbrgmzGWVLS11OnsAsxeeQQLN57Fmm2XsXDjWcxeeQSnswtMv9gG1MGiMY2DRXXybOMAUz0VdfjCfaz46SI27r8GFcMiMbIZ5v89Cd5e3L7/6RuxMKedzqopvVfiGngHO7t27cLMmTPRs2dPiER//XIODg7G1KlTcfr0aUEbSIgrUBd8a3yDCJR52HXViqmgwBECHr7BIpfk2S93XsGZq4VwE4swenAkXhrZCT6eEotGLBw9qBVSU3qvxDXwnsYqKyszmJfj5+eHqqoqixtFiCtytCXEzrSihk/lYy7Jswxbv9XEy091RkQLueZxS6ccm9IO303pvRLnxzvYiYyMxPbt2/Vu+Hnw4EHK1yE2xzAsLlx/gNz7pZB5SRwq36QxR1pC7GwrargGi1yTYnvEhiC/uAqKWpXmPJbsn8W3na6gW1QwEto3w8Ezd1BQUo1gfy+kdg2DuzvvSQO7cKbfHcQyvIOdqVOn4qWXXkJJSQkGDhwIkUiEkydP4scff8SmTZuwZMkSa7STEL1OZxdg4/5rWvsHUQVXbpxxRQ2XYJHrVNSeE7maPzf8zAgxYuFIQa016UsC33My1yn+/dHvjqZFxLKNq0mYtn37dixZskRre4igoCDMmDEDzzzzjKANtAWVikFRUaXg53V3FyMgwAfFxZVQKhnBz9/UGargqkYVXI3LyinGwo1nTR73+uhERLcJcJrPM8OwmL3yiFnF+xp+Zuy5Qs0Z+tqZ//05c9udlTU+04GBPnBz4zaKaFadncceewyPPfYYbt68iZKSEsjlcrRr1w5isXMMXRLn50z5Jo7KGZbEN8YlABGLRXiybwTW7jS+eag+DT8zXEZnHHnJvjU5878/Z247MZ/ZRQUBoF27dkK1gxBenC3fxBEJkZ9iS1w3ncy+XYwff71p1jX4fGaa8iaYzvzvz5nbTszHKdgZNGgQ5xOKRCLs37/f7AYRwoUz5ps4ImdZUcNl08nEyObYcfRPbP39FlgWaBHkjSmPx6GqRomSSgXuFVZix9Eck9fi8pkx1Z70PhFI69XWYQJFoTnzvz9nbjsxH6dg5+7duxCJRIiJiUFUVJS120SISVTBVTiOvnqIy7TD+r1XcfDMXVzJqd8hvXd8KMYOjYKH1E1zTFZOMadgx9Rnhkt7Mn+/hV/O3cWYIR0dJmAUkjP/+3PmthPzcQp23nnnHezcuROnT59GbW0tHn30UaSlpSE8PNza7SNEL2fMN3Fkjrx6iMu0Q0llLUoqayGViDFuaBR6d2qhcwzfz4yhfBwu7QGAkopaoxuHOjNn/vfnzG0n5uMU7IwePRqjR49GQUEBdu/ejZ07d2L58uWIi4vDo48+ihEjRiA42LX+MRPH5mz5JsR8XKcTAmQemPlsF7Rq5qP3eT6fGWP5OHUqfitJXDHZ1Zn//Tlz24n5eC2fCg4OxvPPP49NmzZh3759GDJkCLZt24YBAwZg3Lhx+O6771BSUmKlphKiTZ1vEuhgWzA4OoZhkZVTjGOX85CVUwyG4V19wqa4Tie8MDzaYKCjxmXbDlNbaBQU8asSr052dTWOugUKF/S7o+kxq85OYzk5Odi8eTO++uorAMCFCxcsPaVNUZ0d5yYWi3CvuIaqoHJgyQoivp9noZZlc6mbEyjzwMKpvTif31DbuFwrwFcKiES86vhMfjwWPWNDOR/vTL87nHn5Pf3usB2nrLOjVllZiZ9//hm7d+/Gb7/9BgDo3bu3JackhDexWIROHZohLMjL4W8Mava4QXBZ0STUN1ohl2VbY9rBUI4Sl3yc4opapPeJQObvtzhfz5WTXR0538sUZ/zdQczDO9hRBzi7du3C77//DpVKhZ49e+Kdd97BkCFDIJPJrNFOQlyGPeqz2LKQmjWCqogWcrQI9Mb9RlNIQi+RL6qo4XRccKCX3iX7+lCyKyH2xynYaRjg/Pbbb1CpVEhKSsJbb72FIUOGICDAOaN6QmzNlqMrDdmqkJo1gqo/rj/A5zsuo7JGCU+pG4YmhSM0yFvwEbHT2QX4bv91Tsf6+3gguk1AfW2fI7eQ+fufBo+lZFdC7I9TsNOrVy8olUp07doVc+bMwfDhwxEYGGjtthHiUuxZpt5WhdSEDKqUKgZbDt3QbNjZJlSGqU/EITjA26I26mNqr6SGGo7UiMUiPN6nHVo193X4woyENGWcgh2Fov4f8MmTJ3Hq1Cn8+9//NnisSCTC5cuXhWkdIS7EnmXqbVVIjes0kKnjHpRUY9W2S7h5rwwAMKhbGP42sAPcxCJk5RQLmuukVDL4Znc25+P1jdQ4emFGQpo6TsHOSy+9ZO12EOLy7Fmm3laF1Coq6yw+7szVQqz93xVUKZTw9nDHhBEx6BbV3Cq5TqezC/D17mxUVJtut8xLgueHRxm8ljMn6hLi6ijYIcRG7FmmXiwWoUdMMHb//5SQPskxwRaPRMi8pWYfV6dk8MMv17H/1B0A9UnJU5+IQzN/L6vkOvGZugKAUYNoSooQZ8WrqCAhxHzq0RVjrLVyh2FYHL9SYPSYE1cKLC4waOr9GTquoLgK7687rQl0hiWH482xXdHM34tzrhOftnM5p6k2E0Kch0V1dohhDMPiyp9FqLtVDImIRfuWfjR/38SdvVaI2jqV0WOstXLHVvlC5kyXncwqwFe7rqBaoYKPpzv+8WgsukQ2s2rbue5vZajNhBDnQsGOFdijjgqxnDUL/ZmaMvH1kmC8kXwQS9kqX4hPAcA6pQqbDlzHz2fvAgA6tPLDP5+IQ6Dc06w28Wk73/fJNwh15qrChLgiCnYEZq86KsQy1gxQuUyZSNxESIxsbtF1jLFlvpB63yFjS7Hzi6qwMvMibhdUAABG9GyD9L4RcNdT+t0abed6rMxbgueH8QtC6csOIY6Hgh0B2bOOCjGftQNUrlsQWGPJuZqtVmOpGVuKfexyHr7enQ1FrQq+XhJMeiwWndoF2bTtXM4p85JgybTecHfnntpIX3YIcUycgp3MzExeJ01PTzejKc7PnnVUiHlsEaDac8m5mjX2l+JyzYaf89o6FTbsuYpf/7gPoD7gmPJ4nMnEX2vtjWXqnM8Pj+IV6NCXHUIcF6dgZ86cOVp/F4nq/6E23DBd/RjQdIMdR7ipEX5sEaDac8l5Q1yml6zl3oNKrNx6EXcLKyEC8GivtniiT1u4ibkFE9Zou9DnpC87hDguTsHOgQMHNH++cuUKZs+ejWnTpuGRRx5BcHAwiouLcfDgQSxfvhwffPCB1Rrr6Bzlpka4s0WAauspJGPsUen38IX7+HZvNmrrGMh9pJj0WCzi2vLfbsYabRfynPRlhxDHxSnYadWqlebPL7/8MqZNm4ZJkyZpHgsJCcHo0aNRW1uLRYsWoX///sK31Ak40k2NcGOLANUeU0iOQFGrwrp92Th8IQ8AENMmAJMfi4Wfr2V9KfSoiFDnpC87hDgu3gnKN27cQGxsrN7n2rVrhzt37ljcKGfVVG9qzsxWAao9p5AastVKoTuFFViZeRH3H1ZBJAKe6BOBtJS2Op99V1qiTV92CHFcvIOdtm3bYvv27ejdu7fOc9999x06duwoSMOclaPc1Ag3tgxQ7b1ZpC1WCrEsi9/O38eGfVdRq2Tg5yvFlMfi9I6cnM4uwPp9V1FSUat5zN9XijFDOgr278SWwRR92SHEcYnYhlnGHOzduxevvPIKEhISMHDgQAQEBODBgwfYu3cvrl+/js8++wwpKSnWaq9VqFQMiooqBT0nw7C4ca8UdayIKihbmbu7GAEBPiguroRSyZh1Dn0jHq4UoDIMi9krj5gcdVg4tZfBz6mpfq5WKPHt3mwcu5QPAIiLCMSktFjIfXT3wTJVZFGIwMte9W6E+CwJ8ZkmplE/2441+jow0Aduempz6cM72AGAgwcPYsWKFbh8+TJYloVYLEZiYiJeffVVdO/enXeD7c0awQ5A/5BsRah+dqUplcaycoqxcONZk8e9PjrRYP6KsX6+nV+OlVsvIb+oCmKRCE/2i8AjPdtALNLtP4Zh8cqy31BZozTYDl9Pd3w8va/Z/W+LYMoYSz9L9LvDNqifbcfewY5ZRQVTU1ORmpoKhUKB0tJS+Pv7QyrlttsxIY5KX6KqqwRA1lopxLIsfjl3Dxv3X4NSxSBA5oEpj8cZzUvJyik2GugAQEWNElk5xYiN4L9qyxHq3VgjkZoQYj6zKyjfuHEDhw8fRmFhIcaOHYvc3FxER0fD19eX13kYhsGnn36KH374AeXl5UhKSsK8efMQHh5u8rXbtm3D7NmzceDAAYSFhZn7VoiTYxgWF64/QO79Usi8JIIFJK5U9l/uxe3LCNfjAKCqRomvd2fhZFb9buqd2wfhH4/GQOZt/BxZucWczp+Va16wQ/VuCCGN8Q52GIbBvHnzsGXLFrAsC5FIhOHDhyMjIwO3b9/GunXrEBoayvl8GRkZ2LBhAz788EOEhoZi0aJFmDhxIrZv3250tOju3btYsGAB3+YTF3M6uwAb919DkcABicuV/eca+3E87s+8MqzKvISCkmq4iUV4qn97DE0O1zttpYPrxDnvCfZ6VO+GENIY91ro/y8jIwPbt2/He++9h8OHD2uqKM+ePRsMw2Dp0qWcz1VbW4u1a9di+vTpGDBgAKKjo7F06VLk5eVh7969Bl/HMAxmz56NuLg4vs0nLkQdkBQ1+havDkhOZxeYdV6u0yAMY+bd2A7KqmpNH8ThOJZlsffkbbz/7WkUlFQjSO6JOWO6YniP1twCHYDzaIqzV6wmhDgO3sHOli1bMH36dDz11FPw9/fXPB4TE4Pp06fj8OHDnM+VlZWFyspKrdVbcrkcsbGxOHnypMHXrVq1CnV1dZgyZQrf5hMXYc2AhM80iLMQIgCorK7DB1+fxLo9V6FUsUiMbIb5f09C+1Z+vNoS3ToAHlI3o8d4St0Q3dq8YEdd78YYqndDSNPCexrrwYMHiImJ0ftcSEgIysrKOJ8rL6++smqLFi20Hg8ODtY819j58+exdu1abN68Gfn5+ZyvZQqfDf+4UmeJc80WJ9xd+bOIU0By414pYnhuTVBeXcf5OGt8bqwhNiIQgTIPnVGwhgLlHoiNCNSb73TjbikyfrqAwpIauIlFGDU4EkOTwrX2xOOKYVhIxCIY++m5u4ng7i42O/dq7LAoLN983uDzY4ZFQWoi4LIn+t1hG9TPtmPvvuYd7LRp0waHDh1Cr169dJ47ceIE2rRpw/lc1dXVAKCTm+Ph4YHS0lKd46uqqjBr1izMmjULbdu2FSzYEYtFCAjwEeRc+sjlXlY7d1NVd4tbkmsdy/9nG96C20hFeAs/q35uhDZlZGd88LXhEdMpT3ZGUJD2AgOWZbH11xv4asdlqBgWoUHeeH1cd0SGm5/Ye+H6A1SYWo1VrcS94hp06tDMrGsMTYmAr48H1mRewMPSGs3jzfy9MOmJePTq3NKs89oa/e6wDepn27FXX/MOdsaPH4958+ahrq4OAwcOhEgkQk5ODo4fP461a9fq7JBujKenJ4D63B31nwFAoVDAy0u3Q9577z1ERERg1KhRfJttFMOwKCurEvScQH0EK5d7oaysGioV1XAQkkTEbXpKImJRXMyvhlLLAE9OoyAtAzx5n9ueYsL98PLTnbF+T7bWewuUe2DM0CjEhPtpvZ/yqlp8tv0yzl17AABIjg3BzOe6QVWntOh9597X/SJj6LiwIPN/McaE+2HJi72RfbsYJRW18PeVIqp1AMRikcP/3Oh3h21QP9uONfpaLveyXp2dZ555BkVFRVi5ciU2btwIlmUxc+ZMSCQSTJw4EaNHj+Z8LvX0VUFBAVq3bq15vKCgAFFRUTrHb9myBVKpFImJiQAAlUoFAEhLS8M///lP/POf/+T7djSsWVBKpWKoYJXA2rf047QPUfuWfmb1/WhTZf8HRYJhWEGSlG1ZyyexQzMktAvSe72G/XT9TilWbbuIojIF3N3EGD2oAwYnhcPHS4LimlqLPs8yLwnn44T4dxMZ5q/5s1A/M1uh3x22Qf1sO/bqa7Pq7EyZMgVjxozB2bNnUVJSArlcjoSEBK2EZS7UdXmOHz+uCXbKyspw+fJljB07Vuf4xiu0/vjjD8yePRtr1qxp8ntyNTXW3ofIVnuc2aOWj7GCdwzLYvfx2/jx0E0wLIuQAC9MTY9H6xCZWfk5+tCGmYQQW+Md7Lz55puYNm0awsPD0bdvX63nbt68iYULF2LVqlWcziWVSjF27FgsXrwYgYGBaNWqFRYtWoTQ0FAMHToUKpUKRUVFkMlk8PT01MkHUicxt2zZknegRZyfOiBpXGdHqIDE2ht3mlPLx5qjQGVVtfh8x2VcvFkEAOgRG4Lnh0XBy8Ps2qN60YaZhBBb4/Rb7N69e5o/Z2ZmYvDgwXBz013J8Ouvv+LIkSO8GjB9+nQolUrMnTsXNTU1SEpKwhdffAGJRII7d+5g0KBB+OCDDzBy5Ehe5yVNQ7eoYCTFhOBecY3gFZQB7mX/+QYh5mxpYM1RoOzbxVi97RJKKmohcRfjucGR6JfQUrDRnMZsNXJGCCEAx41Ap0yZgl9//dXkyViWRe/evfHFF18I0jhboY1AnZu9+9mcIITvxpzW2tiSYVj87+ifyPz9FlgWaBHkjalPxCMsWHfbF2v0s6vsPSY0e3+mmwrqZ9txio1AFyxYgCNHjoBlWfzrX//C1KlTtRKKAUAsFkMul6NHjx78W0yIkzJ3Wwk+WxpYa2PL0spafLb9Ei7/Wb+Mv1d8KMYO7QhPqbDTVoQQYm+cfquFhITgySefBACIRCIMGDAAcrlcM5VVU1ODuro6yGQy67WUEAdjSRDCp6Kx0BtbMgyLfSdzse3ILVQrVJBKxBg7JAp9Orcw+VohudJGq4QQx8a7lGFaWho+/vhj/O1vf9M8dubMGaSkpOCjjz4Cw9BQIGkaLNlWgs+WBkJubHkyKx8vffwrvvv5OqoV9aUbPCVu8PKwbTVh9YhY4/6zdF8zQgjRh3ews3z5cmzbtg1paWmax2JjYzFr1ix8//33+PzzzwVtICH2xDAssnKKcexyHrJyirVqtFgShKhXJBmjXpEk1MaWv567i5WZl1BTq9J6vKyqzqYBhitutEoIcWy8J+e3b9+ON954Q6uKsb+/P1544QW4u7vjm2++weTJkwVtJCH2YGqahWsQcq+wElk5xTrJt1xXJAlRl+bCjYf4ek+20Xaak/djDqGn5QghxBTewU5xcTHCw8P1PteuXTuDG3gS4ky4JB4nRjY3GYQAwI6jOdhxNEdvPgqXWj6W1KVRMQwyf7uF/x3NMfmebRVgCDktRwghXPCexmrXrh327Nmj97mDBw/y2giUEEfEdZoFgMmpqIYM5aOoa/n0jA1FdJsAvUGLehSocZ5PoMzD4IqvorIaLNxwllOgo2aLAEOoaTlCCOGK98jO888/jzlz5qCkpASDBw9GUFAQioqK8PPPP2PXrl344IMPrNFOQmyGzzSLoakoY8ydLuJT0fmP6w/wxf+uoKK6Dp5SNwxNCse2w3+avIYtAgzaLoIQYmu8g5309HRUVlYiIyNDa6+qgIAAvP3220hPTxeyfYRo2KoAHd9ploZByOWcIuw4YnwkxZLpIlMVnZUqBj8euondJ24DANqEyPDP9Dg09/PCb+fvO0SAQdtFEEJszazqYWPGjMFzzz2HW7duaTYCbdeuHcRi3rNihHBiLFm4R1yooNcyZ5pFHYRwDZRO/f9UliUBW+PgL0juiTXbL+HGvTIAwKBuYfjbwA6QuNf/u3SkAIO2iyCE2JLZpVJFIhHatWsnZFuIg7NXaX9TycJiNzGGpkRwOheX92DJNAvXQOngmbs4eOau2UX09AV/IgAsAC8Pd/x9RLTOOR0twLD2RquEEKLGKdiJiYnBd999h86dOyM6Otro5oAikQiXL18WrIHEMdiy2m3DgETuJTWZLLx+bzYG9Whr8rxc34M50yzqNhdV1EDmJUF5dZ3J9gCmt5Uw9D70tU1dlebp/u0MnstQgAHU79dl66CD60arhBBiCU7BzosvvoiQkBDNn621EzJxTObu/2Tutfgk+wJAUZkCl28+RFiQl9Hz8nkPfEZBzGlzY1yTlrmsFPvf0Rz079LK4LkaBxi0bQMhxNVxCnZeeuklzZ9ffvllqzWGOB5rbUKpj6mdvY0pKqsxGOyY+x64jIIUFFUh8/c/zWqzVvs5Ji0LXZDPloEsIYTYC6dg5969e7xO2rJlS7MaQxyPNTah1JejwSUgMSZQ7mnwOUveA5dREGNkXhJ0jw7Gz2fvmjyWS3Lzg7JqTtc1di71z6C4XIGNB2wTyBJCiD1xCnZSU1N5TV1duXLF7AYRxyJktVtj0yU+nhKzp4EC5R6IbReEstIqs9vG5ThzRp7Kq+sQEmB4eq2hhsnN+oLCwpJq7DjMrUCgoURpvsEabdtACHEFnIKd999/XxPslJaWYvHixUhJScEjjzyC5s2bo6SkBAcPHsQvv/yCOXPmWLXBxLaEqnZrarpkSPcws9oHAGOGRsHNyMiDEO/BkpEnXx+JydVdMq/6YC8rpxjl1bXYdOC61vE+nu6oVTKoUzIQiQDWyB6ZhlaKmTtNSNs2EEKcHadgZ+TIkZo/v/jii0hPT8d7772ndcxjjz2G//znP9i1axeeffZZYVtJ7EaIardcAoVjl/N5t02dLJwUbTynRIj3wGUqzOC5fT1Nru4qr67DZzsMr2KsrFECAFoEeWNoUji+3m14U0999XIsCdZo2wZCiLPjXWfn8OHDWLFihd7nBgwYgO+//97iRhHHIUS1Wy6BQnlVHXy9JKgwsmQ7wFeKf6TFoqyqltfyaLFYhB4xwdh9ItfgMckxwUbPZe7ohjqIEotFvLeV0KdGoUTfzi3h6yXhVS/H3GCNtm0ghLgC3sFOQEAAzp8/j969e+s8d+zYMc0SdeI6LC1GxzVQSIkLwb5Tdww+/9yQjohtG8it0Q0wDIvjVwqMHnPiSgGeHtDBYMBj7uhGw0Cw4equoooafLf/Oud6PGrFFbWaPbn41MsxN1ijbRsIIa6Ad7DzzDPPYMWKFaipqcGAAQMQEBCABw8eYPfu3di4cSP+9a9/WaOdxM4sqXbLNVBIjGyOjuH+glf4FWJFGZepsIYMtVm9uqs+N4dfoKOmDlz41MvhG6zRtg2EEFfCO9iZOnUqysvL8cUXX2DNmjUAAJZl4enpiVdeeQVjxowRvJHOimFYXLj+ALn3SyHzkjh9KXxzq93yyZkRi0WCbyFQVFFj8XFcpvPS+0QgONCLU5stSfrVF7iYSgCfmh5nOknaW4JnB3VAoK+n039WCSGkId7BjkgkwhtvvIFp06bh3LlzKC0tRUBAABITE+Ht7W2NNjql09kF2Lj/GoqoKi3vvB+htxC4cquI03EVleaNtKi1au7D+Wdr7rSYvhwaLsnH3x24jlGDIrEy0/DP4PlhUU3us0kIaRrM3qbcx8cHzZs3h1wuR0JCAmpra4Vsl1NTf8suavQtWv0t+3S28fwRV6TO+wmQad/kA2UeVq3Sezq7AIcvclvpJfOWGnyOaxVmhjGyJrwB9WgXX/pyaLhO08m8JHb5GRBCiL2Ztev51q1bsWTJEhQWFkIkEuGHH37A8uXLIZFIsGTJEkilhm8ars6W2ys4G1vvcs13ubWx4EPoStJisQhDk8Lw3cEbnNpmLIeGT9HEnrGhtNM4IaTJ4R3s7Ny5E2+88QYef/xxDBw4EK+++ioAYMiQIXj33XeRkZGBGTNmCN1OpyH0TdHV2HKXaz7LrU0tsRaykjTLsjh07h62HLoFADpFAgNlHnh2UCRkXhJOAQnfoom00zghpKnhHeysWrUKo0aNwvz586FSqTSPP/XUUygqKsL333/fpIMdIW+KxDJ8+tjUEmuhKklXK5T4alcWTmbVT2V2bh+ECY9E4/7DKrNHWoQomkgIIa6Md87OrVu3MGTIEL3PJSQkID+ffyVcVyLUTZFYjmsfp/eJMJmvwiXHxlRAkZNXjne/PImTWQVwE4vwt4EdMP3pzvDz9UB0mwD0jA1FdJsA3lNK6gRwY6heDiGkKeMd7AQFBeHGDf15Bjdu3EBQUJDFjXJmQtwUiTC4/CwCfKVI69XW5LksCShYlsWB03fwn29PoaCkGkFyD7wxpiuG92gNMY8Ndo2xVwI4IYQ4A97TWCNGjMCyZcsQHByM/v37A6hfjn7x4kVkZGQgLS1N8EY6EyG2V3B2+nbstub7NVTPiMvP4rkhHTm3zZxK0lU1dfhyZxZOXy0EAHTp0Ax/fzQGvl4Snu+SW/so+ZgQQnSJWNbY/sm6amtrMW3aNPz+++8Qi8VgGAY+Pj6oqqpC9+7d8dlnn8HT09Na7bUKlYpBUVGloOfUV2enKVSlNVbF1xrvm0s9I31tsuRnwTWYu3mvDKu2XsSD0hrNtNXg7mEQCTSaY2vu7mIEBPiguLgSSiVj7+a4NOpr26B+th1r9HVgoA/c3LhNUPEOdtQOHz6MY8eOoaSkBDKZDMnJyejfv79T/iK3RrAD1I/y3CuucZkKyqYYquKrxnU6hWswwed6SiWDg2fuoKCkGsH+XkjtGgZ3d7PLTBnFsiz2nczFD7/cgIph0czPE1PT4xHRQm6V69kK3Rhsh/raNqifbcfewQ7vaax//OMfmDhxInr37q13M1DyF7FYhE4dmiEsyMvl/yEJVV+I68gQl+tt2HcViZHNcfZaoc4595zMtcpoU0V1Hdb+7wrOXX8AAOgW1RwTHomGt6fw01aEEEK44f3V9syZM045ekOsi099IUPUIzWNz6Ov8jSX6xVX1GLNtkucz2mp63dKMf/LEzh3/QHc3UQYO7QjpqXHU6BDCCF2xjvY6du3L7Zt24a6Osv2ESKuxdL6Qny3Y+B6vRNZxoMZPls8GMKwLHYdy8GH68+gqEyB4AAvvDWuO1K7Om9+DiGEuBLe01geHh7Ytm0bdu3ahfbt2+ts/ikSifD1118L1kBifUKsnrK0vtCOI7d4VZ4Wqk6RpdWsy6pq8cWOK7hw8yEAIDkmGOOHR8PLw6ydWAghhFgB79/IeXl5SExM1Py9cX6zmfnOxE6EWj1lSRXf09kFyPz9T07XUY/ocLkeV+aeI/t2MVZvu4SSilpI3MV4bnAk+iW0pNEcQghxMLyDnW+//dYa7SB2YGg1kzqfhU8xOnPrC/HdrLPh/k6mrsdVeVUtr+MZlsX/juYg87ebYFkgNNAbU9PjER7sa3FbCCGECI9XsHP+/HncvXsXbdq0QWxsrLXaRGzAGruzm1N0T8jNOs3l68M9gbi0shafb7+ES38WAwBS4kIxblhHeEpp2ooQQhwVp9/QZWVlmDJlCs6dOweWZSESiZCYmIglS5agRYsW1m4jsQJr7c7Ot4qvuZt18h0RMibQl1sRzCt/FmHN9ssorayF1F2MMUM7ok+nFjRtRQghDo5TsPPxxx/j8uXLePnllxEfH4+bN29i1apVmDdvHj777DNrt5FYgTV3ZxeLRQYDpMbJ0HIvKadzNt6sk8+IkDGBMg90aOWHrJxig8EZw7DYdvgWth/+EyyAls18MDU9Hq2a+Vh8fUIIIdbHKdj5+eefMXPmTIwfPx4A0K9fP4SEhGDWrFmoqqrSWZFF6m+QV/4sQt2tYkhELNq39HOo6sn22J3dUDK0j6c7KmuUBl+nb7NOrkFY5/ZBOH/jocHnk2OC8cbqowYTtEsqFFiz7RKybpcAAPp0boExQzrCQ+LG6fqEEELsj1OwU1hYiLi4OK3HevToAZVKhfv376N9+/ZWaZyzsvX+UOawZPWUOYwlQ5uib7NOrkHY8OTW6Nu5hd48ouSYYOw+kau3TSt+uojHerXBL+fuobyqDh4SNzw/LAop8aGcrmsvtt6ElRBCnAGnYEepVEIq1Z5u8PPzAwAoFJZPJbgSIVc4WZMtd2fnkl/j6yWBu5sIJRV/rYzy8XTHkO5hSIxsrnM8n2BNLBYhoX0zrb2xBnRphTc/O2a0TduP5AAAwpr7Ymp6HFoEOfa0lTME2YQQYg8WLyGhujp/scYKJ1fAJb+moroOs0Z1wfU7pdh3KheVNUpU1iiR+fufOPTHfZ0bNp9gTV8QsONoDiqqTVcBT+gQhKlPxEPq4NNWzhJkE0KIPVi87TOtRPmLEPtDGcMwLLJyinHsch6ycoot2uaA7/YMluCaX/PH9QfI/P2WTv6OoX2s1EvdA2XaU1qBMg/Nzd3QfltcAh0A6BEb4vCBji1/loQQ4ow4j+zMnz8fvr5/FU1Tj+i8/fbb8PH5a3i/KW8XYc0VTkJNUahzOi7nFFll6Xnj65RUKlBWwa1o39FL+Uaf1zcq1i0qGEkxIbhXXIPc+6WQeUk0U1dCLE8XMkHbWqxVRsAWKMeIEGILnIKdpKQkALpTVvoeb8rTWtZa4WRqiiK9T1uk9YoweZPQFzCZYmzjTkM3KX3XEYkAYx8NmbcE5VXGR1sM3bDFYhE6dWiGsCAvKJWM5nFLl6dbq4ih0KwZZFsT5RgRQmyFU7BDW0RwY84KJ1PfbLmMThjKa2nIUMBkir7AzNhNCoDe65iKgXvGhmDfqTsm28Pnhm3pzV2oBG1rs0cZAUtRjhEhxJaoxr2A+K5w4vLNluvohLGbBMOw+GpXFu/3o29kw9RNysfT+Eeq8QiPeisJH08Jp2CHzw2b67FikQhMg0YZ297CEdm6jIClKJGfEGJrFOwI7MbdUpPPN0ycbaxx0MJ3dELfTWLHEd2kXy4aj2xwuUmZug7LAqNSO0DuK9UayWIYlvMNu/FoWGxEoN7juQQBHhI3LH2pN/7MK3favBFblhEQgjPnGBFCnBMFOwJSKhnsOalbpK6hPSdzkd6nHedvtnynHorKFcjKKYZYLEJJpQJybyn2mmhTY4ZGNoTaokHuK0XPWO3ifFxv2GevFeotEDhlZGfEhPvxPufEtBh4erg7/U3VnE1Y7cVZc4wIIc6Lgh0BHTxzx2RuCssC3/1sOklY/c2Wy+hEYyu3XjRrJCctpQ1i2wYaHNkQ6uZjKIAzdcMG9OcDFZUr8MHXJ/Hy052R2KGZzjmnPB6Lr3ZlQVH3V/Kyn48UY4d2dKggwFJ8N2G1F2fMMSKEODcKdgRUUFLN6bj8Ym7HlVQqOI1ONGZOoOPj6Y70vu2M3hiFuPmYyh0xdMMGgNkrjxg99/q92UhoF6T1HvKLq7Dr+G1NoJPQIQiDu4Ujpk2AwwUBQjC2CaujcLYcI0KI87O4qCD5S7C/F6fjQgK4HacOLtQjHv6+3HYIN8eQ7uEmb/7qm5Qxvl4So89zyR1R37B7xoYi+v+DEk55HmXaBRuPX87Hu1+exO38Cvh6STDjmc545ekExEUEumSg4yzUAbwxjpRjRAhxfnYPdhiGwbJly9C3b1906dIFkyZNQm6u4RyTa9euYfLkyejRowdSUlIwffp03Lt3z4YtNiy1axhMFZQWiYBnB0aaDBoaf7PtFhWMxdN6I71PhAAt1ebr6a6zq3hj6qTg7lG6+1Q1NH54FF58Ml7n/TWsamwOPnketXUqfL07C6u3XUJNrQodw/wwf0ISOrdvZvoExCbUAbzQnxNCCNHH7tNYGRkZ2LBhAz788EOEhoZi0aJFmDhxIrZv366z+WhxcTEmTJiArl274ttvv0VtbS0+/PBDTJw4ET/99BM8POw7x+/uLsawpHC9O2mrDUsKh1TqZtbqGbFYhMf7RKBVcx+dvBYfT3ezpq8AYPwj0Ua/RXMpEli/aWe4ZiWY0LkjXKfQVCoW731zCncKKyEC8GivNniiTwTcxHaP60kjzpJjRAhxfnYNdmpra7F27VrMmjULAwYMAAAsXboUffv2xd69e5GWlqZ1/P79+1FVVYWFCxfC09MTALBo0SIMGDAAZ86cQUpKiq3fgo6/pdYPz+85masVDIhE9YGO+nlLV88IUamay7UMLZFXX95DIoaijvn/TTtv4dAf9zR1goTMHeGS5+Hj6Y51e69CUaeC3FuCSY/FIc7AsnTiGJwhx4gQ4vzsGuxkZWWhsrJSK0iRy+WIjY3FyZMndYKdlJQUZGRkaAIdABD//zf2srIy2zSag7+lRmJkv/b45dxdlFYr4efljgFdWsHdXXt0wZxvtoaCDy6jOgEyD/xjRAzKqms5XYtLXZ2GK5wA61XA5ZKore6D6Nb+mPx4HPx9aTUPIYQQOwc7eXl5AIAWLVpoPR4cHKx5rqGwsDCEhYVpPbZmzRp4enpq9ukyV+NAxFLu7mI82jsCcrkXSkqqcPnWQ5RU1MLfV4qo1torgeLbB3E6J8Ow2GjBxpZjh0Whc6R23grDsMi+Xay3bVf+NL1ZqCEbD1xDUkyIoFMSPeJCIXYTY/2ebBQ1aJebWAQVw0IEIL1fOzzRx/Q+YYQ/Nzex1v+J9VBf2wb1s+3Yu6/tGuxUV9cvwW6cm+Ph4YHSUuOViIH6PbvWrVuHuXPnIjDQ/OkKsViEgAAf0wea4cj5e1iTeQEPS2s0jwX5eWJyeif06tyS17kuXH+gdZM3RO4jRVnlXzuNN/P3wqQn4nWuZ6ptdbeKebWvoaIyBe4V16BTB2GTgoemRGBQj7a4dOMBfj13FwdP5aJOySBA5oFZY7uhcwfjCdTEcnI5t9WExHLU17ZB/Ww79upruwY76umo2tparakphUIBLy/DHcKyLD755BOsXLkSU6dOxbhx4yxqB8OwKCursugc+py5WoiPv/9D5/GHpTWaInhJ0dynenLvmw4Agfrk5kCZh85oTXFxpeaYk1kFWL75vNG2+ZrY54pLe8OChP9g19Qq8b/fb+LIxfrRv/h2QXj9+e5wY1mt90iE5eYmhlzuhbKyaqhUjOkXELNRX9sG9bPtWKOv5XIvziNFdg121NNXBQUFaN26tebxgoICREVF6X1NXV0d3nzzTezYsQNvvvkmXnjhBUHaolQK+0FnGBbfmNh8c/0e3SJ4xshM1LBR8/OWIjLMX6stDMNq/X3dnmyTbftwSgrv6s2N2yt0v+YWVGBl5kXkFVVBJAKe7NsOj/eNQIDME8XFlYJfj+hSqRjqZxuhvrYN6mfbsVdf23WiMjo6Gr6+vjh+/LjmsbKyMly+fNlgDs7rr7+O3bt3Y8mSJYIFOtZwNbfE5JSTeksIrrgU9eNSeZbrRozX75aaLP5mSTv4YFkWv5y9i39/fQp5RVUIkHngjee6Iq1XW4hNFTcihBDSpNk12JFKpRg7diwWL16MAwcOICsrC6+++ipCQ0MxdOhQqFQqFBYWoqamPqfkxx9/xM6dO/Hqq68iOTkZhYWFmv/UxzgKa2x2KFTlWT5tM1T8TYhKyVxVK5RYve0SvtmTDaWKQad2QZg/IYm2EyCEEMKJ3YsKTp8+HUqlEnPnzkVNTQ2SkpLwxRdfQCKR4M6dOxg0aBA++OADjBw5Ejt27AAALFy4EAsXLtQ6j/oYR1FQxG3/K777TQmxuzXfjRgNLZE3tAO5ubtsq6s0N7xGbkEFVm69iILiaohFIjw1oB2GJbd2+dEcfX1BK8wIIcQ8IlaI6nROTqViUFQkXGKroVo4jQXKPLBwai+zbmKW3AwZhsXslUdMbsTIpW1C3ZT1VWn28nCHok4FhmERJPfAlCfi0aGVn85r3d3FCAjwcZmcHX19ESDz0BRrtBdX62dHRn1tG9TPtmONvg4M9HGOBGVXxKUQn5olUz2WVJ7lUqCPa9uEqIBrKDisVtQXCWwbKsPMZ7uYnDpzBYb6wlrFGgkhpCmgSkoC45L8CwDpfdra9aZlKBdH5iXBkO5h8PGUaK3gshYuwWFphQLeHq4fl3Ppi437r9nk50IIIa7E9e8gNsY1+Tc40NvKLalnbJqpYS7O2WuFOHYpH+XVddh36g72nbpjk6kTLsFhcUUtruaWWDSC5Aw5MFxXyVnaF4QQ0tRQsCMwayUmm4NL7odYLEJlTX2A05gtpk6ssWqtMUfNgWnMFn1BCCFNEU1jCYhhWBz6457J4wJ8pVZfNq3O/Wg8UqAOYE5nFwCw/9RJZbXpDUwB84NDrv3gCPiukiOEEMINBTsC4pqv079LS6tOofAJYPhMnQiJYVnsOp6DTQdMJ3ObW6DQ3oEcX0IVjSSEEKKNgh0BOUq+zo4jtzgHMPaYOimvqsWyzefxw883oGJYvcvJGzJ31Zq9AjlzCVU0khBCiDbK2REQn2kIayXMns4uQObvf3I6Vn1tLoSaOrmaW4LV2y6huFwBdzcxnhsSif4JLXHmqrAFCgHnzIERomgkIYQQbRTsCEg9DWGqWF95dZ1OUT8hEmb51PgBoAmyuLTZ0qkThmXxv6M5yPztJlgWCA30xtT0eIQH+wIwXKXZkgDQWXNgrNEXhBDSlNE0loC4TEMkxwRjZaZ1Ema55gwBfwUwtpg6Ka2sxdLvzuGnX+sDnZS4EMx7obsm0FFTFyjsGRuK6DYBFt/cnTkHRui+IISQpoyCHYEZKtYXKPPA1PQ4HL9iPJixJGGWz3RMwwDGWJstXXZ+JacY89eewKU/iyF1F2PCiGhMTIuFp9T6g4qUA0MIIQSgaSyrUE9D3LhXijpWBImIRfuWflYvGsd1Oia9T4ROACP01AnDsNh+5E9sO3wLLAu0bOaDqU/EoVVzX9MvFhDlwBBCCKFgx0rEYhFi2gZqbXxmTsJsw0RmubcUYIGy6lq9wQiX/JsAXynSerU12GYhKvOWVCiwZtslZN0uAQD06dQCY4Z0hIfUzeJzm4NyYAghpGmjYMeG+CbM6qv825C+asimNvh8bkhHq97kL90qwmfbL6Gsqg4eEjeMG9YRveJbWO16XAkVyBFCCHE+lLNjQx1a+UFmYududcKsocq/DelLarZm/o0xKobBj7/ewH+/O4eyqjqENffBvBe6O0SgQwghpGmjkR0bYBgW236/iX2n7qCyxvj2CKP/P6GWzxLyjfuvITGyud4NPm0xbVNUVoM12y7h6p1SAMCALi0xalAkpBL7TFsRQgghDVGwY2VHzt/D8u/PoaK6jvNr+CwhB3STmm25w/f5Gw/w+Y4rqKiug6fUDeOHR6NHbIhVrkUIIYSYg4IdKzqZVYDlm8/zes3G/dcwckA73tdSJzXz2eHbkqBIqWLw4683sfv4bQBA6xBfTH0iHiGNtsKwZeBFCCGE6EPBjpUwDIv1e7J5v66oXIGKSu6jQGr+Ph6aPJ/G1Lk9DXN2+ARFjT0srcGqbRdx424ZACC1ays8m9oBEnftaStLrkEIIYQIhRKUreRqbgmKeExFNSTzlpqs/NtQoMwDHVr5cd7h21DyM5cqzmevFWL+lydw424ZvDzcMS09HmOHRukNdMy9BiGEECIkCnasxJLNJdWjH1yNHhyJ63dLORUszLpdzDkoakipYrDpwDUs33IBlTVKRLSQ4Z0JSegerTtCw2WPLksqRRNCCCF80DSWlci9pWa9ztdLoslr0Vf5t6FAmQeeHRQJH08JTnEcKcnKKeZdxbmwpBqrtl7ErfvlAIAh3cPxzMD2cHfTHytbu1I0IYQQwgcFO1ZwOrsA6/ddNe/F7F+jHY2XkDeuoFxeXYtNBwwHQ3pxzA1umPC8dmcWqhVKeHu44x+PxiCxY3NOr+V6DUIIIcSaKNgRmKEkYa4qapTYfyoXcl+pZvWSvtGP09kFWJl5ide5A2UeiA4PwA7kmDzW11OC9Xuv4sCZOwCA9i3lmPJEHJr5eZl8Ld9K0YQQQog1UbAjIC65KlxsOnhd82d9q5fMvc7owZGIbhNgcv8sPx8pNh+6gdv5FQCAR3q0xpP92hmctmqMyx5d6krRhBBCiLVRgrKA+BYD5ELf6iW+12m4VYR6/yxjqhRK3M6vgK+XBDOe6YxnBnbgHOgA4HSN0YMjqd4OIYQQm6CRHQFZMwel4ZYQXK+T2rUVukcF6xTyU++f1Tj52UMihqKOQZ2SQWSYH6Y8HodAuadZ7TV0jUCZB0ZTnR1CCCE2RMGOgKyZg1JUrsD+U7kY3D2c83W6RwUbXO3UMPn5z/wy/HzmLgpLaiACMCKlDdL7RsBNbNnAn6336CKEEEL0oWBHQFxyVSyx6eB17DmZi1GDOgiSEyMWi1BcrsDW3/6Eok4FmbcEkx6LRXxEkGBtFotFtLycEEKIXVHOjoDEYhF6xFh3eqa4XIGVmZdMXsdUToyiToW1O6/gsx2XoahTIbq1P979e7KggQ4hhBDiCGhkR0AMw+K38/dtcq0TVwowNT1ep84Ol5yYuw8qsTLzIu49qIQIwGO92+Lx3hE0vUQIIcQlUbAjoKzbxaisUdrkWkXlCsi8JFg0tRfnnBiWZfH7hftYv/cqapUM/HykmPxYLGLaBtqkzYQQQog9ULAjoKycYpter6RSwTknpqZWiW/3XMXRS3kAgNi2AZj0WBz8fMzb1oIQQghxFhTsCIi18b6WXFdl5RZUYNXWi7j/sAoiEZDetx0eTWkDsYimrQghhLg+CnYE5OPFrTv7J7TA+ZtFRldTiUTGgycuq61YlsWhP+5h4/5rqFMy8PeVYsrjcYhqTaujCCGENB20GktAcl9uU0KRrf1NVhgelhRu9HlTq62qFUqs3nYJ3+zORp2SQad2QZj/92QKdAghhDQ5NLIjIH9vbtNKck8pLucUQQSg8eCNp9QN/3g0Bt2igtG+lZ/JCsQMw+okKOcWVGDl1osoKK6GWCTCU/3bYViP1jRtRQghpEmiYEdIHGOJ5T9dQJ2S0ftcTa0KN+6WoltUsMkKxKezC3SCIS8PdyjqVGAYFoFyD/zz8Xh0CPOz+K0RQgghzoqCHQGVVdVyOs5QoKO252QuRvZrD3d3scHVVqezC7Dip4s6j1cr6pe+tw2VYeazXeDrJeHUJkIIIcRVUc6OgITaG4tlgYNn7hh8nmFYbNh/zeg5SisU8PagWJYQQgihYEdAHcP94c8xSdmUgpJqg89dzS0xuf9WcUUtruaWCNIWQgghxJlRsCMgsViEAV1aCnKuYH8vg8+VVHLbaJTrcYQQQogro2BHYMGB3hafQyQCUruGGXy+qprblhRCTasRQgghzoySOgQmRICR0D4I7u66cSjDsthz4jZ+PHTT5Dm4FB0khBBCmgIKdgRWXs1tRZYxt/MrwDCsVtHA8qpafPG/Kzh/4yEAoEMrP1y/W2rwHKaKDhJCCCFNBQU7AmIYFpsOXLf4PEXlClzNLdEsOb+aW4LV2y6huFwBdzcxnhscif5dWuLM1UKTRQcJIYSQpo6CHQFxWSXFVUmlAgzLYtexHPz06y0wLIuQQG9MfSIOrUNkAGCy6CAhhBBCKNgRVFFFjWDnkriJsfT7P3DpVhEAICUuBOOGRcFTqv0jM1R0kBBCCCH1KNgRUEVlnSDnkXlJsG7vVZRW1kLqLsaYIR3Rp3MLiGhvK0IIIYQ3CnYEJPMWpqBgRXUdWAAtgrwxLT0erZr7CnJeQgghpCmiOjsCCpBxX3ae3idC53h3t/qRGxZAn04tMG98EgU6hBBCiIVoZEdAHcP9ESDzMJmkHOArRVqvtkjr1RZXc0tw8c+H+OXMPVQplJBKxHh+WBR6xbewUasJIYQQ10YjOwISi0V4bnCkyeOeG9IRYrEILFhczinCrqO3UaVQIqy5D955IYkCHUIIIURANLJjY8OSwtAtKhjF5Qqs3nZJs1ln/y4tMXpQJKQSN/s2kBBCCHExFOwIiGFYbNh/zegxe0/egbu7Gw6du4eK6jp4SN3wwvBo9IgNsVErCSGEkKbF7tNYDMNg2bJl6Nu3L7p06YJJkyYhNzfX4PHFxcV47bXXkJSUhOTkZLz77ruorq62YYsN41JUkAXwv6M5qKiuQ+tgX8x/IYkCHUIIIcSK7B7sZGRkYMOGDfj3v/+NTZs2gWEYTJw4EbW1+veYmj59OnJycvDVV1/hk08+waFDhzB//nzbNtqAkkru1ZM9JGK8ObYrQgTYJZ0QQgghhtk12KmtrcXatWsxffp0DBgwANHR0Vi6dCny8vKwd+9enePPnj2LEydO4KOPPkJcXBxSUlKwYMECbN26Ffn5+XZ4B9r47HiuqGNw6365FVtDCCGEEMDOwU5WVhYqKyuRkpKieUwulyM2NhYnT57UOf7UqVNo3rw52rdvr3ksOTkZIpEIp0+ftkmbjekY7g8fT+5pUHxGggghhBBiHrsmKOfl5QEAWrTQXmodHBysea6h/Px8nWOlUin8/f1x//59i9ri7i5M3Dc0uTV++vUmp2OD5J6CXbcpc3MTa/2fWAf1s+1QX9sG9bPt2Luv7RrsqBOLpVLtbRY8PDxQWlqq9/jGx6qPVyjMHyURi0UICPAx+/UNjX8sHvtO3kZFtdLocc38PdEjIQxutEO5YORyL3s3oUmgfrYd6mvboH62HXv1tV2DHU9PTwD1uTvqPwOAQqGAl5duh3h6eupNXFYoFPD2Nj/Rl2FYlJVVmf36xiY8Govlm88bPWb04I4oKxXumk2Zm5sYcrkXysqqoVIx9m6Oy6J+th3qa9ugfrYda/S1XO7FeaTIrsGOekqqoKAArVu31jxeUFCAqKgoneNDQ0Oxf/9+rcdqa2tRUlKC4OBgi9qiVAr3QU/s0AwvPhmPr3dloaJGe4TH10uC8cOjkNihmaDXJIBKxVCf2gD1s+1QX9sG9bPt2Kuv7RrsREdHw9fXF8ePH9cEO2VlZbh8+TLGjh2rc3xSUhIWL16MnJwctGnTBgBw4sQJAEC3bt1s13AOukUFIykmBHceVuPExXtgGBbRbQIQ3ToAYpq6IoQQQmzGrsGOVCrF2LFjsXjxYgQGBqJVq1ZYtGgRQkNDMXToUKhUKhQVFUEmk8HT0xMJCQno2rUrXn31VcyfPx9VVVWYN28e0tPTERLieIX5xGIREjo2R+vm3vStgRBCCLETu6egT58+HU8//TTmzp2L0aNHw83NDV988QUkEgnu37+PPn36YOfOnQAAkUiETz/9FGFhYRg/fjxmzJiBfv36OUxRQUIIIYQ4HhHLsqy9G2FvKhWDoqJKwc/r7i5GQIAPiosraWTHiqifbYP62Xaor22D+tl2rNHXgYE+nBOU7T6yQwghhBBiTRTsEEIIIcSlUbBDCCGEEJdGwQ4hhBBCXBoFO4QQQghxaRTsEEIIIcSlUbBDCCGEEJdGwQ4hhBBCXBoFO4QQQghxaVRBGQDLsmAY63SDm5tYsO3siWHUz7ZB/Ww71Ne2Qf1sO0L3tVgsgkjEbWNtCnYIIYQQ4tJoGosQQgghLo2CHUIIIYS4NAp2CCGEEOLSKNghhBBCiEujYIcQQgghLo2CHUIIIYS4NAp2CCGEEOLSKNghhBBCiEujYIcQQgghLo2CHUIIIYS4NAp2CCGEEOLSKNghhBBCiEujYIcQQgghLo2CHQswDINly5ahb9++6NKlCyZNmoTc3FyDxxcXF+O1115DUlISkpOT8e6776K6utqGLXZOfPv52rVrmDx5Mnr06IGUlBRMnz4d9+7ds2GLnRPffm5o27ZtiIqKwp07d6zcStfAt6/r6uqwZMkSzfFjx47FlStXbNhi58S3nx8+fIjXXnsNPXv2RI8ePfDqq68iPz/fhi12DatXr8a4ceOMHmPr+yEFOxbIyMjAhg0b8O9//xubNm0CwzCYOHEiamtr9R4/ffp05OTk4KuvvsInn3yCQ4cOYf78+bZttBPi08/FxcWYMGECPD098e233+Kzzz5DUVERJk6cCIVCYYfWOw++n2e1u3fvYsGCBTZqpWvg29fz58/Hjz/+iPfffx9btmxBYGAgJk2ahPLychu33Lnw7ecZM2bg3r17+PLLL/Hll1/i3r17ePHFF23caue2fv16fPzxxyaPs/n9kCVmUSgUbGJiIrt+/XrNY6WlpWznzp3Z7du36xx/5swZtmPHjuz169c1j/32229sVFQUm5eXZ5M2OyO+/fz999+ziYmJbHV1teaxe/fusR07dmSPHDlikzY7I779rKZSqdjRo0ezzz//PNuxY0c2NzfXFs11anz7+vbt22xUVBT7888/ax0/cOBA+kwbwbefS0tL2Y4dO7IHDhzQPLZ//362Y8eObHFxsS2a7NTy8vLYKVOmsF26dGGHDx/Ojh071uCx9rgf0siOmbKyslBZWYmUlBTNY3K5HLGxsTh58qTO8adOnULz5s3Rvn17zWPJyckQiUQ4ffq0TdrsjPj2c0pKCjIyMuDp6al5TCyu/5iXlZVZv8FOim8/q61atQp1dXWYMmWKLZrpEvj29eHDhyGTydCvXz+t4w8ePKh1DqKNbz97enrCx8cHmZmZqKioQEVFBbZu3YqIiAjI5XJbNt0pXbp0CRKJBNu2bUNCQoLRY+1xP3S3ylmbgLy8PABAixYttB4PDg7WPNdQfn6+zrFSqRT+/v64f/++9Rrq5Pj2c1hYGMLCwrQeW7NmDTw9PZGUlGS9hjo5vv0MAOfPn8fatWuxefNmymvggW9f37p1C+Hh4di7dy/WrFmD/Px8xMbGYs6cOVo3C6KNbz9LpVJ8+OGHmDdvHrp37w6RSITg4GCsW7dO84WJGJaamorU1FROx9rjfkg/QTOpE6mkUqnW4x4eHnpzQ6qrq3WONXY8qce3nxv79ttvsW7dOsyaNQuBgYFWaaMr4NvPVVVVmDVrFmbNmoW2bdvaookug29fV1RUICcnBxkZGZg5cyZWrlwJd3d3PPfcc3j48KFN2uyM+PYzy7K4cuUKEhMTsX79enz99ddo2bIlpk2bhoqKCpu0uamwx/2Qgh0zqadJGie6KRQKeHl56T1eX1KcQqGAt7e3dRrpAvj2sxrLsvj444/x3nvvYerUqSZXBjR1fPv5vffeQ0REBEaNGmWT9rkSvn3t7u6OiooKLF26FH369EHnzp2xdOlSAMBPP/1k/QY7Kb79vGvXLqxbtw6LFi1Ct27dkJycjFWrVuHu3bvYvHmzTdrcVNjjfkjBjpnUQ3AFBQVajxcUFCAkJETn+NDQUJ1ja2trUVJSguDgYOs11Mnx7Wegfpnu7NmzsWrVKrz55puYMWOGtZvp9Pj285YtW3DkyBEkJiYiMTERkyZNAgCkpaVh1apV1m+wEzPnd4e7u7vWlJWnpyfCw8Npqb8RfPv51KlTiIiIgK+vr+YxPz8/REREICcnx7qNbWLscT+kYMdM0dHR8PX1xfHjxzWPlZWV4fLly3pzQ5KSkpCXl6f1j+bEiRMAgG7dulm/wU6Kbz8DwOuvv47du3djyZIleOGFF2zUUufGt5/37t2LHTt2IDMzE5mZmXjvvfcA1OdH0WiPceb87lAqlbhw4YLmsZqaGuTm5qJNmzY2abMz4tvPoaGhyMnJ0ZpGqaqqwp07d2iqVmD2uB9SgrKZpFIpxo4di8WLFyMwMBCtWrXCokWLEBoaiqFDh0KlUqGoqAgymQyenp5ISEhA165d8eqrr2L+/PmoqqrCvHnzkJ6ebnCEgvDv5x9//BE7d+7E66+/juTkZBQWFmrOpT6G6OLbz41vsuqEz5YtW8Lf398O78B58O3r7t27o1evXnjjjTewYMEC+Pv7Y9myZXBzc8MTTzxh77fjsPj2c3p6Or744gvMmDEDr7zyCgDg448/hoeHB0aOHGnnd+PcHOJ+aJUF7U2EUqlkFy5cyPbs2ZPt0qULO2nSJE2dkdzcXLZjx47sli1bNMc/ePCAffnll9kuXbqwPXr0YN955x22pqbGXs13Gnz6ecKECWzHjh31/tfwZ0F08f08N3Ts2DGqs8MD374uLy9n33nnHbZHjx5sQkICO2HCBPbatWv2ar7T4NvP169fZ6dMmcImJyezPXv2ZF966SX6TJvhjTfe0Kqz4wj3QxHLsqx1wihCCCGEEPujnB1CCCGEuDQKdgghhBDi0ijYIYQQQohLo2CHEEIIIS6Ngh1CCCGEuDQKdgghhBDi0ijYIcTFUXWJpol+7oT8hYIdQjiYM2cOoqKiDP7Xu3dvezdRr2vXrmH06NGCnOv48eOIiorSKr/fmLqf+vXrZ/Bmu3jxYkRFRdHmrFaSl5eHyZMn4+7duyaPraurw8iRI3HkyBEA+j/ncXFx6NOnD2bPno379+8DAH788Uej/x7U/xk7tnPnzkhNTcWCBQu0dhX/5JNPMH/+fOE7hjRptF0EIRw1b94cn376qd7nJBKJjVvDze7du3H27FmbXlMsFiM/Px9nzpzRu8/Nzp07bdqepubIkSM4dOgQp2NXrVqF0NBQ9OrVS/NY48+5UqnErVu3sHjxYpw9exY7duzAgAED8N1332mO+eWXX7By5Up8+umnaN68ud5rNX6utLQUv/32G7799lsUFRXh448/BgBMnjwZw4YNw7Bhw5CSksLnrRNiEAU7hHAklUrRpUsXezfD4bVo0QIsy2LXrl06wc65c+eQn5+Pjh072ql1RK2goABr1qzBxo0btR7X9znv3r07JBIJ3njjDRw4cACPPvooAgMDNc/fvHkTABATE4OwsDC919P3XP/+/fHw4UPs2rULlZWV8PHxgZeXF8aPH48PPvgA27ZtE+CdEkLTWIQI6uLFi4iLi8OcOXM0jz18+BApKSmYMGECWJbVDOv/8ccfePLJJ9G5c2c89thj2L17t9a5FAoFFi5ciP79+yM+Ph6PPfaYzqgIy7L46quv8Mgjj6Bz584YMmQIvvjiC7Asi+XLl2u+oUdFRWH58uUAAIZhsGbNGgwZMgTx8fEYNmwYvv32W533smnTJgwbNgydO3fG2LFjce/ePc79MHz4cOzdu1dnKmvnzp3o1auX3s1Cf/jhBzz66KOIj4/HgAEDsHz5cqhUKp1jRo4ciS5duqBz58544oknsGvXLs3zDMNg6dKlSE1NRXx8PFJTU7FkyRLU1dUBMDwVN27cOK1ptdTUVLz//vsYP348OnfujLfeegsAUFJSgnnz5qFXr17o1KkT/va3v+Ho0aNa54qKisLGjRsxZ84cdOvWDcnJyXjvvfdQU1ODjz76CD179kSPHj3w1ltvae2wzeXnMm7cOLz11ltYs2YNBgwYgE6dOmHUqFE4f/48gPopozfffBMAMGjQIK3PYWNffvklWrZsifj4eIPHNNSpUycA4DQ9xodMJoNIJIJIJNI8lpaWhmvXruGXX34R9Fqk6aJghxAelEql3v/UN/X4+HhMmjQJP/30k+YmOG/ePDAMgw8//FDrF/qUKVMwaNAgfPrpp4iIiMCMGTM00w8sy+LFF1/Epk2bMGHCBKxcuRKJiYl49dVXkZmZqTnHwoULsXDhQqSmpmLVqlV4+umnsXjxYqxZswbPPPMMnn76aQDAd999h2eeeQYAMH/+fCxbtgyPP/44Vq1aheHDh+P999/HihUrNOddt24d3nnnHfTv3x8ZGRlISEjA22+/zbmfRowYoZnKUmMYBrt378ajjz6qc/zq1avx9ttvIyUlBatWrcKYMWPw2WefaV1z/fr1mDdvHgYPHozVq1dj8eLFkEqlmDVrlmbX9c8++wwbN27Eiy++iLVr12L06NH44osvsHLlSs5tb3i9Tp06ISMjA08//TQUCgXGjx+PAwcO4NVXX8Wnn36K0NBQTJw4USfgWbRoEaRSKT799FOkp6fj22+/RXp6Ou7fv4/Fixdj3Lhx2Lx5s1Yww+XnAgB79uzBgQMHMHfuXPz3v//FgwcP8PLLL0OlUmHAgAGYOnUqgPppo2nTphl8f9u3b8ewYcM498etW7cAAK1bt+b8moYYhtH8e6mrq8PDhw+xefNm/PTTTxgyZAi8vb01x4aEhKBLly7Yvn27WdcipDGaxiKEo7t37yIuLk7vc6+//jr+8Y9/AABefPFFHDx4EO+++y4mT56M/fv345NPPkFISIjWa8aNG4cXX3wRANC3b188+eSTWLFiBfr3748jR47gt99+w9KlSzFixAjNMdXV1Vi8eDHS0tJQVVWFb775BmPHjsXs2bMBAL169UJhYSFOnjyJKVOmIDQ0FAA00xK3bt3C999/j5kzZ2Ly5MkAgD59+kAkEmH16tV47rnn4O/vj4yMDIwYMQL/+te/NMdUVFRg06ZNnPqqU6dOCA8P15rKOnXqFEpKSjB48GBs2bJFc2x5eTkyMjLw7LPPYu7cuZrr+fv7Y+7cuZgwYQIiIyORm5uLf/zjH1o38FatWmHkyJE4ffo0Hn30UZw4cQLx8fF46qmnAADJycnw8vKCTCbj1O6GWrZsiVmzZmn+/v333yMrKwvff/89EhISAAD9+vXDuHHjsHjxYq331KFDByxYsEDThh9++AF1dXVYvHgx3N3d0adPH+zZs0cTDHL5uQQEBACoD7i/+OIL+Pr6AgAqKyvxxhtv4MqVK4iPj9cEI8amlG7cuIHCwkJ07txZ7/NKpVLz54qKCly4cAEffPABwsLCMGDAAN59CQBDhgzReaxZs2Z47rnnMH36dJ3nOnXqhB07dph1LUIao2CHEI6aN29ucISgRYsWmj9LJBJ89NFHeOaZZ/DWW2/hySefxPDhw3Ve8+STT2r+LBKJMGTIECxfvhw1NTU4evQoRCIR+vfvr3XjSU1NxbZt23Dt2jUUFhZCqVRi6NChWudVBwz6HDt2DCzLIjU1Vee8K1euxOnTpxEREYGHDx9i4MCBWq995JFHOAc7QP3oTmZmJt566y2IRCL873//w4ABAzQ3abWzZ8+ipqZGb5sA4PDhw4iMjNRMyZSVleHmzZvIycnRTEfV1tYCAHr06IElS5bgueeeQ2pqKgYMGICxY8dybnNDMTExWn8/evQomjdvjri4OK12Dhw4EAsXLkRpaSn8/PwAAImJiZrn3dzcEBAQgLi4OLi7//Ur19/fH+Xl5QC4/VwGDx4MoD6QatiH6iC6urqa83vLzc0FAL3BkKGgPiEhAQsWLICnpyfn6zS0cuVKNG/eHHV1dfjxxx+RmZmJ6dOn49lnn9V7fKtWrfDw4UNUV1fDy8vLrGsSokbBDiEcSaVSTd6CKTExMYiKisLFixd1gga14OBgrb8HBQWBZVmUlZWhpKQELMuia9euel9bUFCA0tJSANBKFDWlpKQEAPROJQFAfn6+5nzqkQQ1Q6tsDBkxYgRWr16NM2fOoEuXLti7d6/eJcXqNqlHNBorKCgAANy+fRvz5s3D0aNHIZFI0K5dO0RHRwP4q6bMxIkT4ePjgy1btmDx4sVYtGgRIiMjMXfuXPTs2ZNX+xtOq6jbWVhYaHB0r7CwUBPsNA7o9J2v8bkB4z8XtcY3frG4PhuBYRiD529MHWTpCyIaB/VSqRShoaGa92aujh07aoKrrl27QqlUYt68efD19dX7vtX9VV5eTsEOsRgFO4RYwXfffYeLFy8iOjoa//nPf5CSkgK5XK51TElJCZo1a6b5+4MHD+Dm5gZ/f3/IZDJ4e3vjm2++0Xv+Nm3aaKZAioqK0K5dO81z9+7dw+3bt/Uu+1a34euvv4aPj4/O8y1btkRZWRmA+sTqxu3lIzo6GhEREdi9ezdqamqgUCj0ToGo27R48WK0bdtW5/lmzZqBYRhMnjwZEokEmzdvRkxMDNzd3XH9+nVs3bpVc6xYLMaYMWMwZswYPHz4EIcOHcKqVavw8ssv4/Dhw5qcqcaBgXolkDEymQxt27bF4sWL9T5vaMqICy4/FyGpA1n1z7ohPkG9JebOnYvDhw9j/vz56NGjh9a/BaB+abpIJNKbzE4IX5SgTIjA7t69i48++ghPP/00Vq1ahfLycvznP//ROW7//v2aP7Msi71796Jbt26QSqVITk5GVVUVWJZFp06dNP9dvXoVK1asgFKpROfOnSGRSPDzzz9rnXft2rWYOXMm3NzcNN/61bp37w4AKC4u1jpvUVERPvnkE5SUlKBt27Zo0aKFzuqwxtfhYsSIEdi7dy927tyJIUOGwMPDQ+eYhIQESCQS5Ofna7XJ3d0d//3vf3Hnzh0UFxfj1q1bePrppzXPAcCvv/4K4K/gZdSoUXjvvfcA1I+UjRw5EmPGjEFZWRkqKio0Iy7qhGag/qZ648YNk+8lOTkZ9+/fR1BQkFY7Dx8+jM8//xxubm68+0eNy8+Fq8Y/c33UwVPDfrA1X19fvPnmmygrK8OSJUt0ns/Ly0OzZs0glUrt0DriamhkhxCOamtrce7cOYPPR0VFwdPTE2+99Ra8vLzw+uuvw8/PDzNmzMD777+PYcOGafJQgPqVVAqFAhEREfjhhx9w48YNfP311wDq648kJSVh2rRpmDZtGtq3b4/z589j2bJl6Nu3r2aq6fnnn8dXX32lCZD++OMPbNy4Ea+//jrEYrFmxGDHjh1ISEhAVFQUHn/8cbz99tu4e/cu4uPjcevWLSxduhRhYWFo27YtRCIRZs2ahddeew1z587F8OHDce7cOZ16LFyMGDECK1aswNatW5GRkaH3mICAAEycOBGffPIJKioq0KNHD+Tn5+OTTz6BSCRCdHQ0ZDIZWrVqhfXr1yM0NBRyuRy//fabZuRLna+SlJSEtWvXolmzZkhMTER+fj6+/PJLJCcnIzAwEH5+fmjRogVWrFgBX19fTQIwl2mSkSNHYt26dZgwYQL++c9/okWLFjhy5Ag+++wzjB071qLCklx+Llypf+b79u1Dv3790L59e51j2rVrh5YtW+L06dN6E4dtZcSIEdiwYQN++uknjB49With+syZM+jbt6/d2kZcCwU7hHBUWFhoMJkSADIzM3HmzBkcPXoUH3/8sSbHYdy4cdi+fTvmzZunlYMzf/58rF69Grm5uYiNjcXatWs13/DFYjHWrFmDTz75BKtXr8bDhw8REhKCCRMmaFZwAcDs2bMRFBSETZs24fPPP0dYWBjefvttjBo1CgAwdOhQbN26FXPmzMHTTz+N+fPn44MPPsDq1auxadMm5OXlISgoCCNGjMCMGTM0oxNpaWkQi8XIyMjA1q1b0bFjRyxYsAAzZ87k1WcdOnRAx44dUVhYqFWlt7EZM2agefPm2LBhAz7//HP4+fkhJSUFM2fO1KykysjIwH/+8x/MmTMHUqkUHTp0wMqVK/H+++/j1KlTGDduHF555RVIpVJs2bIFK1asgEwmQ2pqKl577TUA9cnCy5Ytw/vvv4+ZM2eiWbNmGD9+PG7evKlZWm2It7c31q9fjyVLlmDRokUoLy9Hq1at8Nprr+Hvf/87r37Rh8vPhYsePXqgV69eWLJkCY4ePYo1a9boPW7YsGH49ddfjdbisYW5c+di5MiRWLBgAX744QeIRCIUFBQgKysLr7zyil3bRlyHiKXd4gixKXXhtwMHDliU50GIJfLz8zF48GCsXbsWSUlJ9m6OlhUrVmDfvn346aeftGpTEWIuytkhhJAmKCQkBC+88AI+++wzezdFS2VlJTZu3IiZM2dSoEMEQ8EOIYQ0US+//DLy8/Px+++/27spGmvWrEFqair69etn76YQF0LTWIQQQghxaTSyQwghhBCXRsEOIYQQQlwaBTuEEEIIcWkU7BBCCCHEpVGwQwghhBCXRsEOIYQQQlwaBTuEEEIIcWkU7BBCCCHEpVGwQwghhBCX9n+YqoGTJn85RgAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#predict the input from the trained model (transductive evaluation of the model)\n", + "example_smiles=config.data.get_sets()[0]\n", + "expected = config.data.get_sets()[1]\n", + "predicted = model.predict_from_smiles(example_smiles)\n", + "\n", + "# Plot expected vs predicted values for the best model.\n", + "ax = plt.scatter(expected, predicted)\n", + "lims = [expected.min(), expected.max()]\n", + "plt.plot(lims, lims) # Diagonal line.\n", + "plt.xlabel(f\"Expected {config.data.response_column} (PTR)\");\n", + "plt.ylabel(f\"Predicted {config.data.response_column}\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Interlude: Cautionary advice for PRF ∆y (response column) validity" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "N.B It is not possible to train on response column values outside the likelihood for y-label memberships (ranging from 0-1), as expected for `∆y`. Doing so will result in the following error from QSARtuna:" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:19:17,760] A new study created in memory with name: my_study\n", + "[I 2024-07-02 13:19:17,800] A new study created in memory with name: study_name_0\n", + "[W 2024-07-02 13:19:17,801] Trial 0 failed with parameters: {} because of the following error: ValueError('PRFClassifier supplied but response column outside [0.0-1.0] acceptable range. Response max: 9.7, response min: 5.3 ').\n", + "Traceback (most recent call last):\n", + " File \"/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optuna/study/_optimize.py\", line 196, in _run_trial\n", + " value_or_values = func(trial)\n", + " File \"/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/objective.py\", line 128, in __call__\n", + " self._validate_algos()\n", + " File \"/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/objective.py\", line 264, in _validate_algos\n", + " raise ValueError(\n", + "ValueError: PRFClassifier supplied but response column outside [0.0-1.0] acceptable range. Response max: 9.7, response min: 5.3 \n", + "[W 2024-07-02 13:19:17,807] Trial 0 failed with value None.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "As expected, training the PRF on the raw pXC50 values resulted in the following error:\n", + "\n", + "\"PRFClassifier supplied but response column outside [0.0-1.0] acceptable range. Response max: 9.7, response min: 5.3 \n" + ] + } + ], + "source": [ + "# Prepare problematic hyperparameter optimization configuration without PTR.\n", + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"Smiles\",\n", + " response_column=\"Measurement\",\n", + " training_dataset_file=\"../tests/data/pxc50/P24863.csv\"),\n", + " descriptors=[\n", + " ECFP.new(),\n", + " ],\n", + " algorithms=[\n", + " PRFClassifier.new(n_estimators={\"low\": 5, \"high\": 10}), #n_estimators set low for the example to run fast\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=2,\n", + " n_trials=2,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ")\n", + "\n", + "try:\n", + " study = optimize(config, study_name=\"my_study\")\n", + "except ValueError as e:\n", + " print(f'As expected, training the PRF on the raw pXC50 values resulted in the following error:\\n\\n\"{e}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To summarise:\n", + "1. PRF handles probability of `y` or `∆y` labels, which range between [`0-1`]\n", + "2. PRF is evaluated in a probabilistic setting via conventional regression metrics (e.g. `RMSE`, `R2` etc.), despite the fact that PRF can be considered a modification to the classic Random Forest classifier\n", + "3. Probabilistic output is the probability of activity at a relevant cutoff, similar to a classification algorithm\n", + "4. Ouputs reflect liklihoods for a molecular property to be above a relevant threshold, given experimental uncertainty (and arguably a more useful component for within a REINVENT MPO score)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ChemProp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "QSARtuna has the functionaility to train `ChemProp` deep learning models. These are message passing neural networks (MPNNs) based on a graph representation of training molecules. They are considered by many to offer the state-of-the-art approach for property prediction.\n", + "\n", + "`ChemProp` was first described in the paper Analyzing Learned Molecular Representations for Property Prediction: https://pubs.acs.org/doi/full/10.1021/acs.jcim.9b00237\n", + "\n", + "More information is available in their slides: https://docs.google.com/presentation/d/14pbd9LTXzfPSJHyXYkfLxnK8Q80LhVnjImg8a3WqCRM/edit" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `ChemProp` package expects SMILES as molecule inputs, since it calcaultes a molecule graph directly from these and so expects SMILES as descriptors. The `SmilesFromFile` and `SmilesAndSideInfoFromFile` descriptors (more about this later) are available for this purpose and are _only_ supported by the `ChemProp` algorithms:" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Smiles based descriptors:\n", + "(, )\n" + ] + } + ], + "source": [ + "from optunaz.config.optconfig import ChemPropRegressor\n", + "from optunaz.descriptors import SmilesBasedDescriptor, SmilesFromFile\n", + "print(f\"Smiles based descriptors:\\n{SmilesBasedDescriptor.__args__}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Simple ChemProp example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following is an example of the most basic ChemProp run, which will train the algorithm using the recommended (sensible) defaults for the MPNN architecture:" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:19:17,867] A new study created in memory with name: my_study\n", + "[I 2024-07-02 13:19:17,868] A new study created in memory with name: study_name_0\n", + "INFO:root:Enqueued ChemProp manual trial with sensible defaults: {'activation__668a7428ff5cdb271b01c0925e8fea45': 'ReLU', 'aggregation__668a7428ff5cdb271b01c0925e8fea45': 'mean', 'aggregation_norm__668a7428ff5cdb271b01c0925e8fea45': 100, 'batch_size__668a7428ff5cdb271b01c0925e8fea45': 50, 'depth__668a7428ff5cdb271b01c0925e8fea45': 3, 'dropout__668a7428ff5cdb271b01c0925e8fea45': 0.0, 'features_generator__668a7428ff5cdb271b01c0925e8fea45': 'none', 'ffn_hidden_size__668a7428ff5cdb271b01c0925e8fea45': 300, 'ffn_num_layers__668a7428ff5cdb271b01c0925e8fea45': 2, 'final_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45': -4, 'hidden_size__668a7428ff5cdb271b01c0925e8fea45': 300, 'init_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45': -4, 'max_lr_exp__668a7428ff5cdb271b01c0925e8fea45': -3, 'warmup_epochs_ratio__668a7428ff5cdb271b01c0925e8fea45': 0.1, 'algorithm_name': 'ChemPropRegressor', 'ChemPropRegressor_algorithm_hash': '668a7428ff5cdb271b01c0925e8fea45'}\n", + "[I 2024-07-02 13:20:11,301] Trial 0 finished with value: -6833.034983241957 and parameters: {'algorithm_name': 'ChemPropRegressor', 'ChemPropRegressor_algorithm_hash': '668a7428ff5cdb271b01c0925e8fea45', 'activation__668a7428ff5cdb271b01c0925e8fea45': , 'aggregation__668a7428ff5cdb271b01c0925e8fea45': , 'aggregation_norm__668a7428ff5cdb271b01c0925e8fea45': 100.0, 'batch_size__668a7428ff5cdb271b01c0925e8fea45': 50.0, 'depth__668a7428ff5cdb271b01c0925e8fea45': 3.0, 'dropout__668a7428ff5cdb271b01c0925e8fea45': 0.0, 'ensemble_size__668a7428ff5cdb271b01c0925e8fea45': 1, 'epochs__668a7428ff5cdb271b01c0925e8fea45': 5, 'features_generator__668a7428ff5cdb271b01c0925e8fea45': , 'ffn_hidden_size__668a7428ff5cdb271b01c0925e8fea45': 300.0, 'ffn_num_layers__668a7428ff5cdb271b01c0925e8fea45': 2.0, 'final_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45': -4, 'hidden_size__668a7428ff5cdb271b01c0925e8fea45': 300.0, 'init_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45': -4, 'max_lr_exp__668a7428ff5cdb271b01c0925e8fea45': -3, 'warmup_epochs_ratio__668a7428ff5cdb271b01c0925e8fea45': 0.1, 'descriptor': '{\"name\": \"SmilesFromFile\", \"parameters\": {}}'}. Best is trial 0 with value: -6833.034983241957.\n", + "[I 2024-07-02 13:21:02,026] Trial 1 finished with value: -6445.608102397302 and parameters: {'algorithm_name': 'ChemPropRegressor', 'ChemPropRegressor_algorithm_hash': '668a7428ff5cdb271b01c0925e8fea45', 'activation__668a7428ff5cdb271b01c0925e8fea45': , 'aggregation__668a7428ff5cdb271b01c0925e8fea45': , 'aggregation_norm__668a7428ff5cdb271b01c0925e8fea45': 78.0, 'batch_size__668a7428ff5cdb271b01c0925e8fea45': 105.0, 'depth__668a7428ff5cdb271b01c0925e8fea45': 5.0, 'dropout__668a7428ff5cdb271b01c0925e8fea45': 0.16, 'ensemble_size__668a7428ff5cdb271b01c0925e8fea45': 1, 'epochs__668a7428ff5cdb271b01c0925e8fea45': 5, 'features_generator__668a7428ff5cdb271b01c0925e8fea45': , 'ffn_hidden_size__668a7428ff5cdb271b01c0925e8fea45': 1700.0, 'ffn_num_layers__668a7428ff5cdb271b01c0925e8fea45': 3.0, 'final_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45': -2, 'hidden_size__668a7428ff5cdb271b01c0925e8fea45': 2300.0, 'init_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45': -2, 'max_lr_exp__668a7428ff5cdb271b01c0925e8fea45': -3, 'warmup_epochs_ratio__668a7428ff5cdb271b01c0925e8fea45': 0.1, 'descriptor': '{\"name\": \"SmilesFromFile\", \"parameters\": {}}'}. Best is trial 1 with value: -6445.608102397302.\n" + ] + } + ], + "source": [ + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-50/train.csv\", # This will be split into train and test.\n", + " split_strategy=Stratified(fraction=0.50),\n", + " deduplication_strategy=KeepMedian(),\n", + " ),\n", + " descriptors=[\n", + " SmilesFromFile.new(),\n", + " ],\n", + " algorithms=[\n", + " ChemPropRegressor.new(epochs=5), #epochs=5 to ensure run finishes quickly\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=2,\n", + " n_trials=2,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ")\n", + "\n", + "study = optimize(config, study_name=\"my_study\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You may safely ignore `ChemProp` warnings such as `Model 0 provided with no test set, no metric evaluation will be performed`, `\"rmse = nan\"` and `1-fold cross validation`, as they are information prompts printed from `ChemProp` due to some (deactivated) CV functionaility (ChemProp can perform it's own cross validation - details for this are still printed despite its deactivation within `QSARtuna`).\n", + "\n", + "NB: QSARtuna will first trial the sensible defaults for the MPNN architecture (where possible given the user config). This is communicated to the user, e.g. see the output which advises:\n", + "\n", + "`A new study created in memory with name: study_name_0\n", + "INFO:root:Enqueued ChemProp manual trial with sensible defaults: {'activation': 'ReLU', 'aggregation': 'mean', 'aggregation_norm': 100, 'batch_size': 50, 'depth': 3, 'dropout': 0.0, 'features_generator': 'none', 'ffn_hidden_size': 300, 'ffn_num_layers': 3, 'final_lr_ratio_exp': -1, 'hidden_size': 300, 'init_lr_ratio_exp': -1, 'max_lr_exp': -3, 'warmup_epochs_ratio': 0.1, 'algorithm_name': 'ChemPropRegressor'}`.\n", + "\n", + "Enqueuing custom parameters ensures sampling from a sensible hyperparameter space to begin with, and to facilitate further optimisation from this point. Additional trials will not have any further preset enqueing and use Bayesian optimization for trial suggestion." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ChemProp optimization separate from shallow methods (default behavior)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By default, `QSARtuna` separates `ChemProp` from the other shallow methods using the `split_chemprop` flag. When this setting is set, the user must specify the number of `ChemProp` trials using the `n_chemprop_trials` flag if more than 1 (default) trial is desired:" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "from optunaz.config.optconfig import ChemPropClassifier, RandomForestClassifier\n", + "\n", + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt_gt_330\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-50/train.csv\",\n", + " split_strategy=Stratified(fraction=0.75),\n", + " deduplication_strategy=KeepMedian(),\n", + " ),\n", + " descriptors=[\n", + " ECFP.new(),\n", + " SmilesFromFile.new(),\n", + " ],\n", + " algorithms=[\n", + " ChemPropClassifier.new(epochs=4),\n", + " RandomForestClassifier.new(n_estimators={\"low\": 5, \"high\": 5}),\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.CLASSIFICATION,\n", + " cross_validation=2,\n", + " n_trials=1, # run only one random forest classifier trial\n", + " n_chemprop_trials=2, # run one enqueued chemprop trial and 1 undirected trial\n", + " split_chemprop=True, # this is set to true by default (shown here for illustration)\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Turn on Hyperopt within trials (advanced functionaility & very large computational cost)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "QSARtuna optimises all aspects of the ChemProp architecture when using ChemPropRegressor or ChemPropClassifier, however, users can activate the [original hyperparameter-optimization implementation](https://github.com/chemprop/chemprop#hyperparameter-optimization) in the `ChemProp` package, which performs automated Bayesian hyperparameter optimization using the `Hyperopt` package within each trial, at large computational cost.\n", + "\n", + "NB: The principal way for users to expand and perform more advanced runs is to extend the available non-network hyperparameters, such as the `features_generator` option or e.g. to trial differnt side information weighting (if side information is available).\n", + "\n", + "NB: Please note that when `num_iters`=1 (default behavior), any optimisation of the MPNN architecture (done by Hyperopt) is deactivated - the sensible defaults as specified by the ChemProp authors are applied. i.e. optimisation of the MPNN is only possible when `num_iters`>=2, like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "from optunaz.config.optconfig import ChemPropHyperoptRegressor, ChemPropHyperoptClassifier\n", + "\n", + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-50/train.csv\",\n", + " split_strategy=Stratified(fraction=0.5),\n", + " deduplication_strategy=KeepMedian(),\n", + " ),\n", + " descriptors=[\n", + " SmilesFromFile.new(),\n", + " ],\n", + " algorithms=[\n", + " ChemPropHyperoptRegressor.new(epochs=5, num_iters=2), #num_iters>2: enable hyperopt within ChemProp trials\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=2,\n", + " n_trials=1, #just optimise one ChemProp model for this example\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "NB: Remember that parameter tuning of the MPNN network is performed _within_ each trial.\n", + "\n", + "#### A note on MPNN Hyperopt search space\n", + "\n", + "`ChemProp` models trained using Hyperopt use the [original implementation](https://chemprop.readthedocs.io/en/latest/hyperopt.html), but one key difference is the `search_parameter_level` setting created for `QSARtuna`; Instead of using pre-defined search spaces as in the original package, `QSARtuna` can (and will by the default since `search_parameter_level`=`auto` unless changed) alter the space depending on the characteristics of user input data. For example, no. training set compounds, hyperparameter trials (`num_iters`) & epochs (`epochs`) are used by the `auto` setting to ensure search spaces are not too large for limited data/epochs, and _vice-versa_, an extensive search space is trailed when applicable.\n", + "\n", + "N.B: Users can also manually define `Hyperopt` search spaces by altering `search_parameter_level` from `auto` to a different level between `[0-8]`, representing the increasing search space size (see the [QSARtuna documentation](https://pages.scp.astrazeneca.net/mai/qsartuna/optunaz.config.html#optunaz.config.optconfig.ChemPropSearch_Parameter_Level) for details)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Side information and multi-task learning (MTL) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "_“Even if you are only optimizing one loss as is the typical case, chances are there is an auxiliary task that will help you improve upon your main task”_ [Caruana, 1998]\n", + "\n", + "`QSARtuna` typically optimizes for one particular metric for a given molecule property. While we can generally achieve acceptable performance this way, these single task (ST) models ignore information that may improve the prediction of main task of intent. See option `a.` in the figure below.\n", + "\n", + "Signals from relevant related tasks (aka \"auxiliary tasks\" or \"side information\") could come from the training signals of other molecular properties and by sharing representations between related tasks, we can enable a neural network to generalize better on our original task of intent. This approach is called Multi-Task Learning (MTL) See option `b.` in the figure below.\n", + "\n", + "\"Difference\n", + "\n", + "_(above) Differences between optimizing one vs. more than one loss function. a.) Single-task (ST): one model trained to predict one task one model optimised until performance no longer increases b.) Multi-task (MT/MTL): training one model to predict multiple tasks one model optimising more than one loss function at once enables representations to be shared between trained tasks training signals of related tasks shared between all tasks._\n", + "\n", + "\n", + "`ChemProp` performs MTL by using the knowledge learnt during training one task to reduce the loss of other tasks included in training. In order to use this function in `QSARtuna`, a user should provide side information in a separate file, and it should have the same ordering and length as the input/response columns (i.e. length of y should = length of side information for y).\n", + "\n", + "E.g: consider the DRD2 example input from earlier:" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "canonical,activity,molwt,molwt_gt_330\r\n", + "Cc1cc(NC(=O)c2cccc(COc3ccc(Br)cc3)c2)no1,0,387.233,True\r\n", + "O=C(Nc1ccc(F)cc1F)Nc1sccc1-c1nc2ccccc2s1,0,387.4360000000001,True\r\n", + "COC(=O)c1ccccc1NC(=O)c1cc([N+](=O)[O-])nn1Cc1ccccc1,0,380.36000000000007,True\r\n", + "CCOC(=O)C(C)Sc1nc(-c2ccccc2)ccc1C#N,0,312.39400000000006,False\r\n", + "CCC(CC)NC(=O)c1nn(Cc2ccccc2)c(=O)c2ccccc12,0,349.4340000000001,True\r\n", + "Brc1ccccc1OCCCOc1cccc2cccnc12,0,358.235,True\r\n", + "CCCCn1c(COc2cccc(OC)c2)nc2ccccc21,0,310.39700000000005,False\r\n", + "CCOc1cccc(NC(=O)c2sc3nc(-c4ccc(F)cc4)ccc3c2N)c1,0,407.4700000000001,True\r\n", + "COc1ccc(S(=O)(=O)N(CC(=O)Nc2ccc(C)cc2)c2ccc(C)cc2)cc1OC,0,454.54800000000023,True\r\n" + ] + } + ], + "source": [ + "!head ../tests/data/DRD2/subset-50/train.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There is an accompying example of side information/auxiliary data inputs (calculated PhysChem properties ) as provided in `train_side_info.csv` within the tests data folder:" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "canonical,cLogP,cLogS,H-Acceptors,H-Donors,Total Surface Area,Relative PSA\r\n", + "Cc1cc(NC(=O)c2cccc(COc3ccc(Br)cc3)c2)no1,4.04,-5.293,5,1,265.09,0.22475\r\n", + "O=C(Nc1ccc(F)cc1F)Nc1sccc1-c1nc2ccccc2s1,4.8088,-5.883,4,2,271.39,0.32297\r\n", + "COC(=O)c1ccccc1NC(=O)c1cc([N+](=O)[O-])nn1Cc1ccccc1,1.6237,-3.835,9,1,287.39,0.33334\r\n", + "CCOC(=O)C(C)Sc1nc(-c2ccccc2)ccc1C#N,3.2804,-4.314,4,0,249.51,0.26075\r\n", + "CCC(CC)NC(=O)c1nn(Cc2ccccc2)c(=O)c2ccccc12,3.2533,-4.498,5,1,278.05,0.18917\r\n", + "Brc1ccccc1OCCCOc1cccc2cccnc12,4.5102,-4.694,3,0,246.29,0.12575\r\n", + "CCCCn1c(COc2cccc(OC)c2)nc2ccccc21,3.7244,-2.678,4,0,255.14,0.14831\r\n", + "CCOc1cccc(NC(=O)c2sc3nc(-c4ccc(F)cc4)ccc3c2N)c1,4.4338,-6.895,5,2,302.18,0.26838\r\n", + "COc1ccc(S(=O)(=O)N(CC(=O)Nc2ccc(C)cc2)c2ccc(C)cc2)cc1OC,3.2041,-5.057,7,1,343.67,0.22298\r\n" + ] + } + ], + "source": [ + "!head ../tests/data/DRD2/subset-50/train_side_info.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "I.e. the first column (Smiles) should match between the two files, and any columns after the SMILES within the `train_side_info.csv` side information file will be used as y-label side information in the training of the network.\n", + "\n", + "N.B: that calculated PhysChem properties are only one example of side information, and that side information may come from any related property that improves the main task of intent.\n", + "\n", + "A classification example can also be found here:" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "canonical,cLogP_Gt2.5,cLogS_Gt-3.5,H-Acceptors_Gt5,H-Donors_Gt0,Total Surface Area_Gt250,Relative PSA_Lt0.25\r\n", + "Cc1cc(NC(=O)c2cccc(COc3ccc(Br)cc3)c2)no1,1,0,0,1,1,1\r\n", + "O=C(Nc1ccc(F)cc1F)Nc1sccc1-c1nc2ccccc2s1,1,0,0,1,1,0\r\n", + "COC(=O)c1ccccc1NC(=O)c1cc([N+](=O)[O-])nn1Cc1ccccc1,0,0,1,1,1,0\r\n", + "CCOC(=O)C(C)Sc1nc(-c2ccccc2)ccc1C#N,1,0,0,0,0,0\r\n", + "CCC(CC)NC(=O)c1nn(Cc2ccccc2)c(=O)c2ccccc12,1,0,0,1,1,1\r\n", + "Brc1ccccc1OCCCOc1cccc2cccnc12,1,0,0,0,0,1\r\n", + "CCCCn1c(COc2cccc(OC)c2)nc2ccccc21,1,1,0,0,1,1\r\n", + "CCOc1cccc(NC(=O)c2sc3nc(-c4ccc(F)cc4)ccc3c2N)c1,1,0,0,1,1,0\r\n", + "COc1ccc(S(=O)(=O)N(CC(=O)Nc2ccc(C)cc2)c2ccc(C)cc2)cc1OC,1,0,1,1,1,1\r\n" + ] + } + ], + "source": [ + "!head ../tests/data/DRD2/subset-50/train_side_info_cls.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The contribution or _weight_ of all side information tasks in their contribution to the loss function during training a network is a parameter that can be optimised within QSARtuna, e.g:" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:21:03,347] A new study created in memory with name: my_study\n", + "[I 2024-07-02 13:21:03,350] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 13:21:05,443] Trial 0 finished with value: -5817.944008002311 and parameters: {'algorithm_name': 'ChemPropHyperoptRegressor', 'ChemPropHyperoptRegressor_algorithm_hash': 'db9e60f9b8f0a43eff4b41917b6293d9', 'ensemble_size__db9e60f9b8f0a43eff4b41917b6293d9': 1, 'epochs__db9e60f9b8f0a43eff4b41917b6293d9': 4, 'features_generator__db9e60f9b8f0a43eff4b41917b6293d9': , 'num_iters__db9e60f9b8f0a43eff4b41917b6293d9': 1, 'search_parameter_level__db9e60f9b8f0a43eff4b41917b6293d9': , 'descriptor': '{\"name\": \"SmilesAndSideInfoFromFile\", \"parameters\": {\"file\": \"../tests/data/DRD2/subset-50/train_side_info.csv\", \"input_column\": \"canonical\", \"aux_weight_pc\": {\"low\": 0, \"high\": 100, \"q\": 10}}}', 'aux_weight_pc__db9e60f9b8f0a43eff4b41917b6293d9': 50}. Best is trial 0 with value: -5817.944008002311.\n", + "[I 2024-07-02 13:21:05,495] Trial 1 pruned. Duplicate parameter set\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'ChemPropHyperoptRegressor', 'ChemPropHyperoptRegressor_algorithm_hash': 'db9e60f9b8f0a43eff4b41917b6293d9', 'ensemble_size__db9e60f9b8f0a43eff4b41917b6293d9': 1, 'epochs__db9e60f9b8f0a43eff4b41917b6293d9': 4, 'features_generator__db9e60f9b8f0a43eff4b41917b6293d9': , 'num_iters__db9e60f9b8f0a43eff4b41917b6293d9': 1, 'search_parameter_level__db9e60f9b8f0a43eff4b41917b6293d9': , 'descriptor': '{\"name\": \"SmilesAndSideInfoFromFile\", \"parameters\": {\"file\": \"../tests/data/DRD2/subset-50/train_side_info.csv\", \"input_column\": \"canonical\", \"aux_weight_pc\": {\"low\": 0, \"high\": 100, \"q\": 10}}}', 'aux_weight_pc__db9e60f9b8f0a43eff4b41917b6293d9': 50}, return [-5817.944008002311]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:21:07,433] Trial 2 finished with value: -5796.34392897437 and parameters: {'algorithm_name': 'ChemPropHyperoptRegressor', 'ChemPropHyperoptRegressor_algorithm_hash': 'db9e60f9b8f0a43eff4b41917b6293d9', 'ensemble_size__db9e60f9b8f0a43eff4b41917b6293d9': 1, 'epochs__db9e60f9b8f0a43eff4b41917b6293d9': 4, 'features_generator__db9e60f9b8f0a43eff4b41917b6293d9': , 'num_iters__db9e60f9b8f0a43eff4b41917b6293d9': 1, 'search_parameter_level__db9e60f9b8f0a43eff4b41917b6293d9': , 'descriptor': '{\"name\": \"SmilesAndSideInfoFromFile\", \"parameters\": {\"file\": \"../tests/data/DRD2/subset-50/train_side_info.csv\", \"input_column\": \"canonical\", \"aux_weight_pc\": {\"low\": 0, \"high\": 100, \"q\": 10}}}', 'aux_weight_pc__db9e60f9b8f0a43eff4b41917b6293d9': 80}. Best is trial 2 with value: -5796.34392897437.\n", + "[I 2024-07-02 13:21:09,439] Trial 3 finished with value: -5795.086720713623 and parameters: {'algorithm_name': 'ChemPropHyperoptRegressor', 'ChemPropHyperoptRegressor_algorithm_hash': 'db9e60f9b8f0a43eff4b41917b6293d9', 'ensemble_size__db9e60f9b8f0a43eff4b41917b6293d9': 1, 'epochs__db9e60f9b8f0a43eff4b41917b6293d9': 4, 'features_generator__db9e60f9b8f0a43eff4b41917b6293d9': , 'num_iters__db9e60f9b8f0a43eff4b41917b6293d9': 1, 'search_parameter_level__db9e60f9b8f0a43eff4b41917b6293d9': , 'descriptor': '{\"name\": \"SmilesAndSideInfoFromFile\", \"parameters\": {\"file\": \"../tests/data/DRD2/subset-50/train_side_info.csv\", \"input_column\": \"canonical\", \"aux_weight_pc\": {\"low\": 0, \"high\": 100, \"q\": 10}}}', 'aux_weight_pc__db9e60f9b8f0a43eff4b41917b6293d9': 100}. Best is trial 3 with value: -5795.086720713623.\n", + "[I 2024-07-02 13:21:09,470] Trial 4 pruned. Duplicate parameter set\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'ChemPropHyperoptRegressor', 'ChemPropHyperoptRegressor_algorithm_hash': 'db9e60f9b8f0a43eff4b41917b6293d9', 'ensemble_size__db9e60f9b8f0a43eff4b41917b6293d9': 1, 'epochs__db9e60f9b8f0a43eff4b41917b6293d9': 4, 'features_generator__db9e60f9b8f0a43eff4b41917b6293d9': , 'num_iters__db9e60f9b8f0a43eff4b41917b6293d9': 1, 'search_parameter_level__db9e60f9b8f0a43eff4b41917b6293d9': , 'descriptor': '{\"name\": \"SmilesAndSideInfoFromFile\", \"parameters\": {\"file\": \"../tests/data/DRD2/subset-50/train_side_info.csv\", \"input_column\": \"canonical\", \"aux_weight_pc\": {\"low\": 0, \"high\": 100, \"q\": 10}}}', 'aux_weight_pc__db9e60f9b8f0a43eff4b41917b6293d9': 100}, return [-5795.086720713623]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:21:11,241] Trial 5 finished with value: -5820.227555999914 and parameters: {'algorithm_name': 'ChemPropHyperoptRegressor', 'ChemPropHyperoptRegressor_algorithm_hash': 'db9e60f9b8f0a43eff4b41917b6293d9', 'ensemble_size__db9e60f9b8f0a43eff4b41917b6293d9': 1, 'epochs__db9e60f9b8f0a43eff4b41917b6293d9': 4, 'features_generator__db9e60f9b8f0a43eff4b41917b6293d9': , 'num_iters__db9e60f9b8f0a43eff4b41917b6293d9': 1, 'search_parameter_level__db9e60f9b8f0a43eff4b41917b6293d9': , 'descriptor': '{\"name\": \"SmilesAndSideInfoFromFile\", \"parameters\": {\"file\": \"../tests/data/DRD2/subset-50/train_side_info.csv\", \"input_column\": \"canonical\", \"aux_weight_pc\": {\"low\": 0, \"high\": 100, \"q\": 10}}}', 'aux_weight_pc__db9e60f9b8f0a43eff4b41917b6293d9': 0}. Best is trial 3 with value: -5795.086720713623.\n", + "[I 2024-07-02 13:21:11,283] Trial 6 pruned. Duplicate parameter set\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'ChemPropHyperoptRegressor', 'ChemPropHyperoptRegressor_algorithm_hash': 'db9e60f9b8f0a43eff4b41917b6293d9', 'ensemble_size__db9e60f9b8f0a43eff4b41917b6293d9': 1, 'epochs__db9e60f9b8f0a43eff4b41917b6293d9': 4, 'features_generator__db9e60f9b8f0a43eff4b41917b6293d9': , 'num_iters__db9e60f9b8f0a43eff4b41917b6293d9': 1, 'search_parameter_level__db9e60f9b8f0a43eff4b41917b6293d9': , 'descriptor': '{\"name\": \"SmilesAndSideInfoFromFile\", \"parameters\": {\"file\": \"../tests/data/DRD2/subset-50/train_side_info.csv\", \"input_column\": \"canonical\", \"aux_weight_pc\": {\"low\": 0, \"high\": 100, \"q\": 10}}}', 'aux_weight_pc__db9e60f9b8f0a43eff4b41917b6293d9': 100}, return [-5795.086720713623]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:21:13,322] Trial 7 finished with value: -5852.160071204277 and parameters: {'algorithm_name': 'ChemPropHyperoptRegressor', 'ChemPropHyperoptRegressor_algorithm_hash': 'db9e60f9b8f0a43eff4b41917b6293d9', 'ensemble_size__db9e60f9b8f0a43eff4b41917b6293d9': 1, 'epochs__db9e60f9b8f0a43eff4b41917b6293d9': 4, 'features_generator__db9e60f9b8f0a43eff4b41917b6293d9': , 'num_iters__db9e60f9b8f0a43eff4b41917b6293d9': 1, 'search_parameter_level__db9e60f9b8f0a43eff4b41917b6293d9': , 'descriptor': '{\"name\": \"SmilesAndSideInfoFromFile\", \"parameters\": {\"file\": \"../tests/data/DRD2/subset-50/train_side_info.csv\", \"input_column\": \"canonical\", \"aux_weight_pc\": {\"low\": 0, \"high\": 100, \"q\": 10}}}', 'aux_weight_pc__db9e60f9b8f0a43eff4b41917b6293d9': 10}. Best is trial 3 with value: -5795.086720713623.\n" + ] + } + ], + "source": [ + "from optunaz.descriptors import SmilesAndSideInfoFromFile\n", + "\n", + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-50/train.csv\",\n", + " test_dataset_file=\"../tests/data/DRD2/subset-50/test.csv\"), # Hidden during optimization.\n", + " descriptors=[\n", + " SmilesAndSideInfoFromFile.new(file='../tests/data/DRD2/subset-50/train_side_info.csv',\\\n", + " input_column='canonical',\n", + " aux_weight_pc={\"low\": 0, \"high\": 100, \"q\": 10}) #try different aux weights\n", + " ],\n", + " algorithms=[\n", + " ChemPropHyperoptRegressor.new(epochs=4), #epochs=4 to ensure run finishes quickly\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=1,\n", + " n_trials=8,\n", + " n_startup_trials=0,\n", + " random_seed=42,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ")\n", + "\n", + "study = optimize(config, study_name=\"my_study\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the toy example above, the `ChemPropRegressor` has been trialed with a variety of auxiliary weights ranging from 0-100%, using the SmilesAndSideInfoFromFile setting `aux_weight_pc={\"low\": 0, \"high\": 100}`.\n", + "\n", + "The inlfuence of the weighting of side information on model performance next hence be explored via a scatterplot of the auxiliary weight percent as a product of the objective value:" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "data = study.trials_dataframe().query('user_attrs_trial_ran==True') #drop any pruned/erroneous trials\n", + "data.columns = [i.split('__')[0] for i in data.columns] # remove algorithm hash from columns\n", + "ax = sns.scatterplot(data=data, x=\"params_aux_weight_pc\", y=\"value\")\n", + "ax.set(xlabel=\"Aux weight percent (%)\", ylabel=f\"Ojbective value\\n({default_reg_scoring})\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hence we can conclude that 100% weighting of the side information produces the most performant `ChemProp` model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Combining ChemProp & shallow models (only recommended for large no. trials & long run times)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Above described how ChemProp is by default split from shallow methods. Users can override this using the advanced functionaility when `split_chemprop`=False. In this situation `QSARtuna` will optimise `ChemProp` with the other shallow methods:" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt_gt_330\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-50/train.csv\", # This will be split into train and test.\n", + " split_strategy=Stratified(fraction=0.75),\n", + " deduplication_strategy=KeepMedian(),\n", + " ),\n", + " descriptors=[\n", + " ECFP.new(),\n", + " SmilesFromFile.new(),\n", + " ],\n", + " algorithms=[\n", + " ChemPropClassifier.new(epochs=4),\n", + " RandomForestClassifier.new(n_estimators={\"low\": 5, \"high\": 5}),\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.CLASSIFICATION,\n", + " cross_validation=2,\n", + " random_seed=42,\n", + " n_trials=10, # run one random forest classifier trial\n", + " split_chemprop=False, # set to false to run ChemProp along side other algorithms\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Combining ChemProp and traditional methods in this way means the potential search possible in `QSARtuna` comprises some incompatible sub-spaces; for example, `ChemProp` is only compatible with SMILES-based descriptors, whilst traditional methods are incompatible with SMILES. In the following examples, we consider the following search space: \"Valid" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pruning is an effective method to handle such incompatible algorithm-desciptor pairs (grey) to skip any incompatible tials that use non-SMILES descriptors with `ChemProp` and _vice-versa_, when any SMILES-based descriptors are trialed with non-`ChemProp` algorithms. \n", + "\n", + "This is only recommended for advanced users, when the no. trials is >100, since the optimiser has to learn which descriptors and methods are compatible at the same time as optimising for the most performative pairs.\n", + "\n", + "_What is pruning?_\n", + "\n", + "Pruning is an `Optuna` feature [originally designed](https://optuna.readthedocs.io/en/v2.0.0/tutorial/pruning.html) to automatically stop unpromising trials early in training.\n", + "\n", + "_How/why is pruning implemented in QSARtuna?_\n", + "\n", + "Instead of the original intention of pruning _unpromising trials_ due to poor performance, `QSARtuna` ensures `Optuna` prunes _incompatible algorithm-desciptor pairs_, so the sampler will not trial incomptaible settings again. Duplicated trials are also pruned, to avoid repeatedly suggesting the same hyerparameters. Pruning the trial (instead of returning a cached result) explicitly tasks the sampler to avoid identical parameters.\n", + "\n", + "Now let us consider the following run:" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:21:13,577] A new study created in memory with name: my_study\n", + "[I 2024-07-02 13:21:13,629] Trial 0 finished with value: -inf and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.9525489095524835, 'descriptor': '{\"name\": \"SmilesAndSideInfoFromFile\", \"parameters\": {\"file\": \"../tests/data/DRD2/subset-50/train_side_info.csv\", \"input_column\": \"canonical\", \"aux_weight_pc\": {\"low\": 0, \"high\": 40, \"q\": 10}}}', 'aux_weight_pc__cfa1990d5153c8812982f034d788d7ee': 30}. Best is trial 0 with value: -inf.\n", + "[I 2024-07-02 13:21:13,777] Trial 1 finished with value: -4824.686269039228 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.7731425652872588, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 1 with value: -4824.686269039228.\n", + "[I 2024-07-02 13:21:13,819] Trial 2 pruned. Incompatible subspace\n", + "[I 2024-07-02 13:21:13,849] Trial 3 pruned. Incompatible subspace\n", + "[I 2024-07-02 13:21:13,997] Trial 4 finished with value: -4409.946844928445 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.791002332112292, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 4 with value: -4409.946844928445.\n", + "[I 2024-07-02 13:21:14,021] Trial 5 pruned. Incompatible subspace\n", + "[I 2024-07-02 13:21:14,167] Trial 6 finished with value: -5029.734620250011 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 23.329624779366306, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.00015024763718638216, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 4 with value: -4409.946844928445.\n", + "[I 2024-07-02 13:21:14,196] Trial 7 pruned. Incompatible subspace\n", + "[I 2024-07-02 13:21:14,228] Trial 8 pruned. Incompatible subspace\n", + "[I 2024-07-02 13:21:14,269] Trial 9 pruned. Incompatible subspace\n", + "[I 2024-07-02 13:21:14,523] Trial 10 finished with value: -4396.722635068717 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 17, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 7, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 10 with value: -4396.722635068717.\n", + "[I 2024-07-02 13:21:14,559] Trial 11 pruned. Duplicate parameter set\n", + "[I 2024-07-02 13:21:14,753] Trial 12 finished with value: -4030.4577379164707 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 30, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 9, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -4030.4577379164707.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 17, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 7, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}, return [-4396.722635068717]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:21:14,790] Trial 13 pruned. Duplicate parameter set\n", + "[I 2024-07-02 13:21:14,960] Trial 14 finished with value: -4030.4577379164707 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 26, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 9, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 12 with value: -4030.4577379164707.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 30, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 9, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}, return [-4030.4577379164707]\n" + ] + } + ], + "source": [ + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-50/train.csv\",\n", + " test_dataset_file=\"../tests/data/DRD2/subset-50/test.csv\"\n", + " ),\n", + " descriptors=[\n", + " ECFP.new(), # NB: The first 2 descriptors are incompatible with ChemProp \n", + " MACCS_keys.new(),\n", + " SmilesAndSideInfoFromFile.new(file='../tests/data/DRD2/subset-50/train_side_info.csv',\\\n", + " input_column='canonical',\n", + " aux_weight_pc={\"low\": 0, \"high\": 40, \"q\": 10}\n", + " ), # NB: This is only compatible only with ChemProp\n", + " ],\n", + " algorithms=[\n", + " SVR.new(), # NB: The first 6 regressors are incompatible with SMILES descriptors \n", + " RandomForestRegressor.new(n_estimators={\"low\": 5, \"high\": 10}),\n", + " Ridge.new(),\n", + " Lasso.new(),\n", + " PLSRegression.new(),\n", + " ChemPropHyperoptRegressor.new(epochs=4) # NB: ChemProp only compatible with SMILES\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=3,\n", + " n_trials=15,\n", + " random_seed=4, # seed 4 ensures sampling of incompatible subspace for this example\n", + " n_startup_trials=10,\n", + " split_chemprop=False,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ")\n", + "\n", + "study = optimize(config, study_name=\"my_study\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Consulting the `QSARtuna` output, we observe cases of e.g. \"`Trial 3 pruned. Incompatible subspace`\", which indicates an instance when the sampler has sampled an incompitble algo-descriptor pair. \n", + "\n", + "\"`Trial 13 pruned. Duplicate parameter set`\" is an example of pruning a duplicated trial parameter suggestion.\n", + "\n", + "N.B: \"`Trial 0 finished with value: -inf`\" can occur when the very first `Optuna` trial comprises incompitble algo-descriptor pairs (`-inf` is assigned instead of pruning, since `Optuna` does not allow pruning first trials)." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "data = study.trials_dataframe().query('user_attrs_trial_ran==True') #drop any pruned/erroneous trials\n", + "ax = sns.scatterplot(data=data, x=\"number\", y=\"value\",hue='params_algorithm_name')\n", + "ax.set(xlabel=\"Trial number\", ylabel=f\"Ojbective value\\n({default_reg_scoring})\")\n", + "sns.move_legend(ax, \"upper left\", bbox_to_anchor=(1, 1))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As shown in the overview graph, `Optuna` never successfully samples/trials `ChemProp` with a compatible descriptor, as seen from it's absence in the generated plot. The pruned 8th trial is also missing due to the pruning. This highlights a limitation of combining `ChemProp` (and the associated descriptors) with other algorithms - it makes optimisation more complex, and not all combinations may be suggested. Aside from splitting ChemProp into separate trials, another mitigation strategy is to reduce the complexity of the search space, by removing some incompatible descriptors/traditional methods, to ensure ChemProp has more sampling success. Overall, this serves as a cautionary argument to split `ChemProp` trials (using `split_chemprop`=True)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pre-training and adapting ChemProp models (Transfer Learning)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Transfer learning (TL) to adapt pre-trained models on a specific (wider) dataset to a specific dataset of interest in a similar manner to [this publication](https://pubs.acs.org/doi/10.1021/acs.molpharmaceut.3c01124) can be performed in QSARtuna. This option is available for ChemProp models and employs the [original ChemProp package implementation](https://chemprop.readthedocs.io/en/latest/tutorial.html#pretraining).\n", + "For example, a user can perform optimisation to pre-train a model using the following:" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:21:15,255] A new study created in memory with name: my_study\n", + "[I 2024-07-02 13:21:15,256] A new study created in memory with name: study_name_0\n", + "INFO:root:Enqueued ChemProp manual trial with sensible defaults: {'activation__e0d3a442222d4b38f3aa1434851320db': 'ReLU', 'aggregation__e0d3a442222d4b38f3aa1434851320db': 'mean', 'aggregation_norm__e0d3a442222d4b38f3aa1434851320db': 100, 'batch_size__e0d3a442222d4b38f3aa1434851320db': 50, 'depth__e0d3a442222d4b38f3aa1434851320db': 3, 'dropout__e0d3a442222d4b38f3aa1434851320db': 0.0, 'features_generator__e0d3a442222d4b38f3aa1434851320db': 'none', 'ffn_hidden_size__e0d3a442222d4b38f3aa1434851320db': 300, 'ffn_num_layers__e0d3a442222d4b38f3aa1434851320db': 2, 'final_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db': -4, 'hidden_size__e0d3a442222d4b38f3aa1434851320db': 300, 'init_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db': -4, 'max_lr_exp__e0d3a442222d4b38f3aa1434851320db': -3, 'warmup_epochs_ratio__e0d3a442222d4b38f3aa1434851320db': 0.1, 'algorithm_name': 'ChemPropRegressor', 'ChemPropRegressor_algorithm_hash': 'e0d3a442222d4b38f3aa1434851320db'}\n", + "[I 2024-07-02 13:21:58,856] Trial 0 finished with value: -4937.540075659691 and parameters: {'algorithm_name': 'ChemPropRegressor', 'ChemPropRegressor_algorithm_hash': 'e0d3a442222d4b38f3aa1434851320db', 'activation__e0d3a442222d4b38f3aa1434851320db': , 'aggregation__e0d3a442222d4b38f3aa1434851320db': , 'aggregation_norm__e0d3a442222d4b38f3aa1434851320db': 100.0, 'batch_size__e0d3a442222d4b38f3aa1434851320db': 50.0, 'depth__e0d3a442222d4b38f3aa1434851320db': 3.0, 'dropout__e0d3a442222d4b38f3aa1434851320db': 0.0, 'ensemble_size__e0d3a442222d4b38f3aa1434851320db': 1, 'epochs__e0d3a442222d4b38f3aa1434851320db': 4, 'features_generator__e0d3a442222d4b38f3aa1434851320db': , 'ffn_hidden_size__e0d3a442222d4b38f3aa1434851320db': 300.0, 'ffn_num_layers__e0d3a442222d4b38f3aa1434851320db': 2.0, 'final_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db': -4, 'hidden_size__e0d3a442222d4b38f3aa1434851320db': 300.0, 'init_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db': -4, 'max_lr_exp__e0d3a442222d4b38f3aa1434851320db': -3, 'warmup_epochs_ratio__e0d3a442222d4b38f3aa1434851320db': 0.1, 'descriptor': '{\"name\": \"SmilesFromFile\", \"parameters\": {}}'}. Best is trial 0 with value: -4937.540075659691.\n", + " \r" + ] + } + ], + "source": [ + "from optunaz.descriptors import SmilesFromFile\n", + "from optunaz.config.optconfig import ChemPropRegressor\n", + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-50/train.csv\", # This will be split into train and test.\n", + " ),\n", + " descriptors=[SmilesFromFile.new()],\n", + " algorithms=[\n", + " ChemPropRegressor.new(epochs=4),\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=2,\n", + " n_trials=1,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ")\n", + "\n", + "study = optimize(config, study_name=\"my_study\")\n", + "_ = build_best(buildconfig_best(study), \"../target/pretrained.pkl\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The pretrained model saved to `../target/pretrained.pkl` can now be supplied as an input for the `ChemPropRegressorPretrained` algorithm. This model can be retrained with (or adapted to) a new dataset (`../tests/data/DRD2/subset-50/test.csv`) like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:23:02,954] A new study created in memory with name: my_study\n", + "[I 2024-07-02 13:23:02,997] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 13:23:47,043] Trial 0 finished with value: -5114.7131239123555 and parameters: {'algorithm_name': 'ChemPropRegressorPretrained', 'ChemPropRegressorPretrained_algorithm_hash': 'dfc518a76317f23d95e5aa5a3eac77f0', 'frzn__dfc518a76317f23d95e5aa5a3eac77f0': , 'epochs__dfc518a76317f23d95e5aa5a3eac77f0': 4, 'descriptor': '{\"name\": \"SmilesFromFile\", \"parameters\": {}}'}. Best is trial 0 with value: -5114.7131239123555.\n" + ] + } + ], + "source": [ + "from optunaz.config.optconfig import ChemPropRegressorPretrained\n", + "\n", + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-50/test.csv\",\n", + " ),\n", + " descriptors=[SmilesFromFile.new()],\n", + " algorithms=[\n", + " ChemPropRegressorPretrained.new(\n", + " pretrained_model='../target/pretrained.pkl',\n", + " epochs=ChemPropRegressorPretrained.Parameters.ChemPropParametersEpochs(low=4,high=4))\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=2,\n", + " n_trials=1,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ")\n", + "\n", + "study = optimize(config, study_name=\"my_study\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we have the basics covered, we can now provide an example of how QSARtuna can compare the performance of local, adapted and global (no epochs for transfer learning) models within a single optimisation job in the following example:" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:23:47,172] A new study created in memory with name: my_study\n", + "[I 2024-07-02 13:23:47,174] A new study created in memory with name: study_name_0\n", + "INFO:root:Enqueued ChemProp manual trial with sensible defaults: {'activation__e0d3a442222d4b38f3aa1434851320db': 'ReLU', 'aggregation__e0d3a442222d4b38f3aa1434851320db': 'mean', 'aggregation_norm__e0d3a442222d4b38f3aa1434851320db': 100, 'batch_size__e0d3a442222d4b38f3aa1434851320db': 50, 'depth__e0d3a442222d4b38f3aa1434851320db': 3, 'dropout__e0d3a442222d4b38f3aa1434851320db': 0.0, 'features_generator__e0d3a442222d4b38f3aa1434851320db': 'none', 'ffn_hidden_size__e0d3a442222d4b38f3aa1434851320db': 300, 'ffn_num_layers__e0d3a442222d4b38f3aa1434851320db': 2, 'final_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db': -4, 'hidden_size__e0d3a442222d4b38f3aa1434851320db': 300, 'init_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db': -4, 'max_lr_exp__e0d3a442222d4b38f3aa1434851320db': -3, 'warmup_epochs_ratio__e0d3a442222d4b38f3aa1434851320db': 0.1, 'algorithm_name': 'ChemPropRegressor', 'ChemPropRegressor_algorithm_hash': 'e0d3a442222d4b38f3aa1434851320db'}\n", + "[I 2024-07-02 13:24:09,495] Trial 0 finished with value: -5891.7552821093905 and parameters: {'algorithm_name': 'ChemPropRegressor', 'ChemPropRegressor_algorithm_hash': 'e0d3a442222d4b38f3aa1434851320db', 'activation__e0d3a442222d4b38f3aa1434851320db': , 'aggregation__e0d3a442222d4b38f3aa1434851320db': , 'aggregation_norm__e0d3a442222d4b38f3aa1434851320db': 100.0, 'batch_size__e0d3a442222d4b38f3aa1434851320db': 50.0, 'depth__e0d3a442222d4b38f3aa1434851320db': 3.0, 'dropout__e0d3a442222d4b38f3aa1434851320db': 0.0, 'ensemble_size__e0d3a442222d4b38f3aa1434851320db': 1, 'epochs__e0d3a442222d4b38f3aa1434851320db': 4, 'features_generator__e0d3a442222d4b38f3aa1434851320db': , 'ffn_hidden_size__e0d3a442222d4b38f3aa1434851320db': 300.0, 'ffn_num_layers__e0d3a442222d4b38f3aa1434851320db': 2.0, 'final_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db': -4, 'hidden_size__e0d3a442222d4b38f3aa1434851320db': 300.0, 'init_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db': -4, 'max_lr_exp__e0d3a442222d4b38f3aa1434851320db': -3, 'warmup_epochs_ratio__e0d3a442222d4b38f3aa1434851320db': 0.1, 'descriptor': '{\"name\": \"SmilesFromFile\", \"parameters\": {}}'}. Best is trial 0 with value: -5891.7552821093905.\n", + "[I 2024-07-02 13:24:31,625] Trial 1 finished with value: -5891.7552821093905 and parameters: {'algorithm_name': 'ChemPropRegressor', 'ChemPropRegressor_algorithm_hash': 'e0d3a442222d4b38f3aa1434851320db', 'activation__e0d3a442222d4b38f3aa1434851320db': , 'aggregation__e0d3a442222d4b38f3aa1434851320db': , 'aggregation_norm__e0d3a442222d4b38f3aa1434851320db': 98.0, 'batch_size__e0d3a442222d4b38f3aa1434851320db': 40.0, 'depth__e0d3a442222d4b38f3aa1434851320db': 3.0, 'dropout__e0d3a442222d4b38f3aa1434851320db': 0.0, 'ensemble_size__e0d3a442222d4b38f3aa1434851320db': 1, 'epochs__e0d3a442222d4b38f3aa1434851320db': 4, 'features_generator__e0d3a442222d4b38f3aa1434851320db': , 'ffn_hidden_size__e0d3a442222d4b38f3aa1434851320db': 300.0, 'ffn_num_layers__e0d3a442222d4b38f3aa1434851320db': 2.0, 'final_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db': -4, 'hidden_size__e0d3a442222d4b38f3aa1434851320db': 300.0, 'init_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db': -4, 'max_lr_exp__e0d3a442222d4b38f3aa1434851320db': -3, 'warmup_epochs_ratio__e0d3a442222d4b38f3aa1434851320db': 0.1, 'descriptor': '{\"name\": \"SmilesFromFile\", \"parameters\": {}}'}. Best is trial 0 with value: -5891.7552821093905.\n", + "[I 2024-07-02 13:24:53,140] Trial 2 finished with value: -5890.94653501547 and parameters: {'algorithm_name': 'ChemPropRegressorPretrained', 'ChemPropRegressorPretrained_algorithm_hash': '77dfc8230317e08504ed5e643243fbc2', 'frzn__77dfc8230317e08504ed5e643243fbc2': , 'epochs__77dfc8230317e08504ed5e643243fbc2': 0, 'descriptor': '{\"name\": \"SmilesFromFile\", \"parameters\": {}}'}. Best is trial 2 with value: -5890.94653501547.\n", + "[I 2024-07-02 13:24:53,181] Trial 3 pruned. Duplicate parameter set\n", + "[I 2024-07-02 13:24:53,211] Trial 4 pruned. Duplicate parameter set\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'ChemPropRegressorPretrained', 'ChemPropRegressorPretrained_algorithm_hash': '77dfc8230317e08504ed5e643243fbc2', 'frzn__77dfc8230317e08504ed5e643243fbc2': , 'epochs__77dfc8230317e08504ed5e643243fbc2': 0, 'descriptor': '{\"name\": \"SmilesFromFile\", \"parameters\": {}}'}, return [-5890.94653501547]\n", + "Duplicated trial: {'algorithm_name': 'ChemPropRegressorPretrained', 'ChemPropRegressorPretrained_algorithm_hash': '77dfc8230317e08504ed5e643243fbc2', 'frzn__77dfc8230317e08504ed5e643243fbc2': , 'epochs__77dfc8230317e08504ed5e643243fbc2': 0, 'descriptor': '{\"name\": \"SmilesFromFile\", \"parameters\": {}}'}, return [-5890.94653501547]\n" + ] + } + ], + "source": [ + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-50/train.csv\", # test.csv supplied for fair comparison\n", + " test_dataset_file=\"../tests/data/DRD2/subset-50/test.csv\", # test.csv supplied for fair comparison\n", + " ),\n", + " descriptors=[SmilesFromFile.new()],\n", + " algorithms=[\n", + " ChemPropRegressor.new(epochs=4), # local\n", + " ChemPropRegressorPretrained.new(\n", + " pretrained_model='../target/pretrained.pkl',\n", + " epochs=ChemPropRegressorPretrained.Parameters.ChemPropParametersEpochs(low=0,high=0)) # global\n", + " , \n", + " ChemPropRegressorPretrained.new(\n", + " pretrained_model='../target/pretrained.pkl',\n", + " epochs=ChemPropRegressorPretrained.Parameters.ChemPropParametersEpochs(low=4,high=4)) #adapted\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=1,\n", + " n_trials=5,\n", + " n_startup_trials=0,\n", + " random_seed=1545,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ")\n", + "tl_study = optimize(config, study_name=\"my_study\").trials_dataframe()\n", + "\n", + "tl_study['epochs'] = tl_study.loc[:,tl_study.columns.str.contains('params_epochs'\n", + " )].fillna(''\n", + " ).astype(str\n", + " ).agg(''.join, axis=1).astype(float) # merge epochs into one column\n", + "\n", + "tl_study.loc[~tl_study['params_ChemPropRegressor_algorithm_hash'].isna(),\n", + " \"Model type\"]='Local' # Annotate the local model\n", + "\n", + "tl_study.loc[tl_study['params_ChemPropRegressor_algorithm_hash'].isna() \n", + " & (tl_study['epochs'] == 4), \"Model type\"] = 'Adapted' # Annotate the adapted model (TL to new data)\n", + "\n", + "tl_study.loc[tl_study['params_ChemPropRegressor_algorithm_hash'].isna() \n", + " & (tl_study['epochs'] == 0), \"Model type\"] = 'Global' # Annotate the global model (no TL)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.set_theme(style=\"darkgrid\")\n", + "default_reg_scoring= config.settings.scoring\n", + "ax = sns.scatterplot(data=tl_study, x=\"number\", \n", + " y=\"value\",hue='Model type')\n", + "ax.set(xlabel=\"Trial number\",ylabel=f\"Ojbective value\\n({default_reg_scoring})\")\n", + "sns.move_legend(ax, \"upper right\", bbox_to_anchor=(1.6, 1), ncol=1, title=\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For this toy example we do not observe a large difference between the three model types, but in a real world setting a user can build the best model from the three model types evaluated." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ChemProp fingerprints (encode latent representation as descriptors)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is possible for ChemProp to provide generate outputs in the form intended for use as a fingerprint using the [original package implementation](https://github.com/chemprop/chemprop/tree/master?tab=readme-ov-file#encode-fingerprint-latent-representation). Fingerprints are derived from the latent representation from the MPNN or penultimate FFN output layer, which can be used as a form of learned descriptor or fingerprint." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with open(\"../target/pretrained.pkl\", \"rb\") as f:\n", + " chemprop_model = pickle.load(f)\n", + "\n", + "ax = sns.heatmap(\n", + " chemprop_model.predictor.chemprop_fingerprint(\n", + " df[config.data.input_column].head(5),\n", + " fingerprint_type=\"MPN\",\n", + " ), # MPN specified for illustration purposes - this is the default method in QSARtuna\n", + " cbar_kws={'label': 'Fingerprint value'}\n", + ")\n", + "ax.set(ylabel=\"Compound query\", xlabel=f\"Latent representation\\n(ChemProp descriptor/fingerprint)\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The output is n compounds as the input query in the rows by n latent representation features from the MPN in the columns. This output can then be used for any semi-supervise learning approach outside of QSARtuna, as required. Alternatively the last layer of the FFN can be used as so:" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ax = sns.heatmap(\n", + " chemprop_model.predictor.chemprop_fingerprint(\n", + " df[config.data.input_column].head(5),\n", + " fingerprint_type=\"last_FFN\"), # Last FFN\n", + " cbar_kws={'label': 'Fingerprint value'}\n", + ")\n", + "ax.set(ylabel=\"Compound query\", xlabel=f\"Latent representation\\n(ChemProp descriptor)\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The 5 compounds in the user query are also represented by the rows, howeever the 300 features are now derived from the last output layer of the FFN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Probability calibration (classification)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When performing classification you often want not only to predict the class label, but also obtain a probability of the respective label. This probability gives you some kind of confidence on the prediction. Some models can give you poor estimates of the class probabilities. The `CalibratedClassifierCV` QSARtuna models allow better calibration for the probabilities of a given model.\n", + "\n", + "First, we should understand that well calibrated classifiers are probabilistic classifiers for which the output of the predict_proba method can be directly interpreted as a confidence level. For instance, a well calibrated (binary) classifier should classify the samples such that among the samples to which it gave a predict_proba value close to 0.8, approximately 80% actually belong to the positive class.\n", + "\n", + "See the [Scikit-learn documentation](https://scikit-learn.org/stable/modules/calibration.html#calibration) on the topic for more details.\n", + "\n", + "The available methods are `Sigmoid`, `Isotonic regression` and `VennABERS`, and a review of those calibration methods for QSAR has been performed [here](https://pubs.acs.org/doi/10.1021/acs.jcim.0c00476).\n", + "\n", + "we can review the effect of e.g. `sigmoid` calibration on the Random Forest algorithm by doing a calibrated run:" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:25:15,173] A new study created in memory with name: calibrated_rf\n", + "[I 2024-07-02 13:25:15,175] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 13:25:16,110] Trial 0 finished with value: 0.8353535353535354 and parameters: {'algorithm_name': 'CalibratedClassifierCVWithVA', 'CalibratedClassifierCVWithVA_algorithm_hash': 'e788dfbfc5075967acb5ddf9d971ea20', 'n_folds__e788dfbfc5075967acb5ddf9d971ea20': 5, 'max_depth__e788dfbfc5075967acb5ddf9d971ea20': 16, 'n_estimators__e788dfbfc5075967acb5ddf9d971ea20': 100, 'max_features__e788dfbfc5075967acb5ddf9d971ea20': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: 0.8353535353535354.\n" + ] + } + ], + "source": [ + "from optunaz.config.optconfig import CalibratedClassifierCVWithVA, RandomForestClassifier\n", + "from sklearn.calibration import calibration_curve\n", + "import seaborn as sns\n", + "\n", + "from collections import defaultdict\n", + "\n", + "import pandas as pd\n", + "\n", + "from sklearn.metrics import (\n", + " precision_score,\n", + " recall_score,\n", + " f1_score,\n", + " brier_score_loss,\n", + " log_loss,\n", + " roc_auc_score,\n", + ")\n", + "\n", + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt_gt_330\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-100/train.csv\"),\n", + " descriptors=[ECFP.new()],\n", + " algorithms=[ # the CalibratedClassifierCVWithVA is used here\n", + " CalibratedClassifierCVWithVA.new(\n", + " estimator=RandomForestClassifier.new(\n", + " n_estimators=RandomForestClassifier.Parameters.RandomForestClassifierParametersNEstimators(\n", + " low=100, high=100\n", + " )\n", + " ),\n", + " n_folds=5,\n", + " ensemble=\"True\",\n", + " method=\"sigmoid\", \n", + " )\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.CLASSIFICATION,\n", + " cross_validation=2,\n", + " n_trials=1,\n", + " n_startup_trials=0,\n", + " n_jobs=-1,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " random_seed=42,\n", + " ),\n", + ")\n", + "\n", + "study = optimize(config, study_name=\"calibrated_rf\")\n", + "build_best(buildconfig_best(study), \"../target/best.pkl\")\n", + "with open(\"../target/best.pkl\", \"rb\") as f:\n", + " calibrated_model = pickle.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "followed by an uncalibrated run:" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:25:18,566] A new study created in memory with name: uncalibrated_rf\n", + "[I 2024-07-02 13:25:18,608] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 13:25:18,915] Trial 0 finished with value: 0.8185858585858585 and parameters: {'algorithm_name': 'RandomForestClassifier', 'RandomForestClassifier_algorithm_hash': '167e1e88dd2a80133e317c78f009bdc9', 'max_depth__167e1e88dd2a80133e317c78f009bdc9': 16, 'n_estimators__167e1e88dd2a80133e317c78f009bdc9': 100, 'max_features__167e1e88dd2a80133e317c78f009bdc9': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: 0.8185858585858585.\n" + ] + } + ], + "source": [ + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt_gt_330\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-100/train.csv\"),\n", + " descriptors=[ECFP.new()],\n", + " algorithms=[ # an uncalibrated RandomForestClassifier is used here\n", + " RandomForestClassifier.new(\n", + " n_estimators=RandomForestClassifier.Parameters.RandomForestClassifierParametersNEstimators(\n", + " low=100, high=100\n", + " )\n", + " )\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.CLASSIFICATION,\n", + " cross_validation=2,\n", + " n_trials=1,\n", + " n_startup_trials=0,\n", + " n_jobs=-1,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " random_seed=42,\n", + " ),\n", + ")\n", + "\n", + "study = optimize(config, study_name=\"uncalibrated_rf\")\n", + "build_best(buildconfig_best(study), \"../target/best.pkl\")\n", + "with open(\"../target/best.pkl\", \"rb\") as f:\n", + " uncalibrated_model = pickle.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sigmoid calibration assigns more conservative probability estimates compared to the default RF, as shown by the lower median:" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = pd.read_csv(\n", + " '../tests/data/DRD2/subset-1000/train.csv'\n", + " ).sample(500, random_state=123) # Load and sample test data.\n", + "expected = df[config.data.response_column]\n", + "input_column = df[config.data.input_column]\n", + "calibrated_predicted = uncalibrated_model.predict_from_smiles(input_column)\n", + "uncalibrated_predicted = calibrated_model.predict_from_smiles(input_column)\n", + "\n", + "cal_df=pd.DataFrame(data={\"default\":uncalibrated_predicted,\"sigmoid\":calibrated_predicted})\n", + "sns.boxplot(data=cal_df.melt(),x='value',y='variable').set_ylabel('');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plotting the (sigmoid) calibrated predictions as a function of uncalibrated (default) values further highlights the behaviour of the probability calibration scaling:" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot expected vs predicted values for the best model.\n", + "import matplotlib.pyplot as plt\n", + "ax = plt.scatter(calibrated_predicted, uncalibrated_predicted)\n", + "lims = [expected.min(), expected.max()]\n", + "plt.plot(lims, lims) # Diagonal line.\n", + "plt.xlabel(f\"Calibrated {config.data.response_column}\");\n", + "plt.ylabel(f\"Uncalibrated {config.data.response_column}\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can now visualize how well calibrated the predicted probabilities are using calibration curves. A calibration curve, also known as a reliability diagram, uses inputs from a binary classifier and plots the average predicted probability for each bin against the fraction of positive classes, on the y-axis. See [here](https://scikit-learn.org/stable/modules/calibration.html) for more info." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.calibration import calibration_curve\n", + "\n", + "plt.figure(figsize=(10, 10))\n", + "ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)\n", + "ax2 = plt.subplot2grid((3, 1), (2, 0))\n", + "\n", + "ax1.plot([0, 1], [0, 1], \"k:\", label=\"Perfectly calibrated\")\n", + "for pred, name in [(uncalibrated_predicted, 'default'),\n", + " (calibrated_predicted, 'sigmoid')]:\n", + "\n", + " fraction_of_positives, mean_predicted_value = \\\n", + " calibration_curve(expected, pred, n_bins=10)\n", + " \n", + " brier=brier_score_loss(expected,pred)\n", + "\n", + " ax1.plot(mean_predicted_value, fraction_of_positives, \"s-\",\n", + " label=\"%s, brier=%.2f\" % (name, brier))\n", + "\n", + " ax2.hist(pred, range=(0, 1), bins=10, label=name,\n", + " histtype=\"step\", lw=2)\n", + "\n", + "ax1.set_ylabel(\"Fraction of positives\")\n", + "ax1.set_ylim([-0.05, 1.05])\n", + "ax1.legend(loc=\"lower right\")\n", + "ax1.set_title('Calibration plots (reliability curve)')\n", + "\n", + "ax2.set_xlabel(\"Mean predicted value\")\n", + "ax2.set_ylabel(\"Count\")\n", + "ax2.legend(loc=\"upper center\", ncol=2)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The diagonal line on the calibration (scatter) plot indicates the situation when a classifier is perfectly calibrationed, when the proportion of active instances annotated by the model are perfectly captured by the probability generated by the model. Deviation above this line indicates when a classifier is under-confident, since the proportion of actives obtaining that score is higher than the score itself, and vice-versa, lines below indicate over-confident estimators, when the proportion of actives obtaining a given score is lower.\n", + "\n", + "Brier score loss (a metric composed of calibration term and refinement term) is one way to capture calibration calibration improvement (this is recorded in the legend above). Notice that this metric does not significantly alter the prediction accuracy measures (precision, recall and F1 score) as shown in the cell below. This is because calibration should not significantly change prediction probabilities at the location of the decision threshold (at x = 0.5 on the graph). Calibration should however, make the predicted probabilities more accurate and thus more useful for making allocation decisions under uncertainty." + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Brier lossLog lossPrecisionRecallF1Roc auc
calibration
yes0.1847050.5471290.8305650.7440480.7849290.716536
no0.1752970.5294740.8112090.8184520.8148150.714104
\n", + "
" + ], + "text/plain": [ + " Brier loss Log loss Precision Recall F1 Roc auc \n", + "calibration \n", + "yes 0.184705 0.547129 0.830565 0.744048 0.784929 0.716536\n", + "no 0.175297 0.529474 0.811209 0.818452 0.814815 0.714104" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from collections import defaultdict\n", + "\n", + "import pandas as pd\n", + "\n", + "from sklearn.metrics import (\n", + " precision_score,\n", + " recall_score,\n", + " f1_score,\n", + " brier_score_loss,\n", + " log_loss,\n", + " roc_auc_score,\n", + ")\n", + "\n", + "scores = defaultdict(list)\n", + "for i, (name, y_prob) in enumerate([('yes',calibrated_predicted), ('no',uncalibrated_predicted)]):\n", + " \n", + " y_pred = y_prob > 0.5\n", + " scores[\"calibration\"].append(name)\n", + "\n", + " for metric in [brier_score_loss, log_loss]:\n", + " score_name = metric.__name__.replace(\"_\", \" \").replace(\"score\", \"\").capitalize()\n", + " scores[score_name].append(metric(expected, y_prob))\n", + "\n", + " for metric in [precision_score, recall_score, f1_score, roc_auc_score]:\n", + " score_name = metric.__name__.replace(\"_\", \" \").replace(\"score\", \"\").capitalize()\n", + " scores[score_name].append(metric(expected, y_pred))\n", + "\n", + " score_df = pd.DataFrame(scores).set_index(\"calibration\")\n", + " score_df.round(decimals=3)\n", + "\n", + "score_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Uncertainty estimation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "QSARtuna offers three different ways to calculate uncertainty estimates and they are returned along with the normal predictions in the format `[[predictions], [uncertainties]]`. The currently implemented methods are:\n", + "\n", + "1. VennABERS calibration (a probability calibration covered in the section above).\n", + "2. Ensemble uncertainty (ChemProp models trained with random initialisations).\n", + "3. MAPIE (uncertainty for regression)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### VennABERS uncertainty" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "VennABERS (VA) uncertainty is implemented as in the section \"Uses for the Multipoint Probabilities from the VA Predictors\" from https://pubs.acs.org/doi/10.1021/acs.jcim.0c00476. This is based on the margin between the upper (p1) and lower (p0) probability bounary, output by the VennABERS algorithm. More details on this can be found in [this](https://cml.rhul.ac.uk/people/ptocca/HomePage/Toccaceli_CP___Venn_Tutorial.pdf) tutorial" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:25:20,500] A new study created in memory with name: calibrated_rf\n", + "[I 2024-07-02 13:25:20,548] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 13:25:21,537] Trial 0 finished with value: 0.8213131313131313 and parameters: {'algorithm_name': 'CalibratedClassifierCVWithVA', 'CalibratedClassifierCVWithVA_algorithm_hash': '79765fbec1586f3c917ff30de274fdb4', 'n_folds__79765fbec1586f3c917ff30de274fdb4': 5, 'max_depth__79765fbec1586f3c917ff30de274fdb4': 16, 'n_estimators__79765fbec1586f3c917ff30de274fdb4': 100, 'max_features__79765fbec1586f3c917ff30de274fdb4': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: 0.8213131313131313.\n" + ] + } + ], + "source": [ + "from optunaz.config.optconfig import CalibratedClassifierCVWithVA, RandomForestClassifier\n", + "from sklearn.calibration import calibration_curve\n", + "import seaborn as sns\n", + "\n", + "from collections import defaultdict\n", + "\n", + "import pandas as pd\n", + "\n", + "from sklearn.metrics import (\n", + " precision_score,\n", + " recall_score,\n", + " f1_score,\n", + " brier_score_loss,\n", + " log_loss,\n", + " roc_auc_score,\n", + ")\n", + "\n", + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt_gt_330\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-100/train.csv\"),\n", + " descriptors=[ECFP.new()],\n", + " algorithms=[ # the CalibratedClassifierCVWithVA is used here\n", + " CalibratedClassifierCVWithVA.new(\n", + " estimator=RandomForestClassifier.new(\n", + " n_estimators=RandomForestClassifier.Parameters.RandomForestClassifierParametersNEstimators(\n", + " low=100, high=100\n", + " )\n", + " ),\n", + " n_folds=5,\n", + " ensemble=\"True\",\n", + " method=\"vennabers\", \n", + " )\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.CLASSIFICATION,\n", + " cross_validation=2,\n", + " n_trials=1,\n", + " n_startup_trials=0,\n", + " n_jobs=-1,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " random_seed=42,\n", + " ),\n", + ")\n", + "\n", + "study = optimize(config, study_name=\"calibrated_rf\")\n", + "build_best(buildconfig_best(study), \"../target/best.pkl\")\n", + "with open(\"../target/best.pkl\", \"rb\") as f:\n", + " calibrated_model = pickle.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "VennABERS uncertainty can now be obtained by running inference and supplying `uncert=True`." + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "from rdkit.Chem import AllChem\n", + "from rdkit.Chem import PandasTools\n", + "from rdkit import RDConfig\n", + "from rdkit import DataStructs\n", + "\n", + "# get training data, mols & fingerprints\n", + "train_df = pd.read_csv('../tests/data/DRD2/subset-100/train.csv') # Load test data.\n", + "PandasTools.AddMoleculeColumnToFrame(train_df,'canonical','molecule',includeFingerprints=True)\n", + "train_df[\"fp\"]=train_df[\"molecule\"].apply(lambda x: AllChem.GetMorganFingerprint(x,2 ))\n", + "\n", + "# get test data, mols & fingerprints and calculate the nn to training set\n", + "df = pd.read_csv('../tests/data/DRD2/subset-1000/train.csv') # Load test data.\n", + "PandasTools.AddMoleculeColumnToFrame(df,'canonical','molecule',includeFingerprints=True)\n", + "df[\"fp\"]=df[\"molecule\"].apply(lambda x: AllChem.GetMorganFingerprint(x,2 ))\n", + "df['nn']=df[\"fp\"].apply(lambda x: max(DataStructs.BulkTanimotoSimilarity(x,[i for i in train_df[\"fp\"]])))\n", + "\n", + "# add uncertainty & prediction to the df \n", + "df['va_pred'], df['va_uncert'] = calibrated_model.predict_from_smiles(df[config.data.input_column], uncert=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is possible to relate the uncertainty to the nearest neighbor (nn) to look for distance-to-model (DTM) effect and to the probabilistic output from the RF model scaled by VA:" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot uncertainty as a function of nn or true label in a trellis for overview.\n", + "fig, ax =plt.subplots(1,2, figsize=(10, 5), sharey=True)\n", + "sns.regplot(data=df,y='va_uncert',x='nn', ax=ax[0])\n", + "sns.regplot(data=df,y='va_uncert',x='va_pred', ax=ax[1]).set_ylabel(\"\")\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Similar to the findings in the referenced scaling evaluation paper above, the lower and upper probability boundary intervals are shown to produce large discordance for test set molecules that are neither very similar nor very dissimilar to the active training set, which were hence difficult to predict." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ensemble uncertainty (ChemProp Only)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Training a ChemProp model with `ensemble_size` >1 will enable uncertainty estimation based on the implementation in the original ChemProp package, using the deviation of predictions from the ensemble of models trained with different random initialisation of the weights. This can be done like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 13:25:25,916] A new study created in memory with name: my_study\n", + "[I 2024-07-02 13:25:25,959] A new study created in memory with name: study_name_0\n", + "INFO:root:Enqueued ChemProp manual trial with sensible defaults: {'activation__fd833c2dde0b7147e6516ea5eebb2657': 'ReLU', 'aggregation__fd833c2dde0b7147e6516ea5eebb2657': 'mean', 'aggregation_norm__fd833c2dde0b7147e6516ea5eebb2657': 100, 'batch_size__fd833c2dde0b7147e6516ea5eebb2657': 50, 'depth__fd833c2dde0b7147e6516ea5eebb2657': 3, 'dropout__fd833c2dde0b7147e6516ea5eebb2657': 0.0, 'features_generator__fd833c2dde0b7147e6516ea5eebb2657': 'none', 'ffn_hidden_size__fd833c2dde0b7147e6516ea5eebb2657': 300, 'ffn_num_layers__fd833c2dde0b7147e6516ea5eebb2657': 2, 'final_lr_ratio_exp__fd833c2dde0b7147e6516ea5eebb2657': -4, 'hidden_size__fd833c2dde0b7147e6516ea5eebb2657': 300, 'init_lr_ratio_exp__fd833c2dde0b7147e6516ea5eebb2657': -4, 'max_lr_exp__fd833c2dde0b7147e6516ea5eebb2657': -3, 'warmup_epochs_ratio__fd833c2dde0b7147e6516ea5eebb2657': 0.1, 'algorithm_name': 'ChemPropClassifier', 'ChemPropClassifier_algorithm_hash': 'fd833c2dde0b7147e6516ea5eebb2657'}\n", + "[I 2024-07-02 13:32:26,200] Trial 0 finished with value: 0.65625 and parameters: {'algorithm_name': 'ChemPropClassifier', 'ChemPropClassifier_algorithm_hash': 'fd833c2dde0b7147e6516ea5eebb2657', 'activation__fd833c2dde0b7147e6516ea5eebb2657': , 'aggregation__fd833c2dde0b7147e6516ea5eebb2657': , 'aggregation_norm__fd833c2dde0b7147e6516ea5eebb2657': 100.0, 'batch_size__fd833c2dde0b7147e6516ea5eebb2657': 50.0, 'depth__fd833c2dde0b7147e6516ea5eebb2657': 3.0, 'dropout__fd833c2dde0b7147e6516ea5eebb2657': 0.0, 'ensemble_size__fd833c2dde0b7147e6516ea5eebb2657': 5, 'epochs__fd833c2dde0b7147e6516ea5eebb2657': 4, 'features_generator__fd833c2dde0b7147e6516ea5eebb2657': , 'ffn_hidden_size__fd833c2dde0b7147e6516ea5eebb2657': 300.0, 'ffn_num_layers__fd833c2dde0b7147e6516ea5eebb2657': 2.0, 'final_lr_ratio_exp__fd833c2dde0b7147e6516ea5eebb2657': -4, 'hidden_size__fd833c2dde0b7147e6516ea5eebb2657': 300.0, 'init_lr_ratio_exp__fd833c2dde0b7147e6516ea5eebb2657': -4, 'max_lr_exp__fd833c2dde0b7147e6516ea5eebb2657': -3, 'warmup_epochs_ratio__fd833c2dde0b7147e6516ea5eebb2657': 0.1, 'descriptor': '{\"name\": \"SmilesFromFile\", \"parameters\": {}}'}. Best is trial 0 with value: 0.65625.\n", + " \r" + ] + } + ], + "source": [ + "# Start with the imports.\n", + "import sklearn\n", + "from optunaz.three_step_opt_build_merge import (\n", + " optimize,\n", + " buildconfig_best,\n", + " build_best,\n", + " build_merged,\n", + ")\n", + "from optunaz.config import ModelMode, OptimizationDirection\n", + "from optunaz.config.optconfig import (\n", + " OptimizationConfig,\n", + " ChemPropHyperoptRegressor,\n", + " ChemPropHyperoptClassifier\n", + ")\n", + "from optunaz.datareader import Dataset\n", + "from optunaz.descriptors import SmilesFromFile\n", + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt_gt_330\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-50/train.csv\", # This will be split into train and test.\n", + " ),\n", + " descriptors=[\n", + " SmilesFromFile.new(),\n", + " ],\n", + " algorithms=[\n", + " ChemPropClassifier.new(epochs=4, ensemble_size=5), #epochs=15 to ensure run finishes quickly\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.CLASSIFICATION,\n", + " cross_validation=2,\n", + " n_trials=1,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ")\n", + "\n", + "study = optimize(config, study_name=\"my_study\")\n", + "\n", + "build_best(buildconfig_best(study), \"../target/best.pkl\")\n", + "with open(\"../target/best.pkl\", \"rb\") as f:\n", + " chemprop_model = pickle.load(f)\n", + "\n", + "# add chemprop uncertainty & prediction to the df \n", + "df[\"cp_pred_ensemble\"], df[\"cp_uncert_ensemble\"] = chemprop_model.predict_from_smiles(df[config.data.input_column], uncert=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot uncertainty as a function of nn or true label in a trellis for overview.\n", + "fig, ax =plt.subplots(1,2, figsize=(10, 5), sharey=True)\n", + "sns.regplot(data=df,y='cp_uncert_ensemble',x='nn', ax=ax[0])\n", + "sns.regplot(data=df,y='cp_uncert_ensemble',x='cp_pred_ensemble', ax=ax[1]).set(ylabel='')\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Similar to the VA uncertainty, the largest ensemble uncertainty is observed for test set molecules that are neither very similar nor very dissimilar to the active training set, which are hence difficult to predict. Larger uncertainty is also seen toward the midpoint of the ChemProp predictions, for cases when the probabilistic output from models is also neither very high nor very low." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ChemProp dropout uncertainty" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ChemProp uncertainty based on dropout is available for single model and not an ensemble (i.e. when ChemProp is provided with `ensemble_size=1`. It is based on the implementation in the [original ChemProp package](https://github.com/chemprop/chemprop#uncertainty-estimation)\n", + "\n", + "The method uses Monte Carlo dropout to generate a virtual ensemble of models and reports the ensemble variance of the predictions.\n", + "\n", + "Note that this dropout is distinct from dropout regularization used during training, which is not active during predictions.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:01:03,422] A new study created in memory with name: my_study\n", + "[I 2024-07-02 14:01:03,468] A new study created in memory with name: study_name_0\n", + "INFO:root:Enqueued ChemProp manual trial with sensible defaults: {'activation__c73885c5d5a4182168b8b002d321965a': 'ReLU', 'aggregation__c73885c5d5a4182168b8b002d321965a': 'mean', 'aggregation_norm__c73885c5d5a4182168b8b002d321965a': 100, 'batch_size__c73885c5d5a4182168b8b002d321965a': 50, 'depth__c73885c5d5a4182168b8b002d321965a': 3, 'dropout__c73885c5d5a4182168b8b002d321965a': 0.0, 'features_generator__c73885c5d5a4182168b8b002d321965a': 'none', 'ffn_hidden_size__c73885c5d5a4182168b8b002d321965a': 300, 'ffn_num_layers__c73885c5d5a4182168b8b002d321965a': 2, 'final_lr_ratio_exp__c73885c5d5a4182168b8b002d321965a': -4, 'hidden_size__c73885c5d5a4182168b8b002d321965a': 300, 'init_lr_ratio_exp__c73885c5d5a4182168b8b002d321965a': -4, 'max_lr_exp__c73885c5d5a4182168b8b002d321965a': -3, 'warmup_epochs_ratio__c73885c5d5a4182168b8b002d321965a': 0.1, 'algorithm_name': 'ChemPropClassifier', 'ChemPropClassifier_algorithm_hash': 'c73885c5d5a4182168b8b002d321965a'}\n", + "[I 2024-07-02 14:02:28,149] Trial 0 finished with value: 0.46875 and parameters: {'algorithm_name': 'ChemPropClassifier', 'ChemPropClassifier_algorithm_hash': 'c73885c5d5a4182168b8b002d321965a', 'activation__c73885c5d5a4182168b8b002d321965a': , 'aggregation__c73885c5d5a4182168b8b002d321965a': , 'aggregation_norm__c73885c5d5a4182168b8b002d321965a': 100.0, 'batch_size__c73885c5d5a4182168b8b002d321965a': 50.0, 'depth__c73885c5d5a4182168b8b002d321965a': 3.0, 'dropout__c73885c5d5a4182168b8b002d321965a': 0.0, 'ensemble_size__c73885c5d5a4182168b8b002d321965a': 1, 'epochs__c73885c5d5a4182168b8b002d321965a': 5, 'features_generator__c73885c5d5a4182168b8b002d321965a': , 'ffn_hidden_size__c73885c5d5a4182168b8b002d321965a': 300.0, 'ffn_num_layers__c73885c5d5a4182168b8b002d321965a': 2.0, 'final_lr_ratio_exp__c73885c5d5a4182168b8b002d321965a': -4, 'hidden_size__c73885c5d5a4182168b8b002d321965a': 300.0, 'init_lr_ratio_exp__c73885c5d5a4182168b8b002d321965a': -4, 'max_lr_exp__c73885c5d5a4182168b8b002d321965a': -3, 'warmup_epochs_ratio__c73885c5d5a4182168b8b002d321965a': 0.1, 'descriptor': '{\"name\": \"SmilesFromFile\", \"parameters\": {}}'}. Best is trial 0 with value: 0.46875.\n", + " \r" + ] + } + ], + "source": [ + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt_gt_330\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-50/train.csv\", # This will be split into train and test.\n", + " ),\n", + " descriptors=[\n", + " SmilesFromFile.new(),\n", + " ],\n", + " algorithms=[\n", + " ChemPropClassifier.new(epochs=5), #ensemble_size not supplied (defaults back to 1) \n", + " #to ensure uncertainty will be based on dropout\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.CLASSIFICATION,\n", + " cross_validation=2,\n", + " n_trials=1,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ")\n", + "\n", + "study = optimize(config, study_name=\"my_study\")\n", + "build_best(buildconfig_best(study), \"../target/best.pkl\")\n", + "with open(\"../target/best.pkl\", \"rb\") as f:\n", + " chemprop_model = pickle.load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "# add chemprop uncertainty & prediction to the df \n", + "df[\"cp_pred_dropout\"], df[\"cp_uncert_dropout\"] = chemprop_model.predict_from_smiles(df[config.data.input_column], uncert=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Similar to previous findings using ensembling, the dropout approach toward uncertainty shows largest uncertainty for marginal cases neither similar not dissimilar to training, and with proabilities toward the midpoint (0.5):" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot uncertainty as a function of nn or true label in a trellis for overview.\n", + "fig, ax =plt.subplots(1,2, figsize=(10, 5), sharey=True)\n", + "sns.regplot(data=df,y='cp_uncert_dropout',x='nn', ax=ax[0])\n", + "sns.regplot(data=df,y='cp_uncert_dropout',x='cp_pred_dropout', ax=ax[1]).set(ylabel='')\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Comparison of dropout vs. ensemble uncertainties can be performed as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n", + "INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 correlation between drouput and ensemble uncertatinties:-100.98\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot uncertainty as a function of va_prediction and true label in a trellis for overview.\n", + "r2 = r2_score(y_true=df['cp_uncert_dropout'], y_pred=df['cp_uncert_ensemble'])\n", + "print(f\"R2 correlation between drouput and ensemble uncertatinties:{r2:.2f}\")\n", + "\n", + "fig, ax =plt.subplots(1,2, figsize=(10, 5))\n", + "df['cp_uncert_delta']=df['cp_uncert_dropout']-df['cp_uncert_ensemble']\n", + "sns.regplot(data=df,y='cp_uncert_dropout',x='cp_uncert_ensemble', ax=ax[0])\n", + "sns.boxplot(data=df,y='cp_uncert_delta',x='activity', ax=ax[1])\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Findings show that a limited correlation between dropout and ensemble uncertainty for the toy example (real world examples with more epochs/more predictive models will be different)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### MAPIE (regression uncertainty)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For regression uncertainty, the MAPIE package is available within QSARtuna for regression algorithms, and is selected like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:22:47,822] A new study created in memory with name: my_study\n", + "[I 2024-07-02 14:22:47,862] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 14:22:49,237] Trial 0 finished with value: -4430.271946796234 and parameters: {'algorithm_name': 'Mapie', 'Mapie_algorithm_hash': '976d211e4ac64e5568d369bcddd3aeb1', 'mapie_alpha__976d211e4ac64e5568d369bcddd3aeb1': 0.05, 'max_depth__976d211e4ac64e5568d369bcddd3aeb1': 9, 'n_estimators__976d211e4ac64e5568d369bcddd3aeb1': 50, 'max_features__976d211e4ac64e5568d369bcddd3aeb1': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: -4430.271946796234.\n" + ] + } + ], + "source": [ + "from optunaz.config.optconfig import Mapie\n", + " \n", + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-300/train.csv\", # This will be split into train and test.\n", + " ),\n", + " descriptors=[\n", + " ECFP.new(),\n", + " ],\n", + " algorithms=[Mapie.new( # mapie 'wraps' around a regressor of choice\n", + " estimator=RandomForestRegressor.new(n_estimators={\"low\": 50, \"high\": 50})\n", + " )\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=2,\n", + " n_trials=1,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ")\n", + "\n", + "study = optimize(config, study_name=\"my_study\")\n", + "build_best(buildconfig_best(study), \"../target/best.pkl\")\n", + "with open(\"../target/best.pkl\", \"rb\") as f:\n", + " mapie = pickle.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Analysis of the nn's and behaviour of uncertainty vs. predicted values can be perfomed like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "# get training data, mols & fingerprints\n", + "train_df = pd.read_csv('../tests/data/DRD2/subset-300/train.csv') # Load test data.\n", + "PandasTools.AddMoleculeColumnToFrame(train_df,'canonical','molecule',includeFingerprints=True)\n", + "train_df[\"fp\"]=train_df[\"molecule\"].apply(lambda x: AllChem.GetMorganFingerprint(x,2 ))\n", + "\n", + "# get test data, mols & fingerprints and calculate the nn to training set\n", + "df = pd.read_csv('../tests/data/DRD2/subset-50/train.csv') # Load test data.\n", + "PandasTools.AddMoleculeColumnToFrame(df,'canonical','molecule',includeFingerprints=True)\n", + "df[\"fp\"]=df[\"molecule\"].apply(lambda x: AllChem.GetMorganFingerprint(x,2 ))\n", + "df['nn']=df[\"fp\"].apply(lambda x: max(DataStructs.BulkTanimotoSimilarity(x,[i for i in train_df[\"fp\"]])))\n", + "\n", + "mapie.predictor.mapie_alpha=0.99 # it is possible to alter the alpha of mapie post-train using this approach\n", + "\n", + "# add uncertainty & prediction to the df \n", + "df['mapie_pred'], df['mapie_unc'] = mapie.predict_from_smiles(df[config.data.input_column], uncert=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plotting mapie uncertainty as a product of the nearest neighbors/mapie predictions is performed here:" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot uncertainty as a function of nn or true label in a trellis for overview.\n", + "fig, ax =plt.subplots(1,3, figsize=(10, 5))\n", + "sns.regplot(data=df,y='mapie_unc',x='nn', ax=ax[0])\n", + "sns.regplot(data=df,y='mapie_unc',x='mapie_pred', ax=ax[1])\n", + "sns.regplot(data=df,y=df[config.data.response_column],x='mapie_pred', ax=ax[2])\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Further analysis of the uncertainty using error bars is shown here:" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot true value as a function of predicted value, with MAPIE uncertainty error bars for visualisation.\n", + "plt.figure(figsize=(12,5))\n", + "plt.errorbar(df[config.data.response_column], df['mapie_pred'], yerr=df['mapie_unc'].abs(), fmt='o',color='black', alpha=.8, ecolor='gray', elinewidth=1, capsize=10);\n", + "plt.xlabel('Predicted Mw');\n", + "plt.ylabel('Expected Mw');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "where more certain predictions have smaller error bars.\n", + "\n", + "The same analysis can be performed by plotting similarity to nn's (increasing similarity to the training set moving from left to right on the x-axis):" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot true value as a function of predicted value, with MAPIE uncertainty error bars for visualisation.\n", + "plt.figure(figsize=(12,5))\n", + "plt.errorbar(df['nn'], df['mapie_pred'], yerr=df['mapie_unc'].abs(), fmt='o',color='black', alpha=.8, ecolor='gray', elinewidth=1, capsize=10);\n", + "plt.xlabel('Nearest neighbor (NN) similarity');\n", + "plt.ylabel('Expected Mw');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The MAPIE package uses the `alpha` parameter to set the uncertainty of the confidence interval, see here for details. It is possible to alter the uncertainty of the confidence interval by setting the `mapie_alpha` parameter of the QSARtuna model predictor. Here lower alpha produce larger (more conservative) prediction intervals. N.B: `alpha` is set to 0.05 by default and will hence provide more conservative predictions if not changed.\n", + "\n", + "The alpha settings as a function of uncertainty (over all point predictions) can be analysed for our toy example using the following (error bars denote deviations across all point predictions which have been extended by two standard error widths):" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "alpha_impact=[]\n", + "for ma in range(1,100,5):\n", + " mapie.predictor.mapie_alpha=ma/100\n", + " preds = mapie.predict_from_smiles(df[config.data.input_column], uncert=True)\n", + " unc_df = pd.DataFrame(\n", + " data={\n", + " \"pred\": preds[0],\n", + " \"unc\": preds[1],\n", + " \"alpha\": ma,\n", + " }\n", + " )\n", + " alpha_impact.append(unc_df.reset_index())\n", + "alpha_impact=pd.concat(alpha_impact).reset_index(drop=True)\n", + "\n", + "sns.lineplot(data=alpha_impact[alpha_impact['index']<=20],x='alpha',y='unc',err_style=\"bars\", errorbar=(\"se\", 2))\n", + "plt.xlabel('MAPIE Alpha');\n", + "plt.ylabel('MAPIE uncertainty (±MW)');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As expected larger alpha values produce smaller (less conservative) prediction intervals." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Explainability" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Model explainability is incorporated into QSARtuna using two different approaches, depending on the algorithm chosen:\n", + "1. **SHAP:** \n", + " Any shallow algorithm is compatible with the SHAP package (even traditionally unsupported packages use the `KernelExplainer`)\n", + "2. **ChemProp interpret:**\n", + " This explainability approach is based on the interpret function in the original ChemProp package" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### SHAP" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[SHAP](https://proceedings.neurips.cc/paper_files/paper/2017/hash/8a20a8621978632d76c43dfd28b67767-Abstract.html) (SHapley Additive exPlanations) are available in `QSARtuna` based on the implementation available at https://github.com/slundberg/shap. The method uses a game theoretic approach to explain the output of any machine learning model. It connects optimal credit allocation with local explanations using the classic Shapley values from game theory and their related extensions (see [here](https://shap.readthedocs.io/en/latest/index.html) for more details on the published tool and [here](https://github.com/slundberg/shap#methods-unified-by-shap) for papers using the approach).\n", + "\n", + "In the following example, a `RIDGE` regressor is trained using the a comopsite descriptor based on the `ECFP`, `MACCS` keys and `PhysChem` descriptors:" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:22:54,504] A new study created in memory with name: my_study\n", + "[I 2024-07-02 14:22:54,540] A new study created in memory with name: study_name_0\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_ridge.py:243: UserWarning: Singular matrix in solving dual problem. Using least-squares solution instead.\n", + " warnings.warn(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_ridge.py:243: UserWarning: Singular matrix in solving dual problem. Using least-squares solution instead.\n", + " warnings.warn(\n", + "[I 2024-07-02 14:22:55,559] Trial 0 finished with value: -0.34035600917066766 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.676421027478709, 'descriptor': '{\"parameters\": {\"descriptors\": [{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}, {\"name\": \"MACCS_keys\", \"parameters\": {}}, {\"name\": \"UnscaledJazzyDescriptors\", \"parameters\": {\"jazzy_names\": [\"dga\", \"dgp\", \"dgtot\", \"sa\", \"sdc\", \"sdx\"], \"jazzy_filters\": {\"NumHAcceptors\": 25, \"NumHDonors\": 25, \"MolWt\": 1000}}}, {\"name\": \"UnscaledPhyschemDescriptors\", \"parameters\": {\"rdkit_names\": [\"MaxAbsEStateIndex\", \"MaxEStateIndex\", \"MinAbsEStateIndex\", \"MinEStateIndex\", \"qed\", \"SPS\", \"MolWt\", \"HeavyAtomMolWt\", \"ExactMolWt\", \"NumValenceElectrons\", \"NumRadicalElectrons\", \"MaxPartialCharge\", \"MinPartialCharge\", \"MaxAbsPartialCharge\", \"MinAbsPartialCharge\", \"FpDensityMorgan1\", \"FpDensityMorgan2\", \"FpDensityMorgan3\", \"BCUT2D_MWHI\", \"BCUT2D_MWLOW\", \"BCUT2D_CHGHI\", \"BCUT2D_CHGLO\", \"BCUT2D_LOGPHI\", \"BCUT2D_LOGPLOW\", \"BCUT2D_MRHI\", \"BCUT2D_MRLOW\", \"AvgIpc\", \"BalabanJ\", \"BertzCT\", \"Chi0\", \"Chi0n\", \"Chi0v\", \"Chi1\", \"Chi1n\", \"Chi1v\", \"Chi2n\", \"Chi2v\", \"Chi3n\", \"Chi3v\", \"Chi4n\", \"Chi4v\", \"HallKierAlpha\", \"Ipc\", \"Kappa1\", \"Kappa2\", \"Kappa3\", \"LabuteASA\", \"PEOE_VSA1\", \"PEOE_VSA10\", \"PEOE_VSA11\", \"PEOE_VSA12\", \"PEOE_VSA13\", \"PEOE_VSA14\", \"PEOE_VSA2\", \"PEOE_VSA3\", \"PEOE_VSA4\", \"PEOE_VSA5\", \"PEOE_VSA6\", \"PEOE_VSA7\", \"PEOE_VSA8\", \"PEOE_VSA9\", \"SMR_VSA1\", \"SMR_VSA10\", \"SMR_VSA2\", \"SMR_VSA3\", \"SMR_VSA4\", \"SMR_VSA5\", \"SMR_VSA6\", \"SMR_VSA7\", \"SMR_VSA8\", \"SMR_VSA9\", \"SlogP_VSA1\", \"SlogP_VSA10\", \"SlogP_VSA11\", \"SlogP_VSA12\", \"SlogP_VSA2\", \"SlogP_VSA3\", \"SlogP_VSA4\", \"SlogP_VSA5\", \"SlogP_VSA6\", \"SlogP_VSA7\", \"SlogP_VSA8\", \"SlogP_VSA9\", \"TPSA\", \"EState_VSA1\", \"EState_VSA10\", \"EState_VSA11\", \"EState_VSA2\", \"EState_VSA3\", \"EState_VSA4\", \"EState_VSA5\", \"EState_VSA6\", \"EState_VSA7\", \"EState_VSA8\", \"EState_VSA9\", \"VSA_EState1\", \"VSA_EState10\", \"VSA_EState2\", \"VSA_EState3\", \"VSA_EState4\", \"VSA_EState5\", \"VSA_EState6\", \"VSA_EState7\", \"VSA_EState8\", \"VSA_EState9\", \"FractionCSP3\", \"HeavyAtomCount\", \"NHOHCount\", \"NOCount\", \"NumAliphaticCarbocycles\", \"NumAliphaticHeterocycles\", \"NumAliphaticRings\", \"NumAromaticCarbocycles\", \"NumAromaticHeterocycles\", \"NumAromaticRings\", \"NumHAcceptors\", \"NumHDonors\", \"NumHeteroatoms\", \"NumRotatableBonds\", \"NumSaturatedCarbocycles\", \"NumSaturatedHeterocycles\", \"NumSaturatedRings\", \"RingCount\", \"MolLogP\", \"MolMR\", \"fr_Al_COO\", \"fr_Al_OH\", \"fr_Al_OH_noTert\", \"fr_ArN\", \"fr_Ar_COO\", \"fr_Ar_N\", \"fr_Ar_NH\", \"fr_Ar_OH\", \"fr_COO\", \"fr_COO2\", \"fr_C_O\", \"fr_C_O_noCOO\", \"fr_C_S\", \"fr_HOCCN\", \"fr_Imine\", \"fr_NH0\", \"fr_NH1\", \"fr_NH2\", \"fr_N_O\", \"fr_Ndealkylation1\", \"fr_Ndealkylation2\", \"fr_Nhpyrrole\", \"fr_SH\", \"fr_aldehyde\", \"fr_alkyl_carbamate\", \"fr_alkyl_halide\", \"fr_allylic_oxid\", \"fr_amide\", \"fr_amidine\", \"fr_aniline\", \"fr_aryl_methyl\", \"fr_azide\", \"fr_azo\", \"fr_barbitur\", \"fr_benzene\", \"fr_benzodiazepine\", \"fr_bicyclic\", \"fr_diazo\", \"fr_dihydropyridine\", \"fr_epoxide\", \"fr_ester\", \"fr_ether\", \"fr_furan\", \"fr_guanido\", \"fr_halogen\", \"fr_hdrzine\", \"fr_hdrzone\", \"fr_imidazole\", \"fr_imide\", \"fr_isocyan\", \"fr_isothiocyan\", \"fr_ketone\", \"fr_ketone_Topliss\", \"fr_lactam\", \"fr_lactone\", \"fr_methoxy\", \"fr_morpholine\", \"fr_nitrile\", \"fr_nitro\", \"fr_nitro_arom\", \"fr_nitro_arom_nonortho\", \"fr_nitroso\", \"fr_oxazole\", \"fr_oxime\", \"fr_para_hydroxylation\", \"fr_phenol\", \"fr_phenol_noOrthoHbond\", \"fr_phos_acid\", \"fr_phos_ester\", \"fr_piperdine\", \"fr_piperzine\", \"fr_priamide\", \"fr_prisulfonamd\", \"fr_pyridine\", \"fr_quatN\", \"fr_sulfide\", \"fr_sulfonamd\", \"fr_sulfone\", \"fr_term_acetylene\", \"fr_tetrazole\", \"fr_thiazole\", \"fr_thiocyan\", \"fr_thiophene\", \"fr_unbrch_alkane\", \"fr_urea\"]}}]}, \"name\": \"CompositeDescriptor\"}'}. Best is trial 0 with value: -0.34035600917066766.\n" + ] + } + ], + "source": [ + "from optunaz.descriptors import CompositeDescriptor, UnscaledPhyschemDescriptors, UnscaledJazzyDescriptors\n", + " \n", + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-50/train.csv\", # This will be split into train and test.\n", + " ),\n", + " descriptors=[\n", + " CompositeDescriptor.new(\n", + " descriptors=[\n", + " ECFP.new(),\n", + " MACCS_keys.new(),\n", + " UnscaledJazzyDescriptors.new(),\n", + " UnscaledPhyschemDescriptors.new(),\n", + " ]\n", + " )\n", + " ],\n", + " algorithms=[\n", + " Ridge.new(),\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=2,\n", + " n_trials=1,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ")\n", + "\n", + "study = optimize(config, study_name=\"my_study\")\n", + "build_best(buildconfig_best(study), \"../target/best.pkl\")\n", + "with open(\"../target/best.pkl\", \"rb\") as f:\n", + " ridge = pickle.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Predictions from the algorithms can be explained like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
shap_valuedescriptorbitinfo
22272.042023e+01UnscaledPhyschemDescriptors7.0MolWt
22292.025199e+01UnscaledPhyschemDescriptors9.0ExactMolWt
22281.802158e+01UnscaledPhyschemDescriptors8.0HeavyAtomMolWt
22672.387276e+00UnscaledPhyschemDescriptors47.0LabuteASA
22302.106653e+00UnscaledPhyschemDescriptors10.0NumValenceElectrons
...............
17844.598471e-07ECFP1785.0c1(OC)c(OC)ccc(C)c1
5834.598471e-07ECFP584.0C1(c(cc)cc)=NS(=O)(=O)NC(C)=C1
9954.598471e-07ECFP996.0C(C(N)=C)(=O)N(C)C
8454.598471e-07ECFP846.0c(c(c)C)c(O)c
13754.598471e-07ECFP1376.0S1(=O)(=O)N=C(c)C=C(C)N1C
\n", + "

1570 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " shap_value descriptor bit \\\n", + "2227 2.042023e+01 UnscaledPhyschemDescriptors 7.0 \n", + "2229 2.025199e+01 UnscaledPhyschemDescriptors 9.0 \n", + "2228 1.802158e+01 UnscaledPhyschemDescriptors 8.0 \n", + "2267 2.387276e+00 UnscaledPhyschemDescriptors 47.0 \n", + "2230 2.106653e+00 UnscaledPhyschemDescriptors 10.0 \n", + "... ... ... ... \n", + "1784 4.598471e-07 ECFP 1785.0 \n", + "583 4.598471e-07 ECFP 584.0 \n", + "995 4.598471e-07 ECFP 996.0 \n", + "845 4.598471e-07 ECFP 846.0 \n", + "1375 4.598471e-07 ECFP 1376.0 \n", + "\n", + " info \n", + "2227 MolWt \n", + "2229 ExactMolWt \n", + "2228 HeavyAtomMolWt \n", + "2267 LabuteASA \n", + "2230 NumValenceElectrons \n", + "... ... \n", + "1784 c1(OC)c(OC)ccc(C)c1 \n", + "583 C1(c(cc)cc)=NS(=O)(=O)NC(C)=C1 \n", + "995 C(C(N)=C)(=O)N(C)C \n", + "845 c(c(c)C)c(O)c \n", + "1375 S1(=O)(=O)N=C(c)C=C(C)N1C \n", + "\n", + "[1570 rows x 4 columns]" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ridge.predict_from_smiles(df[config.data.input_column], explain=True).query('shap_value > 0')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Outputs are ordered by shap_value (higher is more important). We see that the `UnscaledPhyschemDescriptors` bits corresponding to e.g. `MolWt`, `ExactMolWt`, `HeavyAtomMolWt` and `NumValenceElectrons`. We can hence interpret these as the most important features contrinubting to predicting the MolWt for the DRD2 datset. `UnscaledPhyschemJazzy` descriptors are also ranked relatively high in the list.\n", + "\n", + "Other descriptor types in the composite descriptor such as the ECFP fingerprints are also shown in the output. ECFP bits are translated to the atom environments for which the bit was turned on within the training set. \n", + "\n", + "Other descriptors are less interpretable as no additional information is available in the `info` column." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ChemProp interpret" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ChemProp explainability is based on the `interpret` in the [original package](https://chemprop.readthedocs.io/en/latest/interpret.html#interpretation).\n", + "\n", + "The follow example shows the usage:" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:22:59,978] A new study created in memory with name: my_study\n", + "[I 2024-07-02 14:23:00,032] A new study created in memory with name: study_name_0\n", + "INFO:root:Enqueued ChemProp manual trial with sensible defaults: {'activation__e0d3a442222d4b38f3aa1434851320db': 'ReLU', 'aggregation__e0d3a442222d4b38f3aa1434851320db': 'mean', 'aggregation_norm__e0d3a442222d4b38f3aa1434851320db': 100, 'batch_size__e0d3a442222d4b38f3aa1434851320db': 50, 'depth__e0d3a442222d4b38f3aa1434851320db': 3, 'dropout__e0d3a442222d4b38f3aa1434851320db': 0.0, 'features_generator__e0d3a442222d4b38f3aa1434851320db': 'none', 'ffn_hidden_size__e0d3a442222d4b38f3aa1434851320db': 300, 'ffn_num_layers__e0d3a442222d4b38f3aa1434851320db': 2, 'final_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db': -4, 'hidden_size__e0d3a442222d4b38f3aa1434851320db': 300, 'init_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db': -4, 'max_lr_exp__e0d3a442222d4b38f3aa1434851320db': -3, 'warmup_epochs_ratio__e0d3a442222d4b38f3aa1434851320db': 0.1, 'algorithm_name': 'ChemPropRegressor', 'ChemPropRegressor_algorithm_hash': 'e0d3a442222d4b38f3aa1434851320db'}\n", + "[I 2024-07-02 14:23:43,818] Trial 0 finished with value: -4937.540075659691 and parameters: {'algorithm_name': 'ChemPropRegressor', 'ChemPropRegressor_algorithm_hash': 'e0d3a442222d4b38f3aa1434851320db', 'activation__e0d3a442222d4b38f3aa1434851320db': , 'aggregation__e0d3a442222d4b38f3aa1434851320db': , 'aggregation_norm__e0d3a442222d4b38f3aa1434851320db': 100.0, 'batch_size__e0d3a442222d4b38f3aa1434851320db': 50.0, 'depth__e0d3a442222d4b38f3aa1434851320db': 3.0, 'dropout__e0d3a442222d4b38f3aa1434851320db': 0.0, 'ensemble_size__e0d3a442222d4b38f3aa1434851320db': 1, 'epochs__e0d3a442222d4b38f3aa1434851320db': 4, 'features_generator__e0d3a442222d4b38f3aa1434851320db': , 'ffn_hidden_size__e0d3a442222d4b38f3aa1434851320db': 300.0, 'ffn_num_layers__e0d3a442222d4b38f3aa1434851320db': 2.0, 'final_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db': -4, 'hidden_size__e0d3a442222d4b38f3aa1434851320db': 300.0, 'init_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db': -4, 'max_lr_exp__e0d3a442222d4b38f3aa1434851320db': -3, 'warmup_epochs_ratio__e0d3a442222d4b38f3aa1434851320db': 0.1, 'descriptor': '{\"name\": \"SmilesFromFile\", \"parameters\": {}}'}. Best is trial 0 with value: -4937.540075659691.\n", + " \r" + ] + } + ], + "source": [ + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt\",\n", + " training_dataset_file=\"../tests/data/DRD2/subset-50/train.csv\", # This will be split into train and test.\n", + " ),\n", + " descriptors=[SmilesFromFile.new()],\n", + " algorithms=[\n", + " ChemPropRegressor.new(epochs=4),\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=2,\n", + " n_trials=1,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ")\n", + "\n", + "study = optimize(config, study_name=\"my_study\")\n", + "build_best(buildconfig_best(study), \"../target/best.pkl\")\n", + "with open(\"../target/best.pkl\", \"rb\") as f:\n", + " chemprop = pickle.load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "build_best(buildconfig_best(study), \"../target/best.pkl\")\n", + "with open(\"../target/best.pkl\", \"rb\") as f:\n", + " chemprop = pickle.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Similar to SHAP, ChemProp explainability inference is called using the `explain` flag from the `predict_from_smiles`" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 8 9 18 19 20 21 22 23 24\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 16 17 18 19 20 21 22\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 16 17 18 19 20 21 22\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 15 16 17 18 19 20 21\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 4 5 14 15 16 17 18 19 20\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 12 13 14 15 16 17 18\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 12 13 14 15 16 17 18\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 9 10 11 12 13 14 15\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 8 9 10 11 12 13 14\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 2 3 4 5 6 7 11\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 2 3 4 5 6 7 10\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 5 6\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 7 8 17 18 19 20 21 22 23\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 15 16 17 18 19 20 21\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 15 16 17 18 19 20 21\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 12 13 14 15 16 17 18\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 11 12 13 14 15 16 17\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 10 11 12 13 14 15 16\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 10 11 12\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 9 10 11\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 8 9 10\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 5 6\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 16 17 18 19 20 21 22\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 16 17 18 19 20 21 22\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 13 14 15 16 17 18 19\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 13 14 15\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 12 13 14\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 11 12 13\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 7 8 9\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 6 7 8\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 5 6 7\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 6 7\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 3 4 5\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 8 9 18 19 20\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 16 17 18\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 16 17 18\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 15 16 17\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 4 5 14 15 16\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 12 13 14\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 12 13 14\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 7 8 9\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 7 8\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 6 7 8\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 8 9 13 14 15 16 17 18 19\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 11 12 13 14 15 16 17\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 11 12 13 14 15 16 17\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 10 11 12 13 14 15 16\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 4 5 9 10 11 12 13 14 15\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 7 8 9 10 11 12 13\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 7 8 9 10 11 12 13\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 2 3 4 5 6 8 12\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 2 3 4 5 6 8 11\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 6 7\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 15 16 17 18 19 20 21\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 15 16 17\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 15 16 17\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 10 11 12\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 8 9 10\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 9 10\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 6 7 8\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 8 9\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 5 6 7\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 7 8\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 4 5 6\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 15 16 17 18 19 20 21\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 10 11 12 13 14 15 16\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 10 11 12 13 14 15 16\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 9 10 11 12 13 14 15\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 2 3 4 5 6 11 14\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 2 3 4 5 6 10 13\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 7 8 9 11 12\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 2 3 4 5 6 9 12\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 3 4 5\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 7 8\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 3 4 5\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 16 17 18\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 16 17 18\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 11 12 13\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 10 11 12\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 4 5 9 10 11\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 7 8 9\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 7 8\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 6 7 8\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 7 8 17 18 19\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 7 8 12 13 14\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 10 11 12\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 9 10 11\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 10 11\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 7 8 9\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 8 9 12 13 14 15 16 17 18\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 10 11 12 13 14 15 16\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 10 11 12 13 14 15 16\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 8 11 12\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 7 10 11\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 4 5 6 9 10\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 7 8\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 7 8 17 18 19\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 8 9 10 12 13\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 10 11 12\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 7 10 11\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 9 10 11\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 7 10 11\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 9 10 11\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 16 17 18 19 20 21 22\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 11 12 13 14 15 16 17\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 2 3 4 5 6 12 16\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 13 14\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 2 3 4 5 6 11 15\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 8 9\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 2 3 4 5 6 10 14\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 7 8\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 2 3 4 5 6 9 13\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 6 7\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 11 12 13 14 15 16 17\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 11 12 13\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 8 11 12\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 10 11 12\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 8 11 12\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 10 11 12\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 7 10 11\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 9 10 11\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 4 5 6 9 10\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 4 5 8 9 10\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 10 11 12 14 15\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 16 17 18\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 15 16 17\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 8 9 10\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 8 9\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 5 6 7\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 7 8 12 13 14 15 16 17 18\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 7 8 11 12 13 14 15 16 17\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 9 10 11 12 13 14 15\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 7 10 11\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 7 10 11\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 9 10\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 8 9\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 7 8 12 13 14 15 16 17 18\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 5 6 9 10 11 12 13 14 15\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 0 1 2 3 4 5 6 12 15\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 2 3 4 10 11\n", + "[14:25:51] Can't kekulize mol. Unkekulized atoms: 6 7 16 17 18\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 6 7 12 13 14\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 2 3 6 7 8\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 6 7 13 14 15 16 17 18 19\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 6 7 12 13 14\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 2 3 4 7 8\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 0 1 4 5 6\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 8 9 13 14 15\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 9 10 11 13 14\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 8 9 10 13 14\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 8 9 12 13 14\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 7 8 9 12 13\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 7 8 11 12 13\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 6 7 11 12 13 14 15 16 17\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 5 6 9 10 11 12 13 14 15\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 5 6 7 10 11\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 10 11 12 15 16\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 9 10 11 14 15\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 2 3 4 13 14\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 2 3 4 8 9\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 2 3 4 7 8\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 2 3 4 6 7\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 10 11 12 14 15\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 2 3 4 15 16\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 2 3 4 14 15\n", + "[14:25:52] Can't kekulize mol. Unkekulized atoms: 11 12 13 15 16\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilesscorerationalerationale_score
0Cc1cc(NC(=O)c2cccc(COc3ccc(Br)cc3)c2)no1386.097c1cc(CO[CH3:1])c[cH:1]c1389.151
0O=C(Nc1ccc(F)cc1F)Nc1sccc1-c1nc2ccccc2s1389.485c1c[cH:1]c[cH:1]c1N[CH2:1][NH2:1]388.565
0COC(=O)c1ccccc1NC(=O)c1cc([N+](=O)[O-])nn1Cc1c...384.720CO[CH2:1]c1cccc[cH:1]1389.151
0CCOC(=O)C(C)Sc1nc(-c2ccccc2)ccc1C#N387.110c1c[cH:1]c(S[CH2:1][CH3:1])n[cH:1]1388.871
0CCC(CC)NC(=O)c1nn(Cc2ccccc2)c(=O)c2ccccc12388.997n1c([CH2:1]N[CH3:1])[cH:1][cH:1][cH:1][n:1]1387.854
\n", + "
" + ], + "text/plain": [ + " smiles score \\\n", + "0 Cc1cc(NC(=O)c2cccc(COc3ccc(Br)cc3)c2)no1 386.097 \n", + "0 O=C(Nc1ccc(F)cc1F)Nc1sccc1-c1nc2ccccc2s1 389.485 \n", + "0 COC(=O)c1ccccc1NC(=O)c1cc([N+](=O)[O-])nn1Cc1c... 384.720 \n", + "0 CCOC(=O)C(C)Sc1nc(-c2ccccc2)ccc1C#N 387.110 \n", + "0 CCC(CC)NC(=O)c1nn(Cc2ccccc2)c(=O)c2ccccc12 388.997 \n", + "\n", + " rationale rationale_score \n", + "0 c1cc(CO[CH3:1])c[cH:1]c1 389.151 \n", + "0 c1c[cH:1]c[cH:1]c1N[CH2:1][NH2:1] 388.565 \n", + "0 CO[CH2:1]c1cccc[cH:1]1 389.151 \n", + "0 c1c[cH:1]c(S[CH2:1][CH3:1])n[cH:1]1 388.871 \n", + "0 n1c([CH2:1]N[CH3:1])[cH:1][cH:1][cH:1][n:1]1 387.854 " + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chemprop.predict_from_smiles(df[config.data.input_column].head(5), explain=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The output contians the following:\n", + "\n", + "* The first column is a molecule and second column is its predicted property (in this dummy case MolWt).\n", + "\n", + "* The third column is the smallest substructure that made this molecule obtain that MolWt prediction (called rationale).\n", + "\n", + "* The fourth column is the predicted MolWt of that substructure." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Log transformation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "QSARtuna can be used to transform input labels so that log-scaled or irregularly distributed data can be transformed to a normal distribution as required for most Machine Learning inputs. The following example shows how XC50 values can be scaled to pXC50 values by using the -Log10 to the 6th unit conversion, like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:25:53,892] A new study created in memory with name: transform_example\n", + "[I 2024-07-02 14:25:53,932] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 14:25:54,028] Trial 0 finished with value: -0.5959493772536109 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: -0.5959493772536109.\n", + "[I 2024-07-02 14:25:54,127] Trial 1 finished with value: -0.6571993250300608 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 7, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: -0.5959493772536109.\n", + "[I 2024-07-02 14:25:54,169] Trial 2 finished with value: -4.1511102853256885 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 5.141096648805748, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.4893466963980463e-08, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 0 with value: -0.5959493772536109.\n", + "[I 2024-07-02 14:25:54,259] Trial 3 finished with value: -1.2487063317112765 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 5, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 0 with value: -0.5959493772536109.\n", + "[I 2024-07-02 14:25:54,288] Trial 4 finished with value: -0.6714912461080983 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: -0.5959493772536109.\n", + "[I 2024-07-02 14:25:54,329] Trial 5 finished with value: -0.2725944467796781 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.7896547008552977, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: -0.2725944467796781.\n", + "[I 2024-07-02 14:25:54,369] Trial 6 finished with value: -2.194926264155893 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.6574750183038587, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 5 with value: -0.2725944467796781.\n", + "[I 2024-07-02 14:25:54,395] Trial 7 finished with value: -0.7520919188596032 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.3974313630683448, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: -0.2725944467796781.\n", + "[I 2024-07-02 14:25:54,469] Trial 8 finished with value: -0.7803723847416691 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 28, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 5 with value: -0.2725944467796781.\n", + "[I 2024-07-02 14:25:54,499] Trial 9 finished with value: -0.6397753979196248 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.2391884918766034, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: -0.2725944467796781.\n", + "[I 2024-07-02 14:25:54,528] Trial 10 finished with value: -4.151110299986041 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.00044396482429275296, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.3831436879125245e-10, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 5 with value: -0.2725944467796781.\n", + "[I 2024-07-02 14:25:54,558] Trial 11 finished with value: -4.151110111437006 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.00028965395242758657, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.99928292425642e-07, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: -0.2725944467796781.\n", + "[I 2024-07-02 14:25:54,584] Trial 12 finished with value: -0.5410418750776741 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: -0.2725944467796781.\n", + "[I 2024-07-02 14:25:54,612] Trial 13 finished with value: -0.7183231137124538 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 2, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 5 with value: -0.2725944467796781.\n", + "[I 2024-07-02 14:25:54,640] Trial 14 finished with value: -0.2721824844856162 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.4060379177903557, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 14 with value: -0.2721824844856162.\n", + "[I 2024-07-02 14:25:54,716] Trial 15 finished with value: -1.1900929470222508 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 20, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 14 with value: -0.2721824844856162.\n", + "[I 2024-07-02 14:25:54,745] Trial 16 finished with value: -2.194926264155893 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.344271094811757, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 14 with value: -0.2721824844856162.\n", + "[I 2024-07-02 14:25:54,774] Trial 17 finished with value: -0.5585323973564646 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.670604991178476, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 14 with value: -0.2721824844856162.\n", + "[I 2024-07-02 14:25:54,951] Trial 18 finished with value: -1.3169218304262786 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 22, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 14 with value: -0.2721824844856162.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:25:54,980] Trial 19 finished with value: -0.7974925066137679 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.5158832554303112, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 14 with value: -0.2721824844856162.\n", + "[I 2024-07-02 14:25:55,008] Trial 20 finished with value: -1.218395226466336 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 14 with value: -0.2721824844856162.\n", + "[I 2024-07-02 14:25:55,039] Trial 21 finished with value: -1.1474226942497083 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0009327650919528738, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 6.062479210472502, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 14 with value: -0.2721824844856162.\n", + "[I 2024-07-02 14:25:55,054] Trial 22 pruned. Duplicate parameter set\n", + "[I 2024-07-02 14:25:55,083] Trial 23 finished with value: -1.0239005731675412 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.1366172066709432, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 14 with value: -0.2721824844856162.\n", + "[I 2024-07-02 14:25:55,160] Trial 24 finished with value: -0.7803723847416691 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 26, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 14 with value: -0.2721824844856162.\n", + "[I 2024-07-02 14:25:55,191] Trial 25 finished with value: -2.178901060853144 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 43.92901911959232, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 27.999026012594694, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 14 with value: -0.2721824844856162.\n", + "[I 2024-07-02 14:25:55,222] Trial 26 finished with value: -0.27137790098830755 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.5888977841391714, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 26 with value: -0.27137790098830755.\n", + "[I 2024-07-02 14:25:55,250] Trial 27 finished with value: -0.2710284516876423 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.19435298754153707, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 27 with value: -0.2710284516876423.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}, return [-0.5410418750776741]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:25:55,329] Trial 28 finished with value: -1.3169218304262786 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 13, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 27 with value: -0.2710284516876423.\n", + "[I 2024-07-02 14:25:55,361] Trial 29 finished with value: -3.6273152492418945 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 1.6285506249643193, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.35441495011256785, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 27 with value: -0.2710284516876423.\n", + "[I 2024-07-02 14:25:55,438] Trial 30 finished with value: -1.1900929470222508 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 10, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 27 with value: -0.2710284516876423.\n", + "[I 2024-07-02 14:25:55,469] Trial 31 finished with value: -2.194926264155893 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.2457809516380005, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 27 with value: -0.2710284516876423.\n", + "[I 2024-07-02 14:25:55,496] Trial 32 finished with value: -2.1907041717628215 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6459129458824919, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 27 with value: -0.2710284516876423.\n", + "[I 2024-07-02 14:25:55,528] Trial 33 finished with value: -1.3209075619139279 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.8179058888285398, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 27 with value: -0.2710284516876423.\n", + "[I 2024-07-02 14:25:55,545] Trial 34 pruned. Duplicate parameter set\n", + "[I 2024-07-02 14:25:55,577] Trial 35 finished with value: -0.2709423025014604 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.0920052840435055, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:55,609] Trial 36 finished with value: -1.3133943310851415 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.8677032984759461, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:55,626] Trial 37 pruned. Duplicate parameter set\n", + "[I 2024-07-02 14:25:55,657] Trial 38 finished with value: -1.257769959239938 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.2865764368847064, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:55,735] Trial 39 finished with value: -0.40359637945134746 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.2709423025014604.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}, return [-0.5410418750776741]\n", + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}, return [-1.218395226466336]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:25:55,817] Trial 40 finished with value: -0.4127882135896648 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 9, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:55,836] Trial 41 pruned. Duplicate parameter set\n", + "[I 2024-07-02 14:25:55,905] Trial 42 finished with value: -0.5959493772536109 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 25, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:55,935] Trial 43 finished with value: -0.9246005133276612 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 2, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:56,003] Trial 44 finished with value: -0.8908739215746116 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 22, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 9, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:56,035] Trial 45 finished with value: -1.107536316777608 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 35 with value: -0.2709423025014604.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 5, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}, return [-1.2487063317112765]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:25:56,067] Trial 46 finished with value: -2.194926264155893 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.6437201185807124, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:56,098] Trial 47 finished with value: -4.054360360588395 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 82.41502276709562, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.10978379088847677, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:56,129] Trial 48 finished with value: -0.5428179904345867 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.022707289534838138, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:56,160] Trial 49 finished with value: -0.5696273642213351 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:56,194] Trial 50 finished with value: -0.27099769667470536 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.1580741708125475, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:56,232] Trial 51 finished with value: -0.2709564785634315 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.10900413894771653, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:56,268] Trial 52 finished with value: -0.2709799905898163 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.13705914456987853, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:56,302] Trial 53 finished with value: -0.27097230608092054 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.12790870116376127, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:56,337] Trial 54 finished with value: -0.2709499903064464 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.10123180962907431, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:56,374] Trial 55 finished with value: -0.2710895886052581 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.26565663774320425, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.2709423025014604.\n", + "[I 2024-07-02 14:25:56,411] Trial 56 finished with value: -0.2708711012023424 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.005637048678674678, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: -0.2708711012023424.\n", + "[I 2024-07-02 14:25:56,446] Trial 57 finished with value: -0.27092322402109364 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.06902647427781451, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: -0.2708711012023424.\n", + "[I 2024-07-02 14:25:56,482] Trial 58 finished with value: -0.2712140349882 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.4076704953178294, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: -0.2708711012023424.\n", + "[I 2024-07-02 14:25:56,515] Trial 59 finished with value: -0.27090080367174 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.04187106800188596, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: -0.2708711012023424.\n", + "[I 2024-07-02 14:25:56,550] Trial 60 finished with value: -0.27086925247190047 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.003371853599610078, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 60 with value: -0.27086925247190047.\n", + "[I 2024-07-02 14:25:56,586] Trial 61 finished with value: -0.2708933298483799 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.032781796328385376, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 60 with value: -0.27086925247190047.\n", + "[I 2024-07-02 14:25:56,623] Trial 62 finished with value: -0.27087205624489635 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.006806773659187283, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 60 with value: -0.27086925247190047.\n", + "[I 2024-07-02 14:25:56,658] Trial 63 finished with value: -0.2708869511176179 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.025009489814943348, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 60 with value: -0.27086925247190047.\n", + "[I 2024-07-02 14:25:56,695] Trial 64 finished with value: -0.2711465077924297 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.3311125627707556, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 60 with value: -0.27086925247190047.\n", + "[I 2024-07-02 14:25:56,729] Trial 65 finished with value: -0.2708756855936628 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.011249102380159387, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 60 with value: -0.27086925247190047.\n", + "[I 2024-07-02 14:25:56,766] Trial 66 finished with value: -0.27087301924224993 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.007985924302396141, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 60 with value: -0.27086925247190047.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:25:56,802] Trial 67 finished with value: -0.2708685399954944 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.00249856291483601, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 67 with value: -0.2708685399954944.\n", + "[I 2024-07-02 14:25:56,839] Trial 68 finished with value: -0.27121879554836553 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.4130244908975993, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 67 with value: -0.2708685399954944.\n", + "[I 2024-07-02 14:25:56,880] Trial 69 finished with value: -0.2708693196600531 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.0034541978803366022, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 67 with value: -0.2708685399954944.\n", + "[I 2024-07-02 14:25:56,918] Trial 70 finished with value: -0.27110195265802334 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.27994943662091765, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 67 with value: -0.2708685399954944.\n", + "[I 2024-07-02 14:25:56,956] Trial 71 finished with value: -0.2708682582859318 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.0021532199144365088, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:56,995] Trial 72 finished with value: -0.27087024523986086 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.0045884092728113585, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,030] Trial 73 finished with value: -0.27087351807632193 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.008596600952859433, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,070] Trial 74 finished with value: -0.2710818633795896 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.2567049271070902, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,109] Trial 75 finished with value: -0.27103241786565463 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.1990111983307052, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,146] Trial 76 finished with value: -0.2710350879598171 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.20214459724424078, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,183] Trial 77 finished with value: -0.2708688328221868 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.00285750520671645, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,221] Trial 78 finished with value: -0.27100832234449684 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.17064008990759916, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,258] Trial 79 finished with value: -0.27268613236193845 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.8725420109733135, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,296] Trial 80 finished with value: -0.27119617446689237 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.387533542012365, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,333] Trial 81 finished with value: -0.2708691110831552 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.0031985656730512953, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,370] Trial 82 finished with value: -0.27086852174155146 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.002476186542950981, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,410] Trial 83 finished with value: -0.27135383618835024 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.5626643670396761, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,449] Trial 84 finished with value: -0.2709819654433871 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.1394077979875128, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,488] Trial 85 finished with value: -0.2718548944510965 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.0858347526799794, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,528] Trial 86 finished with value: -4.1508084699212935 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.03329943145150872, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.00025672309762227527, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,566] Trial 87 finished with value: -0.27249853374634975 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.702026434077893, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:25:57,604] Trial 88 finished with value: -0.27095660957755363 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.10916094511173127, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,643] Trial 89 finished with value: -0.27102160995407715 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.18630665884100353, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,681] Trial 90 finished with value: -0.27095708822582026 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.10973377642487026, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,721] Trial 91 finished with value: -0.27088222008661084 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.019235980282946118, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,762] Trial 92 finished with value: -0.2708703086029017 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.004666043957133775, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,799] Trial 93 finished with value: -0.27095279044622245 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.1045877457096882, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,840] Trial 94 finished with value: -0.2709408288690431 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.09023455456986404, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,879] Trial 95 finished with value: -0.9289218260898663 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8200088368788958, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.2708682582859318.\n", + "[I 2024-07-02 14:25:57,917] Trial 96 finished with value: -0.27086675101898655 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.00030502148265565063, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 96 with value: -0.27086675101898655.\n", + "[I 2024-07-02 14:25:57,957] Trial 97 finished with value: -0.2710491243757999 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.21858260742423916, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 96 with value: -0.27086675101898655.\n", + "[I 2024-07-02 14:25:58,001] Trial 98 finished with value: -4.1491615840508995 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.024725853754515203, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0011658455138452, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 96 with value: -0.27086675101898655.\n", + "[I 2024-07-02 14:25:58,040] Trial 99 finished with value: -0.2709462479577586 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.0967427718847167, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 96 with value: -0.27086675101898655.\n" + ] + } + ], + "source": [ + "from optunaz.utils.preprocessing.transform import (\n", + " LogBase,\n", + " LogNegative,\n", + " ModelDataTransform\n", + ")\n", + "\n", + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"Smiles\",\n", + " response_column=\"Measurement\",\n", + " response_type=\"regression\",\n", + " training_dataset_file=\"../tests/data/sdf/example.sdf\",\n", + " split_strategy=Stratified(fraction=0.4),\n", + " deduplication_strategy=KeepMedian(),\n", + " log_transform=True, # Set to True to perform\n", + " log_transform_base=LogBase.LOG10, # Log10 base will be used\n", + " log_transform_negative=LogNegative.TRUE, # Negated transform for the pXC50 calculation\n", + " log_transform_unit_conversion=6, # 6 units used for pXC50 conversion\n", + " ),\n", + " descriptors=[\n", + " ECFP.new(), \n", + " ECFP_counts.new(), \n", + " MACCS_keys.new(),\n", + " ],\n", + " algorithms=[\n", + " SVR.new(),\n", + " RandomForestRegressor.new(n_estimators={\"low\": 5, \"high\": 10}),\n", + " Ridge.new(),\n", + " Lasso.new(),\n", + " PLSRegression.new(),\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=3,\n", + " n_trials=100,\n", + " n_startup_trials=50,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " track_to_mlflow=False,\n", + " random_seed=42,\n", + " ),\n", + ") \n", + "\n", + "transformed_study = optimize(config, study_name=\"transform_example\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In comparison, QSARtuna does not normally transform the data:" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:26:00,252] A new study created in memory with name: non-transform_example\n", + "[I 2024-07-02 14:26:00,254] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 14:26:00,332] Trial 0 finished with value: -3501.942111261296 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: -3501.942111261296.\n", + "[I 2024-07-02 14:26:00,422] Trial 1 finished with value: -5451.207265576796 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 7, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: -3501.942111261296.\n", + "[I 2024-07-02 14:26:00,459] Trial 2 finished with value: -208.1049201007814 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 5.141096648805748, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.4893466963980463e-08, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 2 with value: -208.1049201007814.\n", + "[I 2024-07-02 14:26:00,500] Trial 3 finished with value: -9964.541364058234 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 5, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 2 with value: -208.1049201007814.\n", + "[I 2024-07-02 14:26:00,528] Trial 4 finished with value: -3543.953608539901 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 2 with value: -208.1049201007814.\n", + "[I 2024-07-02 14:26:00,570] Trial 5 finished with value: -6837.057544630979 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.7896547008552977, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 2 with value: -208.1049201007814.\n", + "[I 2024-07-02 14:26:00,613] Trial 6 finished with value: -2507.1794330606067 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.6574750183038587, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 2 with value: -208.1049201007814.\n", + "[I 2024-07-02 14:26:00,650] Trial 7 finished with value: -21534.719219668405 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.3974313630683448, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 2 with value: -208.1049201007814.\n", + "[I 2024-07-02 14:26:00,726] Trial 8 finished with value: -2899.736555614694 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 28, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 2 with value: -208.1049201007814.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.294e+02, tolerance: 2.760e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 14:26:00,790] Trial 9 finished with value: -21674.445000284228 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.2391884918766034, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 2 with value: -208.1049201007814.\n", + "[I 2024-07-02 14:26:00,819] Trial 10 finished with value: -208.1049203123567 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.00044396482429275296, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.3831436879125245e-10, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 2 with value: -208.1049201007814.\n", + "[I 2024-07-02 14:26:00,849] Trial 11 finished with value: -208.1049192609138 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.00028965395242758657, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.99928292425642e-07, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 11 with value: -208.1049192609138.\n", + "[I 2024-07-02 14:26:00,877] Trial 12 finished with value: -3630.72768093756 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 11 with value: -208.1049192609138.\n", + "[I 2024-07-02 14:26:00,907] Trial 13 finished with value: -3431.942816967268 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 2, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 11 with value: -208.1049192609138.\n", + "[I 2024-07-02 14:26:00,934] Trial 14 finished with value: -6908.462045154488 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.4060379177903557, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 11 with value: -208.1049192609138.\n", + "[I 2024-07-02 14:26:01,008] Trial 15 finished with value: -5964.65935954044 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 20, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 11 with value: -208.1049192609138.\n", + "[I 2024-07-02 14:26:01,036] Trial 16 finished with value: -21070.107195348774 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.344271094811757, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 11 with value: -208.1049192609138.\n", + "[I 2024-07-02 14:26:01,065] Trial 17 finished with value: -4977.068508997133 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.670604991178476, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 11 with value: -208.1049192609138.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:26:01,133] Trial 18 finished with value: -8873.669262669626 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 22, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 11 with value: -208.1049192609138.\n", + "[I 2024-07-02 14:26:01,173] Trial 19 finished with value: -21387.63697424318 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.5158832554303112, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 11 with value: -208.1049192609138.\n", + "[I 2024-07-02 14:26:01,202] Trial 20 finished with value: -9958.573006910125 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 11 with value: -208.1049192609138.\n", + "[I 2024-07-02 14:26:01,370] Trial 21 finished with value: -180.5182695600183 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0009327650919528738, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 6.062479210472502, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 21 with value: -180.5182695600183.\n", + "[I 2024-07-02 14:26:01,387] Trial 22 pruned. Duplicate parameter set\n", + "[I 2024-07-02 14:26:01,428] Trial 23 finished with value: -20684.56412138056 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.1366172066709432, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 21 with value: -180.5182695600183.\n", + "[I 2024-07-02 14:26:01,515] Trial 24 finished with value: -2899.736555614694 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 26, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 21 with value: -180.5182695600183.\n", + "[I 2024-07-02 14:26:01,544] Trial 25 finished with value: -150.3435882510586 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 43.92901911959232, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 27.999026012594694, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:01,571] Trial 26 finished with value: -7068.705383113378 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.5888977841391714, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 25 with value: -150.3435882510586.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}, return [-3630.72768093756]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:26:01,599] Trial 27 finished with value: -7150.482090052133 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.19435298754153707, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:01,976] Trial 28 finished with value: -8873.669262669626 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 13, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:02,077] Trial 29 finished with value: -203.93637462922368 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 1.6285506249643193, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.35441495011256785, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:02,160] Trial 30 finished with value: -5964.65935954044 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 10, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:02,193] Trial 31 finished with value: -2570.5111262532305 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.2457809516380005, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:02,237] Trial 32 finished with value: -21987.659957192194 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6459129458824919, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:02,269] Trial 33 finished with value: -9889.493204596083 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.8179058888285398, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:03,369] Trial 34 pruned. Duplicate parameter set\n", + "[I 2024-07-02 14:26:03,413] Trial 35 finished with value: -7172.208490771303 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.0920052840435055, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:03,537] Trial 36 finished with value: -9804.512701665093 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.8677032984759461, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:03,555] Trial 37 pruned. Duplicate parameter set\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}, return [-3630.72768093756]\n", + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}, return [-9958.573006910125]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:26:03,585] Trial 38 finished with value: -9165.74081120673 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.2865764368847064, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:03,665] Trial 39 finished with value: -543.0280270800017 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:03,745] Trial 40 finished with value: -161.1602933782954 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 9, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:03,763] Trial 41 pruned. Duplicate parameter set\n", + "[I 2024-07-02 14:26:03,831] Trial 42 finished with value: -3501.888460860864 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 25, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:03,864] Trial 43 finished with value: -8414.932694243476 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 2, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:03,944] Trial 44 finished with value: -2270.540799189147 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 22, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 9, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 5, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}, return [-9964.541364058234]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:26:03,977] Trial 45 finished with value: -10383.79559309305 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,008] Trial 46 finished with value: -20815.025469865475 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.6437201185807124, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,040] Trial 47 finished with value: -206.7560385808573 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 82.41502276709562, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.10978379088847677, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,070] Trial 48 finished with value: -5264.4700789389035 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.022707289534838138, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,103] Trial 49 finished with value: -3668.255064135424 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,140] Trial 50 finished with value: -156.12174877890536 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 56.793408178086295, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 9.99902820845678, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,177] Trial 51 finished with value: -157.371632749506 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 57.88307313087517, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 8.140915461519354, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,218] Trial 52 finished with value: -153.66773675231477 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 46.177324126813716, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 40.77906017834145, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,256] Trial 53 finished with value: -186.52056745848623 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 89.4565714180547, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 93.6710444346508, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,294] Trial 54 finished with value: -153.30976119334312 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 35.62916671166313, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 40.023639423189294, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,334] Trial 55 finished with value: -181.053696900694 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 23.914617418880486, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 86.31140591484044, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,374] Trial 56 finished with value: -201.33573874994386 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 12.569769302718845, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.5781354926491789, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,412] Trial 57 finished with value: -190.1384885119049 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 95.87666716965626, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 98.2537791489618, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,451] Trial 58 finished with value: -208.076949848299 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.9559574710535281, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0032830967319653665, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,491] Trial 59 finished with value: -170.764974036324 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 15.03910427457823, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 3.406811480459925, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,528] Trial 60 finished with value: -164.4477304958181 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 17.701690847791482, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 4.819274780536123, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,567] Trial 61 finished with value: -157.87939164358104 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 28.32187661108304, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 7.660320437878754, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,607] Trial 62 finished with value: -157.01705178481896 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 38.61397716361812, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 8.603665957830847, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,645] Trial 63 finished with value: -155.73257312230092 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 40.759645965959294, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 11.503212714246787, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:26:04,684] Trial 64 finished with value: -154.46848394144124 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 93.8546740801317, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 15.35327336610912, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,724] Trial 65 finished with value: -161.20421802817864 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 93.57596974747163, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 51.84756262407801, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,763] Trial 66 finished with value: -190.51233215278089 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 6.3564642040401464, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 1.5034542273159819, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,802] Trial 67 finished with value: -207.68667089892196 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 24.034895878929095, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.03653571911285094, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 25 with value: -150.3435882510586.\n", + "[I 2024-07-02 14:26:04,842] Trial 68 finished with value: -102.52277054278186 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.01961499216484045, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 17.670937191883546, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 68 with value: -102.52277054278186.\n", + "[I 2024-07-02 14:26:04,881] Trial 69 finished with value: -97.28722475694815 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.012434370509176538, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 19.34222704431493, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 69 with value: -97.28722475694815.\n", + "[I 2024-07-02 14:26:04,921] Trial 70 finished with value: -93.87402050281146 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.008452015347522093, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 24.914863578437455, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 70 with value: -93.87402050281146.\n", + "[I 2024-07-02 14:26:04,960] Trial 71 finished with value: -89.38847505937936 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.01573542234868893, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 27.99307522974174, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 71 with value: -89.38847505937936.\n", + "[I 2024-07-02 14:26:04,999] Trial 72 finished with value: -81.96336195786391 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.009845516063879428, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 80.59422914099683, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 72 with value: -81.96336195786391.\n", + "[I 2024-07-02 14:26:05,039] Trial 73 finished with value: -89.19345618324213 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.009382525091504246, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 98.35573659237662, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 72 with value: -81.96336195786391.\n", + "[I 2024-07-02 14:26:05,080] Trial 74 finished with value: -86.30772721342525 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.010579672066291478, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 84.35550323165882, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 72 with value: -81.96336195786391.\n", + "[I 2024-07-02 14:26:05,117] Trial 75 finished with value: -90.23970902543148 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.013369359066405863, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 87.4744102498801, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 72 with value: -81.96336195786391.\n", + "[I 2024-07-02 14:26:05,155] Trial 76 finished with value: -81.34331248758777 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.011398351701814368, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 72.54146340620301, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 76 with value: -81.34331248758777.\n", + "[I 2024-07-02 14:26:05,195] Trial 77 finished with value: -208.104535853341 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.011708779850509646, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 1.682286191624579e-05, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 76 with value: -81.34331248758777.\n", + "[I 2024-07-02 14:26:05,235] Trial 78 finished with value: -80.0653774146952 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.009806826677473646, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 76.90274406278985, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 78 with value: -80.0653774146952.\n", + "[I 2024-07-02 14:26:05,276] Trial 79 finished with value: -81.64646042813787 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0038598153381434685, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 73.20918134828555, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 78 with value: -80.0653774146952.\n", + "[I 2024-07-02 14:26:05,316] Trial 80 finished with value: -78.68420472011734 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0032474576673554513, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 98.35551178979624, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 80 with value: -78.68420472011734.\n", + "[I 2024-07-02 14:26:05,357] Trial 81 finished with value: -80.85985201823172 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.003187930738019005, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 89.29431603544847, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 80 with value: -78.68420472011734.\n", + "[I 2024-07-02 14:26:05,399] Trial 82 finished with value: -80.21583898009355 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.003122319313153475, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 93.83526418992966, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 80 with value: -78.68420472011734.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:26:05,437] Trial 83 finished with value: -83.34787242859676 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.002781955938462633, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 89.76228981520067, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 80 with value: -78.68420472011734.\n", + "[I 2024-07-02 14:26:05,478] Trial 84 finished with value: -194.70914272129673 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0023173546614751305, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 1.3000082904498813, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 80 with value: -78.68420472011734.\n", + "[I 2024-07-02 14:26:05,519] Trial 85 finished with value: -208.10492031097328 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.002606064524407, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 1.7861330234653922e-10, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 80 with value: -78.68420472011734.\n", + "[I 2024-07-02 14:26:05,558] Trial 86 finished with value: -208.1049154281806 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0029210589377408366, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 4.200933937391094e-07, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 80 with value: -78.68420472011734.\n", + "[I 2024-07-02 14:26:05,599] Trial 87 finished with value: -208.10492028002287 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.06431564840324226, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 3.2981641934644904e-09, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 80 with value: -78.68420472011734.\n", + "[I 2024-07-02 14:26:05,640] Trial 88 finished with value: -196.56066541774658 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0010848843623839548, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.151493073951163, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 80 with value: -78.68420472011734.\n", + "[I 2024-07-02 14:26:05,682] Trial 89 finished with value: -76.76337597039308 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.004134805589645341, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 90.88115336652716, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 89 with value: -76.76337597039308.\n", + "[I 2024-07-02 14:26:05,724] Trial 90 finished with value: -108.58009587759925 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.004763418454688096, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 22.02920758025023, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 89 with value: -76.76337597039308.\n", + "[I 2024-07-02 14:26:05,766] Trial 91 finished with value: -113.35230417583477 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0009098023238189749, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 79.57100980886017, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 89 with value: -76.76337597039308.\n", + "[I 2024-07-02 14:26:05,809] Trial 92 finished with value: -113.30807467406214 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.03739791555156691, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 27.12818940557025, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 89 with value: -76.76337597039308.\n", + "[I 2024-07-02 14:26:05,850] Trial 93 finished with value: -76.44100655116532 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.006380481141720477, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 88.4882351186755, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 93 with value: -76.44100655116532.\n", + "[I 2024-07-02 14:26:05,891] Trial 94 finished with value: -150.35181001564942 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0036244007454981787, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 5.608797806921866, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 93 with value: -76.44100655116532.\n", + "[I 2024-07-02 14:26:05,935] Trial 95 finished with value: -124.3719027482892 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0014198536004321608, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 35.05588994284273, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 93 with value: -76.44100655116532.\n", + "[I 2024-07-02 14:26:05,978] Trial 96 finished with value: -95.28568052794907 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.005434972462746285, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 30.215759789700954, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 93 with value: -76.44100655116532.\n", + "[I 2024-07-02 14:26:06,018] Trial 97 finished with value: -20325.66479442037 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.9696417046589247, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 93 with value: -76.44100655116532.\n", + "[I 2024-07-02 14:26:06,057] Trial 98 finished with value: -132.21507621375022 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0004528978867024753, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 84.80386923876023, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 93 with value: -76.44100655116532.\n", + "[I 2024-07-02 14:26:06,097] Trial 99 finished with value: -166.85570350846885 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0016948043699497222, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 5.455627755557016, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 93 with value: -76.44100655116532.\n" + ] + } + ], + "source": [ + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"Smiles\",\n", + " response_column=\"Measurement\",\n", + " response_type=\"regression\",\n", + " training_dataset_file=\"../tests/data/sdf/example.sdf\",\n", + " split_strategy=Stratified(fraction=0.4),\n", + " deduplication_strategy=KeepMedian(),\n", + " log_transform=False, # Shown for illustration: Log transform defaults to False\n", + " log_transform_base=None, # Shown for illustration: Log10 base is None/ignored if not log scaled\n", + " log_transform_negative=None, # Shown for illustration: negation is None/ignored if not log scaled\n", + " log_transform_unit_conversion=None, # Shown for illustration: conversion is None/ignored if not log scaled\n", + " ),\n", + " descriptors=[\n", + " ECFP.new(), \n", + " ECFP_counts.new(), \n", + " MACCS_keys.new(),\n", + " ],\n", + " algorithms=[\n", + " SVR.new(),\n", + " RandomForestRegressor.new(n_estimators={\"low\": 5, \"high\": 10}),\n", + " Ridge.new(),\n", + " Lasso.new(),\n", + " PLSRegression.new(),\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=3,\n", + " n_trials=100,\n", + " n_startup_trials=50,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " track_to_mlflow=False,\n", + " random_seed=42,\n", + " ),\n", + ") \n", + "\n", + "default_study = optimize(config, study_name=\"non-transform_example\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The importance of scaling can be analysed by directly contrasting the two different studies with and without log transformation:" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "\n", + "comparison = pd.concat((default_study.trials_dataframe().assign(run=f'no transform (best ={study.best_value:.2f})'),\n", + " transformed_study.trials_dataframe().assign(run=f'transform (best ={transformed_study.best_value:.2f})')))\n", + "\n", + "default_reg_scoring= config.settings.scoring\n", + "ax = sns.relplot(data=comparison, x=\"number\", y=\"value\", \n", + " col='run',hue='params_algorithm_name', \n", + " facet_kws={\"sharey\":False})\n", + "ax.set(xlabel=\"Trial number\",ylabel=f\"Ojbective value\\n({default_reg_scoring})\")\n", + "ax.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This example shows the influence of scaling the pXC50 values to the log scale. The non-noramlised distribution of the unlogged data yields very large (negative) model evaluation scores, since evaluation metrics such as MSE are relative, and the scale of the error is reported in performance values.\n", + "\n", + "Users generate predictions for a model trained on log transformed data in the same way as the normal models, like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1126.56968721, 120.20237903])" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get the best Trial from the log transformed study and build the model.\n", + "buildconfig = buildconfig_best(transformed_study)\n", + "best_build = build_best(buildconfig, \"../target/best.pkl\")\n", + "\n", + "# generate predictions\n", + "import pickle\n", + "with open(\"../target/best.pkl\", \"rb\") as f:\n", + " model = pickle.load(f)\n", + "model.predict_from_smiles([\"CCC\", \"CC(=O)Nc1ccc(O)cc1\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "NB: Please note that outputs have automatically been reversed transformed at inference, back onto the original XC50 scale, as shown by large values outside the log pXC50.\n", + "\n", + "This is the default behaviour of QSARtuna; reverse transform is performed at inference when log transformation was applied, so that users can action on prediction the original input data scale. Importantly, a user can easily override this behaviour by providing the transform parameter as `None`:" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([2.94824194, 3.92008694])" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict_from_smiles([\"CCC\", \"CC(=O)Nc1ccc(O)cc1\"], transform=None)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will instruct QSARtuna to avoid the reverse transform on the predictions. This transform parameter is ignored if no transformation was applied in the user config.\n", + "\n", + "Log transformation can also be combined with the PTR transform. In this situation, all user inputs are expected to be on the untransformed scale. For example, if a user wishes to create a PTR model, trained on pXC50 data and a cut-off for pXC50 values of 5 (10um), the following config can be used:" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:26:10,518] A new study created in memory with name: ptr_and_transform_example\n", + "[I 2024-07-02 14:26:10,558] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 14:26:10,728] Trial 0 finished with value: -0.002341918451736245 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: -0.002341918451736245.\n", + "[I 2024-07-02 14:26:10,805] Trial 1 finished with value: -0.0024908979029632677 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 7, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: -0.002341918451736245.\n", + "[I 2024-07-02 14:26:10,847] Trial 2 finished with value: -0.007901407671048116 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 5.141096648805748, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.4893466963980463e-08, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 0 with value: -0.002341918451736245.\n", + "[I 2024-07-02 14:26:10,888] Trial 3 finished with value: -0.00496231674623194 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 5, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 0 with value: -0.002341918451736245.\n", + "[I 2024-07-02 14:26:10,917] Trial 4 finished with value: -0.0026848278110363512 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: -0.002341918451736245.\n", + "[I 2024-07-02 14:26:10,959] Trial 5 finished with value: -0.0010872728889471893 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.7896547008552977, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: -0.0010872728889471893.\n", + "[I 2024-07-02 14:26:11,000] Trial 6 finished with value: -0.008706109201510277 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.6574750183038587, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 5 with value: -0.0010872728889471893.\n", + "[I 2024-07-02 14:26:11,027] Trial 7 finished with value: -0.008706109201510277 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.3974313630683448, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: -0.0010872728889471893.\n", + "[I 2024-07-02 14:26:11,093] Trial 8 finished with value: -0.002999462459688867 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 28, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 5 with value: -0.0010872728889471893.\n", + "[I 2024-07-02 14:26:11,120] Trial 9 finished with value: -0.00825680029907454 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.2391884918766034, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: -0.0010872728889471893.\n", + "[I 2024-07-02 14:26:11,148] Trial 10 finished with value: -0.007901407993550248 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.00044396482429275296, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.3831436879125245e-10, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 5 with value: -0.0010872728889471893.\n", + "[I 2024-07-02 14:26:11,174] Trial 11 finished with value: -0.007901405163828307 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.00028965395242758657, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.99928292425642e-07, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: -0.0010872728889471893.\n", + "[I 2024-07-02 14:26:11,201] Trial 12 finished with value: -0.0021653695362066753 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: -0.0010872728889471893.\n", + "[I 2024-07-02 14:26:11,227] Trial 13 finished with value: -0.002869169486971014 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 2, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 5 with value: -0.0010872728889471893.\n", + "[I 2024-07-02 14:26:11,255] Trial 14 finished with value: -0.0010855652626111146 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.4060379177903557, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 14 with value: -0.0010855652626111146.\n", + "[I 2024-07-02 14:26:11,329] Trial 15 finished with value: -0.00550533804299308 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 20, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 14 with value: -0.0010855652626111146.\n", + "[I 2024-07-02 14:26:11,359] Trial 16 finished with value: -0.008706109201510277 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.344271094811757, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 14 with value: -0.0010855652626111146.\n", + "[I 2024-07-02 14:26:11,386] Trial 17 finished with value: -0.002236800860454562 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.670604991178476, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 14 with value: -0.0010855652626111146.\n", + "[I 2024-07-02 14:26:11,466] Trial 18 finished with value: -0.006105985607235417 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 22, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 14 with value: -0.0010855652626111146.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:26:11,495] Trial 19 finished with value: -0.008706109201510277 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.5158832554303112, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 14 with value: -0.0010855652626111146.\n", + "[I 2024-07-02 14:26:11,523] Trial 20 finished with value: -0.004846526544994462 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 14 with value: -0.0010855652626111146.\n", + "[I 2024-07-02 14:26:11,550] Trial 21 finished with value: -0.006964668794465202 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0009327650919528738, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 6.062479210472502, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 14 with value: -0.0010855652626111146.\n", + "[I 2024-07-02 14:26:11,565] Trial 22 pruned. Duplicate parameter set\n", + "[I 2024-07-02 14:26:11,594] Trial 23 finished with value: -0.008706109201510277 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.1366172066709432, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 14 with value: -0.0010855652626111146.\n", + "[I 2024-07-02 14:26:11,670] Trial 24 finished with value: -0.002999462459688867 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 26, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 14 with value: -0.0010855652626111146.\n", + "[I 2024-07-02 14:26:11,699] Trial 25 finished with value: -0.008384326901042542 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 43.92901911959232, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 27.999026012594694, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 14 with value: -0.0010855652626111146.\n", + "[I 2024-07-02 14:26:11,730] Trial 26 finished with value: -0.001082194093844804 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.5888977841391714, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 26 with value: -0.001082194093844804.\n", + "[I 2024-07-02 14:26:11,761] Trial 27 finished with value: -0.0010807084256204563 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.19435298754153707, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 27 with value: -0.0010807084256204563.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}, return [-0.0021653695362066753]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:26:11,839] Trial 28 finished with value: -0.006105985607235417 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 13, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 27 with value: -0.0010807084256204563.\n", + "[I 2024-07-02 14:26:11,868] Trial 29 finished with value: -0.008384326901042542 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 1.6285506249643193, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.35441495011256785, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 27 with value: -0.0010807084256204563.\n", + "[I 2024-07-02 14:26:11,948] Trial 30 finished with value: -0.005505338042993082 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 10, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 27 with value: -0.0010807084256204563.\n", + "[I 2024-07-02 14:26:11,979] Trial 31 finished with value: -0.008706109201510277 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.2457809516380005, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 27 with value: -0.0010807084256204563.\n", + "[I 2024-07-02 14:26:12,008] Trial 32 finished with value: -0.008706109201510277 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6459129458824919, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 27 with value: -0.0010807084256204563.\n", + "[I 2024-07-02 14:26:12,039] Trial 33 finished with value: -0.005247934991526694 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.8179058888285398, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 27 with value: -0.0010807084256204563.\n", + "[I 2024-07-02 14:26:12,057] Trial 34 pruned. Duplicate parameter set\n", + "[I 2024-07-02 14:26:12,089] Trial 35 finished with value: -0.0010803393728928605 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.0920052840435055, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,120] Trial 36 finished with value: -0.005218354425190125 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.8677032984759461, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,138] Trial 37 pruned. Duplicate parameter set\n", + "[I 2024-07-02 14:26:12,169] Trial 38 finished with value: -0.004999207507691546 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.2865764368847064, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,245] Trial 39 finished with value: -0.0015694919308122948 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.0010803393728928605.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}, return [-0.0021653695362066753]\n", + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}, return [-0.004846526544994462]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:26:12,326] Trial 40 finished with value: -0.0019757694194001384 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 9, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,343] Trial 41 pruned. Duplicate parameter set\n", + "[I 2024-07-02 14:26:12,421] Trial 42 finished with value: -0.002341918451736244 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 25, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,453] Trial 43 finished with value: -0.00368328296527152 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 2, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,521] Trial 44 finished with value: -0.003412828259848677 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 22, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 9, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 35 with value: -0.0010803393728928605.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 5, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}, return [-0.00496231674623194]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:26:12,551] Trial 45 finished with value: -0.004412110711416997 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,583] Trial 46 finished with value: -0.008706109201510277 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.6437201185807124, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,616] Trial 47 finished with value: -0.008384326901042542 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 82.41502276709562, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.10978379088847677, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,647] Trial 48 finished with value: -0.0021743798524909573 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.022707289534838138, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,679] Trial 49 finished with value: -0.0022761245849848527 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,715] Trial 50 finished with value: -0.0010805768178458735 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.1580741708125475, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,750] Trial 51 finished with value: -0.001080400188305814 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.10900413894771653, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,784] Trial 52 finished with value: -0.0010805009783570441 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.13705914456987853, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,817] Trial 53 finished with value: -0.0010804680472500541 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.12790870116376127, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,853] Trial 54 finished with value: -0.0010803723579987025 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.10123180962907431, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,890] Trial 55 finished with value: -0.001080969596032512 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.26565663774320425, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 35 with value: -0.0010803393728928605.\n", + "[I 2024-07-02 14:26:12,925] Trial 56 finished with value: -0.0010800333715082816 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.005637048678674678, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: -0.0010800333715082816.\n", + "[I 2024-07-02 14:26:12,962] Trial 57 finished with value: -0.0010802574700236845 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.06902647427781451, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: -0.0010800333715082816.\n", + "[I 2024-07-02 14:26:13,000] Trial 58 finished with value: -0.0010814994986419817 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.4076704953178294, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: -0.0010800333715082816.\n", + "[I 2024-07-02 14:26:13,037] Trial 59 finished with value: -0.001080161136846237 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.04187106800188596, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 56 with value: -0.0010800333715082816.\n", + "[I 2024-07-02 14:26:13,071] Trial 60 finished with value: -0.0010800254136811547 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.003371853599610078, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 60 with value: -0.0010800254136811547.\n", + "[I 2024-07-02 14:26:13,107] Trial 61 finished with value: -0.0010801290036870739 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.032781796328385376, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 60 with value: -0.0010800254136811547.\n", + "[I 2024-07-02 14:26:13,142] Trial 62 finished with value: -0.001080037482216557 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.006806773659187283, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 60 with value: -0.0010800254136811547.\n", + "[I 2024-07-02 14:26:13,179] Trial 63 finished with value: -0.0010801015705851358 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.025009489814943348, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 60 with value: -0.0010800254136811547.\n", + "[I 2024-07-02 14:26:13,217] Trial 64 finished with value: -0.0010812122378841013 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.3311125627707556, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 60 with value: -0.0010800254136811547.\n", + "[I 2024-07-02 14:26:13,254] Trial 65 finished with value: -0.0010800531021304936 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.011249102380159387, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 60 with value: -0.0010800254136811547.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:26:13,291] Trial 66 finished with value: -0.00108004162698813 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.007985924302396141, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 60 with value: -0.0010800254136811547.\n", + "[I 2024-07-02 14:26:13,328] Trial 67 finished with value: -0.0010800223466649803 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.00249856291483601, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 67 with value: -0.0010800223466649803.\n", + "[I 2024-07-02 14:26:13,364] Trial 68 finished with value: -0.0010815197263834202 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.4130244908975993, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 67 with value: -0.0010800223466649803.\n", + "[I 2024-07-02 14:26:13,402] Trial 69 finished with value: -0.0010800257029027847 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.0034541978803366022, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 67 with value: -0.0010800223466649803.\n", + "[I 2024-07-02 14:26:13,439] Trial 70 finished with value: -0.0010810223438672223 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.27994943662091765, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 67 with value: -0.0010800223466649803.\n", + "[I 2024-07-02 14:26:13,475] Trial 71 finished with value: -0.0010800211339555509 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.0021532199144365088, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:13,513] Trial 72 finished with value: -0.0010800296871141684 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.0045884092728113585, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:13,548] Trial 73 finished with value: -0.0010800437739166451 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.008596600952859433, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:13,587] Trial 74 finished with value: -0.0010809366267195716 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.2567049271070902, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:13,627] Trial 75 finished with value: -0.001080725386603206 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.1990111983307052, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:13,664] Trial 76 finished with value: -0.0010807368035830652 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.20214459724424078, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:13,704] Trial 77 finished with value: -0.0010800236072155854 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.00285750520671645, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:13,741] Trial 78 finished with value: -0.0010806223050773966 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.17064008990759916, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:13,779] Trial 79 finished with value: -0.0010876516369772728 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.8725420109733135, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:13,819] Trial 80 finished with value: -0.00108142358144501 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.387533542012365, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:13,857] Trial 81 finished with value: -0.0010800248050489667 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.0031985656730512953, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:13,894] Trial 82 finished with value: -0.001080022268085466 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.002476186542950981, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:13,932] Trial 83 finished with value: -0.0010820922958715991 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.5626643670396761, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:13,969] Trial 84 finished with value: -0.0010805094397523254 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.1394077979875128, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:14,007] Trial 85 finished with value: -0.0010841993753324146 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.0858347526799794, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:14,048] Trial 86 finished with value: -0.007899735988203994 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.03329943145150872, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.00025672309762227527, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:26:14,086] Trial 87 finished with value: -0.0010868762004637347 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.702026434077893, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:14,124] Trial 88 finished with value: -0.001080400750193767 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.10916094511173127, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:14,163] Trial 89 finished with value: -0.0010806791616300314 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.18630665884100353, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:14,201] Trial 90 finished with value: -0.0010804028029753213 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.10973377642487026, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:14,240] Trial 91 finished with value: -0.0010800812188506515 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.019235980282946118, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:14,277] Trial 92 finished with value: -0.0010800299598580359 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.004666043957133775, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:14,316] Trial 93 finished with value: -0.0010803843696362083 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.1045877457096882, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:14,353] Trial 94 finished with value: -0.001080333048974234 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.09023455456986404, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:14,394] Trial 95 finished with value: -0.008706109201510277 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.8200088368788958, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 71 with value: -0.0010800211339555509.\n", + "[I 2024-07-02 14:26:14,432] Trial 96 finished with value: -0.001080014645182176 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.00030502148265565063, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 96 with value: -0.001080014645182176.\n", + "[I 2024-07-02 14:26:14,473] Trial 97 finished with value: -0.0010807968027851892 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.21858260742423916, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 96 with value: -0.001080014645182176.\n", + "[I 2024-07-02 14:26:14,516] Trial 98 finished with value: -0.007907028395366658 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.024725853754515203, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0011658455138452, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 96 with value: -0.001080014645182176.\n", + "[I 2024-07-02 14:26:14,553] Trial 99 finished with value: -0.0010803563024666294 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.0967427718847167, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 96 with value: -0.001080014645182176.\n" + ] + } + ], + "source": [ + "ptr_config_log_transform = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"Smiles\",\n", + " response_column=\"Measurement\",\n", + " response_type=\"regression\",\n", + " training_dataset_file=\"../tests/data/sdf/example.sdf\",\n", + " split_strategy=Stratified(fraction=0.4),\n", + " deduplication_strategy=KeepMedian(),\n", + " log_transform=True, # Set to True to perform\n", + " log_transform_base=LogBase.LOG10, # Log10 base will be used\n", + " log_transform_negative=LogNegative.TRUE, # Negated transform for the pXC50 calculation\n", + " log_transform_unit_conversion=6, # 6 units used for pXC50 conversion\n", + " probabilistic_threshold_representation=True, # This enables PTR\n", + " probabilistic_threshold_representation_threshold=5, # This defines the activity threshold for 10um\n", + " probabilistic_threshold_representation_std=0.6, # This captures the deviation/uncertainty in the dataset\n", + "\n", + " ),\n", + " descriptors=[\n", + " ECFP.new(), \n", + " ECFP_counts.new(), \n", + " MACCS_keys.new(),\n", + " ],\n", + " algorithms=[\n", + " SVR.new(),\n", + " RandomForestRegressor.new(n_estimators={\"low\": 5, \"high\": 10}),\n", + " Ridge.new(),\n", + " Lasso.new(),\n", + " PLSRegression.new(),\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=3,\n", + " n_trials=100,\n", + " n_startup_trials=50,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " track_to_mlflow=False,\n", + " random_seed=42,\n", + " ),\n", + ") \n", + "\n", + "ptr_transformed_study = optimize(ptr_config_log_transform, study_name=\"ptr_and_transform_example\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Analysis of the study is performed in the same manner as above:" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.set_theme(style=\"darkgrid\")\n", + "default_reg_scoring= config.settings.scoring\n", + "ax = sns.scatterplot(data=ptr_transformed_study.trials_dataframe(), x=\"number\", \n", + " y=\"value\",style='params_algorithm_name',hue='params_algorithm_name')\n", + "ax.set(xlabel=\"Trial number\",ylabel=f\"Ojbective value\\n({default_reg_scoring})\")\n", + "sns.move_legend(ax, \"upper right\", bbox_to_anchor=(1.6, 1), ncol=1, title=\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In comparison to log scaled models trained without the PRF transform, log-transformed models trained with PTR functions will always output the probabilistic class membership likelihoods from the PTR function: " + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([False])" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get the best Trial from the log transformed study and build the model.\n", + "buildconfig = buildconfig_best(ptr_transformed_study)\n", + "best_build = build_best(buildconfig, \"../target/best.pkl\")\n", + "\n", + "# generate predictions\n", + "import pickle\n", + "with open(\"../target/best.pkl\", \"rb\") as f:\n", + " model = pickle.load(f)\n", + "model.predict_from_smiles([\"CCC\"]) == model.predict_from_smiles([\"CCC\"], transform=None)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is intended behaviour since 1.) the PTR is only inteded to alter model outputs so that predictions opterate on a probabilstic scale, and 2.) the PTR transform is a lossy transformation anyway (values at the extremes of the probability transformation scale are intentionally clipped and cannot be reversed). Hence, reverse transform the of PTR is not possible at inference time." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Covariate modelling" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Modelling one simple covariate, e.g. dose or time point" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A covariate, such as dose or timepoint, can be used as an auxiliary descriptor to account for the effect of this parameter in predictions. In this situation, a compound can be represented more than once across n distinct covariate measurements. Each of the covariate response values can now be used in training an algorithm in this approach. Replicates across each compound-covariate pair may be deduplicated using the standard deduplication approaches.\n", + "\n", + "To activate this function in QSARtuna, the `aux_column` setting can be used according to the column denoting the covariate to be modelled, like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:26:17,282] A new study created in memory with name: covariate_example\n", + "[I 2024-07-02 14:26:17,323] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 14:26:17,422] Trial 0 finished with value: -5186.767663956718 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: -5186.767663956718.\n", + "[I 2024-07-02 14:26:17,522] Trial 1 finished with value: -4679.740824270968 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 7, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 1 with value: -4679.740824270968.\n", + "[I 2024-07-02 14:26:17,575] Trial 2 finished with value: -4890.6705099499995 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 5.141096648805748, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.4893466963980463e-08, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 1 with value: -4679.740824270968.\n", + "[I 2024-07-02 14:26:17,628] Trial 3 finished with value: -3803.9324375833753 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 5, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 3 with value: -3803.9324375833753.\n", + "[I 2024-07-02 14:26:17,667] Trial 4 finished with value: -3135.6497388676926 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 4 with value: -3135.6497388676926.\n", + "[I 2024-07-02 14:26:17,722] Trial 5 finished with value: -551.2518812859375 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.7896547008552977, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 5 with value: -551.2518812859375.\n", + "[I 2024-07-02 14:26:17,778] Trial 6 finished with value: -4309.124112370974 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.6574750183038587, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 5 with value: -551.2518812859375.\n", + "[I 2024-07-02 14:26:17,818] Trial 7 finished with value: -362.30159424580074 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.3974313630683448, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 7 with value: -362.30159424580074.\n", + "[I 2024-07-02 14:26:17,897] Trial 8 finished with value: -4357.02827013125 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 28, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 7 with value: -362.30159424580074.\n", + "[I 2024-07-02 14:26:17,963] Trial 9 finished with value: -386.1437929337522 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.2391884918766034, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 7 with value: -362.30159424580074.\n" + ] + } + ], + "source": [ + "aux_col_config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt\",\n", + " response_type=\"regression\",\n", + " training_dataset_file=\"../tests/data/aux_descriptors_datasets/train_with_conc.csv\",\n", + " aux_column=\"aux1\" # use column aux1 as a co-variate in modelling\n", + " ),\n", + " descriptors=[\n", + " ECFP.new(), \n", + " ECFP_counts.new(), \n", + " MACCS_keys.new(),\n", + " ],\n", + " algorithms=[\n", + " SVR.new(),\n", + " RandomForestRegressor.new(n_estimators={\"low\": 5, \"high\": 10}),\n", + " Ridge.new(),\n", + " Lasso.new(),\n", + " PLSRegression.new(),\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=2,\n", + " n_trials=10,\n", + " random_seed=42,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " ),\n", + ") \n", + "\n", + "aux_col_study = optimize(aux_col_config, study_name=\"covariate_example\")\n", + "build_best(buildconfig_best(aux_col_study), \"../target/aux1_model.pkl\")\n", + "with open(\"../target/aux1_model.pkl\", \"rb\") as f:\n", + " aux1_model = pickle.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Predictions from a covariate-trained model can now be generated like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([52.45281013, 52.45281013])" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "aux1_model.predict_from_smiles([\"CCC\", \"CCC\"], aux=[10,5])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "where the `aux` parameter of `predict_from_smiles` is used (and required) to generate predictions for a an input covariate auxiliary query, and the shape of the `aux` query must be the same shape as the SMILES input query, otherwise a ValueError will be thrown.\n", + "\n", + "So, for this toy example query the predicitons are for the SMILES `CCC` and two separate auxiliary covariate queries of `10` and `5`.\n", + "\n", + "\n", + "N.B: For this particular toy training example, the molecular weight response column (`molwt`) is the same regardless of the modelled covariate value, and so the predictions are the same regardless the `aux` query, as expected." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Transformation of co-variates: Proteochemometric (PCM) modelling + more" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### VectorFromSmiles" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order to utilise more than one type of covariate value at a time, an auxiliary transformation must be applied to process co-variates in a manner expected for the algorithms.\n", + "\n", + "Pre-computed covariates (in a similar manner to pre-computed descriptors), can be processed using the `VectorFromColumn`. Similar to pre-computed descriptors, the VectorFromColumn will split covariates on `,` or comma seperations like so: " + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:26:18,237] A new study created in memory with name: vector_aux_example\n", + "[I 2024-07-02 14:26:18,278] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 14:26:18,353] Trial 0 finished with value: -2200.6817959410578 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.011994365911634164, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 0 with value: -2200.6817959410578.\n", + "[I 2024-07-02 14:26:18,396] Trial 1 finished with value: -2200.95660880078 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.029071783512897825, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. Best is trial 0 with value: -2200.6817959410578.\n", + "[I 2024-07-02 14:26:18,454] Trial 2 finished with value: -5798.564494725643 and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.022631709120790048, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 6.2198637677605415, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 0 with value: -2200.6817959410578.\n", + "[I 2024-07-02 14:26:18,499] Trial 3 finished with value: -972.2899178898048 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.8916194399474267, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 3 with value: -972.2899178898048.\n", + "[I 2024-07-02 14:26:18,556] Trial 4 finished with value: -647.3336440433073 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.5914093983615214, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 4 with value: -647.3336440433073.\n", + "[I 2024-07-02 14:26:18,614] Trial 5 finished with value: -653.3036472748931 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.6201811079699818, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 4 with value: -647.3336440433073.\n", + "[I 2024-07-02 14:26:18,657] Trial 6 finished with value: -3807.8035919667395 and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 4 with value: -647.3336440433073.\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.901e+01, tolerance: 1.892e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:678: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.986e+01, tolerance: 1.914e+01\n", + " model = cd_fast.enet_coordinate_descent(\n", + "[I 2024-07-02 14:26:18,752] Trial 7 finished with value: -5019.459500770764 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.1376436589359351, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. Best is trial 4 with value: -647.3336440433073.\n", + "[I 2024-07-02 14:26:18,836] Trial 8 finished with value: -2756.4017711284796 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 25, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 4 with value: -647.3336440433073.\n", + "[I 2024-07-02 14:26:18,893] Trial 9 finished with value: -771.797115414836 and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.74340620175102, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. Best is trial 4 with value: -647.3336440433073.\n" + ] + } + ], + "source": [ + "from optunaz.utils.preprocessing.transform import VectorFromColumn\n", + "\n", + "vector_covariate_config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt\",\n", + " response_type=\"regression\",\n", + " training_dataset_file=\"../tests/data/precomputed_descriptor/train_with_fp.csv\",\n", + " aux_column=\"fp\", # use a comma separated co-variate vector in column `fp`\n", + " aux_transform=VectorFromColumn.new(), # split the comma separated values into a vector\n", + " split_strategy=Stratified(fraction=0.2),\n", + " ),\n", + " descriptors=[\n", + " ECFP.new(), \n", + " ECFP_counts.new(), \n", + " MACCS_keys.new(),\n", + " ],\n", + " algorithms=[\n", + " SVR.new(),\n", + " RandomForestRegressor.new(n_estimators={\"low\": 5, \"high\": 10}),\n", + " Ridge.new(),\n", + " Lasso.new(),\n", + " PLSRegression.new(),\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=3,\n", + " n_trials=10,\n", + " n_startup_trials=0,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " track_to_mlflow=False,\n", + " random_seed=42,\n", + " ),\n", + ") \n", + "\n", + "vector_covariate_study = optimize(vector_covariate_config, study_name=\"vector_aux_example\")\n", + "build_best(buildconfig_best(vector_covariate_study), \"../target/vector_covariate_model.pkl\")\n", + "with open(\"../target/vector_covariate_model.pkl\", \"rb\") as f:\n", + " vector_covariate_model = pickle.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can inspect the input query for the auxiliary co-variates used in the modelling like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([[0., 0., 0., ..., 0., 0., 0.],\n", + " [1., 0., 0., ..., 1., 0., 0.],\n", + " [1., 0., 0., ..., 1., 0., 1.],\n", + " ...,\n", + " [1., 1., 0., ..., 0., 0., 1.],\n", + " [1., 0., 0., ..., 0., 0., 0.],\n", + " [1., 0., 1., ..., 0., 0., 0.]]),\n", + " (40, 512))" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_smiles, train_y, train_aux, test_smiles, test_y, test_aux = vector_covariate_config.data.get_sets()\n", + "\n", + "train_aux, train_aux.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For this toy example, the co-variate descriptors 512 in legth for the 40 training instances are used in training. Inference for the model can be performed on the test like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([454.39754917, 465.06352766, 340.52031134, 341.89875316,\n", + " 371.5516046 , 389.85042171, 436.33406203, 504.91439129,\n", + " 237.80585907, 346.48565041])" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vector_covariate_model.predict_from_smiles(test_smiles, aux=test_aux)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Z-Scales (for PCM)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Proteochemometric modelling (PCM) is the term used for the approach of training protein-descriptors as a distinct input space alongside the chemical ones. This can be performed in QSARtuna by providing Z-Scales as an auxiliary transformation to a user input column containing sequence information. Protein sequence is transformed to Z-Scales based on [this publication](https://pubs.acs.org/doi/10.1021/jm9700575) using the `Peptides` Python package.\n", + "\n", + "N:B. Note that Z-Scales as covariates are a distinct method separate to `ZScales` descriptors, since the former treats Z-Scales as a distinct input parameter (for PCM modelling), whereas the latter treates them as a descriptor trial that may or may not be selected during optimisation (e.g. for Protein-peptide interaction modelling). In other words, Z-scales will always be an input descriptor parameter when applied as a covariate and duplicates are treated on a compound-`ZScale` pair basis).\n", + "\n", + "Now let us consider the following toy data set file:" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "head: ../tests/data/peptide/toxinpred3/train.csv: No such file or directory\r\n" + ] + } + ], + "source": [ + "!head -n 5 ../tests/data/peptide/toxinpred3/train.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following example demponstrates how Z-Scales may be utilised for PCM by specifying the `ZScales` data transform on the \"Peptide\" column containing our peptide sequence, like so: " + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:31:29,029] A new study created in memory with name: zscale_aux_example\n", + "[I 2024-07-02 14:31:29,089] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 14:31:54,458] Trial 0 finished with value: 0.8886986575836505 and parameters: {'algorithm_name': 'KNeighborsClassifier', 'KNeighborsClassifier_algorithm_hash': 'e51ca55089f389fc37a736adb2aa0e42', 'metric__e51ca55089f389fc37a736adb2aa0e42': , 'n_neighbors__e51ca55089f389fc37a736adb2aa0e42': 5, 'weights__e51ca55089f389fc37a736adb2aa0e42': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 128, \"returnRdkit\": false}}'}. Best is trial 0 with value: 0.8886986575836505.\n" + ] + } + ], + "source": [ + "from optunaz.utils.preprocessing.transform import ZScales\n", + "from optunaz.config.optconfig import KNeighborsClassifier\n", + "\n", + "zscale_covariate_config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"Smiles\",\n", + " response_column=\"Class\",\n", + " response_type=\"classification\",\n", + " training_dataset_file=\"../tests/data/peptide/toxinpred3/train.csv\",\n", + " aux_column=\"Peptide\", # Name of the column containing peptide/protein amino acid sequence\n", + " aux_transform=ZScales.new(), # Zscales transform is used to transform sequence into a Z-scales vector\n", + " split_strategy=Stratified(fraction=0.2),\n", + " ),\n", + " descriptors=[\n", + " ECFP.new(nBits=128), \n", + " ],\n", + " algorithms=[\n", + " KNeighborsClassifier.new(),\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.CLASSIFICATION,\n", + " cross_validation=2,\n", + " n_trials=1,\n", + " n_startup_trials=0,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " track_to_mlflow=False,\n", + " random_seed=42,\n", + " ),\n", + ") \n", + "\n", + "zscale_covariate_study = optimize(zscale_covariate_config, study_name=\"zscale_aux_example\")\n", + "build_best(buildconfig_best(zscale_covariate_study), \"../target/zscale_covariate_model.pkl\")\n", + "with open(\"../target/zscale_covariate_model.pkl\", \"rb\") as f:\n", + " zscale_covariate_model = pickle.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "N:B. Unlike the `ZScale` descriptor (which works on SMILES level of a peptide/protein), the `ZScale` data transform expects amino acid sequence as inputs.\n", + "\n", + "We can inspect the input query for the auxiliary co-variates used in the modelling like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([[ 0.21269231, -0.91153846, 0.29038462, -0.69846154, -0.22230769],\n", + " [-0.99521739, -0.59826087, -0.34695652, -0.03086957, 0.13391304],\n", + " [ 0.08083333, -0.6125 , 0.82916667, -0.05083333, -0.56083333],\n", + " ...,\n", + " [-0.02178571, -0.91785714, 0.45392857, -0.37642857, -0.03107143],\n", + " [ 0.93357143, -0.78964286, 0.62928571, -0.50857143, -0.50107143],\n", + " [-0.1232 , -0.3364 , 0.2328 , -0.1368 , 0.2304 ]]),\n", + " (7062, 5))" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_smiles, train_y, train_aux, test_smiles, test_y, test_aux = zscale_covariate_config.data.get_sets()\n", + "\n", + "train_aux, train_aux.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For this toy example, the Z-scale co-variate descriptors 7062 with the expected length of 5 Z-Scale descriptors used in training. Inference for the model can be performed on the test by providing the auxiliary co-variate Z-Scales like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.2, 0. , 1. , ..., 0.2, 0.8, 0.2])" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "zscale_covariate_model.predict_from_smiles(test_smiles, aux=test_aux)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We may also inspect the X-matrix (descriptor) used to train the toy model like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ax = sns.heatmap(zscale_covariate_model.predictor.X_,\n", + " vmin=-1, vmax=1, cmap='Spectral',\n", + " cbar_kws={'label': 'Fingerprint value'})\n", + "ax.set(ylabel=\"Compound input\", xlabel=f\"Input descriptor (248bit ECFP & Z-Scale))\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that the (continuous) Z-scales covariates can be seen in the final columns (129-132) after the 128bit ECFP fingerprints used in this example " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Advanced options for QSARtuna runs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multi-objective prioritization of performance and standard deviation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "QSARtuna can optimize for the minimzation of the standard deviation of performance across the folds. This should in theory prioritize hyperparameters that are consistently performative across different splits of the data, and so should be more generalizable/performative in production. This can be performed with the `minimize_std_dev` in the example below:" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:32:36,740] A new study created in memory with name: example_multi-parameter_analysis\n", + "[I 2024-07-02 14:32:36,779] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 14:32:37,080] Trial 0 finished with values: [-1.4008740644240856, 0.9876203329634794] and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 5, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. \n", + "[I 2024-07-02 14:32:37,331] Trial 1 finished with values: [-1.3561484909673425, 0.9875061220991906] and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 7, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. \n", + "[I 2024-07-02 14:32:37,472] Trial 2 finished with values: [-2.7856521165563053, 0.21863029956806662] and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 5.141096648805748, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.4893466963980463e-08, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. \n", + "[I 2024-07-02 14:32:37,525] Trial 3 finished with values: [-0.9125905675311808, 0.7861693342190089] and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 5, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. \n", + "[I 2024-07-02 14:32:37,603] Trial 4 finished with values: [-0.5238765412750027, 0.2789424384877304] and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 3, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. \n", + "[I 2024-07-02 14:32:37,657] Trial 5 finished with values: [-0.5348363849100434, 0.5741725628917808] and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.7896547008552977, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. \n", + "[I 2024-07-02 14:32:37,746] Trial 6 finished with values: [-2.0072511048320134, 0.2786318125997387] and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.6574750183038587, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. \n", + "[I 2024-07-02 14:32:37,786] Trial 7 finished with values: [-0.9625764609276656, 0.27575381401822424] and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.3974313630683448, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. \n", + "[I 2024-07-02 14:32:38,036] Trial 8 finished with values: [-1.1114006274062536, 0.7647766019001522] and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 28, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. \n", + "[I 2024-07-02 14:32:38,080] Trial 9 finished with values: [-0.7801680863916906, 0.2725738454485389] and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.2391884918766034, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. \n", + "[I 2024-07-02 14:32:38,121] Trial 10 finished with values: [-2.785652116470164, 0.21863029955530786] and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.00044396482429275296, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.3831436879125245e-10, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. \n", + "[I 2024-07-02 14:32:38,152] Trial 11 finished with values: [-2.785651973436432, 0.21863032832257323] and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.00028965395242758657, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 2.99928292425642e-07, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. \n", + "[I 2024-07-02 14:32:38,180] Trial 12 finished with values: [-0.6101359993004856, 0.3011280543457062] and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. \n", + "[I 2024-07-02 14:32:38,209] Trial 13 finished with values: [-0.5361950698070447, 0.23560786523195643] and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 2, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. \n", + "[I 2024-07-02 14:32:38,274] Trial 14 finished with values: [-0.5356113574175657, 0.5769721187181905] and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.4060379177903557, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. \n", + "[I 2024-07-02 14:32:38,439] Trial 15 finished with values: [-0.543430366921729, 0.514747412346662] and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 20, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. \n", + "[I 2024-07-02 14:32:38,479] Trial 16 finished with values: [-2.0072511048320134, 0.2786318125997387] and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.344271094811757, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. \n", + "[I 2024-07-02 14:32:38,508] Trial 17 finished with values: [-0.5194661889628072, 0.40146744515282495] and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.670604991178476, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. \n", + "[I 2024-07-02 14:32:38,659] Trial 18 finished with values: [-0.659749443628722, 0.6659085938841998] and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 22, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 6, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. \n", + "[I 2024-07-02 14:32:38,876] Trial 19 finished with values: [-1.1068495306229729, 0.24457822094737378] and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 0.5158832554303112, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:32:38,918] Trial 20 finished with values: [-0.8604898820838102, 0.7086875504668667] and parameters: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"MACCS_keys\", \"parameters\": {}}'}. \n", + "[I 2024-07-02 14:32:38,949] Trial 21 finished with values: [-0.5919869916997383, 0.2367498627927979] and parameters: {'algorithm_name': 'SVR', 'SVR_algorithm_hash': 'ea7ccc7ef4a9329af0d4e39eb6184933', 'gamma__ea7ccc7ef4a9329af0d4e39eb6184933': 0.0009327650919528738, 'C__ea7ccc7ef4a9329af0d4e39eb6184933': 6.062479210472502, 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. \n", + "[I 2024-07-02 14:32:38,977] Trial 22 pruned. Duplicate parameter set\n", + "[I 2024-07-02 14:32:39,009] Trial 23 finished with values: [-1.2497762395862362, 0.10124660026536195] and parameters: {'algorithm_name': 'Lasso', 'Lasso_algorithm_hash': '5457f609662e44f04dcc9423066d2f58', 'alpha__5457f609662e44f04dcc9423066d2f58': 1.1366172066709432, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}. \n", + "[I 2024-07-02 14:32:39,151] Trial 24 finished with values: [-1.1114006274062536, 0.7647766019001522] and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 26, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 8, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}'}. \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Duplicated trial: {'algorithm_name': 'PLSRegression', 'PLSRegression_algorithm_hash': '9f2f76e479633c0bf18cf2912fed9eda', 'n_components__9f2f76e479633c0bf18cf2912fed9eda': 4, 'descriptor': '{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}'}, return [-0.6101359993004856, 0.3011280543457062]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:32:39,205] A new study created in memory with name: study_name_1\n", + "INFO:root:Enqueued ChemProp manual trial with sensible defaults: {'activation__668a7428ff5cdb271b01c0925e8fea45': 'ReLU', 'aggregation__668a7428ff5cdb271b01c0925e8fea45': 'mean', 'aggregation_norm__668a7428ff5cdb271b01c0925e8fea45': 100, 'batch_size__668a7428ff5cdb271b01c0925e8fea45': 50, 'depth__668a7428ff5cdb271b01c0925e8fea45': 3, 'dropout__668a7428ff5cdb271b01c0925e8fea45': 0.0, 'features_generator__668a7428ff5cdb271b01c0925e8fea45': 'none', 'ffn_hidden_size__668a7428ff5cdb271b01c0925e8fea45': 300, 'ffn_num_layers__668a7428ff5cdb271b01c0925e8fea45': 2, 'final_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45': -4, 'hidden_size__668a7428ff5cdb271b01c0925e8fea45': 300, 'init_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45': -4, 'max_lr_exp__668a7428ff5cdb271b01c0925e8fea45': -3, 'warmup_epochs_ratio__668a7428ff5cdb271b01c0925e8fea45': 0.1, 'algorithm_name': 'ChemPropRegressor', 'ChemPropRegressor_algorithm_hash': '668a7428ff5cdb271b01c0925e8fea45'}\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/joblib/memory.py:577: JobLibCollisionWarning: Possible name collisions between functions 'calculate_from_smi' (/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/descriptors.py:-1) and 'calculate_from_smi' (/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/descriptors.py:669)\n", + " return self._cached_call(args, kwargs, shelving=False)[0]\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/joblib/memory.py:577: JobLibCollisionWarning: Possible name collisions between functions 'calculate_from_smi' (/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/descriptors.py:-1) and 'calculate_from_smi' (/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/descriptors.py:669)\n", + " return self._cached_call(args, kwargs, shelving=False)[0]\n", + "/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/joblib/memory.py:577: JobLibCollisionWarning: Possible name collisions between functions 'calculate_from_smi' (/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/descriptors.py:-1) and 'calculate_from_smi' (/Users/kljk345/PycharmProjects/Public_Qptuna/D/QSARtuna/venv/lib/python3.10/site-packages/optunaz/descriptors.py:669)\n", + " return self._cached_call(args, kwargs, shelving=False)[0]\n", + "[I 2024-07-02 14:33:47,802] Trial 0 finished with values: [-2.0621601907738047, 0.2749020946925899] and parameters: {'algorithm_name': 'ChemPropRegressor', 'ChemPropRegressor_algorithm_hash': '668a7428ff5cdb271b01c0925e8fea45', 'activation__668a7428ff5cdb271b01c0925e8fea45': , 'aggregation__668a7428ff5cdb271b01c0925e8fea45': , 'aggregation_norm__668a7428ff5cdb271b01c0925e8fea45': 100.0, 'batch_size__668a7428ff5cdb271b01c0925e8fea45': 50.0, 'depth__668a7428ff5cdb271b01c0925e8fea45': 3.0, 'dropout__668a7428ff5cdb271b01c0925e8fea45': 0.0, 'ensemble_size__668a7428ff5cdb271b01c0925e8fea45': 1, 'epochs__668a7428ff5cdb271b01c0925e8fea45': 5, 'features_generator__668a7428ff5cdb271b01c0925e8fea45': , 'ffn_hidden_size__668a7428ff5cdb271b01c0925e8fea45': 300.0, 'ffn_num_layers__668a7428ff5cdb271b01c0925e8fea45': 2.0, 'final_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45': -4, 'hidden_size__668a7428ff5cdb271b01c0925e8fea45': 300.0, 'init_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45': -4, 'max_lr_exp__668a7428ff5cdb271b01c0925e8fea45': -3, 'warmup_epochs_ratio__668a7428ff5cdb271b01c0925e8fea45': 0.1, 'descriptor': '{\"name\": \"SmilesFromFile\", \"parameters\": {}}'}. \n", + "[I 2024-07-02 14:34:59,830] Trial 1 finished with values: [-2.0621601907738047, 0.2749020946925899] and parameters: {'algorithm_name': 'ChemPropRegressor', 'ChemPropRegressor_algorithm_hash': '668a7428ff5cdb271b01c0925e8fea45', 'activation__668a7428ff5cdb271b01c0925e8fea45': , 'aggregation__668a7428ff5cdb271b01c0925e8fea45': , 'aggregation_norm__668a7428ff5cdb271b01c0925e8fea45': 100.0, 'batch_size__668a7428ff5cdb271b01c0925e8fea45': 45.0, 'depth__668a7428ff5cdb271b01c0925e8fea45': 3.0, 'dropout__668a7428ff5cdb271b01c0925e8fea45': 0.0, 'ensemble_size__668a7428ff5cdb271b01c0925e8fea45': 1, 'epochs__668a7428ff5cdb271b01c0925e8fea45': 5, 'features_generator__668a7428ff5cdb271b01c0925e8fea45': , 'ffn_hidden_size__668a7428ff5cdb271b01c0925e8fea45': 300.0, 'ffn_num_layers__668a7428ff5cdb271b01c0925e8fea45': 2.0, 'final_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45': -4, 'hidden_size__668a7428ff5cdb271b01c0925e8fea45': 300.0, 'init_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45': -4, 'max_lr_exp__668a7428ff5cdb271b01c0925e8fea45': -3, 'warmup_epochs_ratio__668a7428ff5cdb271b01c0925e8fea45': 0.1, 'descriptor': '{\"name\": \"SmilesFromFile\", \"parameters\": {}}'}. \n" + ] + } + ], + "source": [ + "config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"Smiles\",\n", + " response_column=\"pXC50\",\n", + " response_type=\"regression\",\n", + " training_dataset_file=\"../tests/data/sdf/example.sdf\",\n", + " ),\n", + " descriptors=[\n", + " ECFP.new(), \n", + " ECFP_counts.new(), \n", + " MACCS_keys.new(),\n", + " SmilesFromFile.new(),\n", + " ],\n", + " algorithms=[\n", + " SVR.new(),\n", + " RandomForestRegressor.new(n_estimators={\"low\": 5, \"high\": 10}),\n", + " Ridge.new(),\n", + " Lasso.new(),\n", + " PLSRegression.new(),\n", + " ChemPropRegressor.new(epochs=5),\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=3,\n", + " n_trials=25,\n", + " n_startup_trials=25,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " track_to_mlflow=False,\n", + " random_seed=42,\n", + " n_chemprop_trials=3,\n", + " minimise_std_dev=True # Multi-objective optimization for performance and std. dev.\n", + " ),\n", + ") \n", + "\n", + "study = optimize(config, study_name=\"example_multi-parameter_analysis\")\n", + "default_reg_scoring= config.settings.scoring\n", + "study.set_metric_names([default_reg_scoring.value,'Standard deviation']) # Set the names of the multi-parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note the multi-parameter performance reported for each trial, e.g. ` Trial 1 finished with values: [XXX, XXX]`, which correspond to negated MSE and deviation of negated MSE performance across the 3-folds, respectively. The two objectives may be plot as a function of trial number, as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Standard Deviation across folds')" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = study.trials_dataframe()\n", + "df.number = df.number+1\n", + "fig=plt.figure(figsize=(12,4))\n", + "ax = sns.scatterplot(data=df, x=\"number\", y=\"values_neg_mean_squared_error\",\n", + " legend=False, color=\"b\")\n", + "ax2 = sns.scatterplot(data=df, x=\"number\", y=\"values_Standard deviation\",\n", + " ax=ax.axes.twinx(), legend=False, color=\"r\")\n", + "\n", + "a = df['values_neg_mean_squared_error'].apply(np.floor).min()\n", + "b = df['values_neg_mean_squared_error'].apply(np.ceil).max()\n", + "c = df['values_Standard deviation'].apply(np.floor).min()\n", + "d = df['values_Standard deviation'].apply(np.ceil).max()\n", + "\n", + "# Align both axes\n", + "ax.set_ylim(a,b);\n", + "ax.set_yticks(np.linspace(a,b, 7));\n", + "ax2.set_ylim(c,d);\n", + "ax2.set_yticks(np.linspace(c,d, 7));\n", + "ax.set_xticks(df.number);\n", + "\n", + "# Set the colors of labels\n", + "ax.set_xlabel('Trial Number')\n", + "ax.set_ylabel('(Performance) Negated MSE', color='b')\n", + "ax2.set_ylabel('Standard Deviation across folds', color='r')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We may plot the Pareto front of this multi-objective study using the Optuna plotting functionaility directly:" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "hovertemplate": "%{text}Trial", + "marker": { + "color": [ + 0, + 1, + 2, + 3, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 14, + 15, + 16, + 18, + 19, + 20, + 21, + 24, + 25, + 26 + ], + "colorbar": { + "title": { + "text": "Trial" + } + }, + "colorscale": [ + [ + 0, + "rgb(247,251,255)" + ], + [ + 0.125, + "rgb(222,235,247)" + ], + [ + 0.25, + "rgb(198,219,239)" + ], + [ + 0.375, + "rgb(158,202,225)" + ], + [ + 0.5, + "rgb(107,174,214)" + ], + [ + 0.625, + "rgb(66,146,198)" + ], + [ + 0.75, + "rgb(33,113,181)" + ], + [ + 0.875, + "rgb(8,81,156)" + ], + [ + 1, + "rgb(8,48,107)" + ] + ], + "line": { + "color": "Grey", + "width": 0.5 + } + }, + "mode": "markers", + "showlegend": false, + "text": [ + "{
\"number\": 0,
\"values\": [
-1.4008740644240856,
0.9876203329634794
],
\"params\": {
\"algorithm_name\": \"RandomForestRegressor\",
\"RandomForestRegressor_algorithm_hash\": \"f1ac01e1bba332215ccbd0c29c9ac3c3\",
\"max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3\": 6,
\"n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3\": 5,
\"max_features__f1ac01e1bba332215ccbd0c29c9ac3c3\": \"auto\",
\"descriptor\": \"{\\\"name\\\": \\\"ECFP\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"nBits\\\": 2048, \\\"returnRdkit\\\": false}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.8974639446129832,
0.7963222851094782,
0.9689476735963439
],
\"max_error\": [
-1.48840242088,
-3.0909646684,
-0.8731823597700004
],
\"neg_mean_absolute_error\": [
-0.3398577206596666,
-0.37011928744749995,
-0.1801785928111111
],
\"neg_mean_squared_error\": [
-0.27358848548388487,
-0.4992568230539897,
-0.08109988798375499
],
\"neg_median_absolute_error\": [
-0.1781329040249997,
-0.07859362806250036,
-0.07024148360000071
],
\"r2\": [
0.8855646464210463,
0.7874092730870651,
0.967646275281887
]
},
\"test_scores\": {
\"explained_variance\": [
0.9639016049394578,
0.03701636076517145,
-0.03610583384733812
],
\"max_error\": [
-0.47698010700000104,
-3.5934751852,
-3.35365075223
],
\"neg_mean_absolute_error\": [
-0.1955445833724998,
-0.9818172778531251,
-0.8300816350316663
],
\"neg_mean_squared_error\": [
-0.05489658915425563,
-2.396894317818735,
-1.7508312862992663
],
\"neg_median_absolute_error\": [
-0.14457091122499977,
-0.2862653911375008,
-0.5112363306699992
],
\"r2\": [
0.9626493715156704,
-0.0863255729554766,
-0.08531642263032624
]
},
\"trial_ran\": true,
\"alg_hash\": \"f1ac01e1bba332215ccbd0c29c9ac3c3\"
}
}", + "{
\"number\": 1,
\"values\": [
-1.3561484909673425,
0.9875061220991906
],
\"params\": {
\"algorithm_name\": \"RandomForestRegressor\",
\"RandomForestRegressor_algorithm_hash\": \"f1ac01e1bba332215ccbd0c29c9ac3c3\",
\"max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3\": 7,
\"n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3\": 6,
\"max_features__f1ac01e1bba332215ccbd0c29c9ac3c3\": \"auto\",
\"descriptor\": \"{\\\"name\\\": \\\"ECFP\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"nBits\\\": 2048, \\\"returnRdkit\\\": false}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.9084162894985426,
0.8181445295200109,
0.9548650651827988
],
\"max_error\": [
-1.3312607501388882,
-2.575803890333333,
-0.8789508147777774
],
\"neg_mean_absolute_error\": [
-0.3185379542458334,
-0.3721876234467591,
-0.2186742885794444
],
\"neg_mean_squared_error\": [
-0.23237176364322487,
-0.43077113594783456,
-0.11338600111806732
],
\"neg_median_absolute_error\": [
-0.16167386033333386,
-0.12968758635416666,
-0.10968790037500042
],
\"r2\": [
0.9028045903055967,
0.8165714624307546,
0.9547661586438142
]
},
\"test_scores\": {
\"explained_variance\": [
0.9639828426250994,
0.028300593780869132,
0.08298265241577563
],
\"max_error\": [
-0.5291279881666675,
-3.633238043666667,
-3.2079123220277777
],
\"neg_mean_absolute_error\": [
-0.17840332446180518,
-0.9769728427812499,
-0.7746915789652774
],
\"neg_mean_squared_error\": [
-0.054949446552472574,
-2.445966975522084,
-1.5675290508274706
],
\"neg_median_absolute_error\": [
-0.14555937145833298,
-0.2359308740624999,
-0.4445126476111101
],
\"r2\": [
0.9626134083151535,
-0.10856638791328899,
0.02831013180650499
]
},
\"trial_ran\": true,
\"alg_hash\": \"f1ac01e1bba332215ccbd0c29c9ac3c3\"
}
}", + "{
\"number\": 2,
\"values\": [
-2.7856521165563053,
0.21863029956806662
],
\"params\": {
\"algorithm_name\": \"SVR\",
\"SVR_algorithm_hash\": \"ea7ccc7ef4a9329af0d4e39eb6184933\",
\"gamma__ea7ccc7ef4a9329af0d4e39eb6184933\": 5.141096648805748,
\"C__ea7ccc7ef4a9329af0d4e39eb6184933\": 2.4893466963980463e-08,
\"descriptor\": \"{\\\"name\\\": \\\"ECFP_counts\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"useFeatures\\\": true, \\\"nBits\\\": 2048}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
2.9603077433826286e-08,
2.9336498785781373e-08,
2.8653315498061716e-08
],
\"max_error\": [
-2.4076986573565327,
-3.7846304921065332,
-3.8912634246065334
],
\"neg_mean_absolute_error\": [
-1.4215399907231998,
-1.3879988835327637,
-1.442631704156533
],
\"neg_mean_squared_error\": [
-2.5275741396195497,
-3.0279379111768083,
-3.3797629894439454
],
\"neg_median_absolute_error\": [
-1.7373697358565328,
-0.8078731446065337,
-0.7077567928565331
],
\"r2\": [
-0.057222272541218366,
-0.28933945788999327,
-0.348311620292739
]
},
\"test_scores\": {
\"explained_variance\": [
0.0,
0.0,
-2.220446049250313e-16
],
\"max_error\": [
-2.1665475247500012,
-3.5460698895,
-2.9176861759999992
],
\"neg_mean_absolute_error\": [
-1.6446862333125005,
-1.3746714857500002,
-1.1807020024375001
],
\"neg_mean_squared_error\": [
-2.9717560322100764,
-2.906429088642059,
-2.4787712288167807
],
\"neg_median_absolute_error\": [
-1.785473097000001,
-1.25575307125,
-0.8571504162500005
],
\"r2\": [
-1.0219280872487388,
-0.3172580123794684,
-0.5365564595688672
]
},
\"trial_ran\": true,
\"alg_hash\": \"ea7ccc7ef4a9329af0d4e39eb6184933\"
}
}", + "{
\"number\": 3,
\"values\": [
-0.9125905675311808,
0.7861693342190089
],
\"params\": {
\"algorithm_name\": \"PLSRegression\",
\"PLSRegression_algorithm_hash\": \"9f2f76e479633c0bf18cf2912fed9eda\",
\"n_components__9f2f76e479633c0bf18cf2912fed9eda\": 5,
\"descriptor\": \"{\\\"name\\\": \\\"MACCS_keys\\\", \\\"parameters\\\": {}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.9849639641402015,
0.9828800440413418,
0.9820949506759404
],
\"max_error\": [
-0.5197622619148001,
-0.578258209487684,
-0.6097236247421289
],
\"neg_mean_absolute_error\": [
-0.15225290925233773,
-0.1395965446158482,
-0.16234470149067756
],
\"neg_mean_squared_error\": [
-0.03594768705569173,
-0.04020521001484897,
-0.04488192649150811
],
\"neg_median_absolute_error\": [
-0.1234255208002697,
-0.11725579182404289,
-0.11763114886180226
],
\"r2\": [
0.9849639641402015,
0.9828800440413418,
0.9820949506759404
]
},
\"test_scores\": {
\"explained_variance\": [
0.7493168066493779,
0.8439890027037464,
-0.24060711891241326
],
\"max_error\": [
-1.2144772276577216,
-1.1632774960663061,
-3.7013161826996095
],
\"neg_mean_absolute_error\": [
-0.41354910413217183,
-0.4816216137839231,
-0.8966876551868384
],
\"neg_mean_squared_error\": [
-0.3692197929485835,
-0.344243524438586,
-2.024308385206373
],
\"neg_median_absolute_error\": [
-0.1919907903167175,
-0.4369218096563432,
-0.4779689126890929
],
\"r2\": [
0.7487896510886491,
0.8439812130464401,
-0.25484114438952377
]
},
\"trial_ran\": true,
\"alg_hash\": \"9f2f76e479633c0bf18cf2912fed9eda\"
}
}", + "{
\"number\": 5,
\"values\": [
-0.5348363849100434,
0.5741725628917808
],
\"params\": {
\"algorithm_name\": \"Ridge\",
\"Ridge_algorithm_hash\": \"cfa1990d5153c8812982f034d788d7ee\",
\"alpha__cfa1990d5153c8812982f034d788d7ee\": 1.7896547008552977,
\"descriptor\": \"{\\\"name\\\": \\\"ECFP_counts\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"useFeatures\\\": true, \\\"nBits\\\": 2048}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.9992953526981213,
0.9991026642455866,
0.9995175846362686
],
\"max_error\": [
-0.10766042163284606,
-0.1075140823295726,
-0.07460654622187857
],
\"neg_mean_absolute_error\": [
-0.03156890558158965,
-0.03649203341737044,
-0.02743892276836289
],
\"neg_mean_squared_error\": [
-0.0016846488614926615,
-0.002107340261104867,
-0.0012092527924104882
],
\"neg_median_absolute_error\": [
-0.02561682757298156,
-0.028191061706472453,
-0.024285917350927555
],
\"r2\": [
0.9992953526981213,
0.9991026642455866,
0.9995175846362686
]
},
\"test_scores\": {
\"explained_variance\": [
0.8935768285340041,
0.9636788736288304,
0.19714578534324456
],
\"max_error\": [
-0.774352519041904,
-0.737606816780155,
-3.0739619860946146
],
\"neg_mean_absolute_error\": [
-0.33687126397921885,
-0.20883221082561076,
-0.6894395118728371
],
\"neg_mean_squared_error\": [
-0.16851231132189992,
-0.09041081602749307,
-1.3455860273807372
],
\"neg_median_absolute_error\": [
-0.2676544331750077,
-0.1533721343043899,
-0.35319599551719927
],
\"r2\": [
0.885347326087343,
0.9590238164476862,
0.16588958341895155
]
},
\"trial_ran\": true,
\"alg_hash\": \"cfa1990d5153c8812982f034d788d7ee\"
}
}", + "{
\"number\": 6,
\"values\": [
-2.0072511048320134,
0.2786318125997387
],
\"params\": {
\"algorithm_name\": \"Lasso\",
\"Lasso_algorithm_hash\": \"5457f609662e44f04dcc9423066d2f58\",
\"alpha__5457f609662e44f04dcc9423066d2f58\": 1.6574750183038587,
\"descriptor\": \"{\\\"name\\\": \\\"MACCS_keys\\\", \\\"parameters\\\": {}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
-2.220446049250313e-16,
2.220446049250313e-16,
2.220446049250313e-16
],
\"max_error\": [
-2.777570688350001,
-2.9603145737666665,
-2.956865269583333
],
\"neg_mean_absolute_error\": [
-1.4282473280400005,
-1.4387520675488887,
-1.493614843444444
],
\"neg_mean_squared_error\": [
-2.3907689094972278,
-2.348441205803191,
-2.5066631026365753
],
\"neg_median_absolute_error\": [
-1.4789578996499992,
-1.4262595417333341,
-1.4297088459166676
],
\"r2\": [
0.0,
0.0,
0.0
]
},
\"test_scores\": {
\"explained_variance\": [
-2.220446049250313e-16,
0.0,
-2.220446049250313e-16
],
\"max_error\": [
-1.7966755186499999,
-2.7217539462666664,
-1.983287996083332
],
\"neg_mean_absolute_error\": [
-1.4597502302624998,
-1.3746714857500002,
-1.1697984021874999
],
\"neg_mean_squared_error\": [
-2.201962094075194,
-2.2065759996699903,
-1.613215220750856
],
\"neg_median_absolute_error\": [
-1.4749943137500003,
-1.1956921469833337,
-1.1850802254166668
],
\"r2\": [
-0.49817446547138133,
-6.909745304128911e-05,
-1.0101498061798608e-05
]
},
\"trial_ran\": true,
\"alg_hash\": \"5457f609662e44f04dcc9423066d2f58\"
}
}", + "{
\"number\": 7,
\"values\": [
-0.9625764609276656,
0.27575381401822424
],
\"params\": {
\"algorithm_name\": \"Lasso\",
\"Lasso_algorithm_hash\": \"5457f609662e44f04dcc9423066d2f58\",
\"alpha__5457f609662e44f04dcc9423066d2f58\": 0.3974313630683448,
\"descriptor\": \"{\\\"name\\\": \\\"ECFP_counts\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"useFeatures\\\": true, \\\"nBits\\\": 2048}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.6309849216096366,
0.6507390643936857,
0.7152691170875705
],
\"max_error\": [
-2.252632607405257,
-2.175450613956933,
-1.706171237738615
],
\"neg_mean_absolute_error\": [
-0.7938599755102753,
-0.7401987497888673,
-0.6895007958617582
],
\"neg_mean_squared_error\": [
-0.8822297765513636,
-0.8202187727552434,
-0.7137243983777222
],
\"neg_median_absolute_error\": [
-0.7885181658671447,
-0.7501373635815645,
-0.5611223597767765
],
\"r2\": [
0.6309849216096364,
0.6507390643936857,
0.7152691170875705
]
},
\"test_scores\": {
\"explained_variance\": [
0.5935872735161616,
0.6194826453868792,
0.17203437251585474
],
\"max_error\": [
-1.395224586235262,
-1.8000052968656917,
-2.494567849472519
],
\"neg_mean_absolute_error\": [
-0.6933313261473552,
-0.8003665596137148,
-0.8987725327249799
],
\"neg_mean_squared_error\": [
-0.7033126801898267,
-0.839924844272171,
-1.3444918583209988
],
\"neg_median_absolute_error\": [
-0.5545466636288294,
-0.5638571491740674,
-0.7465466926544182
],
\"r2\": [
0.5214790020510424,
0.6193274643314883,
0.16656784388811308
]
},
\"trial_ran\": true,
\"alg_hash\": \"5457f609662e44f04dcc9423066d2f58\"
}
}", + "{
\"number\": 8,
\"values\": [
-1.1114006274062536,
0.7647766019001522
],
\"params\": {
\"algorithm_name\": \"RandomForestRegressor\",
\"RandomForestRegressor_algorithm_hash\": \"f1ac01e1bba332215ccbd0c29c9ac3c3\",
\"max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3\": 28,
\"n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3\": 8,
\"max_features__f1ac01e1bba332215ccbd0c29c9ac3c3\": \"auto\",
\"descriptor\": \"{\\\"name\\\": \\\"ECFP\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"nBits\\\": 2048, \\\"returnRdkit\\\": false}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.9206324471278879,
0.8559333726266467,
0.961180851069245
],
\"max_error\": [
-1.0117299001249993,
-2.412809048812499,
-0.86672411225
],
\"neg_mean_absolute_error\": [
-0.322024124525,
-0.3365692731249999,
-0.21982626677291656
],
\"neg_mean_squared_error\": [
-0.20283154301796763,
-0.34722452595138703,
-0.10165721518937171
],
\"neg_median_absolute_error\": [
-0.21598249059374997,
-0.13718304440624962,
-0.12420240787499992
],
\"r2\": [
0.9151605401039691,
0.8521468090862285,
0.9594452022362135
]
},
\"test_scores\": {
\"explained_variance\": [
0.9718126094330066,
0.22890773779793905,
0.21343834363644043
],
\"max_error\": [
-0.4565383455625005,
-3.2067649035625,
-3.1492960391250007
],
\"neg_mean_absolute_error\": [
-0.23419817084375005,
-0.8567377529921876,
-0.6864807362656249
],
\"neg_mean_squared_error\": [
-0.07067602728333097,
-1.8867023442811073,
-1.3768235106543227
],
\"neg_median_absolute_error\": [
-0.20883106609375002,
-0.2070725905937496,
-0.41695414499999917
],
\"r2\": [
0.9519133323494763,
0.14490472528934695,
0.14652589380261938
]
},
\"trial_ran\": true,
\"alg_hash\": \"f1ac01e1bba332215ccbd0c29c9ac3c3\"
}
}", + "{
\"number\": 9,
\"values\": [
-0.7801680863916906,
0.2725738454485389
],
\"params\": {
\"algorithm_name\": \"Lasso\",
\"Lasso_algorithm_hash\": \"5457f609662e44f04dcc9423066d2f58\",
\"alpha__5457f609662e44f04dcc9423066d2f58\": 0.2391884918766034,
\"descriptor\": \"{\\\"name\\\": \\\"ECFP_counts\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"useFeatures\\\": true, \\\"nBits\\\": 2048}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.7358450089950654,
0.7138202856549503,
0.7948804037600925
],
\"max_error\": [
-2.0949820822909455,
-2.0906327941397294,
-1.5918151203814253
],
\"neg_mean_absolute_error\": [
-0.6557327871134385,
-0.6571336339465875,
-0.5940423569437646
],
\"neg_mean_squared_error\": [
-0.6315335397831173,
-0.6720762334329013,
-0.5141657235222882
],
\"neg_median_absolute_error\": [
-0.5979734798770764,
-0.6566486168416157,
-0.4804268744621516
],
\"r2\": [
0.7358450089950654,
0.7138202856549503,
0.7948804037600925
]
},
\"test_scores\": {
\"explained_variance\": [
0.6836083752661624,
0.6966716816699435,
0.2867220579848473
],
\"max_error\": [
-1.226090481431986,
-1.494693861320691,
-2.361646437233209
],
\"neg_mean_absolute_error\": [
-0.5558366160388878,
-0.7399472892266868,
-0.8485251777144946
],
\"neg_mean_squared_error\": [
-0.5158538524332629,
-0.6693267648386103,
-1.1553236419031985
],
\"neg_median_absolute_error\": [
-0.39106556854575514,
-0.5988148150541805,
-0.7632441315512826
],
\"r2\": [
0.6490225368956033,
0.6966462910348781,
0.2838306398665221
]
},
\"trial_ran\": true,
\"alg_hash\": \"5457f609662e44f04dcc9423066d2f58\"
}
}", + "{
\"number\": 10,
\"values\": [
-2.785652116470164,
0.21863029955530786
],
\"params\": {
\"algorithm_name\": \"SVR\",
\"SVR_algorithm_hash\": \"ea7ccc7ef4a9329af0d4e39eb6184933\",
\"gamma__ea7ccc7ef4a9329af0d4e39eb6184933\": 0.00044396482429275296,
\"C__ea7ccc7ef4a9329af0d4e39eb6184933\": 2.3831436879125245e-10,
\"descriptor\": \"{\\\"name\\\": \\\"ECFP\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"nBits\\\": 2048, \\\"returnRdkit\\\": false}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
3.51216833394119e-11,
4.623668115044666e-11,
4.652633833757136e-11
],
\"max_error\": [
-2.407698682236905,
-3.7846305169848202,
-3.8912634494534357
],
\"neg_mean_absolute_error\": [
-1.4215400155888405,
-1.387998906730042,
-1.4426317290108275
],
\"neg_mean_squared_error\": [
-2.527574210296399,
-3.0279379800043413,
-3.3797630611425133
],
\"neg_median_absolute_error\": [
-1.73736976072626,
-0.8078731694136589,
-0.7077568176915539
],
\"r2\": [
-0.0572223021036109,
-0.2893394871977455,
-0.3483116488959317
]
},
\"test_scores\": {
\"explained_variance\": [
3.695310724083356e-11,
3.196509723579766e-11,
3.306266371794209e-11
],
\"max_error\": [
-2.1665475246885846,
-3.546069889503773,
-2.917686175962171
],
\"neg_mean_absolute_error\": [
-1.6446862332734806,
-1.3746714857242122,
-1.1807020024072323
],
\"neg_mean_squared_error\": [
-2.9717560320594796,
-2.9064290886106505,
-2.478771228740361
],
\"neg_median_absolute_error\": [
-1.7854730969470158,
-1.2557530712042917,
-0.8571504162245955
],
\"r2\": [
-1.0219280871462755,
-0.3172580123652333,
-0.5365564595214956
]
},
\"trial_ran\": true,
\"alg_hash\": \"ea7ccc7ef4a9329af0d4e39eb6184933\"
}
}", + "{
\"number\": 11,
\"values\": [
-2.785651973436432,
0.21863032832257323
],
\"params\": {
\"algorithm_name\": \"SVR\",
\"SVR_algorithm_hash\": \"ea7ccc7ef4a9329af0d4e39eb6184933\",
\"gamma__ea7ccc7ef4a9329af0d4e39eb6184933\": 0.00028965395242758657,
\"C__ea7ccc7ef4a9329af0d4e39eb6184933\": 2.99928292425642e-07,
\"descriptor\": \"{\\\"name\\\": \\\"ECFP_counts\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"useFeatures\\\": true, \\\"nBits\\\": 2048}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
1.0042828013645533e-07,
1.2201057308924845e-07,
1.1946357847403277e-07
],
\"max_error\": [
-2.407698483275782,
-3.784630419617357,
-3.8912632434668657
],
\"neg_mean_absolute_error\": [
-1.4215399488000382,
-1.3879988187073213,
-1.4426316383422448
],
\"neg_mean_squared_error\": [
-2.5275739932894736,
-3.027937838296765,
-3.3797627274551605
],
\"neg_median_absolute_error\": [
-1.737369648298217,
-0.8078729699590448,
-0.7077567183897604
],
\"r2\": [
-0.057222211334936324,
-0.28933942685662384,
-0.3483115157757879
]
},
\"test_scores\": {
\"explained_variance\": [
8.476231749821039e-08,
8.974784559967475e-08,
9.749491269861466e-08
],
\"max_error\": [
-2.166547505060172,
-3.54606983564109,
-2.9176859960038835
],
\"neg_mean_absolute_error\": [
-1.6446861757757678,
-1.3746714277448118,
-1.180701917475541
],
\"neg_mean_squared_error\": [
-2.9717558552690067,
-2.9064290286099874,
-2.4787710364303033
],
\"neg_median_absolute_error\": [
-1.785473043619366,
-1.2557529712063742,
-0.8571503548245416
],
\"r2\": [
-1.021927966861293,
-0.3172579851716042,
-0.5365563403111151
]
},
\"trial_ran\": true,
\"alg_hash\": \"ea7ccc7ef4a9329af0d4e39eb6184933\"
}
}", + "{
\"number\": 12,
\"values\": [
-0.6101359993004856,
0.3011280543457062
],
\"params\": {
\"algorithm_name\": \"PLSRegression\",
\"PLSRegression_algorithm_hash\": \"9f2f76e479633c0bf18cf2912fed9eda\",
\"n_components__9f2f76e479633c0bf18cf2912fed9eda\": 4,
\"descriptor\": \"{\\\"name\\\": \\\"ECFP_counts\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"useFeatures\\\": true, \\\"nBits\\\": 2048}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.9977100736908402,
0.9975427640313913,
0.9968195578599012
],
\"max_error\": [
-0.2167986167477789,
-0.2277990799414109,
-0.20980298817103638
],
\"neg_mean_absolute_error\": [
-0.055854002084797474,
-0.05645785050051896,
-0.0720026052862923
],
\"neg_mean_squared_error\": [
-0.00547468462497904,
-0.005770674201062414,
-0.007972296962656158
],
\"neg_median_absolute_error\": [
-0.0476138511534252,
-0.04300842479088396,
-0.06452439449761282
],
\"r2\": [
0.9977100736908402,
0.9975427640313913,
0.9968195578599012
]
},
\"test_scores\": {
\"explained_variance\": [
0.8886597774000782,
0.6722676099050113,
0.5125354270829542
],
\"max_error\": [
-0.6878749296055089,
-2.15435471941149,
-2.426892457483266
],
\"neg_mean_absolute_error\": [
-0.3897525804896699,
-0.555957492705223,
-0.5955349500027809
],
\"neg_mean_squared_error\": [
-0.19483222604638134,
-0.7361837217462683,
-0.8993920501088071
],
\"neg_median_absolute_error\": [
-0.3760392229997924,
-0.2733259086216404,
-0.36468915433905247
],
\"r2\": [
0.8674397407207732,
0.6663452379267604,
0.44247914119156395
]
},
\"trial_ran\": true,
\"alg_hash\": \"9f2f76e479633c0bf18cf2912fed9eda\"
}
}", + "{
\"number\": 14,
\"values\": [
-0.5356113574175657,
0.5769721187181905
],
\"params\": {
\"algorithm_name\": \"Ridge\",
\"Ridge_algorithm_hash\": \"cfa1990d5153c8812982f034d788d7ee\",
\"alpha__cfa1990d5153c8812982f034d788d7ee\": 1.4060379177903557,
\"descriptor\": \"{\\\"name\\\": \\\"ECFP_counts\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"useFeatures\\\": true, \\\"nBits\\\": 2048}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.999517094017306,
0.9994020294133303,
0.9996516810167624
],
\"max_error\": [
-0.08555683896758026,
-0.08555379923076956,
-0.06975837183481826
],
\"neg_mean_absolute_error\": [
-0.02628178610683247,
-0.029832661411189114,
-0.023240052093104483
],
\"neg_mean_squared_error\": [
-0.0011545166096350072,
-0.0014042987655932706,
-0.0008731183432294868
],
\"neg_median_absolute_error\": [
-0.020374460673226125,
-0.022559597416789146,
-0.01924940342623671
],
\"r2\": [
0.999517094017306,
0.9994020294133303,
0.9996516810167624
]
},
\"test_scores\": {
\"explained_variance\": [
0.8935387516249337,
0.9646018619542002,
0.19469890517579236
],
\"max_error\": [
-0.7768335199576253,
-0.7364046007808875,
-3.081177105864297
],
\"neg_mean_absolute_error\": [
-0.3373361317047229,
-0.20254120602107317,
-0.6870056344543213
],
\"neg_mean_squared_error\": [
-0.16873349121216252,
-0.0878643336631266,
-1.350236247377408
],
\"neg_median_absolute_error\": [
-0.265542253953738,
-0.144226455352797,
-0.35230079981759177
],
\"r2\": [
0.8851968393624541,
0.9601779386352713,
0.16300697549965926
]
},
\"trial_ran\": true,
\"alg_hash\": \"cfa1990d5153c8812982f034d788d7ee\"
}
}", + "{
\"number\": 15,
\"values\": [
-0.543430366921729,
0.514747412346662
],
\"params\": {
\"algorithm_name\": \"RandomForestRegressor\",
\"RandomForestRegressor_algorithm_hash\": \"f1ac01e1bba332215ccbd0c29c9ac3c3\",
\"max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3\": 20,
\"n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3\": 8,
\"max_features__f1ac01e1bba332215ccbd0c29c9ac3c3\": \"auto\",
\"descriptor\": \"{\\\"name\\\": \\\"MACCS_keys\\\", \\\"parameters\\\": {}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.8969240974700634,
0.8006428572427549,
0.9354463388985946
],
\"max_error\": [
-1.1089528326874998,
-2.5305006679999993,
-1.2147494705000002
],
\"neg_mean_absolute_error\": [
-0.3882621802999999,
-0.3752013252250003,
-0.2797012135854168
],
\"neg_mean_squared_error\": [
-0.24821934610451973,
-0.46835869923315343,
-0.16568085389341727
],
\"neg_median_absolute_error\": [
-0.38021252496875046,
-0.13201332959375023,
-0.1504819008437499
],
\"r2\": [
0.8961759352321845,
0.8005661380511462,
0.9339038206932756
]
},
\"test_scores\": {
\"explained_variance\": [
0.9445386713483314,
0.8716143998386577,
0.21756160180475825
],
\"max_error\": [
-0.5202287482500001,
-1.0801857144374996,
-2.5865519054374992
],
\"neg_mean_absolute_error\": [
-0.24804613069531256,
-0.4189198933749999,
-0.8129752672265629
],
\"neg_mean_squared_error\": [
-0.08466611775687974,
-0.2833241151942194,
-1.262300867814088
],
\"neg_median_absolute_error\": [
-0.2058811024687497,
-0.37633277553125044,
-0.7446450660000004
],
\"r2\": [
0.942394732381974,
0.8715912380940694,
0.21751691732965106
]
},
\"trial_ran\": true,
\"alg_hash\": \"f1ac01e1bba332215ccbd0c29c9ac3c3\"
}
}", + "{
\"number\": 16,
\"values\": [
-2.0072511048320134,
0.2786318125997387
],
\"params\": {
\"algorithm_name\": \"Lasso\",
\"Lasso_algorithm_hash\": \"5457f609662e44f04dcc9423066d2f58\",
\"alpha__5457f609662e44f04dcc9423066d2f58\": 1.344271094811757,
\"descriptor\": \"{\\\"name\\\": \\\"ECFP\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"nBits\\\": 2048, \\\"returnRdkit\\\": false}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
-2.220446049250313e-16,
2.220446049250313e-16,
2.220446049250313e-16
],
\"max_error\": [
-2.777570688350001,
-2.9603145737666665,
-2.956865269583333
],
\"neg_mean_absolute_error\": [
-1.4282473280400005,
-1.4387520675488887,
-1.493614843444444
],
\"neg_mean_squared_error\": [
-2.3907689094972278,
-2.348441205803191,
-2.5066631026365753
],
\"neg_median_absolute_error\": [
-1.4789578996499992,
-1.4262595417333341,
-1.4297088459166676
],
\"r2\": [
0.0,
0.0,
0.0
]
},
\"test_scores\": {
\"explained_variance\": [
-2.220446049250313e-16,
0.0,
-2.220446049250313e-16
],
\"max_error\": [
-1.7966755186499999,
-2.7217539462666664,
-1.983287996083332
],
\"neg_mean_absolute_error\": [
-1.4597502302624998,
-1.3746714857500002,
-1.1697984021874999
],
\"neg_mean_squared_error\": [
-2.201962094075194,
-2.2065759996699903,
-1.613215220750856
],
\"neg_median_absolute_error\": [
-1.4749943137500003,
-1.1956921469833337,
-1.1850802254166668
],
\"r2\": [
-0.49817446547138133,
-6.909745304128911e-05,
-1.0101498061798608e-05
]
},
\"trial_ran\": true,
\"alg_hash\": \"5457f609662e44f04dcc9423066d2f58\"
}
}", + "{
\"number\": 18,
\"values\": [
-0.659749443628722,
0.6659085938841998
],
\"params\": {
\"algorithm_name\": \"RandomForestRegressor\",
\"RandomForestRegressor_algorithm_hash\": \"f1ac01e1bba332215ccbd0c29c9ac3c3\",
\"max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3\": 22,
\"n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3\": 6,
\"max_features__f1ac01e1bba332215ccbd0c29c9ac3c3\": \"auto\",
\"descriptor\": \"{\\\"name\\\": \\\"MACCS_keys\\\", \\\"parameters\\\": {}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.8771866885159154,
0.7605767073884709,
0.9165904980399754
],
\"max_error\": [
-1.3841658855833332,
-2.73272604925,
-1.399110700083333
],
\"neg_mean_absolute_error\": [
-0.38187799954999974,
-0.3971412162944445,
-0.30359108279166686
],
\"neg_mean_squared_error\": [
-0.29654130407841844,
-0.5648996958669534,
-0.20992361132685675
],
\"neg_median_absolute_error\": [
-0.2927916572083329,
-0.11474098924999998,
-0.1609640780416668
],
\"r2\": [
0.8759640453326875,
0.7594575949054889,
0.9162537593879075
]
},
\"test_scores\": {
\"explained_variance\": [
0.9574469041668595,
0.8643988273760793,
0.022603500535386067
],
\"max_error\": [
-0.6513723423333344,
-0.9304495610000005,
-2.5495979194166667
],
\"neg_mean_absolute_error\": [
-0.20517391297916698,
-0.45735540975,
-0.99868224721875
],
\"neg_mean_squared_error\": [
-0.07485325580276948,
-0.31299982148410066,
-1.591395253599296
],
\"neg_median_absolute_error\": [
-0.13923866258333328,
-0.48702888212499973,
-0.9981938518333333
],
\"r2\": [
0.9490712229775209,
0.8581415509724649,
0.013515798385129396
]
},
\"trial_ran\": true,
\"alg_hash\": \"f1ac01e1bba332215ccbd0c29c9ac3c3\"
}
}", + "{
\"number\": 19,
\"values\": [
-1.1068495306229729,
0.24457822094737378
],
\"params\": {
\"algorithm_name\": \"Lasso\",
\"Lasso_algorithm_hash\": \"5457f609662e44f04dcc9423066d2f58\",
\"alpha__5457f609662e44f04dcc9423066d2f58\": 0.5158832554303112,
\"descriptor\": \"{\\\"name\\\": \\\"ECFP_counts\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"useFeatures\\\": true, \\\"nBits\\\": 2048}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.5531147799581386,
0.6007290929175806,
0.6710850220156985
],
\"max_error\": [
-2.298478369320666,
-2.2303250317057834,
-1.838346644735612
],
\"neg_mean_absolute_error\": [
-0.884276617370655,
-0.8095758892055421,
-0.7542552062854129
],
\"neg_mean_squared_error\": [
-1.06839929018991,
-0.9376642504707711,
-0.8244790392177702
],
\"neg_median_absolute_error\": [
-0.7997337260383168,
-0.8255203756112515,
-0.6010572531757918
],
\"r2\": [
0.5531147799581384,
0.6007290929175806,
0.6710850220156985
]
},
\"test_scores\": {
\"explained_variance\": [
0.4824659350945505,
0.572247959208814,
0.10292090653410846
],
\"max_error\": [
-1.5646578805177356,
-1.923118160270179,
-2.52285079257018
],
\"neg_mean_absolute_error\": [
-0.8351781840542875,
-0.841734435970192,
-0.9158632951536998
],
\"neg_mean_squared_error\": [
-0.9236950134129185,
-0.9443233135299345,
-1.4525302649260659
],
\"neg_median_absolute_error\": [
-0.7060567633436148,
-0.6052039733202146,
-0.7720757854308395
],
\"r2\": [
0.37153492028676904,
0.5720117666434388,
0.09959630991973412
]
},
\"trial_ran\": true,
\"alg_hash\": \"5457f609662e44f04dcc9423066d2f58\"
}
}", + "{
\"number\": 20,
\"values\": [
-0.8604898820838102,
0.7086875504668667
],
\"params\": {
\"algorithm_name\": \"PLSRegression\",
\"PLSRegression_algorithm_hash\": \"9f2f76e479633c0bf18cf2912fed9eda\",
\"n_components__9f2f76e479633c0bf18cf2912fed9eda\": 4,
\"descriptor\": \"{\\\"name\\\": \\\"MACCS_keys\\\", \\\"parameters\\\": {}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.9729380997999632,
0.9634263118198302,
0.9656682096655208
],
\"max_error\": [
-0.5571499986930784,
-0.7228948575726584,
-0.854153006336114
],
\"neg_mean_absolute_error\": [
-0.1973467502577083,
-0.21308212257099363,
-0.22458331075011997
],
\"neg_mean_squared_error\": [
-0.06469874963016482,
-0.08589115637050779,
-0.08605823207889407
],
\"neg_median_absolute_error\": [
-0.15758839122192647,
-0.14998189671792428,
-0.14162605994908728
],
\"r2\": [
0.9729380997999632,
0.9634263118198303,
0.9656682096655208
]
},
\"test_scores\": {
\"explained_variance\": [
0.7436061621507988,
0.8489165070699433,
-0.13142410450882003
],
\"max_error\": [
-1.2208220182819107,
-1.354109371017831,
-3.527307328138104
],
\"neg_mean_absolute_error\": [
-0.43256728384960125,
-0.3953856066990531,
-0.8748509822063061
],
\"neg_mean_squared_error\": [
-0.380290900048737,
-0.3387404579485774,
-1.8624382882541157
],
\"neg_median_absolute_error\": [
-0.26145563417924356,
-0.17011265265548037,
-0.45298046781931856
],
\"r2\": [
0.7412570736630077,
0.8464753246196242,
-0.15450008015918115
]
},
\"trial_ran\": true,
\"alg_hash\": \"9f2f76e479633c0bf18cf2912fed9eda\"
}
}", + "{
\"number\": 21,
\"values\": [
-0.5919869916997383,
0.2367498627927979
],
\"params\": {
\"algorithm_name\": \"SVR\",
\"SVR_algorithm_hash\": \"ea7ccc7ef4a9329af0d4e39eb6184933\",
\"gamma__ea7ccc7ef4a9329af0d4e39eb6184933\": 0.0009327650919528738,
\"C__ea7ccc7ef4a9329af0d4e39eb6184933\": 6.062479210472502,
\"descriptor\": \"{\\\"name\\\": \\\"ECFP\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"nBits\\\": 2048, \\\"returnRdkit\\\": false}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.7842374546454186,
0.8207501586619763,
0.8388394351354407
],
\"max_error\": [
-1.8596758351735492,
-1.6805747542871234,
-1.823107918819086
],
\"neg_mean_absolute_error\": [
-0.4788321327361404,
-0.42546001672177636,
-0.38994606620179817
],
\"neg_mean_squared_error\": [
-0.5464285493502418,
-0.42829517458369093,
-0.43385236090831564
],
\"neg_median_absolute_error\": [
-0.13655484937476903,
-0.10024968956182745,
-0.1000060432903016
],
\"r2\": [
0.7714423392492777,
0.8176257623459602,
0.8269203546132793
]
},
\"test_scores\": {
\"explained_variance\": [
0.8254406163671446,
0.6552430770093162,
0.5453694933369965
],
\"max_error\": [
-1.0076626563695585,
-2.0858951505234673,
-1.7743667608724891
],
\"neg_mean_absolute_error\": [
-0.43420821310566426,
-0.5926528159732936,
-0.6682670932269509
],
\"neg_mean_squared_error\": [
-0.2578508097186242,
-0.7775078661368594,
-0.7406022992437313
],
\"neg_median_absolute_error\": [
-0.3759822002948092,
-0.2889592578375364,
-0.4746849364956609
],
\"r2\": [
0.8245630567115607,
0.6476162207572188,
0.5409107409166944
]
},
\"trial_ran\": true,
\"alg_hash\": \"ea7ccc7ef4a9329af0d4e39eb6184933\"
}
}", + "{
\"number\": 24,
\"values\": [
-1.1114006274062536,
0.7647766019001522
],
\"params\": {
\"algorithm_name\": \"RandomForestRegressor\",
\"RandomForestRegressor_algorithm_hash\": \"f1ac01e1bba332215ccbd0c29c9ac3c3\",
\"max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3\": 26,
\"n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3\": 8,
\"max_features__f1ac01e1bba332215ccbd0c29c9ac3c3\": \"auto\",
\"descriptor\": \"{\\\"name\\\": \\\"ECFP\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"nBits\\\": 2048, \\\"returnRdkit\\\": false}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.9206324471278879,
0.8559333726266467,
0.961180851069245
],
\"max_error\": [
-1.0117299001249993,
-2.412809048812499,
-0.86672411225
],
\"neg_mean_absolute_error\": [
-0.322024124525,
-0.3365692731250001,
-0.21982626677291656
],
\"neg_mean_squared_error\": [
-0.20283154301796763,
-0.34722452595138714,
-0.10165721518937171
],
\"neg_median_absolute_error\": [
-0.21598249059374997,
-0.13718304440625007,
-0.12420240787499992
],
\"r2\": [
0.9151605401039691,
0.8521468090862284,
0.9594452022362135
]
},
\"test_scores\": {
\"explained_variance\": [
0.9718126094330066,
0.22890773779793905,
0.21343834363644043
],
\"max_error\": [
-0.4565383455625005,
-3.2067649035625,
-3.1492960391250007
],
\"neg_mean_absolute_error\": [
-0.23419817084375005,
-0.8567377529921874,
-0.6864807362656249
],
\"neg_mean_squared_error\": [
-0.07067602728333097,
-1.8867023442811073,
-1.3768235106543227
],
\"neg_median_absolute_error\": [
-0.20883106609375002,
-0.20707259059374916,
-0.41695414499999917
],
\"r2\": [
0.9519133323494763,
0.14490472528934695,
0.14652589380261938
]
},
\"trial_ran\": true,
\"alg_hash\": \"f1ac01e1bba332215ccbd0c29c9ac3c3\"
}
}", + "{
\"number\": 25,
\"values\": [
-2.0621601907738047,
0.2749020946925899
],
\"params\": {
\"algorithm_name\": \"ChemPropRegressor\",
\"ChemPropRegressor_algorithm_hash\": \"668a7428ff5cdb271b01c0925e8fea45\",
\"activation__668a7428ff5cdb271b01c0925e8fea45\": \"ReLU\",
\"aggregation__668a7428ff5cdb271b01c0925e8fea45\": \"mean\",
\"aggregation_norm__668a7428ff5cdb271b01c0925e8fea45\": 100.0,
\"batch_size__668a7428ff5cdb271b01c0925e8fea45\": 50.0,
\"depth__668a7428ff5cdb271b01c0925e8fea45\": 3.0,
\"dropout__668a7428ff5cdb271b01c0925e8fea45\": 0.0,
\"ensemble_size__668a7428ff5cdb271b01c0925e8fea45\": 1,
\"epochs__668a7428ff5cdb271b01c0925e8fea45\": 5,
\"features_generator__668a7428ff5cdb271b01c0925e8fea45\": \"none\",
\"ffn_hidden_size__668a7428ff5cdb271b01c0925e8fea45\": 300.0,
\"ffn_num_layers__668a7428ff5cdb271b01c0925e8fea45\": 2.0,
\"final_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45\": -4,
\"hidden_size__668a7428ff5cdb271b01c0925e8fea45\": 300.0,
\"init_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45\": -4,
\"max_lr_exp__668a7428ff5cdb271b01c0925e8fea45\": -3,
\"warmup_epochs_ratio__668a7428ff5cdb271b01c0925e8fea45\": 0.1,
\"descriptor\": \"{\\\"name\\\": \\\"SmilesFromFile\\\", \\\"parameters\\\": {}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
-0.0030084298221983996,
-0.012161707538054234,
0.0017298120410219608
],
\"max_error\": [
-2.7373101426203617,
-3.0090429497797855,
-3.187341899975098
],
\"neg_mean_absolute_error\": [
-1.4223092754175861,
-1.444010474785221,
-1.4795137982689128
],
\"neg_mean_squared_error\": [
-2.4052068497915142,
-2.377039226122409,
-2.5361493212169677
],
\"neg_median_absolute_error\": [
-1.5480976110290525,
-1.4536747993286134,
-1.3328660971721193
],
\"r2\": [
-0.006039036327154967,
-0.012177447852877998,
-0.011763135839585992
]
},
\"test_scores\": {
\"explained_variance\": [
-0.0046704465761211456,
0.033093602781103404,
-0.030933512786851303
],
\"max_error\": [
-1.8682299333981938,
-2.637490054671143,
-2.252076107950927
],
\"neg_mean_absolute_error\": [
-1.5042553457710572,
-1.3561058906000063,
-1.1782307308885192
],
\"neg_mean_squared_error\": [
-2.3574440069269023,
-2.13352134988607,
-1.6955152155084416
],
\"neg_median_absolute_error\": [
-1.5117784228234865,
-1.2060612994278563,
-1.0778323309564208
],
\"r2\": [
-0.6039614961854176,
0.033040887285793796,
-0.05102674518712691
]
},
\"trial_ran\": true,
\"alg_hash\": \"668a7428ff5cdb271b01c0925e8fea45\"
}
}", + "{
\"number\": 26,
\"values\": [
-2.0621601907738047,
0.2749020946925899
],
\"params\": {
\"algorithm_name\": \"ChemPropRegressor\",
\"ChemPropRegressor_algorithm_hash\": \"668a7428ff5cdb271b01c0925e8fea45\",
\"activation__668a7428ff5cdb271b01c0925e8fea45\": \"ReLU\",
\"aggregation__668a7428ff5cdb271b01c0925e8fea45\": \"mean\",
\"aggregation_norm__668a7428ff5cdb271b01c0925e8fea45\": 100.0,
\"batch_size__668a7428ff5cdb271b01c0925e8fea45\": 45.0,
\"depth__668a7428ff5cdb271b01c0925e8fea45\": 3.0,
\"dropout__668a7428ff5cdb271b01c0925e8fea45\": 0.0,
\"ensemble_size__668a7428ff5cdb271b01c0925e8fea45\": 1,
\"epochs__668a7428ff5cdb271b01c0925e8fea45\": 5,
\"features_generator__668a7428ff5cdb271b01c0925e8fea45\": \"none\",
\"ffn_hidden_size__668a7428ff5cdb271b01c0925e8fea45\": 300.0,
\"ffn_num_layers__668a7428ff5cdb271b01c0925e8fea45\": 2.0,
\"final_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45\": -4,
\"hidden_size__668a7428ff5cdb271b01c0925e8fea45\": 300.0,
\"init_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45\": -4,
\"max_lr_exp__668a7428ff5cdb271b01c0925e8fea45\": -3,
\"warmup_epochs_ratio__668a7428ff5cdb271b01c0925e8fea45\": 0.1,
\"descriptor\": \"{\\\"name\\\": \\\"SmilesFromFile\\\", \\\"parameters\\\": {}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
-0.0030084298221983996,
-0.012161707538054234,
0.0017298120410219608
],
\"max_error\": [
-2.7373101426203617,
-3.0090429497797855,
-3.187341899975098
],
\"neg_mean_absolute_error\": [
-1.4223092754175861,
-1.444010474785221,
-1.4795137982689128
],
\"neg_mean_squared_error\": [
-2.4052068497915142,
-2.377039226122409,
-2.5361493212169677
],
\"neg_median_absolute_error\": [
-1.5480976110290525,
-1.4536747993286134,
-1.3328660971721193
],
\"r2\": [
-0.006039036327154967,
-0.012177447852877998,
-0.011763135839585992
]
},
\"test_scores\": {
\"explained_variance\": [
-0.0046704465761211456,
0.033093602781103404,
-0.030933512786851303
],
\"max_error\": [
-1.8682299333981938,
-2.637490054671143,
-2.252076107950927
],
\"neg_mean_absolute_error\": [
-1.5042553457710572,
-1.3561058906000063,
-1.1782307308885192
],
\"neg_mean_squared_error\": [
-2.3574440069269023,
-2.13352134988607,
-1.6955152155084416
],
\"neg_median_absolute_error\": [
-1.5117784228234865,
-1.2060612994278563,
-1.0778323309564208
],
\"r2\": [
-0.6039614961854176,
0.033040887285793796,
-0.05102674518712691
]
},
\"trial_ran\": true,
\"alg_hash\": \"668a7428ff5cdb271b01c0925e8fea45\"
}
}" + ], + "type": "scatter", + "x": [ + -1.4008740644240856, + -1.3561484909673425, + -2.7856521165563053, + -0.9125905675311808, + -0.5348363849100434, + -2.0072511048320134, + -0.9625764609276656, + -1.1114006274062536, + -0.7801680863916906, + -2.785652116470164, + -2.785651973436432, + -0.6101359993004856, + -0.5356113574175657, + -0.543430366921729, + -2.0072511048320134, + -0.659749443628722, + -1.1068495306229729, + -0.8604898820838102, + -0.5919869916997383, + -1.1114006274062536, + -2.0621601907738047, + -2.0621601907738047 + ], + "y": [ + 0.9876203329634794, + 0.9875061220991906, + 0.21863029956806662, + 0.7861693342190089, + 0.5741725628917808, + 0.2786318125997387, + 0.27575381401822424, + 0.7647766019001522, + 0.2725738454485389, + 0.21863029955530786, + 0.21863032832257323, + 0.3011280543457062, + 0.5769721187181905, + 0.514747412346662, + 0.2786318125997387, + 0.6659085938841998, + 0.24457822094737378, + 0.7086875504668667, + 0.2367498627927979, + 0.7647766019001522, + 0.2749020946925899, + 0.2749020946925899 + ] + }, + { + "hovertemplate": "%{text}Best Trial", + "marker": { + "color": [ + 4, + 13, + 17, + 23 + ], + "colorbar": { + "title": { + "text": "Best Trial" + }, + "x": 1.1, + "xpad": 40 + }, + "colorscale": [ + [ + 0, + "rgb(255,245,240)" + ], + [ + 0.125, + "rgb(254,224,210)" + ], + [ + 0.25, + "rgb(252,187,161)" + ], + [ + 0.375, + "rgb(252,146,114)" + ], + [ + 0.5, + "rgb(251,106,74)" + ], + [ + 0.625, + "rgb(239,59,44)" + ], + [ + 0.75, + "rgb(203,24,29)" + ], + [ + 0.875, + "rgb(165,15,21)" + ], + [ + 1, + "rgb(103,0,13)" + ] + ], + "line": { + "color": "Grey", + "width": 0.5 + } + }, + "mode": "markers", + "showlegend": false, + "text": [ + "{
\"number\": 4,
\"values\": [
-0.5238765412750027,
0.2789424384877304
],
\"params\": {
\"algorithm_name\": \"PLSRegression\",
\"PLSRegression_algorithm_hash\": \"9f2f76e479633c0bf18cf2912fed9eda\",
\"n_components__9f2f76e479633c0bf18cf2912fed9eda\": 3,
\"descriptor\": \"{\\\"name\\\": \\\"ECFP\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"nBits\\\": 2048, \\\"returnRdkit\\\": false}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.9922459602118696,
0.9948903572938944,
0.9908331622153396
],
\"max_error\": [
-0.48320288560891855,
-0.3497078087721057,
-0.5450843085444221
],
\"neg_mean_absolute_error\": [
-0.09635794782292499,
-0.079523297746737,
-0.10440879548883496
],
\"neg_mean_squared_error\": [
-0.01853811724846648,
-0.011999695477949981,
-0.022978174042663006
],
\"neg_median_absolute_error\": [
-0.07185892843485808,
-0.05230960537443474,
-0.06809658540781305
],
\"r2\": [
0.9922459602118696,
0.9948903572938944,
0.9908331622153396
]
},
\"test_scores\": {
\"explained_variance\": [
0.9222201387814873,
0.6681556476215966,
0.6039775004142351
],
\"max_error\": [
-0.6124704321639136,
-2.08836938740287,
-2.1141890356053565
],
\"neg_mean_absolute_error\": [
-0.33136953691333026,
-0.5400418425280401,
-0.5590265707291404
],
\"neg_mean_squared_error\": [
-0.13043450193697356,
-0.7454138307155094,
-0.6957812911725253
],
\"neg_median_absolute_error\": [
-0.3174537679481202,
-0.27641233643037877,
-0.40048721917971086
],
\"r2\": [
0.9112547664901909,
0.6621619481838996,
0.5686946721950459
]
},
\"trial_ran\": true,
\"alg_hash\": \"9f2f76e479633c0bf18cf2912fed9eda\"
}
}", + "{
\"number\": 13,
\"values\": [
-0.5361950698070447,
0.23560786523195643
],
\"params\": {
\"algorithm_name\": \"PLSRegression\",
\"PLSRegression_algorithm_hash\": \"9f2f76e479633c0bf18cf2912fed9eda\",
\"n_components__9f2f76e479633c0bf18cf2912fed9eda\": 2,
\"descriptor\": \"{\\\"name\\\": \\\"ECFP\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"nBits\\\": 2048, \\\"returnRdkit\\\": false}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.969087200519737,
0.9773711155694136,
0.9634238244756783
],
\"max_error\": [
-0.8234729672493231,
-0.6504845617093373,
-0.9927621260087944
],
\"neg_mean_absolute_error\": [
-0.21796730972158723,
-0.1791131163270336,
-0.23469322888109886
],
\"neg_mean_squared_error\": [
-0.073905359902935,
-0.05314260463814745,
-0.09168414962237614
],
\"neg_median_absolute_error\": [
-0.16601233852332387,
-0.13362521709731867,
-0.2035233156958971
],
\"r2\": [
0.969087200519737,
0.9773711155694136,
0.9634238244756783
]
},
\"test_scores\": {
\"explained_variance\": [
0.8783843090874257,
0.6785299073459821,
0.6058243711409221
],
\"max_error\": [
-0.6534154925349531,
-2.104949941574862,
-1.9858065636263413
],
\"neg_mean_absolute_error\": [
-0.4125432362254887,
-0.5577045030922277,
-0.5938653126077945
],
\"neg_mean_squared_error\": [
-0.20433488253395957,
-0.7279749471185875,
-0.6762753797685868
],
\"neg_median_absolute_error\": [
-0.4193825877264077,
-0.35228446136692293,
-0.4442159437032447
],
\"r2\": [
0.8609743082130376,
0.6700656363333088,
0.5807861204977627
]
},
\"trial_ran\": true,
\"alg_hash\": \"9f2f76e479633c0bf18cf2912fed9eda\"
}
}", + "{
\"number\": 17,
\"values\": [
-0.5194661889628072,
0.40146744515282495
],
\"params\": {
\"algorithm_name\": \"Ridge\",
\"Ridge_algorithm_hash\": \"cfa1990d5153c8812982f034d788d7ee\",
\"alpha__cfa1990d5153c8812982f034d788d7ee\": 1.670604991178476,
\"descriptor\": \"{\\\"name\\\": \\\"ECFP\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"nBits\\\": 2048, \\\"returnRdkit\\\": false}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.9991674813978301,
0.9993001311528956,
0.9993814596790304
],
\"max_error\": [
-0.11044190626170458,
-0.10105963906130011,
-0.10858642397661944
],
\"neg_mean_absolute_error\": [
-0.03227726729519196,
-0.02998499983167487,
-0.02875269745141784
],
\"neg_mean_squared_error\": [
-0.001990359590645845,
-0.0016436008391980173,
-0.0015504722000676788
],
\"neg_median_absolute_error\": [
-0.017986631052985036,
-0.022831765939194337,
-0.01587690802440811
],
\"r2\": [
0.9991674813978301,
0.9993001311528956,
0.9993814596790304
]
},
\"test_scores\": {
\"explained_variance\": [
0.9478331336825377,
0.8226268061556614,
0.4534912361723785
],
\"max_error\": [
-0.583111805231578,
-1.613625027558136,
-2.7751131872118266
],
\"neg_mean_absolute_error\": [
-0.26113818851689663,
-0.3815732915771106,
-0.5746999515213917
],
\"neg_mean_squared_error\": [
-0.09812196210856176,
-0.4005689680325697,
-1.05970763674729
],
\"neg_median_absolute_error\": [
-0.23978989929238637,
-0.1230775690306789,
-0.26197335243555786
],
\"r2\": [
0.9332396236390536,
0.81845327494365,
0.34310169669196866
]
},
\"trial_ran\": true,
\"alg_hash\": \"cfa1990d5153c8812982f034d788d7ee\"
}
}", + "{
\"number\": 23,
\"values\": [
-1.2497762395862362,
0.10124660026536195
],
\"params\": {
\"algorithm_name\": \"Lasso\",
\"Lasso_algorithm_hash\": \"5457f609662e44f04dcc9423066d2f58\",
\"alpha__5457f609662e44f04dcc9423066d2f58\": 1.1366172066709432,
\"descriptor\": \"{\\\"name\\\": \\\"ECFP_counts\\\", \\\"parameters\\\": {\\\"radius\\\": 3, \\\"useFeatures\\\": true, \\\"nBits\\\": 2048}}\"
},
\"user_attrs\": {
\"train_scores\": {
\"explained_variance\": [
0.49919889928208194,
0.485769413846579,
0.5965792144210624
],
\"max_error\": [
-2.078675180044696,
-2.00754418204583,
-1.8737360817191515
],
\"neg_mean_absolute_error\": [
-0.9947443028365607,
-0.9736841508601317,
-0.8892226290652387
],
\"neg_mean_squared_error\": [
-1.1972997014383888,
-1.207640297807022,
-1.0112399980473843
],
\"neg_median_absolute_error\": [
-1.001240218403781,
-0.9026347572075428,
-0.7422932303483329
],
\"r2\": [
0.4991988992820817,
0.4857694138465789,
0.5965792144210624
]
},
\"test_scores\": {
\"explained_variance\": [
0.4065556114872124,
0.4412005601775002,
0.1443276921579536
],
\"max_error\": [
-1.6559985383136917,
-1.9949646681399216,
-2.240398709741827
],
\"neg_mean_absolute_error\": [
-0.9805882414834669,
-0.9869012222585254,
-0.8993014056306694
],
\"neg_mean_squared_error\": [
-1.1348798102701512,
-1.233228380052796,
-1.3812205284357613
],
\"neg_median_absolute_error\": [
-0.8142109872843268,
-0.9370077589180785,
-0.812315131026196
],
\"r2\": [
0.22784867291739785,
0.4410735940310567,
0.14380024248136636
]
},
\"trial_ran\": true,
\"alg_hash\": \"5457f609662e44f04dcc9423066d2f58\"
}
}" + ], + "type": "scatter", + "x": [ + -0.5238765412750027, + -0.5361950698070447, + -0.5194661889628072, + -1.2497762395862362 + ], + "y": [ + 0.2789424384877304, + 0.23560786523195643, + 0.40146744515282495, + 0.10124660026536195 + ] + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Pareto-front Plot" + }, + "xaxis": { + "title": { + "text": "neg_mean_squared_error" + } + }, + "yaxis": { + "title": { + "text": "Standard deviation" + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from optuna.visualization import plot_pareto_front\n", + "\n", + "plot_pareto_front(study)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Further visualization of QSARtuna runs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is possible to evaluate the parameter importances on regression metric performance across descriptor vs. algorithm choice, based on the completed trials in our study:" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "cliponaxis": false, + "hovertemplate": [ + "descriptor (CategoricalDistribution): 0.1796787107197743", + "algorithm_name (CategoricalDistribution): 0.8203212892802257" + ], + "name": "Objective Value", + "orientation": "h", + "text": [ + "0.18", + "0.82" + ], + "textposition": "outside", + "type": "bar", + "x": [ + 0.1796787107197743, + 0.8203212892802257 + ], + "y": [ + "descriptor", + "algorithm_name" + ] + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Hyperparameter Importances" + }, + "xaxis": { + "title": { + "text": "Hyperparameter Importance" + } + }, + "yaxis": { + "title": { + "text": "Hyperparameter" + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from optuna.visualization import plot_param_importances\n", + "\n", + "plot_param_importances(study, target=lambda t: t.values[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Parameter importances are represented by non-negative floating point numbers, where higher values mean that the parameters are more important. The returned dictionary is of type collections.OrderedDict and is ordered by its values in a descending order (the sum of the importance values are normalized to 1.0). Hence we can conclude that choice of algortihm is more important than choice of descriptor for our current study.\n", + "\n", + "It is also possible to analyse the importance of these hyperparameter choices on the impact on trial duration:" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "cliponaxis": false, + "hovertemplate": [ + "descriptor (CategoricalDistribution): 0.3823267632304901", + "algorithm_name (CategoricalDistribution): 0.6176732367695099" + ], + "name": "duration", + "orientation": "h", + "text": [ + "0.38", + "0.62" + ], + "textposition": "outside", + "type": "bar", + "x": [ + 0.3823267632304901, + 0.6176732367695099 + ], + "y": [ + "descriptor", + "algorithm_name" + ] + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Hyperparameter Importances" + }, + "xaxis": { + "title": { + "text": "Hyperparameter Importance" + } + }, + "yaxis": { + "title": { + "text": "Hyperparameter" + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_param_importances(\n", + " study, target=lambda t: t.duration.total_seconds(), target_name=\"duration\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Optuna also allows us to plot the parameter relationships for our study, like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "dimensions": [ + { + "label": "Objective Value", + "range": [ + -2.7856521165563053, + -0.5194661889628072 + ], + "values": [ + -1.4008740644240856, + -1.3561484909673425, + -2.7856521165563053, + -0.9125905675311808, + -0.5238765412750027, + -0.5348363849100434, + -2.0072511048320134, + -0.9625764609276656, + -1.1114006274062536, + -0.7801680863916906, + -2.785652116470164, + -2.785651973436432, + -0.6101359993004856, + -0.5361950698070447, + -0.5356113574175657, + -0.543430366921729, + -2.0072511048320134, + -0.5194661889628072, + -0.659749443628722, + -1.1068495306229729, + -0.8604898820838102, + -0.5919869916997383, + -1.2497762395862362, + -1.1114006274062536, + -2.0621601907738047, + -2.0621601907738047 + ] + }, + { + "label": "algorithm_name", + "range": [ + 0, + 5 + ], + "ticktext": [ + "RandomForestRegressor", + "SVR", + "PLSRegression", + "Ridge", + "Lasso", + "ChemPropRegressor" + ], + "tickvals": [ + 0, + 1, + 2, + 3, + 4, + 5 + ], + "values": [ + 0, + 0, + 1, + 2, + 2, + 3, + 4, + 4, + 0, + 4, + 1, + 1, + 2, + 2, + 3, + 0, + 4, + 3, + 0, + 4, + 2, + 1, + 4, + 0, + 5, + 5 + ] + }, + { + "label": "descriptor", + "range": [ + 0, + 3 + ], + "ticktext": [ + "{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}", + "{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}", + "{\"name\": \"MACCS_keys\", \"parameters\": {}}", + "{\"name\": \"SmilesFromFile\", \"parameters\": {}}" + ], + "tickvals": [ + 0, + 1, + 2, + 3 + ], + "values": [ + 0, + 0, + 1, + 2, + 0, + 1, + 2, + 1, + 0, + 1, + 0, + 1, + 1, + 0, + 1, + 2, + 0, + 0, + 2, + 1, + 2, + 0, + 1, + 0, + 3, + 3 + ] + } + ], + "labelangle": 30, + "labelside": "bottom", + "line": { + "color": [ + -1.4008740644240856, + -1.3561484909673425, + -2.7856521165563053, + -0.9125905675311808, + -0.5238765412750027, + -0.5348363849100434, + -2.0072511048320134, + -0.9625764609276656, + -1.1114006274062536, + -0.7801680863916906, + -2.785652116470164, + -2.785651973436432, + -0.6101359993004856, + -0.5361950698070447, + -0.5356113574175657, + -0.543430366921729, + -2.0072511048320134, + -0.5194661889628072, + -0.659749443628722, + -1.1068495306229729, + -0.8604898820838102, + -0.5919869916997383, + -1.2497762395862362, + -1.1114006274062536, + -2.0621601907738047, + -2.0621601907738047 + ], + "colorbar": { + "title": { + "text": "Objective Value" + } + }, + "colorscale": [ + [ + 0, + "rgb(247,251,255)" + ], + [ + 0.125, + "rgb(222,235,247)" + ], + [ + 0.25, + "rgb(198,219,239)" + ], + [ + 0.375, + "rgb(158,202,225)" + ], + [ + 0.5, + "rgb(107,174,214)" + ], + [ + 0.625, + "rgb(66,146,198)" + ], + [ + 0.75, + "rgb(33,113,181)" + ], + [ + 0.875, + "rgb(8,81,156)" + ], + [ + 1, + "rgb(8,48,107)" + ] + ], + "reversescale": true, + "showscale": true + }, + "type": "parcoords" + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Parallel Coordinate Plot" + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from optuna.visualization import plot_parallel_coordinate\n", + "\n", + "plot_parallel_coordinate(study,\n", + " params=[\"algorithm_name\", \"descriptor\"],\n", + " target=lambda t: t.values[0]) # First performance value taken" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The same can be done for the relationships for the standard deviation of performance:" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "dimensions": [ + { + "label": "Objective Value", + "range": [ + 0.10124660026536195, + 0.9876203329634794 + ], + "values": [ + 0.9876203329634794, + 0.9875061220991906, + 0.21863029956806662, + 0.7861693342190089, + 0.2789424384877304, + 0.5741725628917808, + 0.2786318125997387, + 0.27575381401822424, + 0.7647766019001522, + 0.2725738454485389, + 0.21863029955530786, + 0.21863032832257323, + 0.3011280543457062, + 0.23560786523195643, + 0.5769721187181905, + 0.514747412346662, + 0.2786318125997387, + 0.40146744515282495, + 0.6659085938841998, + 0.24457822094737378, + 0.7086875504668667, + 0.2367498627927979, + 0.10124660026536195, + 0.7647766019001522, + 0.2749020946925899, + 0.2749020946925899 + ] + }, + { + "label": "algorithm_name", + "range": [ + 0, + 5 + ], + "ticktext": [ + "RandomForestRegressor", + "SVR", + "PLSRegression", + "Ridge", + "Lasso", + "ChemPropRegressor" + ], + "tickvals": [ + 0, + 1, + 2, + 3, + 4, + 5 + ], + "values": [ + 0, + 0, + 1, + 2, + 2, + 3, + 4, + 4, + 0, + 4, + 1, + 1, + 2, + 2, + 3, + 0, + 4, + 3, + 0, + 4, + 2, + 1, + 4, + 0, + 5, + 5 + ] + }, + { + "label": "descriptor", + "range": [ + 0, + 3 + ], + "ticktext": [ + "{\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}", + "{\"name\": \"ECFP_counts\", \"parameters\": {\"radius\": 3, \"useFeatures\": true, \"nBits\": 2048}}", + "{\"name\": \"MACCS_keys\", \"parameters\": {}}", + "{\"name\": \"SmilesFromFile\", \"parameters\": {}}" + ], + "tickvals": [ + 0, + 1, + 2, + 3 + ], + "values": [ + 0, + 0, + 1, + 2, + 0, + 1, + 2, + 1, + 0, + 1, + 0, + 1, + 1, + 0, + 1, + 2, + 0, + 0, + 2, + 1, + 2, + 0, + 1, + 0, + 3, + 3 + ] + } + ], + "labelangle": 30, + "labelside": "bottom", + "line": { + "color": [ + 0.9876203329634794, + 0.9875061220991906, + 0.21863029956806662, + 0.7861693342190089, + 0.2789424384877304, + 0.5741725628917808, + 0.2786318125997387, + 0.27575381401822424, + 0.7647766019001522, + 0.2725738454485389, + 0.21863029955530786, + 0.21863032832257323, + 0.3011280543457062, + 0.23560786523195643, + 0.5769721187181905, + 0.514747412346662, + 0.2786318125997387, + 0.40146744515282495, + 0.6659085938841998, + 0.24457822094737378, + 0.7086875504668667, + 0.2367498627927979, + 0.10124660026536195, + 0.7647766019001522, + 0.2749020946925899, + 0.2749020946925899 + ], + "colorbar": { + "title": { + "text": "Objective Value" + } + }, + "colorscale": [ + [ + 0, + "rgb(247,251,255)" + ], + [ + 0.125, + "rgb(222,235,247)" + ], + [ + 0.25, + "rgb(198,219,239)" + ], + [ + 0.375, + "rgb(158,202,225)" + ], + [ + 0.5, + "rgb(107,174,214)" + ], + [ + 0.625, + "rgb(66,146,198)" + ], + [ + 0.75, + "rgb(33,113,181)" + ], + [ + 0.875, + "rgb(8,81,156)" + ], + [ + 1, + "rgb(8,48,107)" + ] + ], + "reversescale": true, + "showscale": true + }, + "type": "parcoords" + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Parallel Coordinate Plot" + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from optuna.visualization import plot_parallel_coordinate\n", + "\n", + "plot_parallel_coordinate(study,\n", + " params=[\"algorithm_name\", \"descriptor\"],\n", + " target=lambda t: t.values[1]) # Second standard deviation value taken" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Precomputed descriptors from a file example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Precomputed descriptors can be supplied to models using the \"PrecomputedDescriptorFromFile\" descriptor, and supplying the `input_column` and `response_column` like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(512,)" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from optunaz.descriptors import PrecomputedDescriptorFromFile\n", + "\n", + "descriptor=PrecomputedDescriptorFromFile.new(\n", + " file=\"../tests/data/precomputed_descriptor/train_with_fp.csv\",\n", + " input_column=\"canonical\", # Name of the identifier for the compound\n", + " response_column=\"fp\") # Name of the column with the pretrained (comma separated) descriptors\n", + "\n", + "descriptor.calculate_from_smi(\"Cc1cc(NC(=O)c2cccc(COc3ccc(Br)cc3)c2)no1\").shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this toy example there are 512 precomputed bit descriptor vectors, and a model can be trained with precomputed descriptors from a file (in a composite descriptor with ECFP), like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-07-02 14:38:07,785] A new study created in memory with name: precomputed_example\n", + "[I 2024-07-02 14:38:07,788] A new study created in memory with name: study_name_0\n", + "[I 2024-07-02 14:38:07,919] Trial 0 finished with value: -3014.274803630188 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.011994365911634164, 'descriptor': '{\"parameters\": {\"descriptors\": [{\"name\": \"PrecomputedDescriptorFromFile\", \"parameters\": {\"file\": \"../tests/data/precomputed_descriptor/train_with_fp.csv\", \"input_column\": \"canonical\", \"response_column\": \"fp\"}}, {\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}]}, \"name\": \"CompositeDescriptor\"}'}. Best is trial 0 with value: -3014.274803630188.\n", + "[I 2024-07-02 14:38:08,439] Trial 1 finished with value: -3014.471088599086 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 0.03592375122963953, 'descriptor': '{\"parameters\": {\"descriptors\": [{\"name\": \"PrecomputedDescriptorFromFile\", \"parameters\": {\"file\": \"../tests/data/precomputed_descriptor/train_with_fp.csv\", \"input_column\": \"canonical\", \"response_column\": \"fp\"}}, {\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}]}, \"name\": \"CompositeDescriptor\"}'}. Best is trial 0 with value: -3014.274803630188.\n", + "[I 2024-07-02 14:38:10,511] Trial 2 finished with value: -3029.113810544919 and parameters: {'algorithm_name': 'Ridge', 'Ridge_algorithm_hash': 'cfa1990d5153c8812982f034d788d7ee', 'alpha__cfa1990d5153c8812982f034d788d7ee': 1.8153295905650357, 'descriptor': '{\"parameters\": {\"descriptors\": [{\"name\": \"PrecomputedDescriptorFromFile\", \"parameters\": {\"file\": \"../tests/data/precomputed_descriptor/train_with_fp.csv\", \"input_column\": \"canonical\", \"response_column\": \"fp\"}}, {\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}]}, \"name\": \"CompositeDescriptor\"}'}. Best is trial 0 with value: -3014.274803630188.\n", + "[I 2024-07-02 14:38:12,177] Trial 3 finished with value: -4358.575772003129 and parameters: {'algorithm_name': 'RandomForestRegressor', 'RandomForestRegressor_algorithm_hash': 'f1ac01e1bba332215ccbd0c29c9ac3c3', 'max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3': 14, 'n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3': 10, 'max_features__f1ac01e1bba332215ccbd0c29c9ac3c3': , 'descriptor': '{\"parameters\": {\"descriptors\": [{\"name\": \"PrecomputedDescriptorFromFile\", \"parameters\": {\"file\": \"../tests/data/precomputed_descriptor/train_with_fp.csv\", \"input_column\": \"canonical\", \"response_column\": \"fp\"}}, {\"name\": \"ECFP\", \"parameters\": {\"radius\": 3, \"nBits\": 2048, \"returnRdkit\": false}}]}, \"name\": \"CompositeDescriptor\"}'}. Best is trial 0 with value: -3014.274803630188.\n" + ] + } + ], + "source": [ + "from optunaz.descriptors import PrecomputedDescriptorFromFile\n", + "\n", + "precomputed_config = OptimizationConfig(\n", + " data=Dataset(\n", + " input_column=\"canonical\",\n", + " response_column=\"molwt\",\n", + " response_type=\"regression\",\n", + " training_dataset_file=\"../tests/data/precomputed_descriptor/train_with_fp.csv\",\n", + " split_strategy=Stratified(fraction=0.2),\n", + " ),\n", + " descriptors=[\n", + " CompositeDescriptor.new(\n", + " descriptors=[\n", + " PrecomputedDescriptorFromFile.new(file=\"../tests/data/precomputed_descriptor/train_with_fp.csv\",\n", + " input_column=\"canonical\", response_column=\"fp\"),\n", + " ECFP.new()])\n", + " ],\n", + " algorithms=[\n", + " RandomForestRegressor.new(n_estimators={\"low\": 5, \"high\": 10}),\n", + " Ridge.new(),\n", + " Lasso.new(),\n", + " PLSRegression.new(),\n", + " ],\n", + " settings=OptimizationConfig.Settings(\n", + " mode=ModelMode.REGRESSION,\n", + " cross_validation=2,\n", + " n_trials=4,\n", + " n_startup_trials=0,\n", + " direction=OptimizationDirection.MAXIMIZATION,\n", + " track_to_mlflow=False,\n", + " random_seed=42,\n", + " ),\n", + ") \n", + "\n", + "precomputed_study = optimize(precomputed_config, study_name=\"precomputed_example\")\n", + "build_best(buildconfig_best(precomputed_study), \"../target/precomputed_model.pkl\")\n", + "with open(\"../target/precomputed_model.pkl\", \"rb\") as f:\n", + " precomputed_model = pickle.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "N.B: The `qsartuna-predict` CLI command for QSARtuna contains the options `--input-precomputed-file`, `input-precomputed-input-column` and `--input-precomputed-response-column` for generating predictions at inference time. However this is not available within python notebooks and calling predict on a new set of unseen molecules will cause \"Could not find descriptor errors\" like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Could not find descriptor for CCC in file ../tests/data/precomputed_descriptor/train_with_fp.csv.\n", + "Could not find descriptor for CC(=O)Nc1ccc(O)cc1 in file ../tests/data/precomputed_descriptor/train_with_fp.csv.\n" + ] + }, + { + "data": { + "text/plain": [ + "array([nan, nan])" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_molecules = [\"CCC\", \"CC(=O)Nc1ccc(O)cc1\"]\n", + "\n", + "precomputed_model.predict_from_smiles(new_molecules)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A new file with precomputed desciptors from a file should be provided like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([292.65709987, 302.64327077])" + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import tempfile # For this example we use a temp file to store a temporary inference dataset\n", + "\n", + "# extract precomputed descriptor (i.e the 1st descriptor in the composite descriptor for this example)\n", + "precomputed_descriptor = precomputed_model.descriptor.parameters.descriptors[0]\n", + "\n", + "# example fp with 0's for illustration purposes\n", + "example_fp = str([0] * 512)[1:-1]\n", + "\n", + "with tempfile.NamedTemporaryFile() as temp_file:\n", + " # write the query data to a new file\n", + " X = pd.DataFrame(\n", + " data={\"canonical\": new_molecules,\n", + " \"fp\": [example_fp for i in range(len(new_molecules))]})\n", + " X.to_csv(temp_file.name)\n", + " \n", + " # set precomputed descriptor to the new file\n", + " precomputed_descriptor.parameters.file=temp_file.name\n", + " preds = precomputed_model.predict_from_smiles([\"CCC\", \"CC(=O)Nc1ccc(O)cc1\"])\n", + "\n", + "preds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/sphinx-builddir/html/searchindex.js b/docs/sphinx-builddir/html/searchindex.js index 70900df..7354c41 100644 --- a/docs/sphinx-builddir/html/searchindex.js +++ b/docs/sphinx-builddir/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["README", "algorithms", "deduplicator", "descriptors", "index", "modules", "notebooks/QSARtuna_Tutorial", "notebooks/preprocess_data", "optunaz", "optunaz.config", "optunaz.utils", "optunaz.utils.enums", "optunaz.utils.preprocessing", "splitters", "transform"], "filenames": ["README.md", "algorithms.rst", "deduplicator.rst", "descriptors.rst", "index.rst", "modules.rst", "notebooks/QSARtuna_Tutorial.ipynb", "notebooks/preprocess_data.ipynb", "optunaz.rst", "optunaz.config.rst", "optunaz.utils.rst", "optunaz.utils.enums.rst", "optunaz.utils.preprocessing.rst", "splitters.rst", "transform.rst"], "titles": ["QSARtuna \ud80c\udd9b: QSAR using Optimization for Hyperparameter Tuning (formerly Optuna AZ and QPTUNA)", "Available algorithms", "Available deduplicators", "Available descriptors", "Welcome to QSARtuna Documentation!", "optunaz", "QSARtuna CLI Tutorial", "Preprocessing data for QSARtuna", "optunaz package", "optunaz.config package", "optunaz.utils package", "optunaz.utils.enums package", "optunaz.utils.preprocessing package", "Available splitters", "Available transform"], "terms": {"build": [0, 4, 8, 9, 10, 11], "predict": [0, 1, 3, 5, 6, 7, 9, 10, 11], "compchem": 0, "develop": [0, 3, 8], "uncertainti": [0, 1, 4, 7, 8, 9, 12, 14], "quantif": 0, "explain": [0, 1, 4, 5, 9], "mind": 0, "thi": [0, 1, 3, 4, 7, 8, 9, 10, 11, 12, 13], "librari": [0, 7], "search": [0, 1, 3, 8, 9], "best": [0, 1, 8, 9], "ml": [0, 4, 9], "algorithm": [0, 3, 4, 7, 8, 9, 10, 11, 12, 13], "molecular": [0, 1, 3, 4, 6, 7, 8, 9], "descriptor": [0, 4, 5, 9, 11], "given": [0, 1, 3, 6, 7, 8, 9, 10], "data": [0, 1, 3, 4, 8, 9, 11, 12, 13, 14], "itself": [0, 6], "done": [0, 6], "emploi": [0, 3, 6, 7, 8], "latest": [0, 9], "state": [0, 6, 12], "art": [0, 6], "estim": [0, 1, 3, 4, 7, 8, 9, 11], "packag": [0, 1, 3, 5, 6, 7], "further": [0, 9], "document": [0, 6, 7], "github": [0, 6, 8, 12], "page": 0, "here": [0, 3, 6, 7, 8, 9, 12, 13], "structur": [0, 1, 3, 6, 7, 8, 9], "around": [0, 6, 7], "train": [0, 1, 3, 4, 7, 8, 9, 11, 12, 13], "mani": [0, 1, 6, 9], "differ": [0, 1, 3, 6, 8, 9, 12], "paramet": [0, 1, 3, 6, 7, 8, 9, 11, 12, 13, 14], "onli": [0, 1, 3, 7, 8, 9, 12, 13], "dataset": [0, 1, 7, 8, 9, 11, 12], "usual": [0, 1, 6, 7, 9], "cross": [0, 1, 6, 9, 12], "valid": [0, 1, 3, 7, 8, 9, 11, 12], "pick": 0, "evalu": [0, 4, 5, 6], "its": [0, 1, 6, 9, 11], "perform": [0, 3, 8, 11, 12, 14], "test": [0, 1, 4, 7, 8, 9, 11, 12, 13], "prod": [0, 6], "re": [0, 6, 8], "merg": [0, 7, 8, 12], "ha": [0, 1, 6, 7, 8, 9, 12], "drawback": [0, 6], "left": [0, 6, 7], "result": [0, 1, 3, 6, 7, 8, 9], "big": [0, 6], "benefit": [0, 6], "final": [0, 1, 6, 9], "all": [0, 1, 3, 6, 7, 8, 9, 11, 12], "avail": [0, 4, 6, 7, 8], "let": [0, 6, 7], "s": [0, 6, 7], "look": [0, 1, 6, 7, 9], "trivial": 0, "exampl": [0, 3, 7, 8, 9], "weight": [0, 1, 3, 6, 8, 9, 11], "set": [0, 3, 4, 7, 8, 9, 10, 11, 12, 13], "50": [0, 1, 3, 6, 8, 9], "molecul": [0, 1, 3, 6, 7, 8, 9], "we": [0, 3, 6, 7, 8, 9, 12, 13], "start": [0, 4, 6, 7], "format": [0, 3, 6, 7, 8, 10], "It": [0, 1, 3, 6, 7, 8, 9], "contain": [0, 3, 6, 8], "four": 0, "main": [0, 3, 6, 7, 8], "section": [0, 6, 7], "locat": [0, 6], "column": [0, 3, 7, 8, 11, 12, 13, 14], "detail": [0, 3, 6, 7, 8, 9, 10], "about": [0, 6, 7], "which": [0, 1, 3, 6, 7, 8, 9, 12], "below": [0, 3, 6, 7, 8], "task": [0, 3, 7, 8, 9, 11], "training_dataset_fil": [0, 6, 7, 8], "drd2": [0, 6], "subset": [0, 1, 6, 7, 9], "csv": [0, 3, 4, 6, 8], "input_column": [0, 3, 6, 7, 8, 11], "canon": [0, 6, 8, 12], "response_column": [0, 3, 6, 7, 8, 11], "molwt": [0, 6], "mode": [0, 6, 8, 9, 11], "regress": [0, 1, 7, 9, 11, 12], "cross_valid": [0, 6, 9, 11, 12], "5": [0, 1, 3, 6, 7, 8, 9, 12], "direct": [0, 1, 6, 9, 11], "maxim": [0, 1, 6, 9], "n_trial": [0, 6, 8, 9, 11], "100": [0, 1, 3, 6, 7, 8, 9, 10], "n_startup_tri": [0, 6, 8, 9], "30": [0, 1, 6, 7, 9], "name": [0, 1, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14], "ecfp": [0, 4, 6, 8, 9, 11], "radiu": [0, 3, 6, 8, 11], "3": [0, 1, 3, 4, 6, 7, 8, 9], "nbit": [0, 3, 6, 8, 9, 11], "2048": [0, 3, 6, 8], "maccs_kei": [0, 4, 6, 8, 11], "randomforestregressor": [0, 4, 6, 9, 11], "max_depth": [0, 1, 9, 11], "low": [0, 1, 3, 6, 7, 8, 9, 11], "2": [0, 1, 3, 6, 7, 8, 9, 12, 13], "high": [0, 1, 3, 6, 8, 9, 11], "32": [0, 1, 6, 9], "n_estim": [0, 1, 6, 9, 11], "10": [0, 1, 3, 6, 7, 8, 9, 12], "250": [0, 1, 6, 9], "max_featur": [0, 1, 9, 11], "auto": [0, 1, 6, 8, 9], "ridg": [0, 4, 6, 9, 11], "alpha": [0, 1, 6, 7, 8, 9, 11], "0": [0, 1, 3, 4, 6, 7, 8, 9, 12, 13], "lasso": [0, 4, 6, 9, 11], "xgbregressor": [0, 4, 6, 9, 11], "learning_r": [0, 1, 9, 11], "1": [0, 1, 3, 4, 6, 7, 8, 9, 10, 12, 13], "specifi": [0, 1, 6, 7, 8, 9, 11], "In": [0, 1, 3, 6, 7, 8, 9], "rel": [0, 6], "path": [0, 1, 3, 6, 7, 8, 9, 10], "folder": [0, 6, 7], "ar": [0, 1, 3, 6, 7, 8, 9, 11, 12, 13], "want": [0, 6, 7], "fold": [0, 1, 6, 7, 9, 12, 13], "valu": [0, 1, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14], "object": [0, 1, 5, 7, 9, 10, 11, 12], "function": [0, 1, 3, 4, 7, 8, 9, 10, 11, 12, 14], "standard": [0, 1, 7, 9, 12, 14], "scikit": [0, 6, 8, 12, 13], "learn": [0, 1, 3, 7, 8, 9, 12, 13], "have": [0, 3, 6, 7, 8, 9, 12], "total": [0, 6, 7], "trial": [0, 1, 9, 10, 11], "first": [0, 1, 3, 6, 7, 8, 9, 12], "startup": [0, 6], "should": [0, 1, 3, 6, 7, 8, 9, 12], "random": [0, 1, 4, 9, 12], "explor": [0, 6], "get": [0, 6, 7, 8], "stuck": 0, "earli": [0, 1, 6, 8, 9], "one": [0, 3, 7, 8, 9, 12, 13], "local": [0, 6], "minimum": [0, 1, 3, 8, 9, 12, 13], "two": [0, 1, 6, 7, 9, 12, 13], "free": [0, 3, 8], "pair": [0, 6], "ani": [0, 3, 6, 7, 8, 9, 12], "when": [0, 1, 6, 7, 8, 9, 12, 13], "our": [0, 6, 7], "time": [0, 1, 3, 7, 8, 9], "can": [0, 1, 3, 6, 7, 8, 9, 12, 13], "deploi": 0, "singular": [0, 6], "To": [0, 6, 7], "insid": [0, 8], "follow": [0, 3, 6, 7, 8, 9], "syntax": 0, "exec": 0, "sif": 0, "project": [0, 1, 9], "cc": [0, 6, 7], "mai": [0, 1, 3, 6, 7, 8, 9], "qsartuna_latest": 0, "opt": 0, "venv": [0, 6], "bin": [0, 6, 7, 12, 13], "config": [0, 1, 5, 6, 8], "regression_drd2_50": 0, "buildconfig": [0, 5, 6, 8, 10], "outpath": 0, "target": [0, 1, 6, 7, 9, 12, 14], "pkl": [0, 1, 6, 9], "sinc": [0, 3, 6, 7, 8, 12], "long": [0, 7], "avoid": [0, 6, 12], "login": 0, "node": 0, "queue": 0, "instead": [0, 1, 3, 6, 7, 8, 9, 12], "script": 0, "give": [0, 6], "sbatch": 0, "sh": 0, "ntask": 0, "cpu": 0, "per": [0, 1, 7, 9], "mem": 0, "4g": 0, "partit": 0, "core": [0, 7, 9], "illustr": [0, 6, 7], "how": [0, 1, 3, 6, 7, 8, 9], "chose": 0, "chang": [0, 6], "directori": [0, 6, 10], "cd": 0, "project_fold": 0, "optunaaz": 0, "version": [0, 7, 9], "optunaaz_latest": 0, "complet": [0, 3, 6, 8], "creat": [0, 7, 10], "pickl": [0, 6], "your": [0, 6, 7], "home": 0, "under": [0, 6], "built": [0, 3, 7, 8, 9], "infer": [0, 6, 7, 8], "input": [0, 1, 3, 4, 6, 8, 9, 12, 13, 14], "smile": [0, 3, 6, 7, 8, 10, 11, 12, 13], "output": [0, 1, 6, 7, 8, 9, 12, 14], "note": [0, 1, 3, 7, 8, 9, 11, 12, 13], "_": [0, 6], "point": [0, 1, 7, 9, 12, 13], "most": [0, 6, 7], "recent": [0, 6], "legaci": 0, "requir": [0, 3, 6, 7, 8], "same": [0, 1, 6, 7, 9], "modifi": [0, 8], "abov": [0, 6, 7], "suppli": [0, 1, 3, 6, 7, 8, 9], "qsartuna_": 0, "replac": [0, 6, 10], "e": [0, 1, 3, 7, 8, 9, 12, 14], "g": [0, 3, 7, 8, 12, 14], "qsartuna_2": 0, "1_model": 0, "would": [0, 3, 6, 7, 8], "gener": [0, 1, 3, 6, 7, 8, 9, 12, 13], "tri": 0, "dure": [0, 1, 3, 6, 7, 8, 9], "mlflow": [0, 5, 8, 11], "track": [0, 3, 5, 8, 9], "ui": 0, "modul": [0, 5, 6], "load": [0, 1, 6, 7, 9], "Then": 0, "open": [0, 1, 6, 9], "link": 0, "browser": 0, "If": [0, 1, 3, 6, 8, 9, 12, 13], "you": [0, 1, 6, 7, 9], "scp": 0, "forward": [0, 1, 9], "port": 0, "separ": [0, 3, 7, 8], "ssh": 0, "session": 0, "non": [0, 1, 3, 6, 7, 8, 9], "machin": [0, 1, 3, 6, 7, 8, 9], "n": [0, 1, 6, 7, 9, 10], "l": 0, "localhost": 0, "5000": 0, "user": [0, 1, 3, 4, 6, 8, 9, 11, 12, 13, 14], "intranet": 0, "net": 0, "just": [0, 6], "execut": 0, "select": [0, 1, 3, 6, 7, 8, 9], "experi": [0, 7, 10], "after": [0, 1, 3, 6, 7, 8, 9, 10], "choos": [0, 4], "compar": [0, 6], "comparison": [0, 6, 7], "show": [0, 6, 7], "call": [0, 1, 3, 6, 8, 9, 12], "well": [0, 6, 7], "metric": [0, 1, 6, 8, 9, 11], "At": 0, "bottom": 0, "plot": [0, 6, 7, 9, 11], "For": [0, 1, 3, 6, 7, 8, 9, 12], "x": [0, 1, 3, 6, 7, 8, 9, 12, 13], "axi": [0, 6, 7], "number": [0, 1, 3, 6, 7, 8, 9, 10], "y": [0, 1, 7, 8, 9, 12, 13], "cvmean": 0, "r2": [0, 6, 7, 9], "more": [0, 1, 3, 7, 8, 9, 10], "click": 0, "individu": [0, 1, 6, 7, 9], "There": [0, 1, 6, 7, 9], "access": [0, 3, 8], "conda": 0, "environ": [0, 3, 6, 8], "instal": [0, 6], "purg": 0, "miniconda3": 0, "my_env_with_qsartuna": 0, "pip": [0, 6], "activ": [0, 1, 6, 7, 9, 11, 12, 14], "case": [0, 1, 6, 7, 8, 9], "check": [0, 6, 8, 11, 12], "m": [0, 6, 8], "http": [0, 3, 6, 7, 8, 9, 12], "com": [0, 6, 7, 8, 12], "molecularai": [0, 6], "14742594": 0, "tar": [0, 6], "gz": [0, 6], "three_step_opt_build_merg": [0, 5, 6], "import": [0, 6], "buildconfig_best": [0, 6, 8], "build_best": [0, 6, 8], "build_merg": [0, 6, 8], "modelmod": [0, 6, 8, 9], "optimizationdirect": [0, 6, 9], "optconfig": [0, 1, 5, 6, 8, 10, 11], "optimizationconfig": [0, 6, 8, 9], "svr": [0, 4, 6, 9, 11], "randomforest": 0, "pl": [0, 1, 9], "dataread": [0, 5, 6], "ecfp_count": [0, 4, 6, 8, 11], "prepar": [0, 4, 7], "new": [0, 1, 6, 7, 9], "studi": [0, 3, 6, 8, 9, 10, 11], "study_nam": [0, 6, 8], "my_studi": [0, 6], "make": [0, 1, 6, 7, 9, 12, 13, 14], "write": [0, 6], "out": [0, 1, 6, 7, 9], "print": [0, 6, 7], "dump": [0, 6], "indent": [0, 6], "save": [0, 6], "class": [1, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14], "optunaz": [1, 3, 6, 7, 13, 14], "sourc": [1, 3, 8, 9, 10, 11, 12, 13, 14], "adaboost": [1, 9], "classifi": [1, 6, 8, 9, 11], "an": [1, 3, 6, 7, 8, 9, 10, 12, 13], "meta": [1, 6, 9], "begin": [1, 6, 7, 9, 12, 13], "fit": [1, 3, 6, 7, 8, 9], "origin": [1, 3, 6, 7, 8, 9, 12], "addit": [1, 6, 9], "copi": [1, 7, 9, 10], "where": [1, 6, 7, 9], "incorrectli": [1, 9], "instanc": [1, 6, 7, 9], "adjust": [1, 9], "subsequ": [1, 9], "focu": [1, 9], "difficult": [1, 6, 9], "adaboostclassifierparametersnestim": [1, 9], "adaboostclassifierparameterslearningr": [1, 9], "The": [1, 3, 7, 8, 9, 10, 12, 14], "maximum": [1, 3, 6, 7, 8, 9, 12, 13], "boost": [1, 3, 8, 9], "termin": [1, 9], "perfect": [1, 9], "procedur": [1, 3, 7, 8, 9], "stop": [1, 6, 9], "titl": [1, 3, 6, 7, 8, 9, 10, 12, 13, 14], "appli": [1, 3, 6, 7, 8, 9, 12, 13, 14], "each": [1, 3, 6, 7, 8, 9, 12], "classifierat": [1, 9], "iter": [1, 6, 9, 12, 13], "A": [1, 3, 7, 8, 9], "higher": [1, 6, 9], "rateincreas": [1, 9], "contribut": [1, 3, 6, 8, 9], "trade": [1, 9], "off": [1, 6, 7, 9, 12, 13], "between": [1, 6, 7, 9], "learning_rateand": [1, 9], "int": [1, 3, 8, 9, 12, 13], "float": [1, 6, 7, 9, 10, 12, 13, 14], "linear": [1, 7, 9], "model": [1, 3, 4, 7, 8, 9, 10, 12, 13, 14], "l1": [1, 9], "prior": [1, 7, 9], "regular": [1, 6, 9], "spars": [1, 9], "coeffici": [1, 9], "tend": [1, 9], "prefer": [1, 9], "solut": [1, 6, 9], "fewer": [1, 9], "zero": [1, 3, 8, 9], "effect": [1, 6, 7, 9, 12], "reduc": [1, 6, 9], "featur": [1, 3, 6, 8, 9], "upon": [1, 6, 9], "depend": [1, 6, 7, 9], "lassoparametersalpha": [1, 9], "constant": [1, 9], "multipli": [1, 9], "term": [1, 6, 9], "control": [1, 6, 7, 9, 12], "strength": [1, 3, 8, 9], "must": [1, 6, 9], "neg": [1, 6, 7, 9], "i": [1, 3, 6, 7, 8, 9], "inf": [1, 6, 8, 9], "equival": [1, 9], "ordinari": [1, 9], "least": [1, 6, 9], "squar": [1, 6, 9], "solv": [1, 6, 9], "linearregress": [1, 9], "numer": [1, 7, 9], "reason": [1, 7, 9], "us": [1, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14], "advis": [1, 6, 7, 9], "implement": [1, 3, 6, 8, 9, 12], "k": [1, 6, 7, 9, 12, 13], "nearest": [1, 6, 9], "neighbor": [1, 6, 9, 12, 13], "vote": [1, 9], "principl": [1, 9], "behind": [1, 9], "method": [1, 3, 7, 8, 9, 11, 12], "find": [1, 6, 9], "predefin": [1, 4, 9, 12, 14], "sampl": [1, 6, 9, 12, 13], "closest": [1, 9], "distanc": [1, 6, 9], "label": [1, 6, 7, 8, 9, 12, 13], "from": [1, 3, 4, 8, 9, 10, 12, 13, 14], "defin": [1, 3, 6, 7, 8, 9, 10, 12], "despit": [1, 6, 9], "simplic": [1, 9], "success": [1, 6, 9], "larg": [1, 7, 9], "classif": [1, 4, 7, 8, 9, 11, 12], "problem": [1, 6, 7, 8, 9], "n_neighbor": [1, 9, 11], "kneighborsclassifierparametersn_neighbor": [1, 9], "default": [1, 7, 8, 9, 12, 13], "kneighbor": [1, 9], "queri": [1, 6, 7, 9], "list": [1, 3, 4, 6, 7, 8, 9, 12, 14], "comput": [1, 3, 7, 8, 9, 12], "minkowski": [1, 6, 9], "euclidean": [1, 9], "regressor": [1, 6, 8, 9, 11], "kneighborsregressorparametersn_neighbor": [1, 9], "logist": [1, 9], "rather": [1, 3, 6, 7, 8, 9], "than": [1, 3, 6, 7, 8, 9], "also": [1, 3, 6, 7, 8, 9, 12], "known": [1, 3, 6, 7, 8, 9], "literatur": [1, 9], "logit": [1, 9], "entropi": [1, 7, 9], "maxent": [1, 9], "log": [1, 4, 9, 10, 11, 12, 14], "probabl": [1, 4, 7, 9], "describ": [1, 3, 6, 8, 9, 12], "possibl": [1, 6, 7, 9, 12, 13], "outcom": [1, 9], "singl": [1, 6, 9], "solver": [1, 9, 11], "c": [1, 3, 6, 7, 8, 9, 11], "logisticregressionparametersparameterc": [1, 9], "try": [1, 6, 7, 9], "sag": [1, 9], "saga": [1, 9], "fast": [1, 6, 9], "converg": [1, 6, 9], "guarante": [1, 9], "approxim": [1, 6, 9], "scale": [1, 3, 7, 8, 9, 12, 14], "preprocess": [1, 4, 8, 9, 10, 13, 14], "scaler": [1, 3, 8, 9], "invers": [1, 9], "posit": [1, 3, 6, 8, 9], "like": [1, 3, 6, 7, 8, 9], "support": [1, 6, 8, 9], "vector": [1, 3, 6, 8, 9, 12, 14], "smaller": [1, 6, 9], "stronger": [1, 9], "decomposit": [1, 9], "partial": [1, 6, 9], "form": [1, 3, 6, 7, 8, 9, 12], "compon": [1, 3, 6, 8, 9], "fundament": [1, 9], "relat": [1, 3, 6, 7, 8, 9], "matric": [1, 9], "thei": [1, 3, 6, 7, 8, 9], "latent": [1, 9], "variabl": [1, 6, 7, 9], "approach": [1, 3, 6, 7, 8, 9], "covari": [1, 4, 8, 9], "space": [1, 7, 9, 12, 13], "multidimension": [1, 9], "varianc": [1, 6, 7, 9], "other": [1, 6, 7, 9, 12], "word": [1, 6, 9], "both": [1, 6, 7, 9], "lower": [1, 6, 9], "dimension": [1, 9], "subspac": [1, 6, 8, 9], "transform": [1, 4, 8, 9, 10], "n_compon": [1, 9, 11], "ncompon": [1, 9], "keep": [1, 6, 7, 9, 12], "min": [1, 6, 7, 9, 12], "n_sampl": [1, 9], "n_featur": [1, 9], "n_target": [1, 9], "forest": [1, 4, 7, 9], "decis": [1, 6, 7, 9, 12, 14], "tree": [1, 6, 9], "variou": [1, 6, 8, 9], "sub": [1, 6, 9], "averag": [1, 3, 6, 7, 8, 9, 12], "improv": [1, 3, 6, 8, 9], "accuraci": [1, 6, 9], "over": [1, 6, 9], "randomforestclassifierparametersmaxdepth": [1, 9], "randomforestclassifierparametersnestim": [1, 6, 9], "depth": [1, 6, 9, 11], "considerwhen": [1, 9], "split": [1, 4, 8, 9, 12, 13, 14], "thenconsid": [1, 9], "sqrt": [1, 6, 7, 9], "log2": [1, 7, 9, 12, 14], "randomforestregressorparametersmaxdepth": [1, 9], "randomforestregressorparametersnestim": [1, 9], "l2": [1, 9], "loss": [1, 3, 6, 8, 9], "norm": [1, 7, 9], "tikhonov": [1, 9], "base": [1, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14], "libsvm": [1, 9], "quadrat": [1, 9], "impract": [1, 9], "beyond": [1, 9], "ten": [1, 9], "thousand": [1, 9], "svcparametersparameterc": [1, 9], "1e": [1, 9], "gamma": [1, 9, 11], "0001": [1, 9], "proport": [1, 6, 9], "strictli": [1, 9], "penalti": [1, 9], "kernel": [1, 9], "epsilon": [1, 9], "complex": [1, 6, 9], "hard": [1, 3, 8, 9], "coupl": [1, 9], "10000": [1, 9], "svrparametersparameterc": [1, 9], "svrparametersgamma": [1, 9], "maxdepth": [1, 9], "nestim": [1, 9], "learningr": [1, 9], "xgboost": [1, 9, 11], "gradient": [1, 9], "stand": [1, 9], "extrem": [1, 6, 9], "paper": [1, 3, 6, 8, 9], "greedi": [1, 9], "friedman": [1, 9], "learner": [1, 9], "round": [1, 6, 7, 9], "prf": [1, 4, 7, 9], "probabilist": [1, 4, 9], "seen": [1, 6, 7, 9], "hybrid": [1, 6, 9], "similar": [1, 3, 6, 7, 8, 9, 12, 13], "take": [1, 3, 6, 7, 8, 9, 12], "real": [1, 6, 7, 9, 12, 13], "threshold": [1, 3, 4, 6, 8, 9, 12, 13, 14], "represent": [1, 3, 4, 8, 9], "ptr": [1, 4, 6, 9, 12], "howev": [1, 3, 6, 7, 8, 9], "belong": [1, 3, 6, 7, 8, 9], "inact": [1, 6, 7, 9, 12, 14], "use_py_gini": [1, 9, 11], "use_py_leaf": [1, 9, 11], "prfclassifierparametersmaxdepth": [1, 9], "prfclassifierparametersnestim": [1, 9], "min_py_sum_leaf": [1, 9, 11], "prfclassifierparametersminpysumleaf": [1, 9], "gini": [1, 9], "true": [1, 3, 6, 7, 8, 9, 12, 13], "py": [1, 6, 7, 9], "leav": [1, 9], "leaf": [1, 9], "allow": [1, 3, 6, 7, 8, 9], "prune": [1, 6, 9], "propag": [1, 9], "small": [1, 7, 9], "thu": [1, 6, 9], "pth": [1, 9], "scheme": [1, 9], "public": [1, 3, 4, 6, 8, 9], "noisi": [1, 9], "chemprop": [1, 3, 4, 8, 9], "deep": [1, 6, 9], "properti": [1, 3, 6, 7, 8, 9, 10], "network": [1, 3, 6, 8, 9], "messag": [1, 6, 9], "pass": [1, 3, 6, 7, 8, 9, 12], "neural": [1, 6, 8, 9], "d": [1, 3, 6, 8, 9], "mpnn": [1, 9], "encod": [1, 9], "graph": [1, 3, 6, 8, 9], "feed": [1, 9], "ffnn": [1, 9], "multi": [1, 3, 8, 9], "layer": [1, 3, 6, 8, 9], "perceptron": [1, 9], "wa": [1, 3, 6, 7, 8, 9], "present": [1, 6, 7, 9], "analyz": [1, 6, 9], "optuna": [1, 4, 6, 8, 9, 10, 11], "optims": [1, 9], "hyperopt": [1, 9], "ensemble_s": [1, 6, 9, 11], "epoch": [1, 6, 9, 11], "aggreg": [1, 6, 7, 9, 11, 12, 13], "aggregation_norm": [1, 6, 9, 11], "chempropparametersaggregation_norm": [1, 9], "200": [1, 6, 9], "q": [1, 3, 6, 8, 9, 11], "batch_siz": [1, 6, 9, 11], "chempropparametersbatch_s": [1, 9], "chempropparametersdepth": [1, 9], "6": [1, 6, 7, 9], "dropout": [1, 9, 11], "chempropparametersdropout": [1, 9], "4": [1, 3, 6, 7, 8, 9, 12, 13], "04": [1, 6, 9], "features_gener": [1, 6, 9, 11], "ffn_hidden_s": [1, 6, 9, 11], "chempropparametersffn_hidden_s": [1, 9], "300": [1, 6, 9], "2400": [1, 9], "ffn_num_lay": [1, 6, 9, 11], "chempropparametersffn_num_lay": [1, 9], "final_lr_ratio_exp": [1, 6, 9, 11], "chempropparametersfinal_lr_ratio_exp": [1, 9], "hidden_s": [1, 6, 9, 11], "chempropparametershidden_s": [1, 9], "init_lr_ratio_exp": [1, 6, 9, 11], "chempropparametersinit_lr_ratio_exp": [1, 9], "max_lr_exp": [1, 6, 9, 11], "chempropparametersmax_lr_exp": [1, 9], "warmup_epochs_ratio": [1, 6, 9, 11], "chempropparameterswarmup_epochs_ratio": [1, 9], "ensembl": [1, 9, 11], "initialis": [1, 6, 9], "provid": [1, 3, 6, 7, 8, 9, 12], "size": [1, 3, 6, 7, 8, 9], "run": [1, 3, 4, 7, 8, 9, 10], "increas": [1, 6, 7, 9, 12, 13], "400": [1, 9], "sum": [1, 6, 9], "atom": [1, 3, 6, 8, 9, 12, 13], "divid": [1, 6, 9], "up": [1, 3, 6, 8, 9, 10], "batch": [1, 9, 12], "step": [1, 7, 8, 9], "visibl": [1, 9], "randomli": [1, 6, 7, 9], "some": [1, 3, 6, 7, 8, 9], "element": [1, 9, 12, 13], "tensor": [1, 9], "p": [1, 3, 8, 9], "bernoulli": [1, 9], "distribut": [1, 6, 7, 9], "channel": [1, 9], "independ": [1, 7, 9], "everi": [1, 6, 9], "proven": [1, 9], "techniqu": [1, 7, 9], "prevent": [1, 6, 9], "co": [1, 7, 9], "adapt": [1, 3, 8, 9], "neuron": [1, 9], "hidden": [1, 6, 9], "ffn": [1, 6, 9], "exponenti": [1, 7, 9], "rate": [1, 9], "bond": [1, 3, 8, 9], "ratio": [1, 9], "linearli": [1, 9], "init_lr": [1, 9], "max_lr": [1, 9], "afterward": [1, 7, 9], "decreas": [1, 7, 9], "final_lr": [1, 9], "without": [1, 6, 9, 12], "within": [1, 3, 8, 9, 12, 13], "hyperparamet": [1, 4, 6, 8, 9], "side": [1, 3, 8, 9], "inform": [1, 3, 8, 9, 10], "nb": [1, 3, 6, 7, 8, 9], "quick": [1, 4, 9], "simpl": [1, 7, 9], "sensibl": [1, 6, 7, 8, 9], "author": [1, 6, 8, 9], "do": [1, 6, 7, 9, 12], "num_it": [1, 6, 9, 11], "search_parameter_level": [1, 6, 9, 11], "dictat": [1, 9], "larger": [1, 3, 6, 7, 8, 9], "chempropregressorpretrain": [1, 6, 9], "pretrin": [1, 9], "pretrain": [1, 6, 9], "carri": [1, 9], "previous": [1, 9], "qsartuna": [1, 9], "chempropparametersepoch": [1, 6, 9], "frzn": [1, 9, 11], "pretrained_model": [1, 6, 9, 11], "none": [1, 3, 6, 8, 9, 10, 12, 13, 14], "fine": [1, 9], "tune": [1, 3, 4, 6, 8, 9], "decid": [1, 9], "freez": [1, 9], "transfer": [1, 9], "frozen": [1, 9], "str": [1, 3, 6, 8, 9, 10, 11, 12, 13, 14], "calibr": [1, 4, 8, 9], "isoton": [1, 6, 9], "vennab": [1, 9], "cv": [1, 6, 9], "With": [1, 3, 8, 9], "across": [1, 6, 7, 9, 12], "fals": [1, 3, 6, 7, 8, 9, 12], "obtain": [1, 6, 7, 9], "unbias": [1, 9], "offer": [1, 3, 6, 8, 9], "p0": [1, 6, 9], "vs": [1, 6, 7, 9], "p1": [1, 6, 9], "discord": [1, 6, 9], "type": [1, 3, 6, 7, 8, 9, 12], "union": [1, 3, 8, 9], "calibratedclassifiercvensembl": [1, 9], "calibratedclassifiercvmethod": [1, 9], "n_fold": [1, 6, 9, 11], "whether": [1, 3, 7, 8, 9, 12, 14], "agnost": [1, 6, 9], "interv": [1, 6, 9], "encompass": [1, 9], "aleator": [1, 9], "epistem": [1, 9], "back": [1, 6, 9], "strong": [1, 9], "theoret": [1, 6, 9], "thank": [1, 9], "conform": [1, 9], "mapie_alpha": [1, 6, 9, 11], "05": [1, 6, 7, 9], "01": [1, 6, 9], "99": [1, 6, 9], "see": [3, 6, 7, 8, 9, 12, 13], "gedeck": [3, 8], "et": [3, 7, 8], "al": [3, 7, 8], "qsar": [3, 4, 6, 7, 8], "good": [3, 6, 7, 8], "practic": [3, 8], "fingerprint": [3, 8, 9], "wai": [3, 6, 7, 8], "daylight": [3, 8], "enumer": [3, 6, 8, 9], "custom": [3, 6, 8], "ref": [3, 8, 10], "16": [3, 6, 7, 8], "hash": [3, 6, 8, 9, 10], "code": [3, 6, 7, 8], "style": [3, 6, 7, 8], "implicitli": [3, 8], "largest": [3, 6, 8], "refer": [3, 6, 8, 10, 11], "toward": [3, 6, 7, 8], "binari": [3, 6, 8, 9], "extend": [3, 6, 7, 8], "connect": [3, 6, 8], "roger": [3, 8], "circular": [3, 8], "morgan": [3, 8, 9], "invari": [3, 8], "getmorganfingerprintasbitvect": [3, 8], "rdkit": [3, 6, 7, 8, 9], "systemat": [3, 8], "record": [3, 6, 7, 8], "neighborhood": [3, 8, 9], "h": [3, 6, 7, 8], "multipl": [3, 6, 8, 12, 13], "runtim": [3, 8], "substructur": [3, 6, 8], "map": [3, 6, 7, 8], "integ": [3, 7, 8, 12, 13], "length": [3, 6, 8], "identifi": [3, 6, 7, 8], "diamet": [3, 8], "append": [3, 6, 7, 8, 9], "ecfp4": [3, 8], "correspond": [3, 6, 7, 8, 9], "returnrdkit": [3, 6, 8], "consid": [3, 6, 8, 12, 13], "while": [3, 6, 7, 8, 12], "bit": [3, 6, 8], "sometim": [3, 6, 7, 8], "bool": [3, 8, 9, 12, 13], "calculate_from_mol": [3, 8], "mol": [3, 6, 7, 8], "return": [3, 6, 7, 8, 10, 11, 12, 13], "numpi": [3, 6, 7, 8, 12, 13], "arrai": [3, 6, 8, 12, 13], "count": [3, 6, 8, 9], "gethashedmorganfingerprint": [3, 8], "appear": [3, 8, 11], "usefeatur": [3, 6, 8, 11], "fcfp": [3, 8], "normal": [3, 6, 7, 8, 9], "ones": [3, 6, 8], "definit": [3, 8, 12], "gobbi": [3, 8], "popping": [3, 8], "biotechnolog": [3, 8], "bioengin": [3, 8], "61": [3, 6, 8], "47": [3, 6, 8], "54": [3, 6, 8], "1998": [3, 6, 8], "lead": [3, 7, 8], "score": [3, 4, 8, 9, 10, 12, 14], "fp": [3, 6, 8], "maxpath": [3, 6, 8, 11], "fpsize": [3, 6, 8, 11], "macc": [3, 6, 8], "system": [3, 7, 8], "often": [3, 6, 8], "mdl": [3, 8], "kei": [3, 6, 7, 8, 10, 11], "compani": [3, 8], "calcul": [3, 6, 8, 12, 13, 14], "keyset": [3, 8], "construct": [3, 8, 11], "optim": [3, 4, 7, 8, 9, 11], "durant": [3, 8], "reoptim": [3, 8], "drug": [3, 8], "discoveri": [3, 8], "166": [3, 6, 8], "2d": [3, 8, 9], "essenti": [3, 7, 8], "answer": [3, 8], "fragment": [3, 8], "question": [3, 8], "explicitli": [3, 6, 7, 8], "exist": [3, 8], "sens": [3, 8], "matter": [3, 8], "becaus": [3, 6, 7, 8], "address": [3, 8], "specif": [3, 6, 7, 8, 11], "repres": [3, 6, 7, 8], "9": [3, 6, 7, 8], "1049": [3, 8], "distinct": [3, 6, 7, 8, 9], "rdkit_nam": [3, 6, 8, 11], "unscal": [3, 8], "These": [3, 6, 7, 8, 12], "physchem": [3, 6, 8], "caution": [3, 7, 8], "208": [3, 6, 8], "includ": [3, 6, 7, 8], "clogp": [3, 6, 8], "mw": [3, 6, 8], "ring": [3, 7, 8], "rotat": [3, 8], "fraction": [3, 6, 7, 8, 12, 13], "sp3": [3, 8], "kier": [3, 8], "indic": [3, 6, 7, 8, 12, 13], "etc": [3, 6, 8], "tpsa": [3, 6, 8], "slogp": [3, 8], "group": [3, 7, 8, 12, 13], "vsa": [3, 8], "moe": [3, 8], "charg": [3, 8], "www": [3, 8], "org": [3, 6, 8], "doc": [3, 6, 8], "gettingstartedinpython": [3, 8], "html": [3, 6, 7, 8, 9], "whose": [3, 8], "been": [3, 6, 7, 8], "level": [3, 6, 7, 8, 9], "One": [3, 7, 8], "advantag": [3, 8], "interpret": [3, 8], "mean": [3, 6, 7, 8, 9, 12], "physicochem": [3, 8], "intuit": [3, 8], "understood": [3, 8], "option": [3, 4, 7, 8, 9, 11, 12, 13, 14], "jazzy_nam": [3, 6, 8, 11], "jazzy_filt": [3, 6, 8], "jazzi": [3, 8], "hydrat": [3, 8], "energi": [3, 8], "hydrogen": [3, 8], "acceptor": [3, 6, 8], "donor": [3, 6, 8], "found": [3, 6, 8], "doi": [3, 6, 8], "1038": [3, 8], "s41598": [3, 8], "023": [3, 8], "30089": [3, 8], "mmff94": [3, 8], "minimis": [3, 8], "1000da": [3, 8], "compound": [3, 6, 7, 8, 12, 13], "dict": [3, 8, 9, 10, 11], "calculate_from_smi": [3, 6, 8], "smi": [3, 8, 10], "string": [3, 6, 7, 8, 11, 12, 13, 14], "ndarrai": [3, 8], "z": [3, 8, 12, 14], "peptid": [3, 6, 8, 12, 14], "unfittedsklearnscal": [3, 8], "mol_data": [3, 8], "moldata": [3, 8], "file_path": [3, 8], "smiles_column": [3, 8], "fittedsklearnscal": [3, 8], "alia": [3, 8, 9], "precomput": [3, 7, 8], "file": [3, 7, 8, 10, 11, 12, 13], "header": [3, 8], "line": [3, 4, 6, 7, 8, 10], "row": [3, 6, 7, 8], "comma": [3, 6, 7, 8], "respons": [3, 7, 8], "pre": [3, 7, 8], "zscalesdescriptor": [3, 8, 11], "were": [3, 6, 7, 8], "propos": [3, 8], "sandberg": [3, 8], "proteogen": [3, 8], "amino": [3, 6, 8], "acid": [3, 6, 8], "nmr": [3, 8], "thin": [3, 8], "chromatographi": [3, 8], "tlc": [3, 8], "1021": [3, 6, 8], "jm9700575": [3, 8], "captur": [3, 6, 7, 8], "lipophil": [3, 8], "steric": [3, 8], "bulk": [3, 8], "polariz": [3, 8], "electron": [3, 8], "polar": [3, 8], "electroneg": [3, 8], "heat": [3, 8], "electrophil": [3, 8], "anoth": [3, 6, 8], "optimis": [3, 6, 8, 9, 10], "either": [3, 7, 8, 9], "through": [3, 6, 8], "_and_": [3, 8], "auxiliari": [3, 6, 8, 12, 14], "continu": [3, 6, 8], "learnt": [3, 6, 8], "manner": [3, 6, 8], "intent": [3, 6, 8], "much": [3, 8], "remain": [3, 7, 8, 12], "deriv": [3, 6, 7, 8], "cautiou": [3, 8], "upload": [3, 8], "_all_": [3, 8], "ruder": [3, 8], "io": [3, 6, 8, 9], "index": [3, 6, 7, 8], "auxiliarytask": [3, 8], "aux_weight_pc": [3, 6, 8, 9, 11], "20": [3, 6, 7, 8], "info": [3, 6, 8], "percentag": [3, 8], "wrap": [3, 6, 8], "certain": [3, 6, 7, 8], "rang": [3, 6, 7, 8, 12, 13], "scaleddescriptorparamet": [3, 8], "cach": [3, 6, 7, 8, 9], "composit": [3, 6, 8], "concaten": [3, 8], "button": [3, 8], "pleas": [3, 6, 7, 8, 10], "compat": [3, 6, 8], "intro": 4, "background": 4, "json": [4, 6, 9, 11], "command": [4, 6], "interfac": [4, 6, 8, 11, 12], "python": [4, 6, 8, 12], "jupyt": [4, 6], "notebook": [4, 6], "introduct": 4, "translat": [4, 6], "sdf": [4, 6, 8], "need": 4, "deal": [4, 12], "duplic": [4, 8, 9, 12], "dedupl": [4, 6, 8, 10], "experiment": [4, 6, 12, 14], "error": [4, 6], "cli": 4, "tutori": 4, "remov": [4, 7, 8, 9, 12, 13], "advanc": [4, 7, 8], "functoinail": 4, "adaboostclassifi": [4, 6, 9, 11], "kneighborsclassifi": [4, 6, 9, 11], "kneighborsregressor": [4, 6, 9, 11], "logisticregress": [4, 6, 9, 11], "plsregress": [4, 6, 9, 11], "randomforestclassifi": [4, 6, 9, 11], "svc": [4, 6, 9, 11], "prfclassifi": [4, 6, 9, 11], "chempropregressor": [4, 6, 9, 11], "chempropclassifi": [4, 6, 9, 11], "chemprophyperoptclassifi": [4, 6, 9, 11], "chemprophyperoptregressor": [4, 6, 9, 11], "chemprophyperoptregressorpretrain": 4, "calibratedclassifiercvwithva": [4, 6, 9, 11], "mapi": [4, 9, 11], "avalon": [4, 8, 9, 11], "pathfp": [4, 6, 8, 11], "unscaledphyschemdescriptor": [4, 6, 8, 11], "unscaledjazzydescriptor": [4, 6, 8, 11], "unscaledzscalesdescriptor": [4, 8, 11], "physchemdescriptor": [4, 8, 11], "jazzydescriptor": [4, 8, 11], "precomputeddescriptorfromfil": [4, 6, 8, 11], "zscale": [4, 6, 8, 12], "smilesfromfil": [4, 6, 8, 11], "smilesandsideinfofromfil": [4, 6, 8, 11], "scaleddescriptor": [4, 8, 11], "compositedescriptor": [4, 6, 8, 11], "tempor": [4, 6, 12], "stratifi": [4, 6, 12], "scaffoldsplit": [4, 7, 12], "modeldatatransform": [4, 6, 7, 8, 12], "vectorfromcolumn": [4, 6, 8, 12], "keepfirst": [4, 7, 12], "keeplast": [4, 7, 12], "keeprandom": [4, 7, 12], "keepmin": [4, 7, 12], "keepmax": [4, 7, 12], "keepavg": [4, 7, 12], "keepmedian": [4, 6, 7, 12], "keepkeepallnodedupl": 4, "report": [4, 6, 7], "coverag": 4, "releas": [4, 6, 7], "subpackag": 5, "submodul": 5, "build_from_opt": [5, 8], "content": 5, "util": [5, 6, 7, 8, 13, 14], "files_path": [5, 8], "load_json": [5, 8], "schema": [5, 8, 11], "builder": 5, "metirc": 5, "model_writ": 5, "optbuild": 5, "schemagen": 5, "visual": [5, 9, 11], "intend": 6, "necessari": 6, "gui": 6, "autom": [6, 7], "reinvent": 6, "varieti": 6, "them": [6, 7, 9], "so": [6, 7, 9], "hyper": [6, 9], "influenc": [6, 7, 9], "automat": [6, 7], "idea": [6, 7], "read": [6, 7, 8], "otherwis": [6, 12], "might": [6, 7, 12], "skip": [6, 7], "download": 6, "toi": 6, "chosen": 6, "whole": [6, 7], "less": [6, 7], "minut": 6, "holdout": [6, 7], "readi": 6, "next": [6, 7], "few": [6, 7], "head": [6, 7], "molwt_gt_330": 6, "cc1cc": 6, "nc": [6, 7], "o": [6, 7], "c2cccc": 6, "coc3ccc": 6, "br": [6, 7], "cc3": [6, 7], "c2": [6, 7], "no1": 6, "387": 6, "233": 6, "nc1ccc": 6, "f": [6, 7, 12], "cc1f": 6, "nc1sccc1": 6, "c1nc2ccccc2s1": 6, "4360000000001": 6, "coc": 6, "c1ccccc1nc": 6, "c1cc": 6, "nn1cc1ccccc1": 6, "380": 6, "36000000000007": 6, "ccoc": 6, "sc1nc": 6, "c2ccccc2": 6, "ccc1c": 6, "312": 6, "39400000000006": 6, "ccc": [6, 7], "c1nn": [6, 7], "cc2ccccc2": 6, "c2ccccc12": 6, "349": 6, "4340000000001": 6, "brc1ccccc1occcoc1cccc2cccnc12": 6, "358": 6, "235": 6, "ccccn1c": 6, "coc2cccc": 6, "oc": 6, "nc2ccccc21": 6, "310": 6, "39700000000005": 6, "ccoc1cccc": 6, "c2sc3nc": 6, "c4ccc": 6, "cc4": 6, "ccc3c2n": 6, "c1": [6, 7], "407": 6, "4700000000001": 6, "coc1ccc": 6, "nc2ccc": 6, "cc2": 6, "c2ccc": 6, "cc1oc": 6, "454": 6, "54800000000023": 6, "sy": [6, 7], "sklearn": [6, 11], "kljk345": [6, 7], "pycharmproject": 6, "public_qptuna": 6, "lib": [6, 7], "python3": [6, 7], "site": [6, 7], "tqdm": 6, "21": [6, 7], "tqdmwarn": 6, "iprogress": 6, "updat": [6, 7], "ipywidget": 6, "readthedoc": [6, 9], "en": [6, 9], "stabl": [6, 8], "user_instal": 6, "autonotebook": 6, "notebook_tqdm": 6, "typic": [6, 7], "test_dataset_fil": [6, 8], "random_se": [6, 9], "42": [6, 7, 12, 13], "seed": [6, 7, 8, 12, 13], "reproduc": [6, 12, 14], "setup": [6, 9], "basic": [6, 8, 9], "importlib": 6, "reload": 6, "basicconfig": 6, "getlogg": 6, "disabl": [6, 7, 11], "np": [6, 7], "seterr": 6, "ignor": 6, "warn": [6, 7], "filterwarn": 6, "categori": 6, "futurewarn": 6, "runtimewarn": 6, "functool": 6, "partialmethod": 6, "__init__": [6, 9], "flood": 6, "decpreci": 6, "simplefilt": 6, "def": [6, 7, 9], "arg": [6, 8, 9], "kwarg": [6, 9], "stderr": 6, "render": [6, 7], "red": 6, "2024": 6, "07": 6, "02": 6, "13": [6, 7], "17": [6, 7], "26": [6, 7], "561": 6, "memori": [6, 8], "714": 6, "study_name_0": 6, "27": [6, 7], "022": 6, "finish": 6, "3594": 6, "2228073972638": 6, "39": [6, 7], "algorithm_nam": [6, 11], "randomforestregressor_algorithm_hash": 6, "f1ac01e1bba332215ccbd0c29c9ac3c3": 6, "max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3": 6, "n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3": 6, "max_features__f1ac01e1bba332215ccbd0c29c9ac3c3": 6, "lt": [6, 7], "randomforestmaxfeatur": [6, 9], "gt": [6, 7], "34": [6, 7], "171": 6, "5029": 6, "734616310275": 6, "svr_algorithm_hash": 6, "ea7ccc7ef4a9329af0d4e39eb6184933": 6, "gamma__ea7ccc7ef4a9329af0d4e39eb6184933": 6, "039054412752107935": 6, "c__ea7ccc7ef4a9329af0d4e39eb6184933": 6, "1242780840717016e": 6, "429": 6, "4242": 6, "092751193529": 6, "579": 6, "3393": 6, "577488426015": 6, "ridge_algorithm_hash": 6, "cfa1990d5153c8812982f034d788d7e": 6, "alpha__cfa1990d5153c8812982f034d788d7e": 6, "06877704223043679": 6, "644": 6, "427": 6, "45250420148204": 6, "lasso_algorithm_hash": 6, "5457f609662e44f04dcc9423066d2f58": 6, "alpha__5457f609662e44f04dcc9423066d2f58": 6, "7896547008552977": 6, "698": 6, "3387": 6, "245629616474": 6, "plsregression_algorithm_hash": 6, "9f2f76e479633c0bf18cf2912fed9eda": 6, "n_components__9f2f76e479633c0bf18cf2912fed9eda": 6, "853": 6, "734620250011": 6, "3661540064603184": 6, "1799882524170321": 6, "28": [6, 7], "029": 6, "7": [6, 7, 9], "9650": 6, "026568221794": 6, "kneighborsregressor_algorithm_hash": 6, "1709d2c39117ae29f6c9debe7241287b": 6, "metric__1709d2c39117ae29f6c9debe7241287b": 6, "kneighborsmetr": [6, 9], "n_neighbors__1709d2c39117ae29f6c9debe7241287b": 6, "weights__1709d2c39117ae29f6c9debe7241287b": 6, "kneighborsweight": [6, 9], "uniform": [6, 9], "070": 6, "8": [6, 7, 9], "5437": 6, "151635569594": 6, "05083825348819038": 6, "336": 6, "2669": 6, "8534551928174": 6, "373": 6, "4341": 6, "586120152291": 6, "7921825998469865": 6, "532": 6, "11": [6, 7], "5514": 6, "404088878843": 6, "680": 6, "12": [6, 7], "5431": 6, "634989239215": 6, "722": 6, "3530": 6, "5496618991288": 6, "765": 6, "14": [6, 7], "3497": 6, "6833185436312": 6, "793": 6, "15": [6, 7], "4382": 6, "16208862162": 6, "831": 6, "734620031822": 6, "002825619931800395": 6, "309885135051862e": 6, "09": 6, "870": 6, "679": 6, "3109044887755": 6, "16827992999009767": 6, "932": 6, "18": [6, 7], "2550": 6, "114129318373": 6, "974": 6, "19": [6, 7], "4847": 6, "085792360169": 6, "735431606118867": 6, "29": 6, "016": 6, "268760278916": 6, "0014840820994557746": 6, "04671166881768783": 6, "096": 6, "4783": 6, "0470154796785": 6, "135": 6, "22": [6, 7], "3905": 6, "0064899852296": 6, "259": 6, "23": [6, 7], "4030": 6, "45773791647": 6, "340": 6, "24": [6, 7], "4681": 6, "602145939593": 6, "381": 6, "25": [6, 7], "4398": 6, "544034028325": 6, "6452011213193165": 6, "474": 6, "4454": 6, "143979828408": 6, "503": 6, "533": 6, "600": 6, "4397": 6, "330360587512": 6, "617": 6, "682": 6, "31": [6, 7], "2602": 6, "7561184287083": 6, "715": 6, "5267": 6, "388279961089": 6, "2015560027548533": 6, "794": 6, "33": 6, "4863": 6, "581760751052": 6, "836": 6, "388": 6, "96473594016675": 6, "5528259214839937": 6, "906": 6, "35": 6, "5539": 6, "698232987626": 6, "6400992020612235": 6, "962": 6, "36": 6, "5180": 6, "5533034102455": 6, "8968910439566395": 6, "005": 6, "37": 6, "4989": 6, "929984864281": 6, "04458440839692226": 6, "492108041427977": 6, "034": 6, "38": [6, 7], "103": 6, "6528": 6, "215066535042": 6, "16700143339733753": 6, "240": 6, "40": [6, 7], "4168": 6, "7955967552625": 6, "311": 6, "41": 6, "6177": 6, "060727800014": 6, "401": 6, "3963": 6, "906954658343": 6, "435": 6, "43": 6, "6805334166565": 6, "013186009009851564": 6, "001008958590140135": 6, "501": 6, "44": 6, "9300": 6, "86840721566": 6, "547": 6, "45": [6, 7], "83": 6, "87968210939489": 6, "382674443425525e": 6, "565": 6, "46": 6, "594": 6, "626": 6, "48": 6, "717": 6, "49": 6, "3660": 6, "9359502556": 6, "767": 6, "688": 6, "5244070398325": 6, "5267860995545326": 6, "813": 6, "51": 6, "690": 6, "6494438072099": 6, "8458809314722497": 6, "848": 6, "52": 6, "691": 6, "1197058420935": 6, "9167866889210807": 6, "898": 6, "53": 6, "3111710449325": 6, "945685900574672": 6, "934": 6, "9665592812149": 6, "8936837761725833": 6, "970": 6, "55": 6, "4682747008223": 6, "5183865279530455": 6, "030": 6, "56": 6, "687": 6, "5230947231512": 6, "3771771681361766": 6, "078": 6, "57": 6, "4503442069594": 6, "3663259819415374": 6, "127": 6, "58": 6, "686": 6, "9553733616618": 6, "2925652230875628": 6, "174": 6, "59": 6, "370": 6, "2038330506566": 6, "3962903248948568": 6, "222": 6, "60": 6, "377": 6, "25988028857313": 6, "45237513161879": 6, "270": 6, "379": 6, "8933285317637": 6, "4741161933311207": 6, "319": 6, "62": 6, "374": 6, "50897467366013": 6, "4290962207409417": 6, "356": 6, "63": [6, 7], "376": 6, "5588572940058": 6, "4464295711264585": 6, "416": 6, "64": 6, "237448916406": 6, "4687500034684213": 6, "65": 6, "375": 6, "7474776359051": 6, "4395650011783436": 6, "504": 6, "66": 6, "362": 6, "2834906299732": 6, "3326755354190032": 6, "542": 6, "67": 6, "357": 6, "3474880122588": 6, "2887212943233457": 6, "591": 6, "68": 6, "354": 6, "279045046449": 6, "2577677164664005": 6, "642": 6, "69": 6, "347": 6, "36894395697703": 6, "1672928587680225": 6, "706": 6, "70": 6, "345": 6, "17697390093394": 6, "1242367255308854": 6, "757": 6, "71": 6, "74610809299037": 6, "1728352983905301": 6, "807": 6, "72": 6, "23464281634324": 6, "1265380781508565": 6, "856": 6, "73": 6, "344": 6, "6848312222365": 6, "0829896313820404": 6, "902": 6, "74": [6, 7], "9111966504334": 6, "1070414661080543": 6, "966": 6, "75": 6, "70116419828565": 6, "0875643695329498": 6, "026": 6, "76": 6, "62647974688133": 6, "0716281620790837": 6, "089": 6, "77": 6, "6759429204596": 6, "0456289319914898": 6, "141": 6, "78": 6, "343": 6, "58131497761616": 6, "0010195360522613": 6, "193": 6, "79": 6, "342": 6, "7290581014813": 6, "9073210715005748": 6, "254": 6, "80": [6, 7], "67866114080107": 6, "9166305667100072": 6, "317": 6, "81": 6, "6440308445311": 6, "9248722692093634": 6, "367": 6, "82": 6, "02085648448934": 6, "8776928646870886": 6, "1662266300702": 6, "867592364677856": 6, "457": 6, "84": 6, "30158716569775": 6, "8599491178327108": 6, "497": 6, "85": 6, "2803074848341": 6, "8396948389352923": 6, "86": 6, "28301101884045": 6, "8396651775801683": 6, "587": 6, "87": 6, "6781906268143": 6, "8356021935129933": 6, "639": 6, "88": 6, "0405418264898": 6, "7430046191126949": 6, "677": 6, "89": 6, "77203208258476": 6, "9015965341429055": 6, "90": 6, "363": 6, "1622720320929": 6, "6746575663752555": 6, "91": 6, "7403796626193": 6, "9057564666836629": 6, "797": 6, "92": 6, "63579667712696": 6, "9332275205203372": 6, "93": [6, 7], "6886425884964": 6, "9433063264508291": 6, "94": 6, "9341048659705": 6, "884739221967487": 6, "935": 6, "95": [6, 7], "63507445779743": 6, "9381000493689634": 6, "986": 6, "96": 6, "06021011302374": 6, "963138023068903": 6, "97": 6, "9990546212019": 6, "9601651093867907": 6, "066": 6, "98": 6, "3821": 6, "2267845437514": 6, "117": 6, "6786067133016": 6, "721603508336166": 6, "seaborn": [6, 7], "sn": [6, 7], "set_them": 6, "darkgrid": 6, "default_reg_scor": 6, "ax": [6, 7], "scatterplot": 6, "trials_datafram": 6, "xlabel": [6, 7], "ylabel": [6, 7], "ojbect": 6, "interest": [6, 7], "neg_mean_squared_error": [6, 9], "highlight": 6, "color": [6, 7], "cv_test": 6, "user_attrs_test_scor": 6, "lambda": [6, 12], "item": [6, 10], "idx": [6, 7], "v": 6, "hue": 6, "palett": 6, "set1": 6, "inspect": 6, "apischema": [6, 8], "buildconfig_as_dict": 6, "serial": 6, "response_typ": [6, 7, 8], "deduplication_strategi": [6, 7, 8], "split_strategi": [6, 7, 8], "nosplit": [6, 12], "save_intermediate_fil": [6, 8], "log_transform": [6, 7, 8], "log_transform_bas": [6, 7, 8], "null": 6, "log_transform_neg": [6, 7, 8], "log_transform_unit_convers": [6, 7, 8], "probabilistic_threshold_represent": [6, 7, 8], "probabilistic_threshold_representation_threshold": [6, 7, 8], "probabilistic_threshold_representation_std": [6, 7, 8], "metadata": [6, 8, 9, 10, 11], "shuffl": [6, 9, 11, 12, 13], "best_trial": [6, 9, 11], "best_valu": [6, 9, 11], "tracking_rest_endpoint": [6, 9], "best_build": 6, "rb": 6, "predict_from_smil": [6, 8], "cc1": [6, 7], "43103985": 6, "177": 6, "99850936": 6, "now": [6, 7, 9], "panda": [6, 7], "pd": [6, 7], "df": [6, 12, 13], "read_csv": 6, "expect": [6, 7, 10], "matplotlib": [6, 7], "pyplot": [6, 7], "plt": [6, 7], "scatter": 6, "lim": 6, "max": [6, 7, 12], "diagon": 6, "r2_score": 6, "mean_squared_error": 6, "mean_absolute_error": 6, "y_true": [6, 8, 11], "y_pred": [6, 8, 11], "rmse": 6, "ad": [6, 7, 12, 13], "mae": 6, "absolut": 6, "8566354978126369": 6, "204909888075044": 6, "298453946973815": 6, "accept": [6, 7], "again": 6, "hopefulli": [6, 7], "littl": 6, "better": [6, 7, 8, 12], "send": 6, "strategi": [6, 8], "current": 6, "observ": [6, 7], "last": [6, 7, 12], "alreadi": [6, 7], "sort": [6, 7, 12, 13], "oldest": [6, 7, 12, 13], "newest": [6, 7, 12, 13], "end": [6, 7, 12, 13], "extern": 6, "tool": 6, "excel": 6, "ensur": [6, 7, 8, 9], "unballanc": 6, "work": [6, 7], "come": [6, 7], "measur": [6, 7], "fact": 6, "disregard": 6, "stereochemistri": [6, 7], "even": [6, 7], "sever": 6, "median": [6, 7, 12], "factor": [6, 7], "replic": [6, 7], "robust": [6, 7], "outlier": [6, 7], "acorss": 6, "trust": 6, "kept": 6, "splitter": [6, 7, 8, 10], "track_to_mlflow": [6, 9], "my_study_stratified_split": 6, "922": 6, "963": 6, "046": 6, "1856": 6, "4459752935309": 6, "123": 6, "1692": 6, "0451328577294": 6, "2918844591266672": 6, "592": 6, "1378": 6, "9731014410709": 6, "471164936778079": 6, "2658": 6, "13214897931": 6, "804": 6, "2059": 6, "3079659969176": 6, "330": [6, 7], "280": 6, "17777558722315": 6, "7001901522391756": 6, "422": 6, "3551": 6, "475476217507": 6, "466": 6, "2124": 6, "9660426577593": 6, "509": 6, "1686": 6, "5737716985532": 6, "9841058851292832": 6, "552": 6, "1702": 6, "174704715547": 6, "861494545249233": 6, "578": 6, "621": 6, "1204": 6, "636967895143": 6, "5238298142840006": 6, "676": 6, "228": 6, "44505332657158": 6, "9836853549192415": 6, "729": 6, "3949": 6, "499774068696": 6, "04535826280986047": 6, "012999584021838e": 6, "829": 6, "2856": 6, "917927507731": 6, "linear_model": 6, "_coordinate_desc": 6, "678": 6, "convergencewarn": 6, "did": 6, "regularis": 6, "dualiti": 6, "gap": 6, "306e": 6, "toler": 6, "824e": 6, "cd_fast": 6, "enet_coordinate_desc": 6, "882": 6, "2554": 6, "2079198900733": 6, "10588223712643852": 6, "1261": 6, "484274761188": 6, "0950442632698256": 6, "965": 6, "282": 6, "6478019258886": 6, "2920636100136971": 6, "004": 6, "1814": 6, "6019641143478": 6, "048": 6, "1284": 6, "7430070920798": 6, "1729012287538991": 6, "237": 6, "98783693000647": 6, "1721667984096773": 6, "192": 6, "2129": 6, "55317061882": 6, "4997740833423": 6, "779895470793612": 6, "260941957410989e": 6, "279": 6, "1740": 6, "8894369939983": 6, "02841448247455669": 6, "698e": 6, "280e": 6, "820e": 6, "352e": 6, "770e": 6, "3317": 6, "417858905051": 6, "003050380617617421": 6, "404": 6, "448": 6, "1256": 6, "7270466276807": 6, "1594144041655936": 6, "491": 6, "1245": 6, "1399766270456": 6, "336730512398918": 6, "583": 6, "2908": 6, "3563960057677": 6, "628": 6, "1775": 6, "55204856041": 6, "721": 6, "1257": 6, "9288888831513": 6, "1441514794000534": 6, "808": 6, "98174313112844": 6, "1939105579414777": 6, "900": 6, "3054": 6, "7066202193805": 6, "944": 6, "1227": 6, "082986184029": 6, "909508127148669": 6, "988": 6, "1676": 6, "7481962719485": 6, "4307837873914335": 6, "079": 6, "307965996918": 6, "168": 6, "3441": 6, "9109103644514": 6, "211": 6, "1670": 6, "5213862925175": 6, "07945856808433427": 6, "264": 6, "2756": 6, "046839500092": 6, "320": 6, "4997735530674": 6, "022099719935614482": 6, "4657380646234507e": 6, "08": 6, "0862402902634642": 6, "12519632281925502": 6, "467": 6, "3438": 6, "566583971217": 6, "524": 6, "4422556954731": 6, "19967589906728334": 6, "016e": 6, "618": 6, "359": [6, 7], "7639743940817": 6, "059252880514551576": 6, "662": 6, "1246": 6, "7813032646238": 6, "3074782262329858": 6, "755": 6, "2224": 6, "3845873049813": 6, "810": 6, "1673": 6, "9639799911165": 6, "2737740844660712": 6, "896": 6, "3163": 6, "129883232068": 6, "987": 6, "2753": 6, "414173913392": 6, "057": 6, "263": 6, "1352845182604": 6, "627030918721665": 6, "105": 6, "271": 6, "2979718788249": 6, "8548903728617034": 6, "165": 6, "277": 6, "86441431259567": 6, "9605867591283856": 6, "227": 6, "4329099850367": 6, "9537398361705693": 6, "274": 6, "3838070241422": 6, "9045589309769144": 6, "334": 6, "260": 6, "4460398258507": 6, "5589021326002044": 6, "383": 6, "257": 6, "95032410206767": 6, "5053759377103249": 6, "444": 6, "256": 6, "5958038666581": 6, "4789082433356577": 6, "495": 6, "253": 6, "4269973575198": 6, "4281024602273042": 6, "560": 6, "249": 6, "40822811603962": 6, "3546313579812586": 6, "620": 6, "245": 6, "71101688809983": 6, "2913960369109012": 6, "675": 6, "247": 6, "88538215472033": 6, "3274897484709072": 6, "737": 6, "244": 6, "23847775159297": 6, "2647865635312279": 6, "803": 6, "59033004585282": 6, "3228443521984092": 6, "863": 6, "243": 6, "40694430653753": 6, "2489205103047292": 6, "928": 6, "223": 6, "85145692792733": 6, "8934822741396387": 6, "990": [6, 7], "221": 6, "94026043724057": 6, "8552798675517863": 6, "219": 6, "60947928367543": 6, "8149866573467666": 6, "108": 6, "84441955310717": 6, "8531301788095305": 6, "170": 6, "24134912135943": 6, "8418420411160932": 6, "232": 6, "34805357903284": 6, "883998932301903": 6, "293": 6, "99342925522842": 6, "8564564664338091": 6, "353": 6, "50886633416462": 6, "8672069097403997": 6, "415": 6, "61235541906441": 6, "8482856353268698": 6, "479": 6, "217": 6, "7749814513912": 6, "7823980442129331": 6, "538": 6, "216": 6, "00225784039503": 6, "7113129125761161": 6, "601": 6, "8736767409489": 6, "6250904023479531": 6, "666": 6, "94414119442342": 6, "6227757503715069": 6, "731": 6, "45936690929625": 6, "6343056785694773": 6, "63861804615567": 6, "6302707941523814": 6, "860": 6, "1969": 6, "3749442111905": 6, "00019861806798724335": 6, "586529041453": 6, "923": 6, "215": 6, "82051598778696": 6, "6518244359516081": 6, "06387687700067": 6, "6440087841656821": 6, "041": 6, "24994687849525": 6, "6393212787552464": 6, "106": 6, "92984604804667": 6, "6232144947646524": 6, "25506613319246": 6, "603388647930941": 6, "2733": 6, "5772576431627": 6, "287": 6, "29854648789728": 6, "5873312673596333": 6, "16592450348784": 6, "4337907998582289": 6, "410": 6, "68514116107337": 6, "6695836226711808": 6, "475": 6, "220": 6, "8939514172608": 6, "4420925048614356": 6, "535": 6, "72299797702155": 6, "6960582933068138": 6, "69285146262294": 6, "69078828949453": 6, "665": 6, "0538787714827": 6, "7144357045239296": 6, "728": 6, "4213281391621": 6, "7353090312302926": 6, "74724725664498": 6, "92653950485437e": 6, "858": 6, "12287184152592": 6, "7183304951103088": 6, "22186485689846": 6, "7234233661662641": 6, "977": 6, "2720": 6, "793752592223": 6, "042": 6, "3855763846717": 6, "4726201914486088": 6, "By": 6, "roc_auc": [6, 9], "model_evalu": 6, "amongst": 6, "regression_scor": 6, "classification_scor": 6, "explained_vari": [6, 9], "max_error": [6, 9], "neg_mean_absolute_error": [6, 9], "neg_median_absolute_error": [6, 9], "average_precis": [6, 9], "balanced_accuraci": [6, 9], "f1": [6, 9], "f1_macro": [6, 9], "f1_micro": [6, 9], "f1_weight": [6, 9], "jaccard": [6, 9], "jaccard_macro": [6, 9], "jaccard_micro": [6, 9], "jaccard_weight": [6, 9], "neg_brier_scor": [6, 9], "precis": [6, 7, 9], "precision_macro": [6, 9], "precision_micro": [6, 9], "precision_weight": [6, 9], "recal": [6, 9], "recall_macro": [6, 9], "recall_micro": [6, 9], "recall_weight": [6, 9], "auc_pr_cal": [6, 8, 9], "bedroc": [6, 8, 9], "concordance_index": [6, 8, 9], "my_study_r2": 6, "945": 6, "947": 6, "072": 6, "011171868665159623": 6, "197": 6, "08689402230378174": 6, "283": 6, "12553701248394863": 6, "141096648805748": 6, "4893466963980463e": 6, "3039309544203818": 6, "20182749628697164": 6, "485": 6, "8187194367176578": 6, "558": 6, "4647239019719945": 6, "6574750183038587": 6, "611": 6, "8614818478547979": 6, "3974313630683448": 6, "705": 6, "12769795082909816": 6, "773": 6, "8639946428338224": 6, "2391884918766034": 6, "838": 6, "12553701248377633": 6, "00044396482429275296": 6, "3831436879125245e": 6, "892": 6, "12553700871203702": 6, "00028965395242758657": 6, "99928292425642e": 6, "2935582042429075": 6, "976": 6, "18476333152695587": 6, "8190707459213998": 6, "4060379177903557": 6, "118": 6, "12206148974315871": 6, "3105263811279067": 6, "344271094811757": 6, "3562469062424869": 6, "670604991178476": 6, "316": [6, 7], "045959695906983344": 6, "8583939656024446": 6, "5158832554303112": 6, "433": 6, "3062574078515544": 6, "487": 6, "11657354998283716": 6, "0009327650919528738": 6, "062479210472502": 6, "586": 6, "629": 6, "8498478905829554": 6, "1366172066709432": 6, "733": 6, "1276979508290982": 6, "786": 6, "13519830637607919": 6, "92901911959232": 6, "999026012594694": 6, "839": 6, "8198078293055633": 6, "5888977841391714": 6, "878": 6, "8201573964824842": 6, "19435298754153707": 6, "958": 6, "013": 6, "6285506249643193": 6, "35441495011256785": 6, "11934070343348298": 6, "145": 6, "4374125584543907": 6, "2457809516380005": 6, "213": 6, "3625576518621392": 6, "6459129458824919": 6, "36175556871883746": 6, "8179058888285398": 6, "285": 6, "8202473217121523": 6, "0920052840435055": 6, "372": 6, "3672927879319306": 6, "8677032984759461": 6, "402": 6, "445": 6, "40076792599874356": 6, "2865764368847064": 6, "26560316846701765": 6, "632": 6, "41215254857081174": 6, "671": 6, "763": 6, "00461414372160085": 6, "27282533524183633": 6, "919": 6, "10220127407364991": 6, "975": 6, "30323404130582854": 6, "3044553805553568": 6, "6437201185807124": 6, "076": 6, "41502276709562": 6, "10978379088847677": 6, "120": 6, "36160209098547913": 6, "022707289534838138": 6, "175": 6, "2916101445983833": 6, "936e": 6, "782e": 6, "434e": 6, "977e": 6, "276": 6, "8609413020928532": 6, "04987590926279814": 6, "794e": 6, "830e": 6, "906e": 6, "578e": 6, "8610289662757457": 6, "019211413400468974": 6, "754e": 6, "843e": 6, "507e": 6, "493": 6, "8610070549049179": 6, "018492644772509947": 6, "840e": 6, "513e": 6, "924e": 6, "8569771623635769": 6, "008783442408928633": 6, "243e": 6, "014e": 6, "700": 6, "8624781673814641": 6, "05782221001517797": 6, "113e": 6, "935e": 6, "122e": 6, "798": 6, "8618589507037001": 6, "02487072255316275": 6, "886": 6, "864754359721037": 6, "2079910754941946": 6, "946": 6, "8622236413326235": 6, "333215560931422": 6, "009": 6, "861832165638517": 6, "3628098560209365": 6, "068": 6, "8620108533993581": 6, "34240779695521706": 6, "142": 6, "8638540565650902": 6, "26493714991266293": 6, "8629799500771645": 6, "30596394512914815": 6, "8621408609583922": 6, "33648829357762355": 6, "351": 6, "8638132124078156": 6, "2679814646317183": 6, "424": 6, "863983758876634": 6, "24062119162159595": 6, "500": 6, "8627356047945115": 6, "3141728910335158": 6, "8639203054085788": 6, "23391390640786494": 6, "8570103863991635": 6, "6124885145996103": 6, "742": 6, "8647961976727571": 6, "2059976546070975": 6, "830": 6, "8648312544921793": 6, "20266060662750784": 6, "926": 6, "8648431452862716": 6, "20027647978240445": 6, "010": 6, "8648491459660418": 6, "1968919999787333": 6, "8650873115156988": 6, "174598921162764": 6, "204": 6, "8650350577921149": 6, "16468002989641095": 6, "8649412283687147": 6, "1606717091615047": 6, "986e": 6, "396": [6, 7], "8649537211609554": 6, "14694925097689848": 6, "506": 6, "8649734575435447": 6, "147612713300643": 6, "446e": 6, "8648761002838515": 6, "14440434705706803": 6, "398e": 6, "775": 6, "8639826593122782": 6, "1265357179513065": 6, "690e": 6, "875": 6, "864435565531768": 6, "1374245525868926": 6, "938": 6, "8590221951825531": 6, "49890830155012533": 6, "8649098880804443": 6, "1573428812070292": 6, "405e": 6, "864536410656637": 6, "13886104722511608": 6, "8597401050431873": 6, "47746341180045787": 6, "8537465461603838": 6, "050e": 6, "8642643827090003": 6, "13446778921611002": 6, "175e": 6, "8641621818665252": 6, "1286796719653316": 6, "625": 6, "864182755916388": 6, "13303218726548235": 6, "693": 6, "1255357440899417": 6, "021711452917433944": 6, "559714273835951e": 6, "758": 6, "8604596648091501": 6, "43644874418279245": 6, "463e": 6, "861": 6, "8635689909135862": 6, "10940922083495383": 6, "951": 6, "8648544336551733": 6, "1912756875742137": 6, "8648496595672595": 6, "19628449928540487": 6, "8452625121122099": 6, "4324661283995224": 6, "149": 6, "8378670635846416": 6, "839206620815206": 6, "002e": 6, "082e": 6, "8649365368153895": 6, "07270781179126021": 6, "8875676754699953": 6, "0006995169897945908": 6, "586e": 6, "618e": 6, "234e": 6, "484": 6, "8730555131061773": 6, "0018186269840273495": 6, "12553508835019533": 6, "04867556317570456": 6, "0011658455138452": 6, "284e": 6, "177e": 6, "664": 6, "8586292788613132": 6, "005078762921098462": 6, "anyalgorithm": 6, "__args__": 6, "consider": [6, 7], "modif": [6, 12], "establish": 6, "rf": 6, "account": [6, 7, 8], "though": 6, "treat": [6, 7], "pdf": [6, 9], "denot": [6, 12, 13], "determinist": [6, 7], "quantiti": 6, "tradit": [6, 7], "discret": 6, "discretis": [6, 12, 14], "bioactiv": [6, 7], "integr": 6, "afford": [6, 7], "particularli": 6, "liklihood": 6, "membership": [6, 7], "iopscienc": 6, "iop": 6, "articl": [6, 7], "3847": 6, "1538": 6, "3881": 6, "aaf101": 6, "pxc50": [6, 7], "p24863": 6, "enabl": [6, 7], "alwai": [6, 7], "734": 6, "joblib": [6, 8], "577": 6, "joblibcollisionwarn": 6, "collis": 6, "180": 6, "self": [6, 9], "_cached_cal": 6, "shelv": 6, "00": 6, "764": 6, "08099580623289632": 6, "prfclassifier_algorithm_hash": 6, "efe0ba9870529a6cde0dd3ad22447cbb": 6, "max_depth__efe0ba9870529a6cde0dd3ad22447cbb": 6, "n_estimators__efe0ba9870529a6cde0dd3ad22447cbb": 6, "max_features__efe0ba9870529a6cde0dd3ad22447cbb": 6, "prfclassifiermaxfeatur": [6, 9], "min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb": 6, "use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb": 6, "use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb": 6, "408": [6, 7], "07261454017489567": 6, "780": 6, "08791063872794351": 6, "911": 6, "07114663955819509": 6, "879": 6, "06537440628140882": 6, "446": 6, "05680450487193368": 6, "968": 6, "543": 6, "0656836821774901": 6, "333": 6, "07863564862376404": 6, "329": 6, "0648840199215795": 6, "014": 6, "07861037073288182": 6, "608": 6, "06669924317660021": 6, "997": 6, "06734611679947522": 6, "526": 6, "06810559387741143": 6, "0528189695245453": 6, "best_built": 6, "demonstr": [6, 7], "purpos": [6, 7], "transduct": 6, "example_smil": 6, "get_set": [6, 7, 8], "b": [6, 7], "outsid": [6, 7, 8], "likelihood": 6, "problemat": 6, "except": [6, 8], "valueerror": 6, "As": [6, 7], "raw": [6, 7, 8, 12], "760": 6, "800": 6, "w": 6, "801": 6, "fail": 6, "traceback": 6, "_optim": 6, "196": 6, "_run_trial": 6, "value_or_valu": 6, "func": 6, "128": 6, "__call__": 6, "_validate_algo": 6, "rais": [6, 8], "summaris": 6, "handl": 6, "via": [6, 7, 12], "convent": [6, 7], "classic": 6, "relev": 6, "cutoff": [6, 7, 12], "ouput": 6, "reflect": [6, 7], "arguabl": 6, "mpo": 6, "pub": 6, "ac": 6, "full": [6, 9], "jcim": 6, "9b00237": 6, "slide": 6, "googl": 6, "14pbd9ltxzfpsjhyxykflxnk8q80lhvnjimg8a3wqcrm": 6, "edit": 6, "calcault": 6, "directli": [6, 7], "later": [6, 7], "smilesbaseddescriptor": 6, "architectur": [6, 7], "quickli": 6, "867": 6, "868": 6, "root": [6, 9, 10], "enqueu": [6, 8], "manual": 6, "activation__668a7428ff5cdb271b01c0925e8fea45": 6, "relu": [6, 9], "aggregation__668a7428ff5cdb271b01c0925e8fea45": 6, "aggregation_norm__668a7428ff5cdb271b01c0925e8fea45": 6, "batch_size__668a7428ff5cdb271b01c0925e8fea45": 6, "depth__668a7428ff5cdb271b01c0925e8fea45": 6, "dropout__668a7428ff5cdb271b01c0925e8fea45": 6, "features_generator__668a7428ff5cdb271b01c0925e8fea45": 6, "ffn_hidden_size__668a7428ff5cdb271b01c0925e8fea45": 6, "ffn_num_layers__668a7428ff5cdb271b01c0925e8fea45": 6, "final_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45": 6, "hidden_size__668a7428ff5cdb271b01c0925e8fea45": 6, "init_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45": 6, "max_lr_exp__668a7428ff5cdb271b01c0925e8fea45": 6, "warmup_epochs_ratio__668a7428ff5cdb271b01c0925e8fea45": 6, "chempropregressor_algorithm_hash": 6, "668a7428ff5cdb271b01c0925e8fea45": 6, "301": 6, "6833": 6, "034983241957": 6, "chempropactiv": [6, 9], "chempropaggreg": [6, 9], "ensemble_size__668a7428ff5cdb271b01c0925e8fea45": 6, "epochs__668a7428ff5cdb271b01c0925e8fea45": 6, "chempropfeatures_gener": [6, 9], "6445": 6, "608102397302": 6, "1700": 6, "2300": 6, "safe": 6, "nan": [6, 8, 12, 13], "prompt": 6, "due": [6, 7, 8], "deactiv": 6, "own": 6, "still": 6, "commun": 6, "facilit": [6, 7, 9], "preset": 6, "enqu": 6, "bayesian": 6, "suggest": 6, "split_chemprop": [6, 9], "flag": [6, 7, 12, 13], "n_chemprop_tri": [6, 9], "desir": 6, "undirect": 6, "shown": [6, 7], "aspect": [6, 8], "princip": [6, 7], "expand": 6, "differnt": 6, "rememb": 6, "unless": 6, "alter": [6, 9], "characterist": [6, 7], "too": [6, 9], "limit": [6, 7, 9], "vice": [6, 9], "versa": [6, 9], "extens": [6, 9], "trail": [6, 9], "applic": [6, 7, 9], "chanc": 6, "help": 6, "caruana": 6, "particular": 6, "achiev": [6, 7], "st": 6, "figur": [6, 7], "signal": 6, "aka": 6, "could": [6, 7], "share": [6, 12, 13], "until": [6, 9], "longer": [6, 7], "mt": 6, "onc": [6, 7, 12], "knowledg": 6, "order": [6, 7, 9, 12], "earlier": [6, 7], "accompi": 6, "train_side_info": 6, "clog": 6, "surfac": 6, "area": [6, 7], "psa": 6, "265": 6, "22475": 6, "8088": 6, "883": 6, "32297": 6, "6237": 6, "835": 6, "33334": 6, "2804": 6, "314": 6, "26075": 6, "2533": 6, "498": 6, "278": 6, "18917": 6, "5102": 6, "694": 6, "246": 6, "12575": 6, "7244": 6, "255": 6, "14831": 6, "4338": 6, "895": 6, "302": 6, "26838": 6, "2041": 6, "22298": 6, "match": 6, "train_side_info_cl": 6, "clogp_gt2": 6, "clogs_gt": 6, "acceptors_gt5": 6, "donors_gt0": 6, "area_gt250": 6, "psa_lt0": 6, "aux": [6, 8, 10], "03": 6, "350": 6, "443": 6, "5817": 6, "944008002311": 6, "chemprophyperoptregressor_algorithm_hash": 6, "db9e60f9b8f0a43eff4b41917b6293d9": 6, "ensemble_size__db9e60f9b8f0a43eff4b41917b6293d9": 6, "epochs__db9e60f9b8f0a43eff4b41917b6293d9": 6, "features_generator__db9e60f9b8f0a43eff4b41917b6293d9": 6, "num_iters__db9e60f9b8f0a43eff4b41917b6293d9": 6, "search_parameter_level__db9e60f9b8f0a43eff4b41917b6293d9": 6, "chempropsearch_parameter_level": [6, 9], "aux_weight_pc__db9e60f9b8f0a43eff4b41917b6293d9": 6, "5796": 6, "34392897437": 6, "439": 6, "5795": 6, "086720713623": 6, "470": 6, "241": 6, "5820": 6, "227555999914": 6, "322": 6, "5852": 6, "160071204277": 6, "inlfuenc": 6, "henc": [6, 7], "percent": 6, "product": 6, "user_attrs_trial_ran": 6, "drop": [6, 7, 8], "erron": 6, "__": 6, "params_aux_weight_pc": 6, "conclud": [6, 7], "produc": 6, "overrid": [6, 9], "situat": [6, 7], "along": 6, "potenti": [6, 7], "compris": 6, "incompat": 6, "whilst": [6, 7], "desciptor": 6, "grei": [6, 7], "tial": 6, "what": 6, "design": 6, "unpromis": 6, "why": [6, 7], "poor": 6, "sampler": 6, "incompta": 6, "repeatedli": 6, "hyerparamet": 6, "ident": 6, "9525489095524835": 6, "aux_weight_pc__cfa1990d5153c8812982f034d788d7e": 6, "777": 6, "4824": 6, "686269039228": 6, "7731425652872588": 6, "819": 6, "849": 6, "4409": 6, "946844928445": 6, "791002332112292": 6, "021": [6, 7], "167": 6, "329624779366306": 6, "00015024763718638216": 6, "269": 6, "523": 6, "4396": 6, "722635068717": 6, "559": 6, "753": 6, "4577379164707": 6, "790": 6, "960": 6, "consult": 6, "incompitbl": 6, "algo": [6, 9], "occur": 6, "assign": [6, 7], "doe": [6, 7, 8, 9, 12], "params_algorithm_nam": 6, "move_legend": 6, "upper": [6, 7], "bbox_to_anchor": [6, 7], "overview": 6, "never": 6, "successfulli": 6, "absenc": 6, "8th": 6, "miss": [6, 8, 12, 13], "associ": [6, 7], "asid": 6, "mitig": 6, "overal": [6, 7], "serv": [6, 11], "argument": [6, 8], "tl": 6, "wider": 6, "activation__e0d3a442222d4b38f3aa1434851320db": 6, "aggregation__e0d3a442222d4b38f3aa1434851320db": 6, "aggregation_norm__e0d3a442222d4b38f3aa1434851320db": 6, "batch_size__e0d3a442222d4b38f3aa1434851320db": 6, "depth__e0d3a442222d4b38f3aa1434851320db": 6, "dropout__e0d3a442222d4b38f3aa1434851320db": 6, "features_generator__e0d3a442222d4b38f3aa1434851320db": 6, "ffn_hidden_size__e0d3a442222d4b38f3aa1434851320db": 6, "ffn_num_layers__e0d3a442222d4b38f3aa1434851320db": 6, "final_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db": 6, "hidden_size__e0d3a442222d4b38f3aa1434851320db": 6, "init_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db": 6, "max_lr_exp__e0d3a442222d4b38f3aa1434851320db": 6, "warmup_epochs_ratio__e0d3a442222d4b38f3aa1434851320db": 6, "e0d3a442222d4b38f3aa1434851320db": 6, "4937": 6, "540075659691": 6, "ensemble_size__e0d3a442222d4b38f3aa1434851320db": 6, "epochs__e0d3a442222d4b38f3aa1434851320db": 6, "retrain": 6, "954": 6, "043": 6, "5114": 6, "7131239123555": 6, "chempropregressorpretrained_algorithm_hash": 6, "dfc518a76317f23d95e5aa5a3eac77f0": 6, "frzn__dfc518a76317f23d95e5aa5a3eac77f0": 6, "chempropfrzn": [6, 9], "epochs__dfc518a76317f23d95e5aa5a3eac77f0": 6, "cover": 6, "global": 6, "job": [6, 7], "fair": 6, "1545": 6, "tl_studi": 6, "loc": [6, 7], "params_epoch": 6, "fillna": 6, "astyp": [6, 7], "agg": 6, "join": [6, 7], "params_chempropregressor_algorithm_hash": 6, "isna": 6, "annot": 6, "172": 6, "5891": 6, "7552821093905": 6, "140": 6, "5890": 6, "94653501547": 6, "77dfc8230317e08504ed5e643243fbc2": 6, "frzn__77dfc8230317e08504ed5e643243fbc2": 6, "epochs__77dfc8230317e08504ed5e643243fbc2": 6, "181": 6, "right": [6, 7], "ncol": 6, "world": [6, 7, 12, 13], "penultim": [6, 9], "chemprop_model": 6, "heatmap": 6, "predictor": [6, 8], "chemprop_fingerprint": 6, "fingerprint_typ": 6, "mpn": 6, "cbar_kw": 6, "semi": 6, "supervis": 6, "altern": [6, 7], "last_ffn": 6, "howeev": 6, "respect": [6, 7], "kind": [6, 7, 12], "confid": 6, "calibratedclassifiercv": 6, "understand": 6, "predict_proba": [6, 8], "among": 6, "gave": 6, "close": [6, 7], "actual": [6, 7], "topic": 6, "sigmoid": [6, 9], "review": 6, "those": [6, 7, 8], "calibration_curv": 6, "collect": 6, "defaultdict": 6, "precision_scor": 6, "recall_scor": 6, "f1_score": 6, "brier_score_loss": 6, "log_loss": 6, "roc_auc_scor": 6, "n_job": [6, 9, 11], "calibrated_rf": 6, "calibrated_model": 6, "173": 6, "110": 6, "8353535353535354": 6, "calibratedclassifiercvwithva_algorithm_hash": 6, "e788dfbfc5075967acb5ddf9d971ea20": 6, "n_folds__e788dfbfc5075967acb5ddf9d971ea20": 6, "max_depth__e788dfbfc5075967acb5ddf9d971ea20": 6, "n_estimators__e788dfbfc5075967acb5ddf9d971ea20": 6, "max_features__e788dfbfc5075967acb5ddf9d971ea20": 6, "uncalibr": 6, "uncalibrated_rf": 6, "uncalibrated_model": 6, "566": 6, "915": 6, "8185858585858585": 6, "randomforestclassifier_algorithm_hash": 6, "167e1e88dd2a80133e317c78f009bdc9": 6, "max_depth__167e1e88dd2a80133e317c78f009bdc9": 6, "n_estimators__167e1e88dd2a80133e317c78f009bdc9": 6, "max_features__167e1e88dd2a80133e317c78f009bdc9": 6, "conserv": 6, "1000": [6, 7], "random_st": [6, 12, 13], "calibrated_predict": 6, "uncalibrated_predict": 6, "cal_df": 6, "datafram": [6, 7, 8, 11, 12], "boxplot": 6, "melt": 6, "set_ylabel": [6, 7], "behaviour": [6, 7], "curv": [6, 7], "reliabl": 6, "diagram": 6, "against": 6, "figsiz": [6, 7], "ax1": 6, "subplot2grid": 6, "rowspan": 6, "ax2": 6, "perfectli": [6, 7], "pred": 6, "fraction_of_posit": 6, "mean_predicted_valu": 6, "n_bin": 6, "brier": 6, "2f": 6, "hist": 6, "histtyp": 6, "lw": 6, "set_ylim": 6, "legend": [6, 7], "set_titl": 6, "set_xlabel": [6, 7], "center": [6, 7], "tight_layout": [6, 7], "compos": 6, "refin": 6, "notic": 6, "significantli": 6, "cell": 6, "accur": 6, "alloc": 6, "y_prob": 6, "ye": 6, "score_nam": 6, "__name__": 6, "capit": 6, "score_df": 6, "set_index": 6, "decim": 6, "roc": 6, "auc": [6, 8], "184705": 6, "547129": 6, "830565": 6, "744048": 6, "784929": 6, "716536": 6, "175297": 6, "529474": 6, "811209": 6, "818452": 6, "814815": 6, "714104": 6, "va": 6, "multipoint": 6, "0c00476": 6, "margin": [6, 7], "bounari": 6, "548": 6, "537": 6, "8213131313131313": 6, "79765fbec1586f3c917ff30de274fdb4": 6, "n_folds__79765fbec1586f3c917ff30de274fdb4": 6, "max_depth__79765fbec1586f3c917ff30de274fdb4": 6, "n_estimators__79765fbec1586f3c917ff30de274fdb4": 6, "max_features__79765fbec1586f3c917ff30de274fdb4": 6, "uncert": [6, 8], "chem": [6, 7, 8], "allchem": 6, "pandastool": [6, 7], "rdconfig": 6, "datastruct": 6, "train_df": 6, "addmoleculecolumntofram": 6, "includefingerprint": 6, "getmorganfingerprint": 6, "nn": 6, "bulktanimotosimilar": 6, "add": [6, 7, 8, 10], "va_pr": 6, "va_uncert": 6, "dtm": 6, "trelli": 6, "fig": [6, 7], "subplot": [6, 7], "sharei": 6, "regplot": 6, "referenc": 6, "boundari": [6, 7, 12, 14], "neither": 6, "nor": 6, "dissimilar": 6, "cp_pred_ensembl": 6, "cp_uncert_ensembl": 6, "916": 6, "959": 6, "activation__fd833c2dde0b7147e6516ea5eebb2657": 6, "aggregation__fd833c2dde0b7147e6516ea5eebb2657": 6, "aggregation_norm__fd833c2dde0b7147e6516ea5eebb2657": 6, "batch_size__fd833c2dde0b7147e6516ea5eebb2657": 6, "depth__fd833c2dde0b7147e6516ea5eebb2657": 6, "dropout__fd833c2dde0b7147e6516ea5eebb2657": 6, "features_generator__fd833c2dde0b7147e6516ea5eebb2657": 6, "ffn_hidden_size__fd833c2dde0b7147e6516ea5eebb2657": 6, "ffn_num_layers__fd833c2dde0b7147e6516ea5eebb2657": 6, "final_lr_ratio_exp__fd833c2dde0b7147e6516ea5eebb2657": 6, "hidden_size__fd833c2dde0b7147e6516ea5eebb2657": 6, "init_lr_ratio_exp__fd833c2dde0b7147e6516ea5eebb2657": 6, "max_lr_exp__fd833c2dde0b7147e6516ea5eebb2657": 6, "warmup_epochs_ratio__fd833c2dde0b7147e6516ea5eebb2657": 6, "chempropclassifier_algorithm_hash": 6, "fd833c2dde0b7147e6516ea5eebb2657": 6, "65625": 6, "ensemble_size__fd833c2dde0b7147e6516ea5eebb2657": 6, "epochs__fd833c2dde0b7147e6516ea5eebb2657": 6, "midpoint": 6, "mont": 6, "carlo": 6, "virtual": [6, 8], "468": 6, "activation__c73885c5d5a4182168b8b002d321965a": 6, "aggregation__c73885c5d5a4182168b8b002d321965a": 6, "aggregation_norm__c73885c5d5a4182168b8b002d321965a": 6, "batch_size__c73885c5d5a4182168b8b002d321965a": 6, "depth__c73885c5d5a4182168b8b002d321965a": 6, "dropout__c73885c5d5a4182168b8b002d321965a": 6, "features_generator__c73885c5d5a4182168b8b002d321965a": 6, "ffn_hidden_size__c73885c5d5a4182168b8b002d321965a": 6, "ffn_num_layers__c73885c5d5a4182168b8b002d321965a": 6, "final_lr_ratio_exp__c73885c5d5a4182168b8b002d321965a": 6, "hidden_size__c73885c5d5a4182168b8b002d321965a": 6, "init_lr_ratio_exp__c73885c5d5a4182168b8b002d321965a": 6, "max_lr_exp__c73885c5d5a4182168b8b002d321965a": 6, "warmup_epochs_ratio__c73885c5d5a4182168b8b002d321965a": 6, "c73885c5d5a4182168b8b002d321965a": 6, "46875": 6, "ensemble_size__c73885c5d5a4182168b8b002d321965a": 6, "epochs__c73885c5d5a4182168b8b002d321965a": 6, "cp_pred_dropout": 6, "cp_uncert_dropout": 6, "previou": 6, "proabil": 6, "va_predict": 6, "correl": [6, 7], "drouput": 6, "uncertatinti": 6, "cp_uncert_delta": 6, "categor": [6, 12], "unit": [6, 7], "parsabl": 6, "date": 6, "cast": 6, "appropri": 6, "befor": [6, 7, 12], "choic": [6, 12], "822": 6, "862": 6, "4430": 6, "271946796234": 6, "mapie_algorithm_hash": 6, "976d211e4ac64e5568d369bcddd3aeb1": 6, "mapie_alpha__976d211e4ac64e5568d369bcddd3aeb1": 6, "max_depth__976d211e4ac64e5568d369bcddd3aeb1": 6, "n_estimators__976d211e4ac64e5568d369bcddd3aeb1": 6, "max_features__976d211e4ac64e5568d369bcddd3aeb1": 6, "analysi": [6, 7, 8], "perfom": 6, "post": 6, "mapie_pr": 6, "mapie_unc": 6, "bar": 6, "visualis": 6, "errorbar": 6, "yerr": 6, "ab": 6, "fmt": 6, "black": [6, 7], "ecolor": 6, "grai": 6, "elinewidth": 6, "capsiz": 6, "move": [6, 10], "analys": [6, 7], "width": 6, "alpha_impact": 6, "ma": 6, "unc_df": 6, "unc": 6, "reset_index": [6, 7], "concat": 6, "lineplot": 6, "err_styl": 6, "se": 6, "incorpor": 6, "tradition": 6, "unsupport": 6, "kernelexplain": [6, 8], "shaplei": 6, "explan": 6, "slundberg": 6, "game": 6, "credit": 6, "theori": 6, "publish": 6, "comopsit": 6, "540": 6, "_ridg": 6, "userwarn": [6, 7], "matrix": 6, "dual": 6, "34035600917066766": 6, "676421027478709": 6, "dga": 6, "dgp": 6, "dgtot": 6, "sa": 6, "sdc": 6, "sdx": 6, "numhacceptor": 6, "numhdonor": 6, "maxabsestateindex": 6, "maxestateindex": 6, "minabsestateindex": 6, "minestateindex": 6, "qed": 6, "sp": 6, "heavyatommolwt": 6, "exactmolwt": 6, "numvalenceelectron": 6, "numradicalelectron": 6, "maxpartialcharg": 6, "minpartialcharg": 6, "maxabspartialcharg": 6, "minabspartialcharg": 6, "fpdensitymorgan1": 6, "fpdensitymorgan2": 6, "fpdensitymorgan3": 6, "bcut2d_mwhi": 6, "bcut2d_mwlow": 6, "bcut2d_chghi": 6, "bcut2d_chglo": 6, "bcut2d_logphi": 6, "bcut2d_logplow": 6, "bcut2d_mrhi": 6, "bcut2d_mrlow": 6, "avgipc": 6, "balabanj": 6, "bertzct": 6, "chi0": 6, "chi0n": 6, "chi0v": 6, "chi1": 6, "chi1n": 6, "chi1v": 6, "chi2n": 6, "chi2v": 6, "chi3n": 6, "chi3v": 6, "chi4n": 6, "chi4v": 6, "hallkieralpha": 6, "ipc": 6, "kappa1": 6, "kappa2": 6, "kappa3": 6, "labuteasa": 6, "peoe_vsa1": 6, "peoe_vsa10": 6, "peoe_vsa11": 6, "peoe_vsa12": 6, "peoe_vsa13": 6, "peoe_vsa14": 6, "peoe_vsa2": 6, "peoe_vsa3": 6, "peoe_vsa4": 6, "peoe_vsa5": 6, "peoe_vsa6": 6, "peoe_vsa7": 6, "peoe_vsa8": 6, "peoe_vsa9": 6, "smr_vsa1": 6, "smr_vsa10": 6, "smr_vsa2": 6, "smr_vsa3": 6, "smr_vsa4": 6, "smr_vsa5": 6, "smr_vsa6": 6, "smr_vsa7": 6, "smr_vsa8": 6, "smr_vsa9": 6, "slogp_vsa1": 6, "slogp_vsa10": 6, "slogp_vsa11": 6, "slogp_vsa12": 6, "slogp_vsa2": 6, "slogp_vsa3": 6, "slogp_vsa4": 6, "slogp_vsa5": 6, "slogp_vsa6": 6, "slogp_vsa7": 6, "slogp_vsa8": 6, "slogp_vsa9": 6, "estate_vsa1": 6, "estate_vsa10": 6, "estate_vsa11": 6, "estate_vsa2": 6, "estate_vsa3": 6, "estate_vsa4": 6, "estate_vsa5": 6, "estate_vsa6": 6, "estate_vsa7": 6, "estate_vsa8": 6, "estate_vsa9": 6, "vsa_estate1": 6, "vsa_estate10": 6, "vsa_estate2": 6, "vsa_estate3": 6, "vsa_estate4": 6, "vsa_estate5": 6, "vsa_estate6": 6, "vsa_estate7": 6, "vsa_estate8": 6, "vsa_estate9": 6, "fractioncsp3": 6, "heavyatomcount": 6, "nhohcount": 6, "nocount": 6, "numaliphaticcarbocycl": 6, "numaliphaticheterocycl": 6, "numaliphaticr": 6, "numaromaticcarbocycl": 6, "numaromaticheterocycl": 6, "numaromaticr": 6, "numheteroatom": 6, "numrotatablebond": 6, "numsaturatedcarbocycl": 6, "numsaturatedheterocycl": 6, "numsaturatedr": 6, "ringcount": 6, "mollogp": 6, "molmr": 6, "fr_al_coo": 6, "fr_al_oh": 6, "fr_al_oh_notert": 6, "fr_arn": 6, "fr_ar_coo": 6, "fr_ar_n": 6, "fr_ar_nh": 6, "fr_ar_oh": 6, "fr_coo": 6, "fr_coo2": 6, "fr_c_o": 6, "fr_c_o_nocoo": 6, "fr_c_": 6, "fr_hoccn": 6, "fr_imin": 6, "fr_nh0": 6, "fr_nh1": 6, "fr_nh2": 6, "fr_n_o": 6, "fr_ndealkylation1": 6, "fr_ndealkylation2": 6, "fr_nhpyrrol": 6, "fr_sh": 6, "fr_aldehyd": 6, "fr_alkyl_carbam": 6, "fr_alkyl_halid": 6, "fr_allylic_oxid": 6, "fr_amid": 6, "fr_amidin": 6, "fr_anilin": 6, "fr_aryl_methyl": 6, "fr_azid": 6, "fr_azo": 6, "fr_barbitur": 6, "fr_benzen": 6, "fr_benzodiazepin": 6, "fr_bicycl": 6, "fr_diazo": 6, "fr_dihydropyridin": 6, "fr_epoxid": 6, "fr_ester": 6, "fr_ether": 6, "fr_furan": 6, "fr_guanido": 6, "fr_halogen": 6, "fr_hdrzine": 6, "fr_hdrzone": 6, "fr_imidazol": 6, "fr_imid": 6, "fr_isocyan": 6, "fr_isothiocyan": 6, "fr_keton": 6, "fr_ketone_topliss": 6, "fr_lactam": 6, "fr_lacton": 6, "fr_methoxi": 6, "fr_morpholin": 6, "fr_nitril": 6, "fr_nitro": 6, "fr_nitro_arom": 6, "fr_nitro_arom_nonortho": 6, "fr_nitroso": 6, "fr_oxazol": 6, "fr_oxim": 6, "fr_para_hydroxyl": 6, "fr_phenol": 6, "fr_phenol_noorthohbond": 6, "fr_phos_acid": 6, "fr_phos_est": 6, "fr_piperdin": 6, "fr_piperzin": 6, "fr_priamid": 6, "fr_prisulfonamd": 6, "fr_pyridin": 6, "fr_quatn": 6, "fr_sulfid": 6, "fr_sulfonamd": 6, "fr_sulfon": 6, "fr_term_acetylen": 6, "fr_tetrazol": 6, "fr_thiazol": 6, "fr_thiocyan": 6, "fr_thiophen": 6, "fr_unbrch_alkan": 6, "fr_urea": 6, "shap_valu": 6, "2227": 6, "042023e": 6, "2229": 6, "025199e": 6, "2228": 6, "802158e": 6, "2267": 6, "387276e": 6, "2230": 6, "106653e": 6, "1784": 6, "598471e": 6, "1785": 6, "584": 6, "ns": 6, "995": 6, "996": 6, "845": 6, "846": 6, "1375": 6, "1376": 6, "s1": 6, "n1c": 6, "1570": 6, "contrinubt": 6, "datset": 6, "unscaledphyschemjazzi": 6, "rank": [6, 8], "usag": 6, "978": 6, "032": 6, "818": 6, "t": [6, 7, 9], "kekul": 6, "unkekul": 6, "rational": 6, "rationale_scor": 6, "386": 6, "097": 6, "ch3": 6, "ch": 6, "389": [6, 7], "151": 6, "c1c": 6, "c1n": 6, "ch2": 6, "nh2": 6, "nn1cc1c": 6, "384": 6, "720": 6, "c1cccc": 6, "871": 6, "854": 6, "contian": 6, "second": 6, "dummi": 6, "third": 6, "smallest": 6, "made": [6, 7], "fourth": 6, "irregularli": 6, "xc50": [6, 7], "log10": [6, 7, 12, 14], "6th": 6, "convers": [6, 7, 12, 14], "logbas": [6, 7, 12, 14], "logneg": [6, 7, 12, 14], "negat": [6, 7, 12, 14], "transformed_studi": 6, "transform_exampl": 6, "028": 6, "5959493772536109": 6, "6571993250300608": 6, "169": 6, "1511102853256885": 6, "2487063317112765": 6, "288": 6, "6714912461080983": 6, "2725944467796781": 6, "369": 6, "194926264155893": 6, "395": 6, "7520919188596032": 6, "469": 6, "7803723847416691": 6, "499": 6, "6397753979196248": 6, "528": 6, "151110299986041": 6, "151110111437006": 6, "5410418750776741": 6, "612": 6, "7183231137124538": 6, "640": 6, "2721824844856162": 6, "716": 6, "1900929470222508": 6, "745": 6, "774": 6, "5585323973564646": 6, "3169218304262786": 6, "980": 6, "7974925066137679": 6, "008": 6, "218395226466336": 6, "039": 6, "1474226942497083": 6, "054": 6, "083": 6, "0239005731675412": 6, "160": 6, "191": 6, "178901060853144": 6, "27137790098830755": 6, "2710284516876423": 6, "361": 6, "6273152492418945": 6, "438": 6, "496": 6, "1907041717628215": 6, "3209075619139279": 6, "545": 6, "2709423025014604": 6, "609": 6, "3133943310851415": 6, "657": 6, "257769959239938": 6, "735": 6, "40359637945134746": 6, "817": 6, "4127882135896648": 6, "905": 6, "9246005133276612": 6, "003": 6, "8908739215746116": 6, "035": 6, "107536316777608": 6, "067": 6, "098": 6, "054360360588395": 6, "129": 6, "5428179904345867": 6, "5696273642213351": 6, "194": 6, "27099769667470536": 6, "1580741708125475": 6, "2709564785634315": 6, "10900413894771653": 6, "268": 6, "2709799905898163": 6, "13705914456987853": 6, "27097230608092054": 6, "12790870116376127": 6, "337": 6, "2709499903064464": 6, "10123180962907431": 6, "2710895886052581": 6, "26565663774320425": 6, "411": 6, "2708711012023424": 6, "005637048678674678": 6, "27092322402109364": 6, "06902647427781451": 6, "482": 6, "2712140349882": 6, "4076704953178294": 6, "515": 6, "27090080367174": 6, "04187106800188596": 6, "550": 6, "27086925247190047": 6, "003371853599610078": 6, "2708933298483799": 6, "032781796328385376": 6, "623": 6, "27087205624489635": 6, "006806773659187283": 6, "658": 6, "2708869511176179": 6, "025009489814943348": 6, "695": 6, "2711465077924297": 6, "3311125627707556": 6, "2708756855936628": 6, "011249102380159387": 6, "766": 6, "27087301924224993": 6, "007985924302396141": 6, "802": 6, "2708685399954944": 6, "00249856291483601": 6, "27121879554836553": 6, "4130244908975993": 6, "880": 6, "2708693196600531": 6, "0034541978803366022": 6, "918": 6, "27110195265802334": 6, "27994943662091765": 6, "956": 6, "2708682582859318": 6, "0021532199144365088": 6, "27087024523986086": 6, "0045884092728113585": 6, "27087351807632193": 6, "008596600952859433": 6, "2710818633795896": 6, "2567049271070902": 6, "109": 6, "27103241786565463": 6, "1990111983307052": 6, "146": 6, "2710350879598171": 6, "20214459724424078": 6, "183": 6, "2708688328221868": 6, "00285750520671645": 6, "27100832234449684": 6, "17064008990759916": 6, "258": 6, "27268613236193845": 6, "8725420109733135": 6, "296": 6, "27119617446689237": 6, "387533542012365": 6, "2708691110831552": 6, "0031985656730512953": 6, "27086852174155146": 6, "002476186542950981": 6, "27135383618835024": 6, "5626643670396761": 6, "449": 6, "2709819654433871": 6, "1394077979875128": 6, "488": [6, 8], "2718548944510965": 6, "0858347526799794": 6, "1508084699212935": 6, "03329943145150872": 6, "00025672309762227527": 6, "27249853374634975": 6, "702026434077893": 6, "604": 6, "27095660957755363": 6, "10916094511173127": 6, "643": 6, "27102160995407715": 6, "18630665884100353": 6, "681": 6, "27095708822582026": 6, "10973377642487026": 6, "27088222008661084": 6, "019235980282946118": 6, "762": 6, "2708703086029017": 6, "004666043957133775": 6, "799": 6, "27095279044622245": 6, "1045877457096882": 6, "840": 6, "2709408288690431": 6, "09023455456986404": 6, "9289218260898663": 6, "8200088368788958": 6, "917": 6, "27086675101898655": 6, "00030502148265565063": 6, "957": 6, "2710491243757999": 6, "21858260742423916": 6, "001": 6, "1491615840508995": 6, "024725853754515203": 6, "040": 6, "2709462479577586": 6, "0967427718847167": 6, "default_studi": 6, "252": 6, "332": 6, "3501": 6, "942111261296": 6, "5451": 6, "207265576796": 6, "459": 6, "1049201007814": 6, "9964": 6, "541364058234": 6, "3543": 6, "953608539901": 6, "570": 6, "6837": 6, "057544630979": 6, "613": 6, "2507": 6, "1794330606067": 6, "650": 6, "21534": 6, "719219668405": 6, "726": 6, "2899": 6, "736555614694": 6, "294e": 6, "760e": 6, "21674": 6, "445000284228": 6, "1049203123567": 6, "1049192609138": 6, "877": 6, "3630": 6, "72768093756": 6, "907": 6, "3431": 6, "942816967268": 6, "6908": 6, "462045154488": 6, "5964": 6, "65935954044": 6, "036": 6, "21070": 6, "107195348774": 6, "065": 6, "4977": 6, "068508997133": 6, "133": 6, "8873": 6, "669262669626": 6, "21387": 6, "63697424318": 6, "202": 6, "9958": 6, "573006910125": 6, "5182695600183": 6, "428": 6, "20684": 6, "56412138056": 6, "544": 6, "150": 6, "3435882510586": 6, "571": 6, "7068": 6, "705383113378": 6, "599": 6, "7150": 6, "482090052133": 6, "077": 6, "203": 6, "93637462922368": 6, "2570": 6, "5111262532305": 6, "21987": 6, "659957192194": 6, "9889": 6, "493204596083": 6, "413": 6, "7172": 6, "208490771303": 6, "9804": 6, "512701665093": 6, "555": 6, "585": 6, "9165": 6, "74081120673": 6, "0280270800017": 6, "161": 6, "1602933782954": 6, "888460860864": 6, "864": 6, "8414": 6, "932694243476": 6, "2270": 6, "540799189147": 6, "10383": 6, "79559309305": 6, "20815": 6, "025469865475": 6, "206": 6, "7560385808573": 6, "5264": 6, "4700789389035": 6, "3668": 6, "255064135424": 6, "156": 6, "12174877890536": 6, "793408178086295": 6, "99902820845678": 6, "157": 6, "371632749506": 6, "88307313087517": 6, "140915461519354": 6, "218": 6, "153": 6, "66773675231477": 6, "177324126813716": 6, "77906017834145": 6, "186": 6, "52056745848623": 6, "4565714180547": 6, "6710444346508": 6, "294": 6, "30976119334312": 6, "62916671166313": 6, "023639423189294": 6, "053696900694": 6, "914617418880486": 6, "31140591484044": 6, "201": 6, "33573874994386": 6, "569769302718845": 6, "5781354926491789": 6, "412": 6, "190": 6, "1384885119049": 6, "87666716965626": 6, "2537791489618": 6, "451": 6, "076949848299": 6, "9559574710535281": 6, "0032830967319653665": 6, "764974036324": 6, "03910427457823": 6, "406811480459925": 6, "164": 6, "4477304958181": 6, "701690847791482": 6, "819274780536123": 6, "567": 6, "87939164358104": 6, "32187661108304": 6, "660320437878754": 6, "607": 6, "01705178481896": 6, "61397716361812": 6, "603665957830847": 6, "645": 6, "155": 6, "73257312230092": 6, "759645965959294": 6, "503212714246787": 6, "684": 6, "154": 6, "46848394144124": 6, "8546740801317": 6, "35327336610912": 6, "724": 6, "20421802817864": 6, "57596974747163": 6, "84756262407801": 6, "51233215278089": 6, "3564642040401464": 6, "5034542273159819": 6, "207": 6, "68667089892196": 6, "034895878929095": 6, "03653571911285094": 6, "842": 6, "102": 6, "52277054278186": 6, "01961499216484045": 6, "670937191883546": 6, "881": 6, "28722475694815": 6, "012434370509176538": 6, "34222704431493": 6, "921": 6, "87402050281146": 6, "008452015347522093": 6, "914863578437455": 6, "38847505937936": 6, "01573542234868893": 6, "99307522974174": 6, "999": 6, "96336195786391": 6, "009845516063879428": 6, "59422914099683": 6, "19345618324213": 6, "009382525091504246": 6, "35573659237662": 6, "080": 6, "30772721342525": 6, "010579672066291478": 6, "35550323165882": 6, "23970902543148": 6, "013369359066405863": 6, "4744102498801": 6, "34331248758777": 6, "011398351701814368": 6, "54146340620301": 6, "195": 6, "104535853341": 6, "011708779850509646": 6, "682286191624579e": 6, "0653774146952": 6, "009806826677473646": 6, "90274406278985": 6, "64646042813787": 6, "0038598153381434685": 6, "20918134828555": 6, "68420472011734": 6, "0032474576673554513": 6, "35551178979624": 6, "85985201823172": 6, "003187930738019005": 6, "29431603544847": 6, "399": 6, "21583898009355": 6, "003122319313153475": 6, "83526418992966": 6, "437": 6, "34787242859676": 6, "002781955938462633": 6, "76228981520067": 6, "478": 6, "70914272129673": 6, "0023173546614751305": 6, "3000082904498813": 6, "519": 6, "10492031097328": 6, "002606064524407": 6, "7861330234653922e": 6, "1049154281806": 6, "0029210589377408366": 6, "200933937391094e": 6, "10492028002287": 6, "06431564840324226": 6, "2981641934644904e": 6, "56066541774658": 6, "0010848843623839548": 6, "151493073951163": 6, "76337597039308": 6, "004134805589645341": 6, "88115336652716": 6, "58009587759925": 6, "004763418454688096": 6, "02920758025023": 6, "113": 6, "35230417583477": 6, "0009098023238189749": 6, "57100980886017": 6, "809": 6, "30807467406214": 6, "03739791555156691": 6, "12818940557025": 6, "850": 6, "44100655116532": 6, "006380481141720477": 6, "4882351186755": 6, "891": 6, "35181001564942": 6, "0036244007454981787": 6, "608797806921866": 6, "124": 6, "3719027482892": 6, "0014198536004321608": 6, "05588994284273": 6, "28568052794907": 6, "005434972462746285": 6, "215759789700954": 6, "06": 6, "018": 6, "20325": 6, "66479442037": 6, "9696417046589247": 6, "132": 6, "21507621375022": 6, "0004528978867024753": 6, "80386923876023": 6, "85570350846885": 6, "0016948043699497222": 6, "455627755557016": 6, "contrast": [6, 7], "relplot": 6, "col": [6, 7], "facet_kw": 6, "axisgrid": [6, 7], "facetgrid": 6, "0x7fb3797f6b30": 6, "noramlis": 6, "unlog": 6, "yield": [6, 7, 12, 13], "mse": 6, "1126": 6, "56968721": 6, "20237903": 6, "revers": [6, 7, 12, 14], "onto": 6, "action": 6, "importantli": 6, "easili": [6, 7], "94824194": 6, "92008694": 6, "instruct": 6, "untransform": 6, "wish": 6, "cut": [6, 7], "10um": 6, "ptr_config_log_transform": 6, "ptr_transformed_studi": 6, "ptr_and_transform_exampl": 6, "518": 6, "002341918451736245": 6, "805": 6, "0024908979029632677": 6, "847": 6, "007901407671048116": 6, "888": 6, "00496231674623194": 6, "0026848278110363512": 6, "0010872728889471893": 6, "000": 6, "008706109201510277": 6, "027": 6, "093": 6, "002999462459688867": 6, "00825680029907454": 6, "148": 6, "007901407993550248": 6, "007901405163828307": 6, "0021653695362066753": 6, "002869169486971014": 6, "0010855652626111146": 6, "00550533804299308": 6, "002236800860454562": 6, "006105985607235417": 6, "004846526544994462": 6, "006964668794465202": 6, "670": 6, "699": 6, "008384326901042542": 6, "730": 6, "001082194093844804": 6, "761": 6, "0010807084256204563": 6, "948": 6, "005505338042993082": 6, "979": 6, "005247934991526694": 6, "0010803393728928605": 6, "005218354425190125": 6, "138": 6, "004999207507691546": 6, "0015694919308122948": 6, "326": 6, "0019757694194001384": 6, "421": 6, "002341918451736244": 6, "453": 6, "00368328296527152": 6, "521": 6, "003412828259848677": 6, "551": 6, "004412110711416997": 6, "616": 6, "647": 6, "0021743798524909573": 6, "0022761245849848527": 6, "0010805768178458735": 6, "750": 6, "001080400188305814": 6, "784": 6, "0010805009783570441": 6, "0010804680472500541": 6, "0010803723579987025": 6, "890": 6, "001080969596032512": 6, "925": 6, "0010800333715082816": 6, "0010802574700236845": 6, "0010814994986419817": 6, "037": 6, "001080161136846237": 6, "071": 6, "0010800254136811547": 6, "107": 6, "0010801290036870739": 6, "001080037482216557": 6, "179": 6, "0010801015705851358": 6, "0010812122378841013": 6, "0010800531021304936": 6, "291": 6, "00108004162698813": 6, "328": 6, "0010800223466649803": 6, "364": 6, "0010815197263834202": 6, "0010800257029027847": 6, "0010810223438672223": 6, "0010800211339555509": 6, "513": 6, "0010800296871141684": 6, "0010800437739166451": 6, "0010809366267195716": 6, "627": 6, "001080725386603206": 6, "0010807368035830652": 6, "704": 6, "0010800236072155854": 6, "741": 6, "0010806223050773966": 6, "779": 6, "0010876516369772728": 6, "00108142358144501": 6, "857": 6, "0010800248050489667": 6, "894": 6, "001080022268085466": 6, "0010820922958715991": 6, "969": 6, "0010805094397523254": 6, "007": 6, "0010841993753324146": 6, "007899735988203994": 6, "086": 6, "0010868762004637347": 6, "001080400750193767": 6, "163": 6, "0010806791616300314": 6, "0010804028029753213": 6, "0010800812188506515": 6, "0010800299598580359": 6, "0010803843696362083": 6, "001080333048974234": 6, "394": [6, 7], "432": 6, "001080014645182176": 6, "473": 6, "0010807968027851892": 6, "516": 6, "007907028395366658": 6, "553": 6, "0010803563024666294": 6, "inted": 6, "opter": 6, "probabilst": 6, "lossi": 6, "anywai": 6, "intention": 6, "clip": [6, 7], "cannot": 6, "timepoint": 6, "aux_column": [6, 8], "accord": [6, 7, 8], "aux_col_config": 6, "aux_descriptors_dataset": 6, "train_with_conc": 6, "aux1": 6, "aux_col_studi": 6, "covariate_exampl": 6, "aux1_model": 6, "323": 6, "5186": 6, "767663956718": 6, "522": 6, "4679": 6, "740824270968": 6, "575": 6, "4890": 6, "6705099499995": 6, "3803": 6, "9324375833753": 6, "667": 6, "3135": 6, "6497388676926": 6, "2518812859375": 6, "778": 6, "4309": 6, "124112370974": 6, "30159424580074": 6, "897": 6, "4357": 6, "02827013125": 6, "1437929337522": 6, "45281013": 6, "shape": [6, 7], "thrown": [6, 8], "prediciton": 6, "regardless": 6, "utilis": [6, 7], "seper": 6, "vector_covariate_config": 6, "precomputed_descriptor": 6, "train_with_fp": 6, "aux_transform": [6, 8], "vector_covariate_studi": 6, "vector_aux_exampl": 6, "vector_covariate_model": 6, "2200": 6, "6817959410578": 6, "011994365911634164": 6, "95660880078": 6, "029071783512897825": 6, "5798": 6, "564494725643": 6, "022631709120790048": 6, "2198637677605415": 6, "972": 6, "2899178898048": 6, "8916194399474267": 6, "556": 6, "3336440433073": 6, "5914093983615214": 6, "614": 6, "653": 6, "3036472748931": 6, "6201811079699818": 6, "3807": 6, "8035919667395": 6, "901e": 6, "892e": 6, "914e": 6, "752": 6, "5019": 6, "459500770764": 6, "1376436589359351": 6, "4017711284796": 6, "893": 6, "771": 6, "797115414836": 6, "74340620175102": 6, "train_smil": [6, 8], "train_i": [6, 8], "train_aux": [6, 8], "test_smil": [6, 8], "test_i": [6, 8], "test_aux": [6, 8], "512": 6, "legth": 6, "39754917": 6, "465": 6, "06352766": 6, "52031134": 6, "341": 6, "89875316": 6, "371": 6, "5516046": 6, "85042171": 6, "436": 6, "33406203": 6, "91439129": 6, "80585907": 6, "346": 6, "48565041": 6, "protein": [6, 12, 14], "alongsid": 6, "chemic": [6, 7, 12, 13], "sequenc": [6, 12, 14], "former": 6, "wherea": 6, "latter": [6, 7], "interact": 6, "basi": [6, 7], "toxinpred3": 6, "No": [6, 9, 12], "demponstr": 6, "zscale_covariate_config": 6, "zscale_covariate_studi": 6, "zscale_aux_exampl": 6, "zscale_covariate_model": 6, "458": 6, "8886986575836505": 6, "kneighborsclassifier_algorithm_hash": 6, "e51ca55089f389fc37a736adb2aa0e42": 6, "metric__e51ca55089f389fc37a736adb2aa0e42": 6, "n_neighbors__e51ca55089f389fc37a736adb2aa0e42": 6, "weights__e51ca55089f389fc37a736adb2aa0e42": 6, "unlik": 6, "21269231": 6, "91153846": 6, "29038462": 6, "69846154": 6, "22230769": 6, "99521739": 6, "59826087": 6, "34695652": 6, "03086957": 6, "13391304": 6, "08083333": 6, "6125": 6, "82916667": 6, "05083333": 6, "56083333": 6, "02178571": 6, "91785714": 6, "45392857": 6, "37642857": 6, "03107143": 6, "93357143": 6, "78964286": 6, "62928571": 6, "50857143": 6, "50107143": 6, "1232": 6, "3364": 6, "2328": 6, "1368": 6, "2304": 6, "7062": 6, "x_": 6, "vmin": 6, "vmax": 6, "cmap": 6, "spectral": 6, "248bit": 6, "128bit": 6, "minimz": 6, "consist": 6, "generaliz": 6, "minimize_std_dev": 6, "minimise_std_dev": [6, 9], "std": [6, 7, 12, 14], "dev": [6, 7], "example_multi": 6, "parameter_analysi": 6, "set_metric_nam": 6, "740": 6, "4008740644240856": 6, "9876203329634794": 6, "331": 6, "3561484909673425": 6, "9875061220991906": 6, "472": 6, "7856521165563053": 6, "21863029956806662": 6, "525": 6, "9125905675311808": 6, "7861693342190089": 6, "603": 6, "5238765412750027": 6, "2789424384877304": 6, "5348363849100434": 6, "5741725628917808": 6, "746": 6, "0072511048320134": 6, "2786318125997387": 6, "9625764609276656": 6, "27575381401822424": 6, "1114006274062536": 6, "7647766019001522": 6, "7801680863916906": 6, "2725738454485389": 6, "121": 6, "785652116470164": 6, "21863029955530786": 6, "152": 6, "785651973436432": 6, "21863032832257323": 6, "6101359993004856": 6, "3011280543457062": 6, "209": 6, "5361950698070447": 6, "23560786523195643": 6, "5356113574175657": 6, "5769721187181905": 6, "543430366921729": 6, "514747412346662": 6, "508": [6, 8], "5194661889628072": 6, "40146744515282495": 6, "659": 6, "659749443628722": 6, "6659085938841998": 6, "876": 6, "1068495306229729": 6, "24457822094737378": 6, "8604898820838102": 6, "7086875504668667": 6, "949": 6, "5919869916997383": 6, "2367498627927979": 6, "2497762395862362": 6, "10124660026536195": 6, "205": 6, "study_name_1": 6, "669": 6, "0621601907738047": 6, "2749020946925899": 6, "xxx": 6, "values_neg_mean_squared_error": 6, "values_standard": 6, "twinx": 6, "r": 6, "floor": 6, "ceil": 6, "align": 6, "set_ytick": 6, "linspac": 6, "set_xtick": 6, "text": [6, 7, 8, 12], "pareto": 6, "front": 6, "plot_pareto_front": 6, "plot_param_import": 6, "dictionari": [6, 10], "ordereddict": 6, "descend": 6, "algortihm": 6, "impact": 6, "durat": 6, "total_second": 6, "target_nam": 6, "relationship": 6, "plot_parallel_coordin": [6, 8, 9, 11], "param": [6, 7, 9, 10], "taken": [6, 7], "101": 6, "precomputed_config": 6, "precomputed_studi": 6, "precomputed_exampl": 6, "precomputed_model": 6, "785": 6, "788": 6, "3014": 6, "274803630188": 6, "471088599086": 6, "03592375122963953": 6, "511": 6, "3029": 6, "113810544919": 6, "8153295905650357": 6, "4358": 6, "575772003129": 6, "unseen": 6, "caus": 6, "111": 6, "new_molecul": 6, "112": 6, "tempfil": 6, "temp": [6, 7], "store": [6, 7, 11], "temporari": [6, 10], "extract": 6, "1st": 6, "example_fp": 6, "namedtemporaryfil": 6, "temp_fil": 6, "len": [6, 7], "to_csv": 6, "292": 6, "65709987": 6, "64327077": 6, "common": 7, "proper": 7, "optuna_az": 7, "process": [7, 8], "draw": 7, "ipythonconsol": 7, "ipython": 7, "displai": 7, "os": 7, "listdir": 7, "isfil": 7, "walk": 7, "handi": 7, "var": 7, "1v": 7, "9y_z128d7gvcp8mf8q0pz3ch0000gq": 7, "ipykernel_82497": 7, "796203442": 7, "deprecationwarn": 7, "deprec": 7, "med": 7, "titles": 7, "fontsiz": 7, "labels": 7, "xtick": 7, "ytick": 7, "rcparam": 7, "whitegrid": 7, "set_styl": 7, "white": 7, "inlin": 7, "3336016810": 7, "matplotlibdeprecationwarn": 7, "ship": 7, "v0_8": 7, "api": [7, 8], "boolean": 7, "nomin": 7, "convert": [7, 12], "conduct": 7, "primarydf": 7, "loadsdf": 7, "inchi": 7, "skeletonspher": 7, "nm": 7, "id": 7, "romol": 7, "ic50": 7, "\u00b5m": 7, "c1ccc2c": 7, "co2": 7, "004320939": 7, "1s": 7, "c8h7no2": 7, "c10": 7, "qrcgftxrxymjo": 7, "uhfffaoysa": 7, "86075": 7, "kinas": 7, "p38": 7, "rdchem": 7, "0x7fd1c0d34040": 7, "c1ccc": 7, "ccc2ccccc2occ3ccc": 7, "882397308": 7, "c22h20o3": 7, "c23": 7, "ytdaoqyeyfcini": 7, "89637": 7, "kd": 7, "retinoid": 7, "receptor": 7, "0x7fd1f8f965e0": 7, "130299026": 7, "0x7fd1f8f96650": 7, "nh": 7, "48148606": 7, "c9h7no": 7, "c11": 7, "lisfmebwquvkpj": 7, "n93": 7, "n82": 7, "n65": 7, "n36": 7, "n33": 7, "ki": 7, "nki": 7, "carbon": 7, "anhydras": 7, "xii": 7, "ncarbon": 7, "0x7fd1f8f966c0": 7, "nc1ccccn1": 7, "c7h8n2o": 7, "h2": 7, "5h": 7, "qrokotbwfzitjz": 7, "86233": 7, "nicotin": 7, "phosphoribosyltransferas": 7, "0x7fd1f8f96730": 7, "renam": 7, "conveni": [7, 8, 9, 12], "rest": 7, "moltosmil": 7, "isomericsmil": 7, "c1coc2ccccc2n1": 7, "004321": 7, "coc2ccccc2ccc2ccccc2": 7, "882397": 7, "130299": 7, "c1ccc2ccccc2": 7, "481486": 7, "stick": 7, "engin": 7, "assum": [7, 12, 13], "stage": 7, "slightli": 7, "pose": 7, "wise": 7, "uniqu": 7, "outlin": 7, "occurr": 7, "preserv": 7, "vari": 7, "recommend": 7, "abil": 7, "being": 7, "df_po": 7, "dedup": [7, 12], "df_rnd": 7, "df_max": 7, "df_avg": 7, "df_med": 7, "deduplidc": 7, "397": 7, "indici": 7, "coc1cc2ncnc": 7, "nc3ccc": 7, "scc4ccccc4": 7, "cl": 7, "c3": 7, "c2cc1oc": 7, "282579": 7, "912929": 7, "cc1cccc": 7, "nc2ncnc3ccc": 7, "c4ccccc4": 7, "cc23": 7, "n2cc": 7, "cc2c": 7, "nc2cccc": 7, "n2": 7, "065502": 7, "390": 7, "958607": 7, "392": 7, "oc2cccc": 7, "cn1nc": 7, "c2cnc": 7, "c3ccc": 7, "nc1c1": 7, "oc1nc": 7, "oc2cc": 7, "c3ccccc3": 7, "c3cccc": 7, "cn": 7, "936291": 7, "uniquify_by_posit": 7, "uniquify_randomli": 7, "essenc": 7, "uniquify_by_valu": 7, "highest": 7, "minim": [7, 9], "densiti": 7, "dpi": 7, "kdeplot": 7, "shade": 7, "orang": 7, "ndigit": 7, "blue": 7, "keepaverag": 7, "deeppink": 7, "hold": [7, 8], "assess": 7, "aim": 7, "simpli": 7, "sai": 7, "veri": 7, "intern": [7, 10], "resembl": 7, "easiest": 7, "especi": 7, "reli": 7, "stochast": 7, "initi": [7, 8, 9], "train_ran": 7, "test_ran": 7, "time_column": 7, "old": 7, "datapoint": [7, 10, 12, 13], "accru": 7, "entri": [7, 12, 13], "timestamp": 7, "top": 7, "df_med_tempor": 7, "fake": 7, "insert": 7, "nccccccoc1ccc2c": 7, "n1cc": 7, "cc1c": 7, "684240": 7, "1037": 7, "481464": 7, "1036": 7, "c1cn": 7, "n2c": 7, "nc3cccc": 7, "n3": 7, "cc3cc3": 7, "732625": 7, "1035": 7, "568332": 7, "1034": 7, "790259": 7, "1033": 7, "train_tempor": 7, "test_tempor": 7, "highli": 7, "skew": 7, "respcol": 7, "fd": [7, 12, 13], "histogram": [7, 12, 13], "determin": [7, 12, 13], "balanc": 7, "train_str": 7, "test_str": 7, "realist": 7, "deplopi": 7, "emul": [7, 12, 13], "hop": 7, "opportun": 7, "seri": 7, "push": 7, "domain": 7, "realibl": 7, "challeng": 7, "scaf_split": 7, "train_sca": 7, "test_sca": 7, "ground": 7, "df_val": 7, "roughli": [7, 9], "evid": 7, "pronounc": 7, "dodgerblu": 7, "middl": 7, "saffold": 7, "discuss": 7, "propreti": 7, "endpoint": 7, "routin": 7, "linearis": 7, "variat": 7, "gaussian": 7, "realiti": 7, "nearli": 7, "exhibit": 7, "superior": 7, "wide": 7, "xc50_data": 7, "keepallnodedupl": [7, 12], "pxc50_data": 7, "zip": 7, "jointplot": 7, "suptitl": 7, "plot_margin": 7, "rugplot": 7, "crimson": 7, "height": 7, "clip_on": 7, "jointgrid": 7, "0x7fd1f984f670": 7, "heteroscedast": 7, "homoscedast": 7, "transorm": 7, "0x7fd1eb924700": 7, "logartihm": 7, "power": [7, 12, 14], "um": 7, "dataset_transform": 7, "THe": 7, "0x7fd1db908550": 7, "had": 7, "reverse_transform": [7, 12], "0x7fd1bc05b4c0": 7, "mervin": 7, "framework": 7, "somewher": 7, "unavoid": 7, "ideal": 7, "deviat": [7, 12, 14], "\u03c3": 7, "heterogen": 7, "versu": 7, "assimil": 7, "hypothesi": 7, "degre": 7, "p_": 7, "activityt": 7, "cumul": 7, "cdf": 7, "eq": 7, "equal": [7, 9], "delta": 7, "vec": 7, "frac": 7, "erf": 7, "overrightarrow": 7, "sigma": 7, "concret": 7, "arbitrari": 7, "unknown": 7, "therefor": 7, "schemat": 7, "lookup": 7, "tabl": 7, "sd": 7, "scenario": [7, 12, 13], "philosoph": 7, "delimit": [7, 12, 14], "operand": 7, "censor": 7, "far": 7, "granular": 7, "therebi": 7, "combin": 7, "becom": 7, "jcheminf": 7, "biomedcentr": 7, "1186": 7, "s13321": 7, "00539": 7, "sec12": 7, "pxc50_threshold": 7, "pxc50_std": 7, "exmapl": 7, "ptr_data": 7, "gather": 7, "certainti": 7, "lower_reproduc": 7, "upper_reproduc": 7, "dataload": 7, "somewhat": 7, "invert": 7, "bell": 7, "kde": 7, "behav": 7, "patch": 7, "mpatch": 7, "line2d": 7, "ax_joint": 7, "axhlin": 7, "linestyl": 7, "ax_marg_x": 7, "axvlin": 7, "ax_marg_i": 7, "region": 7, "uncert_color": 7, "purpl": 7, "uncert_region": 7, "rectangl": 7, "fill": 7, "add_patch": 7, "box": 7, "nthreshold": 7, "nptr": 7, "nregion": 7, "fancybox": 7, "borderaxespad": 7, "leg": 7, "get_legend": 7, "legendhandl": 7, "set_color": 7, "set_alpha": 7, "tight": 7, "layout": 7, "subplots_adjust": 7, "set_size_inch": 7, "1725493911": 7, "attribut": 7, "minor": 7, "legend_handl": 7, "dash": 7, "histrogram": 7, "aforement": 7, "ptrtransform": [7, 12, 14], "ptr_transform": 7, "histplot": 7, "stat": 7, "scipi": 7, "resp_col": [7, 8], "pearsonr": 7, "std_df": 7, "groupbi": 7, "mdn_df": 7, "std_vs_median": 7, "suffix": 7, "_std": 7, "_median": 7, "dropna": 7, "activity_std": 7, "activity_median": 7, "088539": 7, "674782": 7, "265123": 7, "138620": 7, "157605": 7, "stdev": 7, "stat_func": 7, "plot_joint": 7, "zorder": 7, "0x7fd1def2f8b0": 7, "signific": 7, "assumpt": 7, "fulfil": 7, "ptr_train_ran": 7, "ptr_test_ran": 7, "ptr_train_str": 7, "ptr_test_str": 7, "ptr_train_tempor": 7, "ptr_test_tempor": 7, "ptr_train_sca": 7, "ptr_test_sca": 7, "xlim": 7, "pypoetri": 7, "virtualenv": 7, "_qsktrft": 7, "py3": 7, "warn_singular": 7, "msg": 7, "peak": 7, "extem": 7, "dsitribut": 7, "awai": 7, "bias": 7, "enum": [8, 9, 10, 12], "building_configuration_enum": [8, 10], "configuration_enum": [8, 10], "interface_enum": [8, 10], "model_runner_enum": [8, 10], "objective_enum": [8, 10], "optimization_configuration_enum": [8, 10], "prediction_configuration_enum": [8, 10], "return_values_enum": [8, 10], "visualization_enum": [8, 10], "merge_train_and_test_data": 8, "isvalid": 8, "read_data": 8, "filenam": [8, 10], "smiles_col": [8, 10, 12, 13], "aux_col": 8, "invalid": 8, "pars": [8, 9, 11], "tupl": [8, 12, 13], "ambigu": 8, "smiles_": 8, "y_": [8, 10, 12], "aux_": 8, "factori": [8, 9], "intermediate_training_dataset_fil": 8, "intermediate_test_dataset_fil": 8, "get_merged_set": 8, "check_set": 8, "scalingfittingerror": 8, "descriptor_str": 8, "insuffici": 8, "unfittedsklearnscla": 8, "novalidsmil": 8, "mol_from_smi": 8, "numpy_from_rdkit": 8, "dtype": [8, 12], "moldescriptor": 8, "nameparameterdataclass": [8, 9, 12], "abc": [8, 9, 12], "abstract": [8, 9, 12], "parallel_compute_descriptor": 8, "n_core": 8, "parallel": 8, "rdkitdescriptor": 8, "liter": [8, 9, 12, 13], "pathlib": 8, "get_fitted_scaler_for_fp": 8, "saved_param": 8, "get_fitted_scal": 8, "set_unfitted_scaler_data": 8, "fp_info": 8, "canonicalsmil": 8, "scaffold": [8, 12, 13], "genericscaffold": 8, "validdescriptor": 8, "descriptor_from_config": 8, "return_failed_idx": 8, "configur": [8, 9, 10, 11], "score_al": 8, "get_scor": 8, "score_all_smil": 8, "get_train_test_scor": 8, "get_merged_train_scor": 8, "get_ecfp_fpinfo": 8, "get_ecfpcount_fpinfo": 8, "explain_ecfp": 8, "len_feat": 8, "get_fp_info": 8, "exp_df": 8, "descript": [8, 9], "fp_idx": 8, "strt_idx": 8, "runshap": 8, "x_pred": 8, "shap": 8, "shapexplain": 8, "popul": 8, "explainpr": 8, "shallow": 8, "validate_cls_input": 8, "pi_zero": 8, "pr": 8, "melloddi": 8, "sparsechem": 8, "imbal": 8, "bedroc_scor": [8, 9], "truchon": 8, "j": 8, "bayli": 8, "screen": 8, "bad": 8, "recognit": 8, "2007": 8, "concord": 8, "statist": 8, "qualiti": 8, "harald": 8, "On": 8, "surviv": 8, "bound": 8, "2008": 8, "1209": 8, "1216": 8, "posterior": 8, "mark": 8, "abstractmethod": [8, 9], "predict_uncert": 8, "quantifi": 8, "qsartunamodel": 8, "nonetyp": [8, 9], "sent": 8, "get_metadata": 8, "train_scor": [8, 11], "test_scor": [8, 10, 11], "get_transform": 8, "perform_ptr": 8, "wrap_model": 8, "save_model": 8, "novaliddescriptor": 8, "null_scor": 8, "predict_pl": 8, "model_path": 8, "inference_path": 8, "argserror": 8, "issu": [8, 12], "uncertaintyerror": 8, "correctli": 8, "auxcovariatemiss": 8, "precomputederror": 8, "validate_arg": 8, "validate_uncertainti": 8, "check_precomp_arg": 8, "validate_set_precomput": 8, "validate_aux": 8, "doctitl": 8, "docstr": 8, "type_base_schema": 8, "tp": 8, "wyfo": 8, "json_schema": 8, "patch_schema_gener": 8, "patch_schema_optunaz": 8, "split_optim": 8, "base_chemprop_param": 8, "alg": [8, 9], "pop": 8, "fix": [8, 9], "run_studi": 8, "storag": [8, 11], "trial_number_offset": [8, 10], "log_scor": 8, "main_scor": 8, "outfnam": 8, "plot_by_configur": 8, "conf": 8, "plot_slic": [8, 9, 11], "folder_path": 8, "file_format": [8, 9, 11], "png": [8, 9], "plot_contour": [8, 9, 11], "static": 8, "plot_histori": [8, 9, 11], "set_build_cach": 9, "preexist": 9, "remove_algo_hash": 9, "buildconfig_from_tri": 9, "encode_nam": 9, "cenam": 9, "suggest_alg_param": 9, "suggest_aux_param": 9, "desc": 9, "check_invalid_descriptor_param": 9, "adaboostclassifierparamet": 9, "lassoparamet": 9, "kneighborsclassifierparamet": 9, "kneighborsregressorparamet": 9, "logisticregressionparamet": 9, "plsparamet": 9, "randomforestparamet": 9, "ridgeparamet": 9, "svcparamet": 9, "svrparamet": 9, "xgbregressorparamet": 9, "prfclassifierparamet": 9, "bootstrap": 9, "new_syn_data_frac": 9, "chempropregressorparamet": 9, "chempropclassifierparamet": 9, "chempropregressorpretrainedparamet": 9, "chemprophyperoptclassifierparamet": 9, "chemprophyperoptregressorparamet": 9, "calibratedclassifiercvparamet": 9, "mapieparamet": 9, "regressionscor": 9, "classificationscor": 9, "closer": 9, "greater": 9, "manhattan": 9, "trainarg": 9, "tanh": 9, "leakyrelu": 9, "prelu": 9, "selu": 9, "elu": 9, "turn": [9, 12, 13], "morgan_count": 9, "rdkit_2d": 9, "rdkit_2d_norm": 9, "mpnn_first_ffn": 9, "mpnn_last_ffn": 9, "linked_hidden_s": 9, "constrain": 9, "l3": 9, "init_lr_exp": 9, "final_lr_exp": 9, "warmup_epoch": 9, "l4": 9, "l5": 9, "l6": 9, "l7": 9, "l8": 9, "isanyof": 9, "obj": [9, 10], "detect_mode_from_alg": 9, "copy_path_for_scaled_descriptor": 9, "cv_split_strategi": 9, "use_cach": 9, "optuna_storag": 9, "set_cach": 9, "set_algo_hash": 9, "declar": 9, "pydant": 9, "don": 9, "classmethod": 9, "1024": 9, "shorter": 9, "output_fold": [9, 11], "use_xvfb": [9, 11], "imagefileformat": 9, "jpeg": 9, "jpg": 9, "svg": 9, "move_up_directori": 10, "attach_root_path": 10, "attach": 10, "loadjson": 10, "add_ellipsi": 10, "max_length": 10, "shorten_nam": 10, "mlflowcallback": 10, "tracking_uri": 10, "callback": 10, "uri": 10, "server": 10, "set_tracking_uri": 10, "prepare_tag": 10, "tag": 10, "tmp_buildconfig": 10, "create_depend": 10, "remove_schema_properti": 10, "add_boolean_guards_for_schema_properti": 10, "replacekei": 10, "input_": 10, "replacevalu": 10, "addsibl": 10, "delsibl": 10, "sibl": 10, "getref": 10, "context": 10, "recurs": 10, "nest": 10, "copytitl": 10, "oneof": 10, "replaceenum": 10, "singleton": 10, "const": 10, "addtitl": 10, "get_authorization_head": 10, "trackingdata": 10, "trial_numb": [10, 11], "trial_valu": 10, "trial_stat": 10, "all_cv_test_scor": 10, "dataclass": 10, "removeprefix": 10, "prefix": 10, "round_scor": 10, "internaltrackingcallback": 10, "progress": 10, "buildtrackingdata": 10, "response_column_nam": 10, "test_point": 10, "track_build": 10, "mkdict": 10, "load_df_from_fil": 10, "remove_failed_idx": 10, "failed_idx": 10, "md5_hash": 10, "md5": 10, "buildingconfigurationenum": 11, "configurationenum": 11, "general_hyperparamet": 11, "hyper_paramet": 11, "general_regressor": 11, "general_classifi": 11, "metadata_besttri": 11, "metadata_bestvalu": 11, "general_dis": 11, "general_paramet": 11, "task_optim": 11, "task_build": 11, "data_inputcolumn": 11, "data_responsecolumn": 11, "data_train": 11, "data_test": 11, "descriptors_avalon": 11, "descriptors_avalon_nbit": 11, "descriptors_ecfp": 11, "descriptors_ecfp_radiu": 11, "descriptors_ecfp_nbit": 11, "descriptors_ecfpcount": 11, "descriptors_ecfpcounts_radiu": 11, "descriptors_ecfpcounts_usefeatur": 11, "descriptors_pathfp": 11, "descriptors_pathfp_maxpath": 11, "descriptors_pathfp_fps": 11, "descriptors_maccskei": 11, "descriptors_unsc_physchem": 11, "descriptors_physchem": 11, "descriptors_physchem_rdkitnam": 11, "descriptors_unsc_jazzi": 11, "descriptors_jazzi": 11, "descriptors_jazzy_jazzynam": 11, "descriptors_precomput": 11, "descriptors_precomputed_fil": 11, "descriptors_precomputed_input_columnn": 11, "descriptors_precomputed_response_column": 11, "descriptors_unsc_zscal": 11, "descriptors_zscal": 11, "descriptors_smil": 11, "descriptors_smiles_and_si": 11, "descriptors_smiles_and_si_fil": 11, "descriptors_smiles_and_si_input_column": 11, "descriptors_smiles_and_si_aux_weight_pc": 11, "descriptors_sc": 11, "descriptors_scaled_descriptor": 11, "descriptors_scaled_descriptor_paramet": 11, "descriptors_composit": 11, "settings_mod": 11, "settings_mode_regress": 11, "settings_mode_classif": 11, "settings_cross_valid": 11, "settings_direct": 11, "settings_n_tri": 11, "settings_n_job": 11, "settings_shuffl": 11, "algorithms_low": 11, "algorithms_high": 11, "algorithms_q": 11, "algorithms_interface_sklearn": 11, "algorithms_interface_xgboost": 11, "algorithms_rfregressor": 11, "algorithms_rfclassifi": 11, "algorithms_rf_max_featur": 11, "algorithms_rf_max_depth": 11, "algorithms_rf_n_estim": 11, "algorithms_svr": 11, "algorithms_svr_c": 11, "algorithms_svr_gamma": 11, "algorithms_svc": 11, "algorithms_svc_c": 11, "algorithms_svc_gamma": 11, "algorithms_lasso": 11, "algorithms_lasso_alpha": 11, "algorithms_kneighborsclassifi": 11, "algorithms_kneighborsregressor": 11, "algorithms_kneighbors_n_neighbor": 11, "algorithms_kneighbors_metr": 11, "algorithms_kneighbors_weight": 11, "algorithms_ridg": 11, "algorithms_ridge_alpha": 11, "algorithms_plsregress": 11, "algorithms_plsregression_n_compon": 11, "algorithms_logisticregress": 11, "algorithms_logisticregression_solv": 11, "algorithms_logisticregression_c": 11, "algorithms_adaboostclassifi": 11, "algorithms_adaboostclassifier_n_estim": 11, "algorithms_adaboostclassifier_learning_r": 11, "algorithms_xgbregressor": 11, "algorithms_xgbregressor_max_depth": 11, "algorithms_xgbregressor_n_estim": 11, "algorithms_xgbregressor_learning_r": 11, "algorithms_prf": 11, "algorithms_prf_max_featur": 11, "algorithms_prf_max_depth": 11, "algorithms_prf_n_estim": 11, "algorithms_prf_minpysumleaf": 11, "algorithms_prf_use_py_gini": 11, "algorithms_prf_use_py_leaf": 11, "algorithms_chemprop": 11, "basechemprop": 11, "algorithms_chemprop_regressor": 11, "algorithms_chemprop_hyperopt_regressor": 11, "algorithms_chemprop_classifi": 11, "algorithms_chemprop_hyperopt_classifi": 11, "algorithms_chemprop_activ": 11, "algorithms_chemprop_aggreg": 11, "algorithms_chemprop_aggregation_norm": 11, "algorithms_chemprop_batch_s": 11, "algorithms_chemprop_depth": 11, "algorithms_chemprop_dropout": 11, "algorithms_chemprop_epoch": 11, "algorithms_chemprop_ensemble_s": 11, "algorithms_chemprop_features_gener": 11, "algorithms_chemprop_ffn_hidden_s": 11, "algorithms_chemprop_ffn_num_lay": 11, "algorithms_chemprop_frzn": 11, "algorithms_chemprop_final_lr_ratio_exp": 11, "algorithms_chemprop_hidden_s": 11, "algorithms_chemprop_num_it": 11, "algorithms_chemprop_init_lr_ratio_exp": 11, "algorithms_chemprop_max_lr_exp": 11, "algorithms_chemprop_pretrained_model": 11, "algorithms_chemprop_search_parameter_level": 11, "algorithms_chemprop_startup_random_it": 11, "startup_random_it": 11, "algorithms_chemprop_warmup_epochs_ratio": 11, "algorithms_calibratedclassifiercv": 11, "algorithms_calibratedclassifiercv_ensembl": 11, "algorithms_calibratedclassifiercv_estim": 11, "algorithms_calibratedclassifiercv_method": 11, "algorithms_calibratedclassifiercv_n_fold": 11, "algorithms_calibratedclassifiercv_param": 11, "calibrated_param": 11, "algorithms_mapi": 11, "algorithms_mapie_alpha": 11, "interfaceenum": 11, "sklearn_set": 11, "xgboost_set": 11, "chemprop_set": 11, "prf_set": 11, "calibrated_set": 11, "modelrunnerdataframeenum": 11, "modelrunn": 11, "objectiveenum": 11, "attribute_trial_train_scor": 11, "extra_column_besthit": 11, "besthit": 11, "optimizationconfigurationenum": 11, "predictionconfigurationenum": 11, "data_dataset": 11, "sklearnreturnvalueenum": 11, "cross_validate_fit_tim": 11, "fit_tim": 11, "cross_validate_score_tim": 11, "score_tim": 11, "cross_validate_test_scor": 11, "cross_validate_train_scor": 11, "xgboostreturnvalueenum": 11, "visualizationenum": 11, "visualization_regressor": 11, "visualization_classifi": 11, "visualization_use_xvfb": 11, "visualization_output_fold": 11, "visualization_file_format": 11, "visualization_plot": 11, "visualization_plots_histori": 11, "visualization_plots_contour": 11, "visualization_plots_parallel_coordin": 11, "visualization_plots_slic": 11, "optuna_system_attrs_numb": 11, "_number": 11, "optuna_system_attrs_intermediate_valu": 11, "intermediate_valu": 11, "optuna_system_attrs_trial_id": 11, "trial_id": 11, "studyuserattr": 11, "trialuserattr": 11, "trialparam": 11, "algorithm_hash": 11, "mlflowlogparam": 11, "sklearnsplitt": 12, "get_n_split": 12, "cvsplitter": 12, "although": 12, "underli": 12, "get_sklearn_splitt": 12, "n_split": 12, "repeat": [12, 13], "edg": [12, 13], "stratifiedshufflesplit": [12, 13], "revert": [12, 13], "empti": 12, "kfold": 12, "affect": 12, "consecut": 12, "fd_bin": 12, "11879": 12, "10297": 12, "adjac": 12, "downstream": 12, "nativ": 12, "histogramstratifiedshufflesplit": 12, "test_fract": 12, "groupingsplitt": 12, "column_nam": [12, 13], "butina_clust": [12, 13], "cluster": [12, 13], "butina": [12, 13], "make_scaffold_gener": [12, 13], "murcko": [12, 13], "hetero": [12, 13], "difficulti": [12, 13], "novel": [12, 13], "datatransform": 12, "logarithm": [12, 14], "base_dict": 12, "ufunc": 12, "base_neg": 12, "reverse_dict": 12, "exp": 12, "transform_df": 12, "transform_on": 12, "reverse_transform_df": 12, "reverse_transform_on": 12, "auxtransform": 12, "transfor": 12, "auxiliary_data": 12, "usabl": [12, 14]}, "objects": {"": [[8, 0, 0, "-", "optunaz"]], "optunaz": [[8, 0, 0, "-", "builder"], [9, 0, 0, "-", "config"], [8, 0, 0, "-", "datareader"], [8, 0, 0, "-", "descriptors"], [8, 0, 0, "-", "evaluate"], [8, 0, 0, "-", "explainability"], [8, 0, 0, "-", "metircs"], [8, 0, 0, "-", "model_writer"], [8, 0, 0, "-", "objective"], [8, 0, 0, "-", "optbuild"], [8, 0, 0, "-", "predict"], [8, 0, 0, "-", "schemagen"], [8, 0, 0, "-", "three_step_opt_build_merge"], [10, 0, 0, "-", "utils"], [8, 0, 0, "-", "visualizer"]], "optunaz.builder": [[8, 1, 1, "", "build"]], "optunaz.config": [[9, 2, 1, "", "Algorithm"], [9, 2, 1, "", "ModelMode"], [9, 2, 1, "", "NameParameterDataclass"], [9, 2, 1, "", "OptimizationDirection"], [9, 2, 1, "", "Task"], [9, 2, 1, "", "Visualization"], [9, 0, 0, "-", "build_from_opt"], [9, 0, 0, "-", "buildconfig"], [9, 0, 0, "-", "optconfig"]], "optunaz.config.ModelMode": [[9, 3, 1, "", "CLASSIFICATION"], [9, 3, 1, "", "REGRESSION"]], "optunaz.config.NameParameterDataclass": [[9, 4, 1, "", "new"]], "optunaz.config.OptimizationDirection": [[9, 3, 1, "", "MAXIMIZATION"], [9, 3, 1, "", "MINIMIZATION"]], "optunaz.config.Task": [[9, 3, 1, "", "BUILDING"], [9, 3, 1, "", "OPTIMIZATION"], [9, 3, 1, "", "PREDICTION"]], "optunaz.config.Visualization": [[9, 2, 1, "", "ImageFileFormat"], [9, 2, 1, "", "Plots"], [9, 3, 1, "", "file_format"], [9, 3, 1, "", "output_folder"], [9, 3, 1, "", "plots"], [9, 3, 1, "", "use_xvfb"]], "optunaz.config.Visualization.ImageFileFormat": [[9, 3, 1, "", "JPEG"], [9, 3, 1, "", "JPG"], [9, 3, 1, "", "PDF"], [9, 3, 1, "", "PNG"], [9, 3, 1, "", "SVG"]], "optunaz.config.Visualization.Plots": [[9, 3, 1, "", "plot_contour"], [9, 3, 1, "", "plot_history"], [9, 3, 1, "", "plot_parallel_coordinate"], [9, 3, 1, "", "plot_slice"]], "optunaz.config.build_from_opt": [[9, 1, 1, "", "buildconfig_from_trial"], [9, 1, 1, "", "check_invalid_descriptor_param"], [9, 1, 1, "", "encode_name"], [9, 1, 1, "", "remove_algo_hash"], [9, 1, 1, "", "set_build_cache"], [9, 1, 1, "", "suggest_alg_params"], [9, 1, 1, "", "suggest_aux_params"]], "optunaz.config.buildconfig": [[9, 2, 1, "", "AdaBoostClassifier"], [9, 2, 1, "", "Algorithm"], [9, 2, 1, "", "BuildConfig"], [9, 2, 1, "", "CalibratedClassifierCVWithVA"], [9, 2, 1, "", "ChemPropClassifier"], [9, 2, 1, "", "ChemPropHyperoptClassifier"], [9, 2, 1, "", "ChemPropHyperoptRegressor"], [9, 2, 1, "", "ChemPropRegressor"], [9, 2, 1, "", "ChemPropRegressorPretrained"], [9, 2, 1, "", "KNeighborsClassifier"], [9, 2, 1, "", "KNeighborsRegressor"], [9, 2, 1, "", "Lasso"], [9, 2, 1, "", "LogisticRegression"], [9, 2, 1, "", "Mapie"], [9, 2, 1, "", "PLSRegression"], [9, 2, 1, "", "PRFClassifier"], [9, 2, 1, "", "RandomForestClassifier"], [9, 2, 1, "", "RandomForestRegressor"], [9, 2, 1, "", "Ridge"], [9, 2, 1, "", "SVC"], [9, 2, 1, "", "SVR"], [9, 2, 1, "", "XGBRegressor"]], "optunaz.config.buildconfig.AdaBoostClassifier": [[9, 2, 1, "", "AdaBoostClassifierParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.AdaBoostClassifier.AdaBoostClassifierParameters": [[9, 3, 1, "", "learning_rate"], [9, 3, 1, "", "n_estimators"]], "optunaz.config.buildconfig.Algorithm": [[9, 4, 1, "", "estimator"]], "optunaz.config.buildconfig.BuildConfig": [[9, 2, 1, "", "Metadata"], [9, 2, 1, "", "Settings"], [9, 3, 1, "", "algorithm"], [9, 3, 1, "", "data"], [9, 3, 1, "", "descriptor"], [9, 3, 1, "", "metadata"], [9, 3, 1, "", "settings"], [9, 3, 1, "", "task"]], "optunaz.config.buildconfig.BuildConfig.Metadata": [[9, 3, 1, "", "best_trial"], [9, 3, 1, "", "best_value"], [9, 3, 1, "", "cross_validation"], [9, 3, 1, "", "n_trials"], [9, 3, 1, "", "name"], [9, 3, 1, "", "shuffle"], [9, 3, 1, "", "visualization"]], "optunaz.config.buildconfig.BuildConfig.Settings": [[9, 3, 1, "", "direction"], [9, 3, 1, "", "mode"], [9, 3, 1, "", "n_trials"], [9, 3, 1, "", "scoring"], [9, 3, 1, "", "tracking_rest_endpoint"]], "optunaz.config.buildconfig.CalibratedClassifierCVWithVA": [[9, 2, 1, "", "CalibratedClassifierCVParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.CalibratedClassifierCVWithVA.CalibratedClassifierCVParameters": [[9, 3, 1, "", "ensemble"], [9, 3, 1, "", "estimator"], [9, 3, 1, "", "method"], [9, 3, 1, "", "n_folds"]], "optunaz.config.buildconfig.ChemPropClassifier": [[9, 2, 1, "", "ChemPropClassifierParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.ChemPropClassifier.ChemPropClassifierParameters": [[9, 3, 1, "", "activation"], [9, 3, 1, "", "aggregation"], [9, 3, 1, "", "aggregation_norm"], [9, 3, 1, "", "aux_weight_pc"], [9, 3, 1, "", "batch_size"], [9, 3, 1, "", "depth"], [9, 3, 1, "", "dropout"], [9, 3, 1, "", "ensemble_size"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "features_generator"], [9, 3, 1, "", "ffn_hidden_size"], [9, 3, 1, "", "ffn_num_layers"], [9, 3, 1, "", "final_lr_ratio_exp"], [9, 3, 1, "", "hidden_size"], [9, 3, 1, "", "init_lr_ratio_exp"], [9, 3, 1, "", "max_lr_exp"], [9, 3, 1, "", "warmup_epochs_ratio"]], "optunaz.config.buildconfig.ChemPropHyperoptClassifier": [[9, 2, 1, "", "ChemPropHyperoptClassifierParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.ChemPropHyperoptClassifier.ChemPropHyperoptClassifierParameters": [[9, 3, 1, "", "aux_weight_pc"], [9, 3, 1, "", "ensemble_size"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "features_generator"], [9, 3, 1, "", "num_iters"], [9, 3, 1, "", "search_parameter_level"]], "optunaz.config.buildconfig.ChemPropHyperoptRegressor": [[9, 2, 1, "", "ChemPropHyperoptRegressorParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.ChemPropHyperoptRegressor.ChemPropHyperoptRegressorParameters": [[9, 3, 1, "", "aux_weight_pc"], [9, 3, 1, "", "ensemble_size"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "features_generator"], [9, 3, 1, "", "num_iters"], [9, 3, 1, "", "search_parameter_level"]], "optunaz.config.buildconfig.ChemPropRegressor": [[9, 2, 1, "", "ChemPropRegressorParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.ChemPropRegressor.ChemPropRegressorParameters": [[9, 3, 1, "", "activation"], [9, 3, 1, "", "aggregation"], [9, 3, 1, "", "aggregation_norm"], [9, 3, 1, "", "aux_weight_pc"], [9, 3, 1, "", "batch_size"], [9, 3, 1, "", "depth"], [9, 3, 1, "", "dropout"], [9, 3, 1, "", "ensemble_size"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "features_generator"], [9, 3, 1, "", "ffn_hidden_size"], [9, 3, 1, "", "ffn_num_layers"], [9, 3, 1, "", "final_lr_ratio_exp"], [9, 3, 1, "", "hidden_size"], [9, 3, 1, "", "init_lr_ratio_exp"], [9, 3, 1, "", "max_lr_exp"], [9, 3, 1, "", "warmup_epochs_ratio"]], "optunaz.config.buildconfig.ChemPropRegressorPretrained": [[9, 2, 1, "", "ChemPropRegressorPretrainedParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.ChemPropRegressorPretrained.ChemPropRegressorPretrainedParameters": [[9, 3, 1, "", "epochs"], [9, 3, 1, "", "frzn"], [9, 3, 1, "", "pretrained_model"]], "optunaz.config.buildconfig.KNeighborsClassifier": [[9, 2, 1, "", "KNeighborsClassifierParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.KNeighborsClassifier.KNeighborsClassifierParameters": [[9, 3, 1, "", "metric"], [9, 3, 1, "", "n_neighbors"], [9, 3, 1, "", "weights"]], "optunaz.config.buildconfig.KNeighborsRegressor": [[9, 2, 1, "", "KNeighborsRegressorParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.KNeighborsRegressor.KNeighborsRegressorParameters": [[9, 3, 1, "", "metric"], [9, 3, 1, "", "n_neighbors"], [9, 3, 1, "", "weights"]], "optunaz.config.buildconfig.Lasso": [[9, 2, 1, "", "LassoParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.Lasso.LassoParameters": [[9, 3, 1, "", "alpha"]], "optunaz.config.buildconfig.LogisticRegression": [[9, 2, 1, "", "LogisticRegressionParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.LogisticRegression.LogisticRegressionParameters": [[9, 3, 1, "", "C"], [9, 3, 1, "", "solver"]], "optunaz.config.buildconfig.Mapie": [[9, 2, 1, "", "MapieParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.Mapie.MapieParameters": [[9, 3, 1, "", "estimator"], [9, 3, 1, "", "mapie_alpha"]], "optunaz.config.buildconfig.PLSRegression": [[9, 2, 1, "", "PLSParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.PLSRegression.PLSParameters": [[9, 3, 1, "", "n_components"]], "optunaz.config.buildconfig.PRFClassifier": [[9, 2, 1, "", "PRFClassifierParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.PRFClassifier.PRFClassifierParameters": [[9, 3, 1, "", "bootstrap"], [9, 3, 1, "", "max_depth"], [9, 3, 1, "", "max_features"], [9, 3, 1, "", "min_py_sum_leaf"], [9, 3, 1, "", "n_estimators"], [9, 3, 1, "", "new_syn_data_frac"], [9, 3, 1, "", "use_py_gini"], [9, 3, 1, "", "use_py_leafs"]], "optunaz.config.buildconfig.RandomForestClassifier": [[9, 2, 1, "", "RandomForestParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.RandomForestClassifier.RandomForestParameters": [[9, 3, 1, "", "max_depth"], [9, 3, 1, "", "max_features"], [9, 3, 1, "", "n_estimators"]], "optunaz.config.buildconfig.RandomForestRegressor": [[9, 2, 1, "", "RandomForestParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.RandomForestRegressor.RandomForestParameters": [[9, 3, 1, "", "max_depth"], [9, 3, 1, "", "max_features"], [9, 3, 1, "", "n_estimators"]], "optunaz.config.buildconfig.Ridge": [[9, 2, 1, "", "RidgeParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.Ridge.RidgeParameters": [[9, 3, 1, "", "alpha"]], "optunaz.config.buildconfig.SVC": [[9, 2, 1, "", "SVCParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.SVC.SVCParameters": [[9, 3, 1, "", "C"], [9, 3, 1, "", "gamma"]], "optunaz.config.buildconfig.SVR": [[9, 2, 1, "", "SVRParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.SVR.SVRParameters": [[9, 3, 1, "", "C"], [9, 3, 1, "", "gamma"]], "optunaz.config.buildconfig.XGBRegressor": [[9, 2, 1, "", "XGBRegressorParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.XGBRegressor.XGBRegressorParameters": [[9, 3, 1, "", "learning_rate"], [9, 3, 1, "", "max_depth"], [9, 3, 1, "", "n_estimators"]], "optunaz.config.optconfig": [[9, 2, 1, "", "AdaBoostClassifier"], [9, 2, 1, "", "Algorithm"], [9, 2, 1, "", "CalibratedClassifierCVEnsemble"], [9, 2, 1, "", "CalibratedClassifierCVMethod"], [9, 2, 1, "", "CalibratedClassifierCVWithVA"], [9, 2, 1, "", "ChemPropActivation"], [9, 2, 1, "", "ChemPropAggregation"], [9, 2, 1, "", "ChemPropClassifier"], [9, 2, 1, "", "ChemPropFeatures_Generator"], [9, 2, 1, "", "ChemPropFrzn"], [9, 2, 1, "", "ChemPropHyperoptClassifier"], [9, 2, 1, "", "ChemPropHyperoptRegressor"], [9, 2, 1, "", "ChemPropRegressor"], [9, 2, 1, "", "ChemPropRegressorPretrained"], [9, 2, 1, "", "ChemPropSearch_Parameter_Level"], [9, 2, 1, "", "ClassificationScore"], [9, 2, 1, "", "KNeighborsClassifier"], [9, 2, 1, "", "KNeighborsMetric"], [9, 2, 1, "", "KNeighborsRegressor"], [9, 2, 1, "", "KNeighborsWeights"], [9, 2, 1, "", "Lasso"], [9, 2, 1, "", "LogisticRegression"], [9, 2, 1, "", "Mapie"], [9, 2, 1, "", "OptimizationConfig"], [9, 2, 1, "", "PLSRegression"], [9, 2, 1, "", "PRFClassifier"], [9, 2, 1, "", "PRFClassifierMaxFeatures"], [9, 2, 1, "", "RandomForestClassifier"], [9, 2, 1, "", "RandomForestMaxFeatures"], [9, 2, 1, "", "RandomForestRegressor"], [9, 2, 1, "", "RegressionScore"], [9, 2, 1, "", "Ridge"], [9, 2, 1, "", "SVC"], [9, 2, 1, "", "SVR"], [9, 2, 1, "", "XGBRegressor"], [9, 1, 1, "", "copy_path_for_scaled_descriptor"], [9, 1, 1, "", "detect_mode_from_algs"], [9, 1, 1, "", "isanyof"]], "optunaz.config.optconfig.AdaBoostClassifier": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.AdaBoostClassifier.Parameters": [[9, 2, 1, "", "AdaBoostClassifierParametersLearningRate"], [9, 2, 1, "", "AdaBoostClassifierParametersNEstimators"], [9, 3, 1, "", "learning_rate"], [9, 3, 1, "", "n_estimators"]], "optunaz.config.optconfig.AdaBoostClassifier.Parameters.AdaBoostClassifierParametersLearningRate": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.AdaBoostClassifier.Parameters.AdaBoostClassifierParametersNEstimators": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.CalibratedClassifierCVEnsemble": [[9, 3, 1, "", "FALSE"], [9, 3, 1, "", "TRUE"]], "optunaz.config.optconfig.CalibratedClassifierCVMethod": [[9, 3, 1, "", "ISOTONIC"], [9, 3, 1, "", "SIGMOID"], [9, 3, 1, "", "VENNABERS"]], "optunaz.config.optconfig.CalibratedClassifierCVWithVA": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.CalibratedClassifierCVWithVA.Parameters": [[9, 3, 1, "", "ensemble"], [9, 3, 1, "", "estimator"], [9, 3, 1, "", "method"], [9, 3, 1, "", "n_folds"]], "optunaz.config.optconfig.ChemPropActivation": [[9, 3, 1, "", "ELU"], [9, 3, 1, "", "LEAKYRELU"], [9, 3, 1, "", "PRELU"], [9, 3, 1, "", "RELU"], [9, 3, 1, "", "SELU"], [9, 3, 1, "", "TANH"]], "optunaz.config.optconfig.ChemPropAggregation": [[9, 3, 1, "", "MEAN"], [9, 3, 1, "", "NORM"], [9, 3, 1, "", "SUM"]], "optunaz.config.optconfig.ChemPropClassifier": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters": [[9, 2, 1, "", "ChemPropParametersAggregation_Norm"], [9, 2, 1, "", "ChemPropParametersBatch_Size"], [9, 2, 1, "", "ChemPropParametersDepth"], [9, 2, 1, "", "ChemPropParametersDropout"], [9, 2, 1, "", "ChemPropParametersFFN_Hidden_Size"], [9, 2, 1, "", "ChemPropParametersFFN_Num_Layers"], [9, 2, 1, "", "ChemPropParametersFinal_Lr_Ratio_Exp"], [9, 2, 1, "", "ChemPropParametersHidden_Size"], [9, 2, 1, "", "ChemPropParametersInit_Lr_Ratio_Exp"], [9, 2, 1, "", "ChemPropParametersMax_Lr_Exp"], [9, 2, 1, "", "ChemPropParametersWarmup_Epochs_Ratio"], [9, 3, 1, "", "activation"], [9, 3, 1, "", "aggregation"], [9, 3, 1, "", "aggregation_norm"], [9, 3, 1, "", "batch_size"], [9, 3, 1, "", "depth"], [9, 3, 1, "", "dropout"], [9, 3, 1, "", "ensemble_size"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "features_generator"], [9, 3, 1, "", "ffn_hidden_size"], [9, 3, 1, "", "ffn_num_layers"], [9, 3, 1, "", "final_lr_ratio_exp"], [9, 3, 1, "", "hidden_size"], [9, 3, 1, "", "init_lr_ratio_exp"], [9, 3, 1, "", "max_lr_exp"], [9, 3, 1, "", "warmup_epochs_ratio"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersAggregation_Norm": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersBatch_Size": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersDepth": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersDropout": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersFFN_Hidden_Size": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersFFN_Num_Layers": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersFinal_Lr_Ratio_Exp": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersHidden_Size": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersInit_Lr_Ratio_Exp": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersMax_Lr_Exp": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersWarmup_Epochs_Ratio": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropFeatures_Generator": [[9, 3, 1, "", "MORGAN"], [9, 3, 1, "", "MORGAN_COUNT"], [9, 3, 1, "", "NONE"], [9, 3, 1, "", "RDKIT_2D"], [9, 3, 1, "", "RDKIT_2D_NORMALIZED"]], "optunaz.config.optconfig.ChemPropFrzn": [[9, 3, 1, "", "MPNN"], [9, 3, 1, "", "MPNN_FIRST_FFN"], [9, 3, 1, "", "MPNN_LAST_FFN"], [9, 3, 1, "", "NONE"]], "optunaz.config.optconfig.ChemPropHyperoptClassifier": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.ChemPropHyperoptClassifier.Parameters": [[9, 3, 1, "", "ensemble_size"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "features_generator"], [9, 3, 1, "", "num_iters"], [9, 3, 1, "", "search_parameter_level"]], "optunaz.config.optconfig.ChemPropHyperoptRegressor": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.ChemPropHyperoptRegressor.Parameters": [[9, 3, 1, "", "ensemble_size"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "features_generator"], [9, 3, 1, "", "num_iters"], [9, 3, 1, "", "search_parameter_level"]], "optunaz.config.optconfig.ChemPropRegressor": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters": [[9, 2, 1, "", "ChemPropParametersAggregation_Norm"], [9, 2, 1, "", "ChemPropParametersBatch_Size"], [9, 2, 1, "", "ChemPropParametersDepth"], [9, 2, 1, "", "ChemPropParametersDropout"], [9, 2, 1, "", "ChemPropParametersFFN_Hidden_Size"], [9, 2, 1, "", "ChemPropParametersFFN_Num_Layers"], [9, 2, 1, "", "ChemPropParametersFinal_Lr_Ratio_Exp"], [9, 2, 1, "", "ChemPropParametersHidden_Size"], [9, 2, 1, "", "ChemPropParametersInit_Lr_Ratio_Exp"], [9, 2, 1, "", "ChemPropParametersMax_Lr_Exp"], [9, 2, 1, "", "ChemPropParametersWarmup_Epochs_Ratio"], [9, 3, 1, "", "activation"], [9, 3, 1, "", "aggregation"], [9, 3, 1, "", "aggregation_norm"], [9, 3, 1, "", "batch_size"], [9, 3, 1, "", "depth"], [9, 3, 1, "", "dropout"], [9, 3, 1, "", "ensemble_size"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "features_generator"], [9, 3, 1, "", "ffn_hidden_size"], [9, 3, 1, "", "ffn_num_layers"], [9, 3, 1, "", "final_lr_ratio_exp"], [9, 3, 1, "", "hidden_size"], [9, 3, 1, "", "init_lr_ratio_exp"], [9, 3, 1, "", "max_lr_exp"], [9, 3, 1, "", "warmup_epochs_ratio"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersAggregation_Norm": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersBatch_Size": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersDepth": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersDropout": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersFFN_Hidden_Size": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersFFN_Num_Layers": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersFinal_Lr_Ratio_Exp": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersHidden_Size": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersInit_Lr_Ratio_Exp": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersMax_Lr_Exp": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersWarmup_Epochs_Ratio": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropRegressorPretrained": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.ChemPropRegressorPretrained.Parameters": [[9, 2, 1, "", "ChemPropParametersEpochs"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "frzn"], [9, 3, 1, "", "pretrained_model"]], "optunaz.config.optconfig.ChemPropRegressorPretrained.Parameters.ChemPropParametersEpochs": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropSearch_Parameter_Level": [[9, 3, 1, "", "AUTO"], [9, 3, 1, "", "L1"], [9, 3, 1, "", "L2"], [9, 3, 1, "", "L3"], [9, 3, 1, "", "L4"], [9, 3, 1, "", "L5"], [9, 3, 1, "", "L6"], [9, 3, 1, "", "L7"], [9, 3, 1, "", "L8"]], "optunaz.config.optconfig.ClassificationScore": [[9, 3, 1, "", "ACCURACY"], [9, 3, 1, "", "AUC_PR_CAL"], [9, 3, 1, "", "AVERAGE_PRECISION"], [9, 3, 1, "", "BALANCED_ACCURACY"], [9, 3, 1, "", "BEDROC"], [9, 3, 1, "", "CONCORDANCE_INDEX"], [9, 3, 1, "", "F1"], [9, 3, 1, "", "F1_MACRO"], [9, 3, 1, "", "F1_MICRO"], [9, 3, 1, "", "F1_WEIGHTED"], [9, 3, 1, "", "JACCARD"], [9, 3, 1, "", "JACCARD_MACRO"], [9, 3, 1, "", "JACCARD_MICRO"], [9, 3, 1, "", "JACCARD_WEIGHTED"], [9, 3, 1, "", "NEG_BRIER_SCORE"], [9, 3, 1, "", "PRECISION"], [9, 3, 1, "", "PRECISION_MACRO"], [9, 3, 1, "", "PRECISION_MICRO"], [9, 3, 1, "", "PRECISION_WEIGHTED"], [9, 3, 1, "", "RECALL"], [9, 3, 1, "", "RECALL_MACRO"], [9, 3, 1, "", "RECALL_MICRO"], [9, 3, 1, "", "RECALL_WEIGHTED"], [9, 3, 1, "", "ROC_AUC"]], "optunaz.config.optconfig.KNeighborsClassifier": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.KNeighborsClassifier.Parameters": [[9, 2, 1, "", "KNeighborsClassifierParametersN_Neighbors"], [9, 3, 1, "", "metric"], [9, 3, 1, "", "n_neighbors"], [9, 3, 1, "", "weights"]], "optunaz.config.optconfig.KNeighborsClassifier.Parameters.KNeighborsClassifierParametersN_Neighbors": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.KNeighborsMetric": [[9, 3, 1, "", "EUCLIDEAN"], [9, 3, 1, "", "MANHATTAN"], [9, 3, 1, "", "MINKOWSKI"]], "optunaz.config.optconfig.KNeighborsRegressor": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.KNeighborsRegressor.Parameters": [[9, 2, 1, "", "KNeighborsRegressorParametersN_Neighbors"], [9, 3, 1, "", "metric"], [9, 3, 1, "", "n_neighbors"], [9, 3, 1, "", "weights"]], "optunaz.config.optconfig.KNeighborsRegressor.Parameters.KNeighborsRegressorParametersN_Neighbors": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.KNeighborsWeights": [[9, 3, 1, "", "DISTANCE"], [9, 3, 1, "", "UNIFORM"]], "optunaz.config.optconfig.Lasso": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.Lasso.Parameters": [[9, 2, 1, "", "LassoParametersAlpha"], [9, 3, 1, "", "alpha"]], "optunaz.config.optconfig.Lasso.Parameters.LassoParametersAlpha": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.LogisticRegression": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.LogisticRegression.Parameters": [[9, 3, 1, "", "C"], [9, 2, 1, "", "LogisticRegressionParametersParameterC"], [9, 3, 1, "", "solver"]], "optunaz.config.optconfig.LogisticRegression.Parameters.LogisticRegressionParametersParameterC": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.Mapie": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.Mapie.Parameters": [[9, 3, 1, "", "estimator"], [9, 3, 1, "", "mapie_alpha"]], "optunaz.config.optconfig.OptimizationConfig": [[9, 2, 1, "", "Settings"], [9, 3, 1, "", "algorithms"], [9, 3, 1, "", "data"], [9, 3, 1, "", "description"], [9, 3, 1, "", "descriptors"], [9, 3, 1, "", "mode"], [9, 3, 1, "", "name"], [9, 4, 1, "", "set_algo_hashes"], [9, 4, 1, "", "set_cache"], [9, 3, 1, "", "settings"], [9, 3, 1, "", "task"], [9, 3, 1, "", "visualization"]], "optunaz.config.optconfig.OptimizationConfig.Settings": [[9, 3, 1, "", "cross_validation"], [9, 3, 1, "", "cv_split_strategy"], [9, 3, 1, "", "direction"], [9, 3, 1, "", "minimise_std_dev"], [9, 3, 1, "", "mode"], [9, 3, 1, "", "n_chemprop_trials"], [9, 3, 1, "", "n_jobs"], [9, 3, 1, "", "n_startup_trials"], [9, 3, 1, "", "n_trials"], [9, 3, 1, "", "optuna_storage"], [9, 3, 1, "", "random_seed"], [9, 3, 1, "", "scoring"], [9, 3, 1, "", "shuffle"], [9, 3, 1, "", "split_chemprop"], [9, 3, 1, "", "track_to_mlflow"], [9, 3, 1, "", "tracking_rest_endpoint"], [9, 3, 1, "", "use_cache"]], "optunaz.config.optconfig.PLSRegression": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.PLSRegression.Parameters": [[9, 2, 1, "", "NComponents"], [9, 3, 1, "", "n_components"]], "optunaz.config.optconfig.PLSRegression.Parameters.NComponents": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.PRFClassifier": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.PRFClassifier.Parameters": [[9, 2, 1, "", "PRFClassifierParametersMaxDepth"], [9, 2, 1, "", "PRFClassifierParametersMinPySumLeaf"], [9, 2, 1, "", "PRFClassifierParametersNEstimators"], [9, 3, 1, "", "max_depth"], [9, 3, 1, "", "max_features"], [9, 3, 1, "", "min_py_sum_leaf"], [9, 3, 1, "", "n_estimators"], [9, 3, 1, "", "use_py_gini"], [9, 3, 1, "", "use_py_leafs"]], "optunaz.config.optconfig.PRFClassifier.Parameters.PRFClassifierParametersMaxDepth": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.PRFClassifier.Parameters.PRFClassifierParametersMinPySumLeaf": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.PRFClassifier.Parameters.PRFClassifierParametersNEstimators": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.PRFClassifierMaxFeatures": [[9, 3, 1, "", "AUTO"], [9, 3, 1, "", "LOG2"], [9, 3, 1, "", "SQRT"]], "optunaz.config.optconfig.RandomForestClassifier": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.RandomForestClassifier.Parameters": [[9, 2, 1, "", "RandomForestClassifierParametersMaxDepth"], [9, 2, 1, "", "RandomForestClassifierParametersNEstimators"], [9, 3, 1, "", "max_depth"], [9, 3, 1, "", "max_features"], [9, 3, 1, "", "n_estimators"]], "optunaz.config.optconfig.RandomForestClassifier.Parameters.RandomForestClassifierParametersMaxDepth": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.RandomForestClassifier.Parameters.RandomForestClassifierParametersNEstimators": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.RandomForestMaxFeatures": [[9, 3, 1, "", "AUTO"], [9, 3, 1, "", "LOG2"], [9, 3, 1, "", "SQRT"]], "optunaz.config.optconfig.RandomForestRegressor": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.RandomForestRegressor.Parameters": [[9, 2, 1, "", "RandomForestRegressorParametersMaxDepth"], [9, 2, 1, "", "RandomForestRegressorParametersNEstimators"], [9, 3, 1, "", "max_depth"], [9, 3, 1, "", "max_features"], [9, 3, 1, "", "n_estimators"]], "optunaz.config.optconfig.RandomForestRegressor.Parameters.RandomForestRegressorParametersMaxDepth": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.RandomForestRegressor.Parameters.RandomForestRegressorParametersNEstimators": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.RegressionScore": [[9, 3, 1, "", "EXPLAINED_VARIANCE"], [9, 3, 1, "", "MAX_ERROR"], [9, 3, 1, "", "NEG_MEAN_ABSOLUTE_ERROR"], [9, 3, 1, "", "NEG_MEAN_SQUARED_ERROR"], [9, 3, 1, "", "NEG_MEDIAN_ABSOLUTE_ERROR"], [9, 3, 1, "", "R2"]], "optunaz.config.optconfig.Ridge": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.Ridge.Parameters": [[9, 2, 1, "", "Alpha"], [9, 3, 1, "", "alpha"]], "optunaz.config.optconfig.Ridge.Parameters.Alpha": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.SVC": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.SVC.Parameters": [[9, 3, 1, "", "C"], [9, 2, 1, "", "Gamma"], [9, 2, 1, "", "SVCParametersParameterC"], [9, 3, 1, "", "gamma"]], "optunaz.config.optconfig.SVC.Parameters.Gamma": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.SVC.Parameters.SVCParametersParameterC": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.SVR": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.SVR.Parameters": [[9, 3, 1, "", "C"], [9, 2, 1, "", "SVRParametersGamma"], [9, 2, 1, "", "SVRParametersParameterC"], [9, 3, 1, "", "gamma"]], "optunaz.config.optconfig.SVR.Parameters.SVRParametersGamma": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.SVR.Parameters.SVRParametersParameterC": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.XGBRegressor": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.XGBRegressor.Parameters": [[9, 2, 1, "", "LearningRate"], [9, 2, 1, "", "MaxDepth"], [9, 2, 1, "", "NEstimators"], [9, 3, 1, "", "learning_rate"], [9, 3, 1, "", "max_depth"], [9, 3, 1, "", "n_estimators"]], "optunaz.config.optconfig.XGBRegressor.Parameters.LearningRate": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.XGBRegressor.Parameters.MaxDepth": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.XGBRegressor.Parameters.NEstimators": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.datareader": [[8, 2, 1, "", "Dataset"], [8, 1, 1, "", "deduplicate"], [8, 1, 1, "", "isvalid"], [8, 1, 1, "", "merge"], [8, 1, 1, "", "read_data"], [8, 1, 1, "", "split"], [8, 1, 1, "", "transform"]], "optunaz.datareader.Dataset": [[8, 3, 1, "", "aux_column"], [8, 3, 1, "", "aux_transform"], [8, 4, 1, "", "check_sets"], [8, 3, 1, "", "deduplication_strategy"], [8, 4, 1, "", "get_merged_sets"], [8, 4, 1, "", "get_sets"], [8, 3, 1, "", "input_column"], [8, 3, 1, "", "intermediate_test_dataset_file"], [8, 3, 1, "", "intermediate_training_dataset_file"], [8, 3, 1, "", "log_transform"], [8, 3, 1, "", "log_transform_base"], [8, 3, 1, "", "log_transform_negative"], [8, 3, 1, "", "log_transform_unit_conversion"], [8, 3, 1, "", "probabilistic_threshold_representation"], [8, 3, 1, "", "probabilistic_threshold_representation_std"], [8, 3, 1, "", "probabilistic_threshold_representation_threshold"], [8, 3, 1, "", "response_column"], [8, 3, 1, "", "response_type"], [8, 3, 1, "", "save_intermediate_files"], [8, 3, 1, "", "split_strategy"], [8, 3, 1, "", "test_dataset_file"], [8, 3, 1, "", "training_dataset_file"]], "optunaz.descriptors": [[8, 2, 1, "", "Avalon"], [8, 2, 1, "", "CanonicalSmiles"], [8, 2, 1, "", "CompositeDescriptor"], [8, 2, 1, "", "ECFP"], [8, 2, 1, "", "ECFP_counts"], [8, 2, 1, "", "FittedSklearnScaler"], [8, 2, 1, "", "GenericScaffold"], [8, 2, 1, "", "JazzyDescriptors"], [8, 2, 1, "", "MACCS_keys"], [8, 2, 1, "", "MolDescriptor"], [8, 5, 1, "", "NoValidSmiles"], [8, 2, 1, "", "PathFP"], [8, 2, 1, "", "PhyschemDescriptors"], [8, 2, 1, "", "PrecomputedDescriptorFromFile"], [8, 2, 1, "", "RdkitDescriptor"], [8, 2, 1, "", "Scaffold"], [8, 2, 1, "", "ScaledDescriptor"], [8, 5, 1, "", "ScalingFittingError"], [8, 2, 1, "", "SmilesAndSideInfoFromFile"], [8, 2, 1, "", "SmilesFromFile"], [8, 2, 1, "", "UnfittedSklearnScaler"], [8, 2, 1, "", "UnscaledJazzyDescriptors"], [8, 2, 1, "", "UnscaledPhyschemDescriptors"], [8, 2, 1, "", "UnscaledZScalesDescriptors"], [8, 2, 1, "", "ValidDescriptor"], [8, 2, 1, "", "ZScalesDescriptors"], [8, 1, 1, "", "descriptor_from_config"], [8, 1, 1, "", "mol_from_smi"], [8, 1, 1, "", "numpy_from_rdkit"]], "optunaz.descriptors.Avalon": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_mol"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.Avalon.Parameters": [[8, 3, 1, "", "nBits"]], "optunaz.descriptors.CanonicalSmiles": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.CompositeDescriptor": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 4, 1, "", "fp_info"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.CompositeDescriptor.Parameters": [[8, 3, 1, "", "descriptors"]], "optunaz.descriptors.ECFP": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_mol"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.ECFP.Parameters": [[8, 3, 1, "", "nBits"], [8, 3, 1, "", "radius"], [8, 3, 1, "", "returnRdkit"]], "optunaz.descriptors.ECFP_counts": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_mol"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.ECFP_counts.Parameters": [[8, 3, 1, "", "nBits"], [8, 3, 1, "", "radius"], [8, 3, 1, "", "useFeatures"]], "optunaz.descriptors.FittedSklearnScaler": [[8, 4, 1, "", "get_fitted_scaler"], [8, 3, 1, "", "name"], [8, 3, 1, "", "saved_params"]], "optunaz.descriptors.GenericScaffold": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.JazzyDescriptors": [[8, 2, 1, "", "Parameters"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.JazzyDescriptors.Parameters": [[8, 3, 1, "", "descriptor"], [8, 3, 1, "", "jazzy_filters"], [8, 3, 1, "", "jazzy_names"], [8, 3, 1, "", "scaler"]], "optunaz.descriptors.MACCS_keys": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_mol"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.MolDescriptor": [[8, 4, 1, "", "calculate_from_smi"], [8, 4, 1, "", "parallel_compute_descriptor"]], "optunaz.descriptors.PathFP": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_mol"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.PathFP.Parameters": [[8, 3, 1, "", "fpSize"], [8, 3, 1, "", "maxPath"]], "optunaz.descriptors.PhyschemDescriptors": [[8, 2, 1, "", "Parameters"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.PhyschemDescriptors.Parameters": [[8, 3, 1, "", "descriptor"], [8, 3, 1, "", "rdkit_names"], [8, 3, 1, "", "scaler"]], "optunaz.descriptors.PrecomputedDescriptorFromFile": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.PrecomputedDescriptorFromFile.Parameters": [[8, 3, 1, "", "file"], [8, 3, 1, "", "input_column"], [8, 3, 1, "", "response_column"]], "optunaz.descriptors.RdkitDescriptor": [[8, 4, 1, "", "calculate_from_mol"], [8, 4, 1, "", "calculate_from_smi"]], "optunaz.descriptors.Scaffold": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.ScaledDescriptor": [[8, 2, 1, "", "ScaledDescriptorParameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"], [8, 4, 1, "", "set_unfitted_scaler_data"]], "optunaz.descriptors.ScaledDescriptor.ScaledDescriptorParameters": [[8, 3, 1, "", "descriptor"], [8, 3, 1, "", "scaler"]], "optunaz.descriptors.SmilesAndSideInfoFromFile": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.SmilesAndSideInfoFromFile.Parameters": [[8, 2, 1, "", "Aux_Weight_Pc"], [8, 3, 1, "", "aux_weight_pc"], [8, 3, 1, "", "file"], [8, 3, 1, "", "input_column"]], "optunaz.descriptors.SmilesAndSideInfoFromFile.Parameters.Aux_Weight_Pc": [[8, 3, 1, "", "high"], [8, 3, 1, "", "low"], [8, 3, 1, "", "q"]], "optunaz.descriptors.SmilesFromFile": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.UnfittedSklearnScaler": [[8, 2, 1, "", "MolData"], [8, 4, 1, "", "get_fitted_scaler_for_fp"], [8, 3, 1, "", "mol_data"], [8, 3, 1, "", "name"]], "optunaz.descriptors.UnfittedSklearnScaler.MolData": [[8, 3, 1, "", "file_path"], [8, 3, 1, "", "smiles_column"]], "optunaz.descriptors.UnscaledJazzyDescriptors": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.UnscaledJazzyDescriptors.Parameters": [[8, 3, 1, "", "jazzy_filters"], [8, 3, 1, "", "jazzy_names"]], "optunaz.descriptors.UnscaledPhyschemDescriptors": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_mol"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.UnscaledPhyschemDescriptors.Parameters": [[8, 3, 1, "", "rdkit_names"]], "optunaz.descriptors.UnscaledZScalesDescriptors": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.ValidDescriptor": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.ZScalesDescriptors": [[8, 2, 1, "", "Parameters"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.ZScalesDescriptors.Parameters": [[8, 3, 1, "", "descriptor"], [8, 3, 1, "", "scaler"]], "optunaz.evaluate": [[8, 1, 1, "", "get_merged_train_score"], [8, 1, 1, "", "get_scores"], [8, 1, 1, "", "get_train_test_scores"], [8, 1, 1, "", "score_all"], [8, 1, 1, "", "score_all_smiles"]], "optunaz.explainability": [[8, 1, 1, "", "ExplainPreds"], [8, 1, 1, "", "ShapExplainer"], [8, 1, 1, "", "explain_ECFP"], [8, 1, 1, "", "get_ecfp_fpinfo"], [8, 1, 1, "", "get_ecfpcount_fpinfo"], [8, 1, 1, "", "get_fp_info"], [8, 1, 1, "", "runShap"]], "optunaz.metircs": [[8, 1, 1, "", "auc_pr_cal"], [8, 1, 1, "", "bedroc_score"], [8, 1, 1, "", "concordance_index"], [8, 1, 1, "", "validate_cls_input"]], "optunaz.model_writer": [[8, 2, 1, "", "Predictor"], [8, 2, 1, "", "QSARtunaModel"], [8, 1, 1, "", "get_metadata"], [8, 1, 1, "", "get_transform"], [8, 1, 1, "", "perform_ptr"], [8, 1, 1, "", "save_model"], [8, 1, 1, "", "wrap_model"]], "optunaz.model_writer.Predictor": [[8, 4, 1, "", "explain"], [8, 4, 1, "", "predict"], [8, 4, 1, "", "predict_proba"], [8, 4, 1, "", "predict_uncert"]], "optunaz.model_writer.QSARtunaModel": [[8, 3, 1, "", "aux_transform"], [8, 3, 1, "", "descriptor"], [8, 3, 1, "", "metadata"], [8, 3, 1, "", "mode"], [8, 4, 1, "", "predict_from_smiles"], [8, 3, 1, "", "predictor"], [8, 3, 1, "", "transform"]], "optunaz.objective": [[8, 5, 1, "", "NoValidDescriptors"], [8, 2, 1, "", "Objective"], [8, 1, 1, "", "null_scores"]], "optunaz.objective.Objective": [[8, 3, 1, "", "cache"], [8, 3, 1, "", "optconfig"], [8, 3, 1, "", "train_aux"], [8, 3, 1, "", "train_smiles"], [8, 3, 1, "", "train_y"]], "optunaz.optbuild": [[8, 1, 1, "", "main"], [8, 1, 1, "", "predict_pls"]], "optunaz.predict": [[8, 5, 1, "", "ArgsError"], [8, 5, 1, "", "AuxCovariateMissing"], [8, 5, 1, "", "PrecomputedError"], [8, 5, 1, "", "UncertaintyError"], [8, 1, 1, "", "check_precomp_args"], [8, 1, 1, "", "main"], [8, 1, 1, "", "validate_args"], [8, 1, 1, "", "validate_aux"], [8, 1, 1, "", "validate_set_precomputed"], [8, 1, 1, "", "validate_uncertainty"]], "optunaz.schemagen": [[8, 1, 1, "", "doctitle"], [8, 1, 1, "", "main"], [8, 1, 1, "", "patch_schema_generic"], [8, 1, 1, "", "patch_schema_optunaz"], [8, 1, 1, "", "type_base_schema"]], "optunaz.three_step_opt_build_merge": [[8, 1, 1, "", "base_chemprop_params"], [8, 1, 1, "", "build_best"], [8, 1, 1, "", "build_merged"], [8, 1, 1, "", "buildconfig_best"], [8, 1, 1, "", "log_scores"], [8, 1, 1, "", "optimize"], [8, 1, 1, "", "run_study"], [8, 1, 1, "", "split_optimize"]], "optunaz.utils": [[11, 0, 0, "-", "enums"], [10, 0, 0, "-", "files_paths"], [10, 1, 1, "", "load_df_from_file"], [10, 0, 0, "-", "load_json"], [10, 1, 1, "", "md5_hash"], [10, 1, 1, "", "mkdict"], [10, 0, 0, "-", "mlflow"], [12, 0, 0, "-", "preprocessing"], [10, 1, 1, "", "remove_failed_idx"], [10, 0, 0, "-", "schema"], [10, 0, 0, "-", "tracking"]], "optunaz.utils.enums": [[11, 2, 1, "", "MlflowLogParams"], [11, 2, 1, "", "StudyUserAttrs"], [11, 2, 1, "", "TrialParams"], [11, 2, 1, "", "TrialUserAttrs"], [11, 0, 0, "-", "building_configuration_enum"], [11, 0, 0, "-", "configuration_enum"], [11, 0, 0, "-", "interface_enum"], [11, 0, 0, "-", "model_runner_enum"], [11, 0, 0, "-", "objective_enum"], [11, 0, 0, "-", "optimization_configuration_enum"], [11, 0, 0, "-", "prediction_configuration_enum"], [11, 0, 0, "-", "return_values_enum"], [11, 0, 0, "-", "visualization_enum"]], "optunaz.utils.enums.MlflowLogParams": [[11, 3, 1, "", "TRIAL_NUMBER"]], "optunaz.utils.enums.StudyUserAttrs": [[11, 3, 1, "", "OPTCONFIG"]], "optunaz.utils.enums.TrialParams": [[11, 3, 1, "", "ALGORITHM_HASH"], [11, 3, 1, "", "ALGORITHM_NAME"], [11, 3, 1, "", "DESCRIPTOR"]], "optunaz.utils.enums.TrialUserAttrs": [[11, 3, 1, "", "TEST_SCORES"], [11, 3, 1, "", "TRAIN_SCORES"]], "optunaz.utils.enums.building_configuration_enum": [[11, 2, 1, "", "BuildingConfigurationEnum"]], "optunaz.utils.enums.building_configuration_enum.BuildingConfigurationEnum": [[11, 3, 1, "", "GENERAL_CLASSIFIER"], [11, 3, 1, "", "GENERAL_HYPERPARAMETERS"], [11, 3, 1, "", "GENERAL_REGRESSOR"], [11, 3, 1, "", "METADATA"], [11, 3, 1, "", "METADATA_BESTTRIAL"], [11, 3, 1, "", "METADATA_BESTVALUE"]], "optunaz.utils.enums.configuration_enum": [[11, 2, 1, "", "ConfigurationEnum"]], "optunaz.utils.enums.configuration_enum.ConfigurationEnum": [[11, 3, 1, "", "ALGORITHMS"], [11, 3, 1, "", "ALGORITHMS_ADABOOSTCLASSIFIER"], [11, 3, 1, "", "ALGORITHMS_ADABOOSTCLASSIFIER_LEARNING_RATE"], [11, 3, 1, "", "ALGORITHMS_ADABOOSTCLASSIFIER_N_ESTIMATORS"], [11, 3, 1, "", "ALGORITHMS_CALIBRATEDCLASSIFIERCV"], [11, 3, 1, "", "ALGORITHMS_CALIBRATEDCLASSIFIERCV_ENSEMBLE"], [11, 3, 1, "", "ALGORITHMS_CALIBRATEDCLASSIFIERCV_ESTIMATOR"], [11, 3, 1, "", "ALGORITHMS_CALIBRATEDCLASSIFIERCV_METHOD"], [11, 3, 1, "", "ALGORITHMS_CALIBRATEDCLASSIFIERCV_N_FOLDS"], [11, 3, 1, "", "ALGORITHMS_CALIBRATEDCLASSIFIERCV_PARAMS"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_ACTIVATION"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_AGGREGATION"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_AGGREGATION_NORM"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_BATCH_SIZE"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_CLASSIFIER"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_DEPTH"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_DROPOUT"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_ENSEMBLE_SIZE"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_EPOCHS"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_FEATURES_GENERATOR"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_FFN_HIDDEN_SIZE"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_FFN_NUM_LAYERS"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_FINAL_LR_RATIO_EXP"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_FRZN"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_HIDDEN_SIZE"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_HYPEROPT_CLASSIFIER"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_HYPEROPT_REGRESSOR"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_INIT_LR_RATIO_EXP"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_MAX_LR_EXP"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_NUM_ITERS"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_PRETRAINED_MODEL"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_REGRESSOR"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_SEARCH_PARAMETER_LEVEL"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_STARTUP_RANDOM_ITERS"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_WARMUP_EPOCHS_RATIO"], [11, 3, 1, "", "ALGORITHMS_HIGH"], [11, 3, 1, "", "ALGORITHMS_INTERFACE_SKLEARN"], [11, 3, 1, "", "ALGORITHMS_INTERFACE_XGBOOST"], [11, 3, 1, "", "ALGORITHMS_KNEIGHBORSCLASSIFIER"], [11, 3, 1, "", "ALGORITHMS_KNEIGHBORSREGRESSOR"], [11, 3, 1, "", "ALGORITHMS_KNEIGHBORS_METRIC"], [11, 3, 1, "", "ALGORITHMS_KNEIGHBORS_N_NEIGHBORS"], [11, 3, 1, "", "ALGORITHMS_KNEIGHBORS_WEIGHTS"], [11, 3, 1, "", "ALGORITHMS_LASSO"], [11, 3, 1, "", "ALGORITHMS_LASSO_ALPHA"], [11, 3, 1, "", "ALGORITHMS_LOGISTICREGRESSION"], [11, 3, 1, "", "ALGORITHMS_LOGISTICREGRESSION_C"], [11, 3, 1, "", "ALGORITHMS_LOGISTICREGRESSION_SOLVER"], [11, 3, 1, "", "ALGORITHMS_LOW"], [11, 3, 1, "", "ALGORITHMS_MAPIE"], [11, 3, 1, "", "ALGORITHMS_MAPIE_ALPHA"], [11, 3, 1, "", "ALGORITHMS_PLSREGRESSION"], [11, 3, 1, "", "ALGORITHMS_PLSREGRESSION_N_COMPONENTS"], [11, 3, 1, "", "ALGORITHMS_PRF"], [11, 3, 1, "", "ALGORITHMS_PRF_MAX_DEPTH"], [11, 3, 1, "", "ALGORITHMS_PRF_MAX_FEATURES"], [11, 3, 1, "", "ALGORITHMS_PRF_MINPYSUMLEAF"], [11, 3, 1, "", "ALGORITHMS_PRF_N_ESTIMATORS"], [11, 3, 1, "", "ALGORITHMS_PRF_USE_PY_GINI"], [11, 3, 1, "", "ALGORITHMS_PRF_USE_PY_LEAFS"], [11, 3, 1, "", "ALGORITHMS_Q"], [11, 3, 1, "", "ALGORITHMS_RFCLASSIFIER"], [11, 3, 1, "", "ALGORITHMS_RFREGRESSOR"], [11, 3, 1, "", "ALGORITHMS_RF_MAX_DEPTH"], [11, 3, 1, "", "ALGORITHMS_RF_MAX_FEATURES"], [11, 3, 1, "", "ALGORITHMS_RF_N_ESTIMATORS"], [11, 3, 1, "", "ALGORITHMS_RIDGE"], [11, 3, 1, "", "ALGORITHMS_RIDGE_ALPHA"], [11, 3, 1, "", "ALGORITHMS_SVC"], [11, 3, 1, "", "ALGORITHMS_SVC_C"], [11, 3, 1, "", "ALGORITHMS_SVC_GAMMA"], [11, 3, 1, "", "ALGORITHMS_SVR"], [11, 3, 1, "", "ALGORITHMS_SVR_C"], [11, 3, 1, "", "ALGORITHMS_SVR_GAMMA"], [11, 3, 1, "", "ALGORITHMS_XGBREGRESSOR"], [11, 3, 1, "", "ALGORITHMS_XGBREGRESSOR_LEARNING_RATE"], [11, 3, 1, "", "ALGORITHMS_XGBREGRESSOR_MAX_DEPTH"], [11, 3, 1, "", "ALGORITHMS_XGBREGRESSOR_N_ESTIMATORS"], [11, 3, 1, "", "DATA"], [11, 3, 1, "", "DATA_INPUTCOLUMN"], [11, 3, 1, "", "DATA_RESPONSECOLUMN"], [11, 3, 1, "", "DATA_TEST"], [11, 3, 1, "", "DATA_TRAINING"], [11, 3, 1, "", "DESCRIPTORS"], [11, 3, 1, "", "DESCRIPTORS_AVALON"], [11, 3, 1, "", "DESCRIPTORS_AVALON_NBITS"], [11, 3, 1, "", "DESCRIPTORS_COMPOSITE"], [11, 3, 1, "", "DESCRIPTORS_ECFP"], [11, 3, 1, "", "DESCRIPTORS_ECFPCOUNTS"], [11, 3, 1, "", "DESCRIPTORS_ECFPCOUNTS_RADIUS"], [11, 3, 1, "", "DESCRIPTORS_ECFPCOUNTS_USEFEATURES"], [11, 3, 1, "", "DESCRIPTORS_ECFP_NBITS"], [11, 3, 1, "", "DESCRIPTORS_ECFP_RADIUS"], [11, 3, 1, "", "DESCRIPTORS_JAZZY"], [11, 3, 1, "", "DESCRIPTORS_JAZZY_JAZZYNAMES"], [11, 3, 1, "", "DESCRIPTORS_MACCSKEYS"], [11, 3, 1, "", "DESCRIPTORS_PATHFP"], [11, 3, 1, "", "DESCRIPTORS_PATHFP_FPSIZE"], [11, 3, 1, "", "DESCRIPTORS_PATHFP_MAXPATH"], [11, 3, 1, "", "DESCRIPTORS_PHYSCHEM"], [11, 3, 1, "", "DESCRIPTORS_PHYSCHEM_RDKITNAMES"], [11, 3, 1, "", "DESCRIPTORS_PRECOMPUTED"], [11, 3, 1, "", "DESCRIPTORS_PRECOMPUTED_FILE"], [11, 3, 1, "", "DESCRIPTORS_PRECOMPUTED_INPUT_COLUMNN"], [11, 3, 1, "", "DESCRIPTORS_PRECOMPUTED_RESPONSE_COLUMN"], [11, 3, 1, "", "DESCRIPTORS_SCALED"], [11, 3, 1, "", "DESCRIPTORS_SCALED_DESCRIPTOR"], [11, 3, 1, "", "DESCRIPTORS_SCALED_DESCRIPTOR_PARAMETERS"], [11, 3, 1, "", "DESCRIPTORS_SMILES"], [11, 3, 1, "", "DESCRIPTORS_SMILES_AND_SI"], [11, 3, 1, "", "DESCRIPTORS_SMILES_AND_SI_AUX_WEIGHT_PC"], [11, 3, 1, "", "DESCRIPTORS_SMILES_AND_SI_FILE"], [11, 3, 1, "", "DESCRIPTORS_SMILES_AND_SI_INPUT_COLUMN"], [11, 3, 1, "", "DESCRIPTORS_UNSC_JAZZY"], [11, 3, 1, "", "DESCRIPTORS_UNSC_PHYSCHEM"], [11, 3, 1, "", "DESCRIPTORS_UNSC_ZSCALES"], [11, 3, 1, "", "DESCRIPTORS_ZSCALES"], [11, 3, 1, "", "GENERAL_DISABLED"], [11, 3, 1, "", "GENERAL_PARAMETERS"], [11, 3, 1, "", "SETTINGS"], [11, 3, 1, "", "SETTINGS_CROSS_VALIDATION"], [11, 3, 1, "", "SETTINGS_DIRECTION"], [11, 3, 1, "", "SETTINGS_MODE"], [11, 3, 1, "", "SETTINGS_MODE_CLASSIFICATION"], [11, 3, 1, "", "SETTINGS_MODE_REGRESSION"], [11, 3, 1, "", "SETTINGS_N_JOBS"], [11, 3, 1, "", "SETTINGS_N_TRIALS"], [11, 3, 1, "", "SETTINGS_SHUFFLE"], [11, 3, 1, "", "TASK"], [11, 3, 1, "", "TASK_BUILDING"], [11, 3, 1, "", "TASK_OPTIMIZATION"]], "optunaz.utils.enums.interface_enum": [[11, 2, 1, "", "InterfaceEnum"]], "optunaz.utils.enums.interface_enum.InterfaceEnum": [[11, 3, 1, "", "CALIBRATED_SET"], [11, 3, 1, "", "CHEMPROP_SET"], [11, 3, 1, "", "PRF_SET"], [11, 3, 1, "", "SKLEARN_SET"], [11, 3, 1, "", "XGBOOST_SET"]], "optunaz.utils.enums.model_runner_enum": [[11, 2, 1, "", "ModelRunnerDataframeEnum"]], "optunaz.utils.enums.model_runner_enum.ModelRunnerDataframeEnum": [[11, 3, 1, "", "SET"], [11, 3, 1, "", "SMILES"], [11, 3, 1, "", "TEST"], [11, 3, 1, "", "TRAIN"], [11, 3, 1, "", "Y_PRED"], [11, 3, 1, "", "Y_TRUE"]], "optunaz.utils.enums.objective_enum": [[11, 2, 1, "", "ObjectiveEnum"]], "optunaz.utils.enums.objective_enum.ObjectiveEnum": [[11, 3, 1, "", "ATTRIBUTE_TRIAL_TRAIN_SCORE"], [11, 3, 1, "", "EXTRA_COLUMN_BESTHIT"]], "optunaz.utils.enums.optimization_configuration_enum": [[11, 2, 1, "", "OptimizationConfigurationEnum"]], "optunaz.utils.enums.prediction_configuration_enum": [[11, 2, 1, "", "PredictionConfigurationEnum"]], "optunaz.utils.enums.prediction_configuration_enum.PredictionConfigurationEnum": [[11, 3, 1, "", "DATA_DATASET"]], "optunaz.utils.enums.return_values_enum": [[11, 2, 1, "", "SklearnReturnValueEnum"], [11, 2, 1, "", "XGBoostReturnValueEnum"]], "optunaz.utils.enums.return_values_enum.SklearnReturnValueEnum": [[11, 3, 1, "", "CROSS_VALIDATE_FIT_TIME"], [11, 3, 1, "", "CROSS_VALIDATE_SCORE_TIME"], [11, 3, 1, "", "CROSS_VALIDATE_TEST_SCORE"], [11, 3, 1, "", "CROSS_VALIDATE_TRAIN_SCORE"]], "optunaz.utils.enums.return_values_enum.XGBoostReturnValueEnum": [[11, 3, 1, "", "CROSS_VALIDATE_FIT_TIME"], [11, 3, 1, "", "CROSS_VALIDATE_SCORE_TIME"], [11, 3, 1, "", "CROSS_VALIDATE_TEST_SCORE"], [11, 3, 1, "", "CROSS_VALIDATE_TRAIN_SCORE"]], "optunaz.utils.enums.visualization_enum": [[11, 2, 1, "", "VisualizationEnum"]], "optunaz.utils.enums.visualization_enum.VisualizationEnum": [[11, 3, 1, "", "OPTUNA_SYSTEM_ATTRS_INTERMEDIATE_VALUES"], [11, 3, 1, "", "OPTUNA_SYSTEM_ATTRS_NUMBER"], [11, 3, 1, "", "OPTUNA_SYSTEM_ATTRS_TRIAL_ID"], [11, 3, 1, "", "VISUALIZATION"], [11, 3, 1, "", "VISUALIZATION_CLASSIFIER"], [11, 3, 1, "", "VISUALIZATION_FILE_FORMAT"], [11, 3, 1, "", "VISUALIZATION_OUTPUT_FOLDER"], [11, 3, 1, "", "VISUALIZATION_PLOTS"], [11, 3, 1, "", "VISUALIZATION_PLOTS_CONTOUR"], [11, 3, 1, "", "VISUALIZATION_PLOTS_HISTORY"], [11, 3, 1, "", "VISUALIZATION_PLOTS_PARALLEL_COORDINATE"], [11, 3, 1, "", "VISUALIZATION_PLOTS_SLICE"], [11, 3, 1, "", "VISUALIZATION_REGRESSOR"], [11, 3, 1, "", "VISUALIZATION_USE_XVFB"]], "optunaz.utils.files_paths": [[10, 1, 1, "", "attach_root_path"], [10, 1, 1, "", "move_up_directory"]], "optunaz.utils.load_json": [[10, 1, 1, "", "loadJSON"]], "optunaz.utils.mlflow": [[10, 2, 1, "", "MLflowCallback"], [10, 1, 1, "", "add_ellipsis"], [10, 1, 1, "", "shorten_names"]], "optunaz.utils.mlflow.MLflowCallback": [[10, 3, 1, "", "optconfig"], [10, 4, 1, "", "prepare_tags"], [10, 4, 1, "", "tmp_buildconfig"], [10, 3, 1, "", "tracking_uri"], [10, 3, 1, "", "trial_number_offset"]], "optunaz.utils.preprocessing": [[12, 0, 0, "-", "deduplicator"], [12, 0, 0, "-", "splitter"], [12, 0, 0, "-", "transform"]], "optunaz.utils.preprocessing.deduplicator": [[12, 2, 1, "", "Deduplicator"], [12, 2, 1, "", "KeepAllNoDeduplication"], [12, 2, 1, "", "KeepAvg"], [12, 2, 1, "", "KeepFirst"], [12, 2, 1, "", "KeepLast"], [12, 2, 1, "", "KeepMax"], [12, 2, 1, "", "KeepMedian"], [12, 2, 1, "", "KeepMin"], [12, 2, 1, "", "KeepRandom"]], "optunaz.utils.preprocessing.deduplicator.Deduplicator": [[12, 4, 1, "", "dedup"]], "optunaz.utils.preprocessing.deduplicator.KeepAllNoDeduplication": [[12, 4, 1, "", "dedup"], [12, 3, 1, "", "name"]], "optunaz.utils.preprocessing.deduplicator.KeepAvg": [[12, 4, 1, "", "dedup"], [12, 3, 1, "", "name"]], "optunaz.utils.preprocessing.deduplicator.KeepFirst": [[12, 4, 1, "", "dedup"], [12, 3, 1, "", "name"]], "optunaz.utils.preprocessing.deduplicator.KeepLast": [[12, 4, 1, "", "dedup"], [12, 3, 1, "", "name"]], "optunaz.utils.preprocessing.deduplicator.KeepMax": [[12, 4, 1, "", "dedup"], [12, 3, 1, "", "name"]], "optunaz.utils.preprocessing.deduplicator.KeepMedian": [[12, 4, 1, "", "dedup"], [12, 3, 1, "", "name"]], "optunaz.utils.preprocessing.deduplicator.KeepMin": [[12, 4, 1, "", "dedup"], [12, 3, 1, "", "name"]], "optunaz.utils.preprocessing.deduplicator.KeepRandom": [[12, 4, 1, "", "dedup"], [12, 3, 1, "", "name"], [12, 3, 1, "", "seed"]], "optunaz.utils.preprocessing.splitter": [[12, 2, 1, "", "GroupingSplitter"], [12, 2, 1, "", "HistogramStratifiedShuffleSplit"], [12, 2, 1, "", "KFold"], [12, 2, 1, "", "NoSplitting"], [13, 2, 1, "", "Predefined"], [13, 2, 1, "", "Random"], [13, 2, 1, "", "ScaffoldSplit"], [12, 2, 1, "", "SklearnSplitter"], [12, 2, 1, "", "Splitter"], [13, 2, 1, "", "Stratified"], [13, 2, 1, "", "Temporal"], [12, 1, 1, "", "butina_cluster"], [12, 1, 1, "", "fd_bin"], [12, 1, 1, "", "stratify"]], "optunaz.utils.preprocessing.splitter.GroupingSplitter": [[12, 4, 1, "", "groups"]], "optunaz.utils.preprocessing.splitter.HistogramStratifiedShuffleSplit": [[12, 3, 1, "", "bins"], [12, 4, 1, "", "get_n_splits"], [12, 3, 1, "", "n_splits"], [12, 3, 1, "", "random_state"], [12, 4, 1, "", "split"], [12, 3, 1, "", "test_fraction"]], "optunaz.utils.preprocessing.splitter.KFold": [[12, 4, 1, "", "get_sklearn_splitter"], [12, 3, 1, "", "name"], [12, 3, 1, "", "random_state"], [12, 3, 1, "", "shuffle"], [12, 4, 1, "", "split"]], "optunaz.utils.preprocessing.splitter.NoSplitting": [[12, 4, 1, "", "get_sklearn_splitter"], [12, 3, 1, "", "name"], [12, 4, 1, "", "split"]], "optunaz.utils.preprocessing.splitter.Predefined": [[12, 3, 1, "", "column_name"], [12, 4, 1, "", "get_sklearn_splitter"], [12, 4, 1, "", "groups"], [12, 3, 1, "", "name"], [13, 4, 1, "", "split"]], "optunaz.utils.preprocessing.splitter.Random": [[12, 3, 1, "", "fraction"], [12, 4, 1, "", "get_sklearn_splitter"], [12, 3, 1, "", "name"], [12, 3, 1, "", "seed"]], "optunaz.utils.preprocessing.splitter.ScaffoldSplit": [[12, 3, 1, "", "bins"], [12, 3, 1, "", "butina_cluster"], [12, 4, 1, "", "get_n_splits"], [12, 4, 1, "", "get_sklearn_splitter"], [13, 4, 1, "", "groups"], [12, 3, 1, "", "make_scaffold_generic"], [12, 3, 1, "", "name"], [12, 3, 1, "", "random_state"], [13, 4, 1, "", "split"]], "optunaz.utils.preprocessing.splitter.SklearnSplitter": [[12, 4, 1, "", "get_n_splits"], [12, 4, 1, "", "split"]], "optunaz.utils.preprocessing.splitter.Splitter": [[12, 4, 1, "", "get_sklearn_splitter"], [12, 4, 1, "", "split"]], "optunaz.utils.preprocessing.splitter.Stratified": [[12, 3, 1, "", "bins"], [12, 3, 1, "", "fraction"], [12, 4, 1, "", "get_sklearn_splitter"], [12, 3, 1, "", "name"], [12, 3, 1, "", "seed"]], "optunaz.utils.preprocessing.splitter.Temporal": [[12, 3, 1, "", "fraction"], [12, 4, 1, "", "get_sklearn_splitter"], [12, 3, 1, "", "name"], [13, 4, 1, "", "split"]], "optunaz.utils.preprocessing.transform": [[12, 2, 1, "", "AuxTransformer"], [12, 2, 1, "", "DataTransform"], [12, 2, 1, "", "LogBase"], [12, 2, 1, "", "LogNegative"], [14, 2, 1, "", "ModelDataTransform"], [14, 2, 1, "", "PTRTransform"], [14, 2, 1, "", "VectorFromColumn"], [14, 2, 1, "", "ZScales"]], "optunaz.utils.preprocessing.transform.AuxTransformer": [[12, 4, 1, "", "transform"]], "optunaz.utils.preprocessing.transform.DataTransform": [[12, 4, 1, "", "transform"]], "optunaz.utils.preprocessing.transform.LogBase": [[12, 3, 1, "", "LOG"], [12, 3, 1, "", "LOG10"], [12, 3, 1, "", "LOG2"]], "optunaz.utils.preprocessing.transform.LogNegative": [[12, 3, 1, "", "FALSE"], [12, 3, 1, "", "TRUE"]], "optunaz.utils.preprocessing.transform.ModelDataTransform": [[14, 2, 1, "", "Parameters"], [12, 3, 1, "", "base_dict"], [12, 3, 1, "", "base_negation"], [12, 3, 1, "", "name"], [12, 3, 1, "", "parameters"], [12, 3, 1, "", "reverse_dict"], [12, 4, 1, "", "reverse_transform"], [12, 4, 1, "", "reverse_transform_df"], [12, 4, 1, "", "reverse_transform_one"], [12, 4, 1, "", "transform"], [12, 4, 1, "", "transform_df"], [12, 4, 1, "", "transform_one"]], "optunaz.utils.preprocessing.transform.ModelDataTransform.Parameters": [[12, 3, 1, "", "base"], [12, 3, 1, "", "conversion"], [12, 3, 1, "", "negation"]], "optunaz.utils.preprocessing.transform.PTRTransform": [[14, 2, 1, "", "Parameters"], [12, 3, 1, "", "name"], [12, 3, 1, "", "parameters"], [12, 4, 1, "", "reverse_transform"], [12, 4, 1, "", "transform"]], "optunaz.utils.preprocessing.transform.PTRTransform.Parameters": [[12, 3, 1, "", "std"], [12, 3, 1, "", "threshold"]], "optunaz.utils.preprocessing.transform.VectorFromColumn": [[14, 2, 1, "", "Parameters"], [12, 3, 1, "", "name"], [12, 3, 1, "", "parameters"], [12, 4, 1, "", "transform"]], "optunaz.utils.preprocessing.transform.VectorFromColumn.Parameters": [[12, 3, 1, "", "delimiter"]], "optunaz.utils.preprocessing.transform.ZScales": [[14, 2, 1, "", "Parameters"], [12, 3, 1, "", "name"], [12, 3, 1, "", "parameters"], [12, 4, 1, "", "transform"]], "optunaz.utils.schema": [[10, 1, 1, "", "add_boolean_guards_for_schema_properties"], [10, 1, 1, "", "addsibling"], [10, 1, 1, "", "addtitles"], [10, 1, 1, "", "copytitle"], [10, 1, 1, "", "create_dependency"], [10, 1, 1, "", "delsibling"], [10, 1, 1, "", "getref"], [10, 1, 1, "", "remove_schema_properties"], [10, 1, 1, "", "replaceenum"], [10, 1, 1, "", "replacekey"], [10, 1, 1, "", "replacevalue"]], "optunaz.utils.tracking": [[10, 2, 1, "", "BuildTrackingData"], [10, 2, 1, "", "Datapoint"], [10, 2, 1, "", "InternalTrackingCallback"], [10, 2, 1, "", "TrackingData"], [10, 1, 1, "", "get_authorization_header"], [10, 1, 1, "", "removeprefix"], [10, 1, 1, "", "round_scores"], [10, 1, 1, "", "track_build"]], "optunaz.utils.tracking.BuildTrackingData": [[10, 3, 1, "", "response_column_name"], [10, 3, 1, "", "test_points"], [10, 3, 1, "", "test_scores"]], "optunaz.utils.tracking.Datapoint": [[10, 3, 1, "", "expected"], [10, 3, 1, "", "predicted"], [10, 3, 1, "", "smiles"]], "optunaz.utils.tracking.InternalTrackingCallback": [[10, 3, 1, "", "optconfig"], [10, 3, 1, "", "trial_number_offset"]], "optunaz.utils.tracking.TrackingData": [[10, 3, 1, "", "all_cv_test_scores"], [10, 3, 1, "", "buildconfig"], [10, 3, 1, "", "scoring"], [10, 3, 1, "", "trial_number"], [10, 3, 1, "", "trial_state"], [10, 3, 1, "", "trial_value"]], "optunaz.visualizer": [[8, 2, 1, "", "Visualizer"]], "optunaz.visualizer.Visualizer": [[8, 4, 1, "", "plot_by_configuration"], [8, 4, 1, "", "plot_contour"], [8, 4, 1, "", "plot_history"], [8, 4, 1, "", "plot_parallel_coordinate"], [8, 4, 1, "", "plot_slice"]]}, "objtypes": {"0": "py:module", "1": "py:function", "2": "py:class", "3": "py:attribute", "4": "py:method", "5": "py:exception"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "function", "Python function"], "2": ["py", "class", "Python class"], "3": ["py", "attribute", "Python attribute"], "4": ["py", "method", "Python method"], "5": ["py", "exception", "Python exception"]}, "titleterms": {"qsartuna": [0, 4, 6, 7], "\ud80c\udd9b": 0, "qsar": 0, "us": 0, "optim": [0, 6], "hyperparamet": 0, "tune": 0, "formerli": 0, "optuna": 0, "az": 0, "qptuna": 0, "background": [0, 6], "The": [0, 6], "three": [0, 6], "step": [0, 6], "process": [0, 6], "json": 0, "base": [0, 7], "command": 0, "line": 0, "interfac": 0, "configur": [0, 6], "file": [0, 6], "run": [0, 6], "via": 0, "singulart": 0, "submit": 0, "slurm": 0, "model": [0, 6], "option": [0, 6], "inspect": 0, "from": [0, 6, 7], "python": 0, "jupyt": 0, "notebook": 0, "avail": [1, 2, 3, 13, 14], "algorithm": [1, 6], "adaboostclassifi": 1, "lasso": 1, "kneighborsclassifi": 1, "kneighborsregressor": 1, "logisticregress": 1, "plsregress": 1, "randomforestclassifi": 1, "randomforestregressor": 1, "ridg": 1, "svc": 1, "svr": 1, "xgbregressor": 1, "prfclassifi": 1, "chempropregressor": 1, "chempropclassifi": 1, "chemprophyperoptclassifi": 1, "chemprophyperoptregressor": 1, "chemprophyperoptregressorpretrain": 1, "calibratedclassifiercvwithva": 1, "mapi": [1, 6], "dedupl": [2, 7, 12], "keepfirst": 2, "keeplast": 2, "keeprandom": 2, "keepmin": 2, "keepmax": 2, "keepavg": 2, "keepmedian": 2, "keepkeepallnodedupl": 2, "descriptor": [3, 6, 8], "avalon": 3, "ecfp": 3, "ecfp_count": 3, "pathfp": 3, "maccs_kei": 3, "unscaledphyschemdescriptor": 3, "unscaledjazzydescriptor": 3, "unscaledzscalesdescriptor": 3, "physchemdescriptor": 3, "jazzydescriptor": 3, "precomputeddescriptorfromfil": 3, "zscale": [3, 14], "smilesfromfil": 3, "smilesandsideinfofromfil": 3, "scaleddescriptor": 3, "compositedescriptor": 3, "welcom": 4, "document": 4, "develop": 4, "optunaz": [5, 8, 9, 10, 11, 12], "cli": 6, "tutori": 6, "thi": 6, "prepar": 6, "regress": 6, "exampl": 6, "creat": 6, "visual": [6, 8], "progress": 6, "pick": 6, "best": [6, 7], "trial": 6, "build": 6, "merg": 6, "preprocess": [6, 7, 12], "split": [6, 7], "data": [6, 7], "train": 6, "test": 6, "set": 6, "remov": 6, "duplic": [6, 7], "dataset": 6, "choos": 6, "score": 6, "function": 6, "advanc": 6, "functoinail": 6, "probabilist": [6, 7], "random": [6, 7, 13], "forest": 6, "prf": 6, "interlud": [6, 7], "cautionari": 6, "advic": 6, "y": 6, "respons": 6, "column": 6, "valid": 6, "chemprop": 6, "simpl": 6, "separ": 6, "shallow": 6, "method": 6, "default": 6, "behavior": 6, "turn": 6, "hyperopt": 6, "within": [6, 7], "functionail": 6, "veri": 6, "larg": 6, "comput": 6, "cost": 6, "A": 6, "note": 6, "mpnn": 6, "search": 6, "space": 6, "side": 6, "inform": 6, "multi": 6, "task": 6, "learn": 6, "mtl": 6, "combin": 6, "onli": 6, "recommend": 6, "long": 6, "time": 6, "pre": 6, "adapt": 6, "transfer": 6, "fingerprint": 6, "encod": 6, "latent": 6, "represent": [6, 7], "probabl": 6, "calibr": 6, "classif": 6, "uncertainti": 6, "estim": 6, "vennab": 6, "ensembl": 6, "dropout": 6, "explain": [6, 8], "shap": 6, "interpret": 6, "log": [6, 7], "transform": [6, 7, 12, 14], "covari": 6, "one": 6, "e": 6, "g": 6, "dose": 6, "point": 6, "co": 6, "variat": 6, "proteochemometr": 6, "pcm": 6, "more": 6, "vectorfromsmil": 6, "z": 6, "scale": 6, "object": [6, 8], "priorit": 6, "perform": [6, 7], "standard": 6, "deviat": 6, "further": 6, "precomput": 6, "introduct": 7, "translat": 7, "sdf": 7, "csv": 7, "need": 7, "deal": 7, "compar": 7, "differ": 7, "unif": 7, "strategi": 7, "tempor": [7, 13], "stratifi": [7, 13], "scaffold": 7, "input": 7, "user": 7, "import": 7, "logarithm": 7, "dataread": [7, 8], "threshold": 7, "ptr": [7, 14], "experiment": 7, "error": 7, "definit": 7, "implement": 7, "conclus": 7, "calcul": 7, "evalu": [7, 8], "reproduc": 7, "practic": 7, "packag": [8, 9, 10, 11, 12], "subpackag": [8, 10], "submodul": [8, 9, 10, 11, 12], "builder": 8, "modul": [8, 9, 10, 11, 12], "metirc": 8, "model_writ": 8, "optbuild": 8, "predict": 8, "schemagen": 8, "three_step_opt_build_merg": 8, "content": [8, 9, 10, 11, 12], "config": 9, "build_from_opt": 9, "buildconfig": 9, "optconfig": 9, "util": [10, 11, 12], "files_path": 10, "load_json": 10, "mlflow": 10, "schema": 10, "track": 10, "enum": 11, "building_configuration_enum": 11, "configuration_enum": 11, "interface_enum": 11, "model_runner_enum": 11, "objective_enum": 11, "optimization_configuration_enum": 11, "prediction_configuration_enum": 11, "return_values_enum": 11, "visualization_enum": 11, "splitter": [12, 13], "predefin": 13, "scaffoldsplit": 13, "modeldatatransform": 14, "vectorfromcolumn": 14}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "nbsphinx": 4, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 56}}) \ No newline at end of file +Search.setIndex({"docnames": ["README", "algorithms", "deduplicator", "descriptors", "index", "modules", "notebooks/QSARtuna_Tutorial", "notebooks/preprocess_data", "optunaz", "optunaz.config", "optunaz.utils", "optunaz.utils.enums", "optunaz.utils.preprocessing", "splitters", "transform"], "filenames": ["README.md", "algorithms.rst", "deduplicator.rst", "descriptors.rst", "index.rst", "modules.rst", "notebooks/QSARtuna_Tutorial.ipynb", "notebooks/preprocess_data.ipynb", "optunaz.rst", "optunaz.config.rst", "optunaz.utils.rst", "optunaz.utils.enums.rst", "optunaz.utils.preprocessing.rst", "splitters.rst", "transform.rst"], "titles": ["QSARtuna \ud80c\udd9b: QSAR using Optimization for Hyperparameter Tuning (formerly Optuna AZ and QPTUNA)", "Available algorithms", "Available deduplicators", "Available descriptors", "Welcome to QSARtuna Documentation!", "optunaz", "QSARtuna CLI Tutorial", "Preprocessing data for QSARtuna", "optunaz package", "optunaz.config package", "optunaz.utils package", "optunaz.utils.enums package", "optunaz.utils.preprocessing package", "Available splitters", "Available transform"], "terms": {"build": [0, 4, 8, 9, 10, 11], "predict": [0, 1, 3, 5, 6, 7, 9, 10, 11], "compchem": 0, "develop": [0, 3, 8], "uncertainti": [0, 1, 4, 7, 8, 9, 12, 14], "quantif": 0, "explain": [0, 1, 4, 5, 9], "mind": 0, "thi": [0, 1, 3, 4, 7, 8, 9, 10, 11, 12, 13], "librari": [0, 7], "search": [0, 1, 3, 8, 9], "best": [0, 1, 8, 9], "ml": [0, 4, 9], "algorithm": [0, 3, 4, 7, 8, 9, 10, 11, 12, 13], "molecular": [0, 1, 3, 4, 6, 7, 8, 9], "descriptor": [0, 4, 5, 9, 11], "given": [0, 1, 3, 6, 7, 8, 9, 10], "data": [0, 1, 3, 4, 8, 9, 11, 12, 13, 14], "itself": [0, 6], "done": [0, 6], "emploi": [0, 3, 6, 7, 8], "latest": [0, 9], "state": [0, 6, 12], "art": [0, 6], "estim": [0, 1, 3, 4, 7, 8, 9, 11], "packag": [0, 1, 3, 5, 6, 7], "further": [0, 9], "document": [0, 6, 7], "github": [0, 6, 8, 12], "page": 0, "here": [0, 3, 6, 7, 8, 9, 12, 13], "public": [0, 1, 3, 4, 6, 8, 9], "avail": [0, 4, 6, 7, 8], "structur": [0, 1, 3, 6, 7, 8, 9], "around": [0, 6, 7], "train": [0, 1, 3, 4, 7, 8, 9, 11, 12, 13], "mani": [0, 1, 6, 9], "differ": [0, 1, 3, 6, 8, 9, 12], "paramet": [0, 1, 3, 6, 7, 8, 9, 11, 12, 13, 14], "onli": [0, 1, 3, 7, 8, 9, 12, 13], "dataset": [0, 1, 7, 8, 9, 11, 12], "usual": [0, 1, 6, 7, 9], "cross": [0, 1, 6, 9, 12], "valid": [0, 1, 3, 7, 8, 9, 11, 12], "pick": 0, "evalu": [0, 4, 5, 6], "its": [0, 1, 6, 9, 11], "perform": [0, 3, 8, 11, 12, 14], "test": [0, 1, 4, 7, 8, 9, 11, 12, 13], "prod": [0, 6], "re": [0, 6, 8], "merg": [0, 7, 8, 12], "ha": [0, 1, 6, 7, 8, 9, 12], "drawback": [0, 6], "left": [0, 6, 7], "result": [0, 1, 3, 6, 7, 8, 9], "big": [0, 6], "benefit": [0, 6], "final": [0, 1, 6, 9], "all": [0, 1, 3, 6, 7, 8, 9, 11, 12], "let": [0, 6, 7], "s": [0, 6, 7], "look": [0, 1, 6, 7, 9], "trivial": 0, "exampl": [0, 3, 7, 8, 9], "weight": [0, 1, 3, 6, 8, 9, 11], "set": [0, 3, 4, 7, 8, 9, 10, 11, 12, 13], "50": [0, 1, 3, 6, 8, 9], "molecul": [0, 1, 3, 6, 7, 8, 9], "we": [0, 3, 6, 7, 8, 9, 12, 13], "start": [0, 4, 6, 7], "format": [0, 3, 6, 7, 8, 10], "It": [0, 1, 3, 6, 7, 8, 9], "contain": [0, 3, 6, 8], "four": 0, "main": [0, 3, 6, 7, 8], "section": [0, 6, 7], "locat": [0, 6], "column": [0, 3, 7, 8, 11, 12, 13, 14], "detail": [0, 3, 6, 7, 8, 9, 10], "about": [0, 6, 7], "which": [0, 1, 3, 6, 7, 8, 9, 12], "below": [0, 3, 6, 7, 8], "task": [0, 3, 7, 8, 9, 11], "training_dataset_fil": [0, 6, 7, 8], "drd2": [0, 6], "subset": [0, 1, 6, 7, 9], "csv": [0, 3, 4, 6, 8], "input_column": [0, 3, 6, 7, 8, 11], "canon": [0, 6, 8, 12], "response_column": [0, 3, 6, 7, 8, 11], "molwt": [0, 6], "mode": [0, 6, 8, 9, 11], "regress": [0, 1, 7, 9, 11, 12], "cross_valid": [0, 6, 9, 11, 12], "5": [0, 1, 3, 6, 7, 8, 9, 12], "direct": [0, 1, 6, 9, 11], "maxim": [0, 1, 6, 9], "n_trial": [0, 6, 8, 9, 11], "100": [0, 1, 3, 6, 7, 8, 9, 10], "n_startup_tri": [0, 6, 8, 9], "30": [0, 1, 6, 7, 9], "name": [0, 1, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14], "ecfp": [0, 4, 6, 8, 9, 11], "radiu": [0, 3, 6, 8, 11], "3": [0, 1, 3, 4, 6, 7, 8, 9], "nbit": [0, 3, 6, 8, 9, 11], "2048": [0, 3, 6, 8], "maccs_kei": [0, 4, 6, 8, 11], "randomforestregressor": [0, 4, 6, 9, 11], "max_depth": [0, 1, 9, 11], "low": [0, 1, 3, 6, 7, 8, 9, 11], "2": [0, 1, 3, 6, 7, 8, 9, 12, 13], "high": [0, 1, 3, 6, 8, 9, 11], "32": [0, 1, 6, 9], "n_estim": [0, 1, 6, 9, 11], "10": [0, 1, 3, 6, 7, 8, 9, 12], "250": [0, 1, 6, 9], "max_featur": [0, 1, 9, 11], "auto": [0, 1, 6, 8, 9], "ridg": [0, 4, 6, 9, 11], "alpha": [0, 1, 6, 7, 8, 9, 11], "0": [0, 1, 3, 4, 6, 7, 8, 9, 12, 13], "lasso": [0, 4, 6, 9, 11], "xgbregressor": [0, 4, 6, 9, 11], "learning_r": [0, 1, 9, 11], "1": [0, 1, 3, 4, 6, 7, 8, 9, 10, 12, 13], "specifi": [0, 1, 6, 7, 8, 9, 11], "In": [0, 1, 3, 6, 7, 8, 9], "rel": [0, 6], "path": [0, 1, 3, 6, 7, 8, 9, 10], "folder": [0, 6, 7], "ar": [0, 1, 3, 6, 7, 8, 9, 11, 12, 13], "want": [0, 6, 7], "fold": [0, 1, 6, 7, 9, 12, 13], "valu": [0, 1, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14], "object": [0, 1, 5, 7, 9, 10, 11, 12], "function": [0, 1, 3, 4, 7, 8, 9, 10, 11, 12, 14], "standard": [0, 1, 7, 9, 12, 14], "scikit": [0, 6, 8, 12, 13], "learn": [0, 1, 3, 7, 8, 9, 12, 13], "have": [0, 3, 6, 7, 8, 9, 12], "total": [0, 6, 7], "trial": [0, 1, 9, 10, 11], "first": [0, 1, 3, 6, 7, 8, 9, 12], "startup": [0, 6], "should": [0, 1, 3, 6, 7, 8, 9, 12], "random": [0, 1, 4, 9, 12], "explor": [0, 6], "get": [0, 6, 7, 8], "stuck": 0, "earli": [0, 1, 6, 8, 9], "one": [0, 3, 7, 8, 9, 12, 13], "local": [0, 6], "minimum": [0, 1, 3, 8, 9, 12, 13], "two": [0, 1, 6, 7, 9, 12, 13], "free": [0, 3, 8], "pair": [0, 6], "ani": [0, 3, 6, 7, 8, 9, 12], "when": [0, 1, 6, 7, 8, 9, 12, 13], "our": [0, 6, 7], "time": [0, 1, 3, 7, 8, 9], "can": [0, 1, 3, 6, 7, 8, 9, 12, 13], "deploi": 0, "singular": [0, 6], "To": [0, 6, 7], "insid": [0, 8], "follow": [0, 3, 6, 7, 8, 9], "syntax": 0, "exec": 0, "sif": 0, "project": [0, 1, 9], "cc": [0, 6, 7], "mai": [0, 1, 3, 6, 7, 8, 9], "qsartuna_latest": 0, "opt": 0, "venv": [0, 6], "bin": [0, 6, 7, 12, 13], "config": [0, 1, 5, 6, 8], "regression_drd2_50": 0, "buildconfig": [0, 5, 6, 8, 10], "outpath": 0, "target": [0, 1, 6, 7, 9, 12, 14], "pkl": [0, 1, 6, 9], "sinc": [0, 3, 6, 7, 8, 12], "long": [0, 7], "avoid": [0, 6, 12], "login": 0, "node": 0, "queue": 0, "instead": [0, 1, 3, 6, 7, 8, 9, 12], "script": 0, "give": [0, 6], "sbatch": 0, "sh": 0, "ntask": 0, "cpu": 0, "per": [0, 1, 7, 9], "mem": 0, "4g": 0, "partit": 0, "core": [0, 7, 9], "illustr": [0, 6, 7], "how": [0, 1, 3, 6, 7, 8, 9], "chose": 0, "chang": [0, 6], "directori": [0, 6, 10], "cd": 0, "project_fold": 0, "optunaaz": 0, "version": [0, 7, 9], "optunaaz_latest": 0, "complet": [0, 3, 6, 8], "creat": [0, 7, 10], "pickl": [0, 6], "your": [0, 6, 7], "home": 0, "under": [0, 6], "built": [0, 3, 7, 8, 9], "infer": [0, 6, 7, 8], "input": [0, 1, 3, 4, 6, 8, 9, 12, 13, 14], "smile": [0, 3, 6, 7, 8, 10, 11, 12, 13], "output": [0, 1, 6, 7, 8, 9, 12, 14], "note": [0, 1, 3, 7, 8, 9, 11, 12, 13], "_": [0, 6], "point": [0, 1, 7, 9, 12, 13], "most": [0, 6, 7], "recent": [0, 6], "legaci": 0, "requir": [0, 3, 6, 7, 8], "same": [0, 1, 6, 7, 9], "modifi": [0, 8], "abov": [0, 6, 7], "suppli": [0, 1, 3, 6, 7, 8, 9], "qsartuna_": 0, "replac": [0, 6, 10], "e": [0, 1, 3, 7, 8, 9, 12, 14], "g": [0, 3, 7, 8, 12, 14], "qsartuna_2": 0, "1_model": 0, "would": [0, 3, 6, 7, 8], "gener": [0, 1, 3, 6, 7, 8, 9, 12, 13], "tri": 0, "dure": [0, 1, 3, 6, 7, 8, 9], "mlflow": [0, 5, 8, 11], "track": [0, 3, 5, 8, 9], "ui": 0, "modul": [0, 5, 6], "load": [0, 1, 6, 7, 9], "Then": 0, "open": [0, 1, 6, 9], "link": 0, "browser": 0, "If": [0, 1, 3, 6, 8, 9, 12, 13], "you": [0, 1, 6, 7, 9], "scp": 0, "forward": [0, 1, 9], "port": 0, "separ": [0, 3, 7, 8], "ssh": 0, "session": 0, "non": [0, 1, 3, 6, 7, 8, 9], "machin": [0, 1, 3, 6, 7, 8, 9], "n": [0, 1, 6, 7, 9, 10], "l": 0, "localhost": 0, "5000": 0, "user": [0, 1, 3, 4, 6, 8, 9, 11, 12, 13, 14], "intranet": 0, "net": 0, "just": [0, 6], "execut": 0, "select": [0, 1, 3, 6, 7, 8, 9], "experi": [0, 7, 10], "after": [0, 1, 3, 6, 7, 8, 9, 10], "choos": [0, 4], "compar": [0, 6], "comparison": [0, 6, 7], "show": [0, 6, 7], "call": [0, 1, 3, 6, 8, 9, 12], "well": [0, 6, 7], "metric": [0, 1, 6, 8, 9, 11], "At": 0, "bottom": 0, "plot": [0, 6, 7, 9, 11], "For": [0, 1, 3, 6, 7, 8, 9, 12], "x": [0, 1, 3, 6, 7, 8, 9, 12, 13], "axi": [0, 6, 7], "number": [0, 1, 3, 6, 7, 8, 9, 10], "y": [0, 1, 7, 8, 9, 12, 13], "cvmean": 0, "r2": [0, 6, 7, 9], "more": [0, 1, 3, 7, 8, 9, 10], "click": 0, "individu": [0, 1, 6, 7, 9], "There": [0, 1, 6, 7, 9], "access": [0, 3, 8], "conda": 0, "environ": [0, 3, 6, 8], "instal": [0, 6], "purg": 0, "miniconda3": 0, "my_env_with_qsartuna": 0, "pip": [0, 6], "activ": [0, 1, 6, 7, 9, 11, 12, 14], "case": [0, 1, 6, 7, 8, 9], "check": [0, 6, 8, 11, 12], "m": [0, 6, 8], "http": [0, 3, 6, 7, 8, 9, 12], "com": [0, 6, 7, 8, 12], "molecularai": [0, 6], "releas": [0, 4, 6, 7], "download": [0, 6], "tar": [0, 6], "gz": [0, 6], "three_step_opt_build_merg": [0, 5, 6], "import": [0, 6], "buildconfig_best": [0, 6, 8], "build_best": [0, 6, 8], "build_merg": [0, 6, 8], "modelmod": [0, 6, 8, 9], "optimizationdirect": [0, 6, 9], "optconfig": [0, 1, 5, 6, 8, 10, 11], "optimizationconfig": [0, 6, 8, 9], "svr": [0, 4, 6, 9, 11], "randomforest": 0, "pl": [0, 1, 9], "dataread": [0, 5, 6], "ecfp_count": [0, 4, 6, 8, 11], "prepar": [0, 4, 7], "new": [0, 1, 6, 7, 9], "studi": [0, 3, 6, 8, 9, 10, 11], "study_nam": [0, 6, 8], "my_studi": [0, 6], "make": [0, 1, 6, 7, 9, 12, 13, 14], "write": [0, 6], "out": [0, 1, 6, 7, 9], "print": [0, 6, 7], "dump": [0, 6], "indent": [0, 6], "save": [0, 6], "class": [1, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14], "optunaz": [1, 3, 6, 7, 13, 14], "sourc": [1, 3, 8, 9, 10, 11, 12, 13, 14], "adaboost": [1, 9], "classifi": [1, 6, 8, 9, 11], "an": [1, 3, 6, 7, 8, 9, 10, 12, 13], "meta": [1, 6, 9], "begin": [1, 6, 7, 9, 12, 13], "fit": [1, 3, 6, 7, 8, 9], "origin": [1, 3, 6, 7, 8, 9, 12], "addit": [1, 6, 9], "copi": [1, 7, 9, 10], "where": [1, 6, 7, 9], "incorrectli": [1, 9], "instanc": [1, 6, 7, 9], "adjust": [1, 9], "subsequ": [1, 9], "focu": [1, 9], "difficult": [1, 6, 9], "adaboostclassifierparametersnestim": [1, 9], "adaboostclassifierparameterslearningr": [1, 9], "The": [1, 3, 7, 8, 9, 10, 12, 14], "maximum": [1, 3, 6, 7, 8, 9, 12, 13], "boost": [1, 3, 8, 9], "termin": [1, 9], "perfect": [1, 9], "procedur": [1, 3, 7, 8, 9], "stop": [1, 6, 9], "titl": [1, 3, 6, 7, 8, 9, 10, 12, 13, 14], "appli": [1, 3, 6, 7, 8, 9, 12, 13, 14], "each": [1, 3, 6, 7, 8, 9, 12], "classifierat": [1, 9], "iter": [1, 6, 9, 12, 13], "A": [1, 3, 7, 8, 9], "higher": [1, 6, 9], "rateincreas": [1, 9], "contribut": [1, 3, 6, 8, 9], "trade": [1, 9], "off": [1, 6, 7, 9, 12, 13], "between": [1, 6, 7, 9], "learning_rateand": [1, 9], "int": [1, 3, 8, 9, 12, 13], "float": [1, 6, 7, 9, 10, 12, 13, 14], "linear": [1, 7, 9], "model": [1, 3, 4, 7, 8, 9, 10, 12, 13, 14], "l1": [1, 9], "prior": [1, 7, 9], "regular": [1, 6, 9], "spars": [1, 9], "coeffici": [1, 9], "tend": [1, 9], "prefer": [1, 9], "solut": [1, 6, 9], "fewer": [1, 9], "zero": [1, 3, 8, 9], "effect": [1, 6, 7, 9, 12], "reduc": [1, 6, 9], "featur": [1, 3, 6, 8, 9], "upon": [1, 6, 9], "depend": [1, 6, 7, 9], "lassoparametersalpha": [1, 9], "constant": [1, 9], "multipli": [1, 9], "term": [1, 6, 9], "control": [1, 6, 7, 9, 12], "strength": [1, 3, 8, 9], "must": [1, 6, 9], "neg": [1, 6, 7, 9], "i": [1, 3, 6, 7, 8, 9], "inf": [1, 6, 8, 9], "equival": [1, 9], "ordinari": [1, 9], "least": [1, 6, 9], "squar": [1, 6, 9], "solv": [1, 6, 9], "linearregress": [1, 9], "numer": [1, 7, 9], "reason": [1, 7, 9], "us": [1, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14], "advis": [1, 6, 7, 9], "implement": [1, 3, 6, 8, 9, 12], "k": [1, 6, 7, 9, 12, 13], "nearest": [1, 6, 9], "neighbor": [1, 6, 9, 12, 13], "vote": [1, 9], "principl": [1, 9], "behind": [1, 9], "method": [1, 3, 7, 8, 9, 11, 12], "find": [1, 6, 9], "predefin": [1, 4, 9, 12, 14], "sampl": [1, 6, 9, 12, 13], "closest": [1, 9], "distanc": [1, 6, 9], "label": [1, 6, 7, 8, 9, 12, 13], "from": [1, 3, 4, 8, 9, 10, 12, 13, 14], "defin": [1, 3, 6, 7, 8, 9, 10, 12], "despit": [1, 6, 9], "simplic": [1, 9], "success": [1, 6, 9], "larg": [1, 7, 9], "classif": [1, 4, 7, 8, 9, 11, 12], "problem": [1, 6, 7, 8, 9], "n_neighbor": [1, 9, 11], "kneighborsclassifierparametersn_neighbor": [1, 9], "default": [1, 7, 8, 9, 12, 13], "kneighbor": [1, 9], "queri": [1, 6, 7, 9], "list": [1, 3, 4, 6, 7, 8, 9, 12, 14], "comput": [1, 3, 7, 8, 9, 12], "minkowski": [1, 6, 9], "euclidean": [1, 9], "regressor": [1, 6, 8, 9, 11], "kneighborsregressorparametersn_neighbor": [1, 9], "logist": [1, 9], "rather": [1, 3, 6, 7, 8, 9], "than": [1, 3, 6, 7, 8, 9], "also": [1, 3, 6, 7, 8, 9, 12], "known": [1, 3, 6, 7, 8, 9], "literatur": [1, 9], "logit": [1, 9], "entropi": [1, 7, 9], "maxent": [1, 9], "log": [1, 4, 9, 10, 11, 12, 14], "probabl": [1, 4, 7, 9], "describ": [1, 3, 6, 8, 9, 12], "possibl": [1, 6, 7, 9, 12, 13], "outcom": [1, 9], "singl": [1, 6, 9], "solver": [1, 9, 11], "c": [1, 3, 6, 7, 8, 9, 11], "logisticregressionparametersparameterc": [1, 9], "try": [1, 6, 7, 9], "sag": [1, 9], "saga": [1, 9], "fast": [1, 6, 9], "converg": [1, 6, 9], "guarante": [1, 9], "approxim": [1, 6, 9], "scale": [1, 3, 7, 8, 9, 12, 14], "preprocess": [1, 4, 8, 9, 10, 13, 14], "scaler": [1, 3, 8, 9], "invers": [1, 9], "posit": [1, 3, 6, 8, 9], "like": [1, 3, 6, 7, 8, 9], "support": [1, 6, 8, 9], "vector": [1, 3, 6, 8, 9, 12, 14], "smaller": [1, 6, 9], "stronger": [1, 9], "decomposit": [1, 9], "partial": [1, 6, 9], "form": [1, 3, 6, 7, 8, 9, 12], "compon": [1, 3, 6, 8, 9], "fundament": [1, 9], "relat": [1, 3, 6, 7, 8, 9], "matric": [1, 9], "thei": [1, 3, 6, 7, 8, 9], "latent": [1, 9], "variabl": [1, 6, 7, 9], "approach": [1, 3, 6, 7, 8, 9], "covari": [1, 4, 8, 9], "space": [1, 7, 9, 12, 13], "multidimension": [1, 9], "varianc": [1, 6, 7, 9], "other": [1, 6, 7, 9, 12], "word": [1, 6, 9], "both": [1, 6, 7, 9], "lower": [1, 6, 9], "dimension": [1, 9], "subspac": [1, 6, 8, 9], "transform": [1, 4, 8, 9, 10], "n_compon": [1, 9, 11], "ncompon": [1, 9], "keep": [1, 6, 7, 9, 12], "min": [1, 6, 7, 9, 12], "n_sampl": [1, 9], "n_featur": [1, 9], "n_target": [1, 9], "forest": [1, 4, 7, 9], "decis": [1, 6, 7, 9, 12, 14], "tree": [1, 6, 9], "variou": [1, 6, 8, 9], "sub": [1, 6, 9], "averag": [1, 3, 6, 7, 8, 9, 12], "improv": [1, 3, 6, 8, 9], "accuraci": [1, 6, 9], "over": [1, 6, 9], "randomforestclassifierparametersmaxdepth": [1, 9], "randomforestclassifierparametersnestim": [1, 6, 9], "depth": [1, 6, 9, 11], "considerwhen": [1, 9], "split": [1, 4, 8, 9, 12, 13, 14], "thenconsid": [1, 9], "sqrt": [1, 6, 7, 9], "log2": [1, 7, 9, 12, 14], "randomforestregressorparametersmaxdepth": [1, 9], "randomforestregressorparametersnestim": [1, 9], "l2": [1, 9], "loss": [1, 3, 6, 8, 9], "norm": [1, 7, 9], "tikhonov": [1, 9], "base": [1, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14], "libsvm": [1, 9], "quadrat": [1, 9], "impract": [1, 9], "beyond": [1, 9], "ten": [1, 9], "thousand": [1, 9], "svcparametersparameterc": [1, 9], "1e": [1, 9], "gamma": [1, 9, 11], "0001": [1, 9], "proport": [1, 6, 9], "strictli": [1, 9], "penalti": [1, 9], "kernel": [1, 9], "epsilon": [1, 9], "complex": [1, 6, 9], "hard": [1, 3, 8, 9], "coupl": [1, 9], "10000": [1, 9], "svrparametersparameterc": [1, 9], "svrparametersgamma": [1, 9], "maxdepth": [1, 9], "nestim": [1, 9], "learningr": [1, 9], "xgboost": [1, 9, 11], "gradient": [1, 9], "stand": [1, 9], "extrem": [1, 6, 9], "paper": [1, 3, 6, 8, 9], "greedi": [1, 9], "friedman": [1, 9], "learner": [1, 9], "round": [1, 6, 7, 9], "prf": [1, 4, 7, 9], "probabilist": [1, 4, 9], "seen": [1, 6, 7, 9], "hybrid": [1, 6, 9], "similar": [1, 3, 6, 7, 8, 9, 12, 13], "take": [1, 3, 6, 7, 8, 9, 12], "real": [1, 6, 7, 9, 12, 13], "threshold": [1, 3, 4, 6, 8, 9, 12, 13, 14], "represent": [1, 3, 4, 8, 9], "ptr": [1, 4, 6, 9, 12], "howev": [1, 3, 6, 7, 8, 9], "belong": [1, 3, 6, 7, 8, 9], "inact": [1, 6, 7, 9, 12, 14], "use_py_gini": [1, 9, 11], "use_py_leaf": [1, 9, 11], "prfclassifierparametersmaxdepth": [1, 9], "prfclassifierparametersnestim": [1, 9], "min_py_sum_leaf": [1, 9, 11], "prfclassifierparametersminpysumleaf": [1, 9], "gini": [1, 9], "true": [1, 3, 6, 7, 8, 9, 12, 13], "py": [1, 6, 7, 9], "leav": [1, 9], "leaf": [1, 9], "allow": [1, 3, 6, 7, 8, 9], "prune": [1, 6, 9], "propag": [1, 9], "small": [1, 7, 9], "thu": [1, 6, 9], "pth": [1, 9], "scheme": [1, 9], "noisi": [1, 9], "chemprop": [1, 3, 4, 8, 9], "deep": [1, 6, 9], "properti": [1, 3, 6, 7, 8, 9, 10], "network": [1, 3, 6, 8, 9], "messag": [1, 6, 9], "pass": [1, 3, 6, 7, 8, 9, 12], "neural": [1, 6, 8, 9], "d": [1, 3, 6, 8, 9], "mpnn": [1, 9], "encod": [1, 9], "graph": [1, 3, 6, 8, 9], "feed": [1, 9], "ffnn": [1, 9], "multi": [1, 3, 8, 9], "layer": [1, 3, 6, 8, 9], "perceptron": [1, 9], "wa": [1, 3, 6, 7, 8, 9], "present": [1, 6, 7, 9], "analyz": [1, 6, 9], "optuna": [1, 4, 6, 8, 9, 10, 11], "optims": [1, 9], "hyperopt": [1, 9], "ensemble_s": [1, 6, 9, 11], "epoch": [1, 6, 9, 11], "aggreg": [1, 6, 7, 9, 11, 12, 13], "aggregation_norm": [1, 6, 9, 11], "chempropparametersaggregation_norm": [1, 9], "200": [1, 6, 9], "q": [1, 3, 6, 8, 9, 11], "batch_siz": [1, 6, 9, 11], "chempropparametersbatch_s": [1, 9], "chempropparametersdepth": [1, 9], "6": [1, 6, 7, 9], "dropout": [1, 9, 11], "chempropparametersdropout": [1, 9], "4": [1, 3, 6, 7, 8, 9, 12, 13], "04": [1, 6, 9], "features_gener": [1, 6, 9, 11], "ffn_hidden_s": [1, 6, 9, 11], "chempropparametersffn_hidden_s": [1, 9], "300": [1, 6, 9], "2400": [1, 9], "ffn_num_lay": [1, 6, 9, 11], "chempropparametersffn_num_lay": [1, 9], "final_lr_ratio_exp": [1, 6, 9, 11], "chempropparametersfinal_lr_ratio_exp": [1, 9], "hidden_s": [1, 6, 9, 11], "chempropparametershidden_s": [1, 9], "init_lr_ratio_exp": [1, 6, 9, 11], "chempropparametersinit_lr_ratio_exp": [1, 9], "max_lr_exp": [1, 6, 9, 11], "chempropparametersmax_lr_exp": [1, 9], "warmup_epochs_ratio": [1, 6, 9, 11], "chempropparameterswarmup_epochs_ratio": [1, 9], "ensembl": [1, 9, 11], "initialis": [1, 6, 9], "provid": [1, 3, 6, 7, 8, 9, 12], "size": [1, 3, 6, 7, 8, 9], "run": [1, 3, 4, 7, 8, 9, 10], "increas": [1, 6, 7, 9, 12, 13], "400": [1, 9], "sum": [1, 6, 9], "atom": [1, 3, 6, 8, 9, 12, 13], "divid": [1, 6, 9], "up": [1, 3, 6, 8, 9, 10], "batch": [1, 9, 12], "step": [1, 7, 8, 9], "visibl": [1, 9], "randomli": [1, 6, 7, 9], "some": [1, 3, 6, 7, 8, 9], "element": [1, 9, 12, 13], "tensor": [1, 9], "p": [1, 3, 8, 9], "bernoulli": [1, 9], "distribut": [1, 6, 7, 9], "channel": [1, 9], "independ": [1, 7, 9], "everi": [1, 6, 9], "proven": [1, 9], "techniqu": [1, 7, 9], "prevent": [1, 6, 9], "co": [1, 7, 9], "adapt": [1, 3, 8, 9], "neuron": [1, 9], "hidden": [1, 6, 9], "ffn": [1, 6, 9], "exponenti": [1, 7, 9], "rate": [1, 9], "bond": [1, 3, 8, 9], "ratio": [1, 9], "linearli": [1, 9], "init_lr": [1, 9], "max_lr": [1, 9], "afterward": [1, 7, 9], "decreas": [1, 7, 9], "final_lr": [1, 9], "without": [1, 6, 9, 12], "within": [1, 3, 8, 9, 12, 13], "hyperparamet": [1, 4, 6, 8, 9], "side": [1, 3, 8, 9], "inform": [1, 3, 8, 9, 10], "nb": [1, 3, 6, 7, 8, 9], "quick": [1, 4, 9], "simpl": [1, 7, 9], "sensibl": [1, 6, 7, 8, 9], "author": [1, 6, 8, 9], "do": [1, 6, 7, 9, 12], "num_it": [1, 6, 9, 11], "search_parameter_level": [1, 6, 9, 11], "dictat": [1, 9], "larger": [1, 3, 6, 7, 8, 9], "chempropregressorpretrain": [1, 6, 9], "pretrin": [1, 9], "pretrain": [1, 6, 9], "carri": [1, 9], "previous": [1, 9], "qsartuna": [1, 9], "chempropparametersepoch": [1, 6, 9], "frzn": [1, 9, 11], "pretrained_model": [1, 6, 9, 11], "none": [1, 3, 6, 8, 9, 10, 12, 13, 14], "fine": [1, 9], "tune": [1, 3, 4, 6, 8, 9], "decid": [1, 9], "freez": [1, 9], "transfer": [1, 9], "frozen": [1, 9], "str": [1, 3, 6, 8, 9, 10, 11, 12, 13, 14], "calibr": [1, 4, 8, 9], "isoton": [1, 6, 9], "vennab": [1, 9], "cv": [1, 6, 9], "With": [1, 3, 8, 9], "across": [1, 6, 7, 9, 12], "fals": [1, 3, 6, 7, 8, 9, 12], "obtain": [1, 6, 7, 9], "unbias": [1, 9], "offer": [1, 3, 6, 8, 9], "p0": [1, 6, 9], "vs": [1, 6, 7, 9], "p1": [1, 6, 9], "discord": [1, 6, 9], "type": [1, 3, 6, 7, 8, 9, 12], "union": [1, 3, 8, 9], "calibratedclassifiercvensembl": [1, 9], "calibratedclassifiercvmethod": [1, 9], "n_fold": [1, 6, 9, 11], "whether": [1, 3, 7, 8, 9, 12, 14], "agnost": [1, 6, 9], "interv": [1, 6, 9], "encompass": [1, 9], "aleator": [1, 9], "epistem": [1, 9], "back": [1, 6, 9], "strong": [1, 9], "theoret": [1, 6, 9], "thank": [1, 9], "conform": [1, 9], "mapie_alpha": [1, 6, 9, 11], "05": [1, 6, 7, 9], "01": [1, 6, 9], "99": [1, 6, 9], "see": [3, 6, 7, 8, 9, 12, 13], "gedeck": [3, 8], "et": [3, 7, 8], "al": [3, 7, 8], "qsar": [3, 4, 6, 7, 8], "good": [3, 6, 7, 8], "practic": [3, 8], "fingerprint": [3, 8, 9], "wai": [3, 6, 7, 8], "daylight": [3, 8], "enumer": [3, 6, 8, 9], "custom": [3, 6, 8], "ref": [3, 8, 10], "16": [3, 6, 7, 8], "hash": [3, 6, 8, 9, 10], "code": [3, 6, 7, 8], "style": [3, 6, 7, 8], "implicitli": [3, 8], "largest": [3, 6, 8], "refer": [3, 6, 8, 10, 11], "toward": [3, 6, 7, 8], "binari": [3, 6, 8, 9], "extend": [3, 6, 7, 8], "connect": [3, 6, 8], "roger": [3, 8], "circular": [3, 8], "morgan": [3, 8, 9], "invari": [3, 8], "getmorganfingerprintasbitvect": [3, 8], "rdkit": [3, 6, 7, 8, 9], "systemat": [3, 8], "record": [3, 6, 7, 8], "neighborhood": [3, 8, 9], "h": [3, 6, 7, 8], "multipl": [3, 6, 8, 12, 13], "runtim": [3, 8], "substructur": [3, 6, 8], "map": [3, 6, 7, 8], "integ": [3, 7, 8, 12, 13], "length": [3, 6, 8], "identifi": [3, 6, 7, 8], "diamet": [3, 8], "append": [3, 6, 7, 8, 9], "ecfp4": [3, 8], "correspond": [3, 6, 7, 8, 9], "returnrdkit": [3, 6, 8], "consid": [3, 6, 8, 12, 13], "while": [3, 6, 7, 8, 12], "bit": [3, 6, 8], "sometim": [3, 6, 7, 8], "bool": [3, 8, 9, 12, 13], "calculate_from_mol": [3, 8], "mol": [3, 6, 7, 8], "return": [3, 6, 7, 8, 10, 11, 12, 13], "numpi": [3, 6, 7, 8, 12, 13], "arrai": [3, 6, 8, 12, 13], "count": [3, 6, 8, 9], "gethashedmorganfingerprint": [3, 8], "appear": [3, 8, 11], "usefeatur": [3, 6, 8, 11], "fcfp": [3, 8], "normal": [3, 6, 7, 8, 9], "ones": [3, 6, 8], "definit": [3, 8, 12], "gobbi": [3, 8], "popping": [3, 8], "biotechnolog": [3, 8], "bioengin": [3, 8], "61": [3, 6, 8], "47": [3, 6, 8], "54": [3, 6, 8], "1998": [3, 6, 8], "lead": [3, 7, 8], "score": [3, 4, 8, 9, 10, 12, 14], "fp": [3, 6, 8], "maxpath": [3, 6, 8, 11], "fpsize": [3, 6, 8, 11], "macc": [3, 6, 8], "system": [3, 7, 8], "often": [3, 6, 8], "mdl": [3, 8], "kei": [3, 6, 7, 8, 10, 11], "compani": [3, 8], "calcul": [3, 6, 8, 12, 13, 14], "keyset": [3, 8], "construct": [3, 8, 11], "optim": [3, 4, 7, 8, 9, 11], "durant": [3, 8], "reoptim": [3, 8], "drug": [3, 8], "discoveri": [3, 8], "166": [3, 6, 8], "2d": [3, 8, 9], "essenti": [3, 7, 8], "answer": [3, 8], "fragment": [3, 8], "question": [3, 8], "explicitli": [3, 6, 7, 8], "exist": [3, 8], "sens": [3, 8], "matter": [3, 8], "becaus": [3, 6, 7, 8], "address": [3, 8], "specif": [3, 6, 7, 8, 11], "repres": [3, 6, 7, 8], "9": [3, 6, 7, 8], "1049": [3, 8], "distinct": [3, 6, 7, 8, 9], "rdkit_nam": [3, 6, 8, 11], "unscal": [3, 8], "These": [3, 6, 7, 8, 12], "physchem": [3, 6, 8], "caution": [3, 7, 8], "208": [3, 6, 8], "includ": [3, 6, 7, 8], "clogp": [3, 6, 8], "mw": [3, 6, 8], "ring": [3, 7, 8], "rotat": [3, 8], "fraction": [3, 6, 7, 8, 12, 13], "sp3": [3, 8], "kier": [3, 8], "indic": [3, 6, 7, 8, 12, 13], "etc": [3, 6, 8], "tpsa": [3, 6, 8], "slogp": [3, 8], "group": [3, 7, 8, 12, 13], "vsa": [3, 8], "moe": [3, 8], "charg": [3, 8], "www": [3, 8], "org": [3, 6, 8], "doc": [3, 6, 8], "gettingstartedinpython": [3, 8], "html": [3, 6, 7, 8, 9], "whose": [3, 8], "been": [3, 6, 7, 8], "level": [3, 6, 7, 8, 9], "One": [3, 7, 8], "advantag": [3, 8], "interpret": [3, 8], "mean": [3, 6, 7, 8, 9, 12], "physicochem": [3, 8], "intuit": [3, 8], "understood": [3, 8], "option": [3, 4, 7, 8, 9, 11, 12, 13, 14], "jazzy_nam": [3, 6, 8, 11], "jazzy_filt": [3, 6, 8], "jazzi": [3, 8], "hydrat": [3, 8], "energi": [3, 8], "hydrogen": [3, 8], "acceptor": [3, 6, 8], "donor": [3, 6, 8], "found": [3, 6, 8], "doi": [3, 6, 8], "1038": [3, 8], "s41598": [3, 8], "023": [3, 8], "30089": [3, 8], "mmff94": [3, 8], "minimis": [3, 8], "1000da": [3, 8], "compound": [3, 6, 7, 8, 12, 13], "dict": [3, 8, 9, 10, 11], "calculate_from_smi": [3, 6, 8], "smi": [3, 8, 10], "string": [3, 6, 7, 8, 11, 12, 13, 14], "ndarrai": [3, 8], "z": [3, 8, 12, 14], "peptid": [3, 6, 8, 12, 14], "unfittedsklearnscal": [3, 8], "mol_data": [3, 8], "moldata": [3, 8], "file_path": [3, 8], "smiles_column": [3, 8], "fittedsklearnscal": [3, 8], "alia": [3, 8, 9], "precomput": [3, 7, 8], "file": [3, 7, 8, 10, 11, 12, 13], "header": [3, 8], "line": [3, 4, 6, 7, 8, 10], "row": [3, 6, 7, 8], "comma": [3, 6, 7, 8], "respons": [3, 7, 8], "pre": [3, 7, 8], "zscalesdescriptor": [3, 8, 11], "were": [3, 6, 7, 8], "propos": [3, 8], "sandberg": [3, 8], "proteogen": [3, 8], "amino": [3, 6, 8], "acid": [3, 6, 8], "nmr": [3, 8], "thin": [3, 8], "chromatographi": [3, 8], "tlc": [3, 8], "1021": [3, 6, 8], "jm9700575": [3, 8], "captur": [3, 6, 7, 8], "lipophil": [3, 8], "steric": [3, 8], "bulk": [3, 8], "polariz": [3, 8], "electron": [3, 8], "polar": [3, 8], "electroneg": [3, 8], "heat": [3, 8], "electrophil": [3, 8], "anoth": [3, 6, 8], "optimis": [3, 6, 8, 9, 10], "either": [3, 7, 8, 9], "through": [3, 6, 8], "_and_": [3, 8], "auxiliari": [3, 6, 8, 12, 14], "continu": [3, 6, 8], "learnt": [3, 6, 8], "manner": [3, 6, 8], "intent": [3, 6, 8], "much": [3, 8], "remain": [3, 7, 8, 12], "deriv": [3, 6, 7, 8], "cautiou": [3, 8], "upload": [3, 8], "_all_": [3, 8], "ruder": [3, 8], "io": [3, 6, 8, 9], "index": [3, 6, 7, 8], "auxiliarytask": [3, 8], "aux_weight_pc": [3, 6, 8, 9, 11], "20": [3, 6, 7, 8], "info": [3, 6, 8], "percentag": [3, 8], "wrap": [3, 6, 8], "certain": [3, 6, 7, 8], "rang": [3, 6, 7, 8, 12, 13], "scaleddescriptorparamet": [3, 8], "cach": [3, 6, 7, 8, 9], "composit": [3, 6, 8], "concaten": [3, 8], "button": [3, 8], "pleas": [3, 6, 7, 8, 10], "compat": [3, 6, 8], "intro": 4, "background": 4, "json": [4, 6, 9, 11], "command": [4, 6], "interfac": [4, 6, 8, 11, 12], "python": [4, 6, 8, 12], "jupyt": [4, 6], "notebook": [4, 6], "introduct": 4, "translat": [4, 6], "sdf": [4, 6, 8], "need": 4, "deal": [4, 12], "duplic": [4, 8, 9, 12], "dedupl": [4, 6, 8, 10], "experiment": [4, 6, 12, 14], "error": [4, 6], "cli": 4, "tutori": 4, "remov": [4, 7, 8, 9, 12, 13], "advanc": [4, 7, 8], "functoinail": 4, "adaboostclassifi": [4, 6, 9, 11], "kneighborsclassifi": [4, 6, 9, 11], "kneighborsregressor": [4, 6, 9, 11], "logisticregress": [4, 6, 9, 11], "plsregress": [4, 6, 9, 11], "randomforestclassifi": [4, 6, 9, 11], "svc": [4, 6, 9, 11], "prfclassifi": [4, 6, 9, 11], "chempropregressor": [4, 6, 9, 11], "chempropclassifi": [4, 6, 9, 11], "chemprophyperoptclassifi": [4, 6, 9, 11], "chemprophyperoptregressor": [4, 6, 9, 11], "chemprophyperoptregressorpretrain": 4, "calibratedclassifiercvwithva": [4, 6, 9, 11], "mapi": [4, 9, 11], "avalon": [4, 8, 9, 11], "pathfp": [4, 6, 8, 11], "unscaledphyschemdescriptor": [4, 6, 8, 11], "unscaledjazzydescriptor": [4, 6, 8, 11], "unscaledzscalesdescriptor": [4, 8, 11], "physchemdescriptor": [4, 8, 11], "jazzydescriptor": [4, 8, 11], "precomputeddescriptorfromfil": [4, 6, 8, 11], "zscale": [4, 6, 8, 12], "smilesfromfil": [4, 6, 8, 11], "smilesandsideinfofromfil": [4, 6, 8, 11], "scaleddescriptor": [4, 8, 11], "compositedescriptor": [4, 6, 8, 11], "tempor": [4, 6, 12], "stratifi": [4, 6, 12], "scaffoldsplit": [4, 7, 12], "modeldatatransform": [4, 6, 7, 8, 12], "vectorfromcolumn": [4, 6, 8, 12], "keepfirst": [4, 7, 12], "keeplast": [4, 7, 12], "keeprandom": [4, 7, 12], "keepmin": [4, 7, 12], "keepmax": [4, 7, 12], "keepavg": [4, 7, 12], "keepmedian": [4, 6, 7, 12], "keepkeepallnodedupl": 4, "report": [4, 6, 7], "coverag": 4, "subpackag": 5, "submodul": 5, "build_from_opt": [5, 8], "content": 5, "util": [5, 6, 7, 8, 13, 14], "files_path": [5, 8], "load_json": [5, 8], "schema": [5, 8, 11], "builder": 5, "metirc": 5, "model_writ": 5, "optbuild": 5, "schemagen": 5, "visual": [5, 9, 11], "intend": 6, "necessari": 6, "gui": 6, "autom": [6, 7], "reinvent": 6, "varieti": 6, "them": [6, 7, 9], "so": [6, 7, 9], "hyper": [6, 9], "influenc": [6, 7, 9], "automat": [6, 7], "idea": [6, 7], "read": [6, 7, 8], "otherwis": [6, 12], "might": [6, 7, 12], "skip": [6, 7], "toi": 6, "chosen": 6, "whole": [6, 7], "less": [6, 7], "minut": 6, "holdout": [6, 7], "readi": 6, "next": [6, 7], "few": [6, 7], "head": [6, 7], "molwt_gt_330": 6, "cc1cc": 6, "nc": [6, 7], "o": [6, 7], "c2cccc": 6, "coc3ccc": 6, "br": [6, 7], "cc3": [6, 7], "c2": [6, 7], "no1": 6, "387": 6, "233": 6, "nc1ccc": 6, "f": [6, 7, 12], "cc1f": 6, "nc1sccc1": 6, "c1nc2ccccc2s1": 6, "4360000000001": 6, "coc": 6, "c1ccccc1nc": 6, "c1cc": 6, "nn1cc1ccccc1": 6, "380": 6, "36000000000007": 6, "ccoc": 6, "sc1nc": 6, "c2ccccc2": 6, "ccc1c": 6, "312": 6, "39400000000006": 6, "ccc": [6, 7], "c1nn": [6, 7], "cc2ccccc2": 6, "c2ccccc12": 6, "349": 6, "4340000000001": 6, "brc1ccccc1occcoc1cccc2cccnc12": 6, "358": 6, "235": 6, "ccccn1c": 6, "coc2cccc": 6, "oc": 6, "nc2ccccc21": 6, "310": 6, "39700000000005": 6, "ccoc1cccc": 6, "c2sc3nc": 6, "c4ccc": 6, "cc4": 6, "ccc3c2n": 6, "c1": [6, 7], "407": 6, "4700000000001": 6, "coc1ccc": 6, "nc2ccc": 6, "cc2": 6, "c2ccc": 6, "cc1oc": 6, "454": 6, "54800000000023": 6, "sy": [6, 7], "sklearn": [6, 11], "kljk345": [6, 7], "pycharmproject": 6, "public_qptuna": 6, "lib": [6, 7], "python3": [6, 7], "site": [6, 7], "tqdm": 6, "21": [6, 7], "tqdmwarn": 6, "iprogress": 6, "updat": [6, 7], "ipywidget": 6, "readthedoc": [6, 9], "en": [6, 9], "stabl": [6, 8], "user_instal": 6, "autonotebook": 6, "notebook_tqdm": 6, "typic": [6, 7], "test_dataset_fil": [6, 8], "random_se": [6, 9], "42": [6, 7, 12, 13], "seed": [6, 7, 8, 12, 13], "reproduc": [6, 12, 14], "setup": [6, 9], "basic": [6, 8, 9], "importlib": 6, "reload": 6, "basicconfig": 6, "getlogg": 6, "disabl": [6, 7, 11], "np": [6, 7], "seterr": 6, "ignor": 6, "warn": [6, 7], "filterwarn": 6, "categori": 6, "futurewarn": 6, "runtimewarn": 6, "functool": 6, "partialmethod": 6, "__init__": [6, 9], "flood": 6, "decpreci": 6, "simplefilt": 6, "def": [6, 7, 9], "arg": [6, 8, 9], "kwarg": [6, 9], "stderr": 6, "render": [6, 7], "red": 6, "2024": 6, "07": 6, "02": 6, "13": [6, 7], "17": [6, 7], "26": [6, 7], "561": 6, "memori": [6, 8], "714": 6, "study_name_0": 6, "27": [6, 7], "022": 6, "finish": 6, "3594": 6, "2228073972638": 6, "39": [6, 7], "algorithm_nam": [6, 11], "randomforestregressor_algorithm_hash": 6, "f1ac01e1bba332215ccbd0c29c9ac3c3": 6, "max_depth__f1ac01e1bba332215ccbd0c29c9ac3c3": 6, "n_estimators__f1ac01e1bba332215ccbd0c29c9ac3c3": 6, "max_features__f1ac01e1bba332215ccbd0c29c9ac3c3": 6, "lt": [6, 7], "randomforestmaxfeatur": [6, 9], "gt": [6, 7], "34": [6, 7], "171": 6, "5029": 6, "734616310275": 6, "svr_algorithm_hash": 6, "ea7ccc7ef4a9329af0d4e39eb6184933": 6, "gamma__ea7ccc7ef4a9329af0d4e39eb6184933": 6, "039054412752107935": 6, "c__ea7ccc7ef4a9329af0d4e39eb6184933": 6, "1242780840717016e": 6, "429": 6, "4242": 6, "092751193529": 6, "579": 6, "3393": 6, "577488426015": 6, "ridge_algorithm_hash": 6, "cfa1990d5153c8812982f034d788d7e": 6, "alpha__cfa1990d5153c8812982f034d788d7e": 6, "06877704223043679": 6, "644": 6, "427": 6, "45250420148204": 6, "lasso_algorithm_hash": 6, "5457f609662e44f04dcc9423066d2f58": 6, "alpha__5457f609662e44f04dcc9423066d2f58": 6, "7896547008552977": 6, "698": 6, "3387": 6, "245629616474": 6, "plsregression_algorithm_hash": 6, "9f2f76e479633c0bf18cf2912fed9eda": 6, "n_components__9f2f76e479633c0bf18cf2912fed9eda": 6, "853": 6, "734620250011": 6, "3661540064603184": 6, "1799882524170321": 6, "28": [6, 7], "029": 6, "7": [6, 7, 9], "9650": 6, "026568221794": 6, "kneighborsregressor_algorithm_hash": 6, "1709d2c39117ae29f6c9debe7241287b": 6, "metric__1709d2c39117ae29f6c9debe7241287b": 6, "kneighborsmetr": [6, 9], "n_neighbors__1709d2c39117ae29f6c9debe7241287b": 6, "weights__1709d2c39117ae29f6c9debe7241287b": 6, "kneighborsweight": [6, 9], "uniform": [6, 9], "070": 6, "8": [6, 7, 9], "5437": 6, "151635569594": 6, "05083825348819038": 6, "336": 6, "2669": 6, "8534551928174": 6, "373": 6, "4341": 6, "586120152291": 6, "7921825998469865": 6, "532": 6, "11": [6, 7], "5514": 6, "404088878843": 6, "680": 6, "12": [6, 7], "5431": 6, "634989239215": 6, "722": 6, "3530": 6, "5496618991288": 6, "765": 6, "14": [6, 7], "3497": 6, "6833185436312": 6, "793": 6, "15": [6, 7], "4382": 6, "16208862162": 6, "831": 6, "734620031822": 6, "002825619931800395": 6, "309885135051862e": 6, "09": 6, "870": 6, "679": 6, "3109044887755": 6, "16827992999009767": 6, "932": 6, "18": [6, 7], "2550": 6, "114129318373": 6, "974": 6, "19": [6, 7], "4847": 6, "085792360169": 6, "735431606118867": 6, "29": 6, "016": 6, "268760278916": 6, "0014840820994557746": 6, "04671166881768783": 6, "096": 6, "4783": 6, "0470154796785": 6, "135": 6, "22": [6, 7], "3905": 6, "0064899852296": 6, "259": 6, "23": [6, 7], "4030": 6, "45773791647": 6, "340": 6, "24": [6, 7], "4681": 6, "602145939593": 6, "381": 6, "25": [6, 7], "4398": 6, "544034028325": 6, "6452011213193165": 6, "474": 6, "4454": 6, "143979828408": 6, "503": 6, "533": 6, "600": 6, "4397": 6, "330360587512": 6, "617": 6, "682": 6, "31": [6, 7], "2602": 6, "7561184287083": 6, "715": 6, "5267": 6, "388279961089": 6, "2015560027548533": 6, "794": 6, "33": 6, "4863": 6, "581760751052": 6, "836": 6, "388": 6, "96473594016675": 6, "5528259214839937": 6, "906": 6, "35": 6, "5539": 6, "698232987626": 6, "6400992020612235": 6, "962": 6, "36": 6, "5180": 6, "5533034102455": 6, "8968910439566395": 6, "005": 6, "37": 6, "4989": 6, "929984864281": 6, "04458440839692226": 6, "492108041427977": 6, "034": 6, "38": [6, 7], "103": 6, "6528": 6, "215066535042": 6, "16700143339733753": 6, "240": 6, "40": [6, 7], "4168": 6, "7955967552625": 6, "311": 6, "41": 6, "6177": 6, "060727800014": 6, "401": 6, "3963": 6, "906954658343": 6, "435": 6, "43": 6, "6805334166565": 6, "013186009009851564": 6, "001008958590140135": 6, "501": 6, "44": 6, "9300": 6, "86840721566": 6, "547": 6, "45": [6, 7], "83": 6, "87968210939489": 6, "382674443425525e": 6, "565": 6, "46": 6, "594": 6, "626": 6, "48": 6, "717": 6, "49": 6, "3660": 6, "9359502556": 6, "767": 6, "688": 6, "5244070398325": 6, "5267860995545326": 6, "813": 6, "51": 6, "690": 6, "6494438072099": 6, "8458809314722497": 6, "848": 6, "52": 6, "691": 6, "1197058420935": 6, "9167866889210807": 6, "898": 6, "53": 6, "3111710449325": 6, "945685900574672": 6, "934": 6, "9665592812149": 6, "8936837761725833": 6, "970": 6, "55": 6, "4682747008223": 6, "5183865279530455": 6, "030": 6, "56": 6, "687": 6, "5230947231512": 6, "3771771681361766": 6, "078": 6, "57": 6, "4503442069594": 6, "3663259819415374": 6, "127": 6, "58": 6, "686": 6, "9553733616618": 6, "2925652230875628": 6, "174": 6, "59": 6, "370": 6, "2038330506566": 6, "3962903248948568": 6, "222": 6, "60": 6, "377": 6, "25988028857313": 6, "45237513161879": 6, "270": 6, "379": 6, "8933285317637": 6, "4741161933311207": 6, "319": 6, "62": 6, "374": 6, "50897467366013": 6, "4290962207409417": 6, "356": 6, "63": [6, 7], "376": 6, "5588572940058": 6, "4464295711264585": 6, "416": 6, "64": 6, "237448916406": 6, "4687500034684213": 6, "65": 6, "375": 6, "7474776359051": 6, "4395650011783436": 6, "504": 6, "66": 6, "362": 6, "2834906299732": 6, "3326755354190032": 6, "542": 6, "67": 6, "357": 6, "3474880122588": 6, "2887212943233457": 6, "591": 6, "68": 6, "354": 6, "279045046449": 6, "2577677164664005": 6, "642": 6, "69": 6, "347": 6, "36894395697703": 6, "1672928587680225": 6, "706": 6, "70": 6, "345": 6, "17697390093394": 6, "1242367255308854": 6, "757": 6, "71": 6, "74610809299037": 6, "1728352983905301": 6, "807": 6, "72": 6, "23464281634324": 6, "1265380781508565": 6, "856": 6, "73": 6, "344": 6, "6848312222365": 6, "0829896313820404": 6, "902": 6, "74": [6, 7], "9111966504334": 6, "1070414661080543": 6, "966": 6, "75": 6, "70116419828565": 6, "0875643695329498": 6, "026": 6, "76": 6, "62647974688133": 6, "0716281620790837": 6, "089": 6, "77": 6, "6759429204596": 6, "0456289319914898": 6, "141": 6, "78": 6, "343": 6, "58131497761616": 6, "0010195360522613": 6, "193": 6, "79": 6, "342": 6, "7290581014813": 6, "9073210715005748": 6, "254": 6, "80": [6, 7], "67866114080107": 6, "9166305667100072": 6, "317": 6, "81": 6, "6440308445311": 6, "9248722692093634": 6, "367": 6, "82": 6, "02085648448934": 6, "8776928646870886": 6, "1662266300702": 6, "867592364677856": 6, "457": 6, "84": 6, "30158716569775": 6, "8599491178327108": 6, "497": 6, "85": 6, "2803074848341": 6, "8396948389352923": 6, "86": 6, "28301101884045": 6, "8396651775801683": 6, "587": 6, "87": 6, "6781906268143": 6, "8356021935129933": 6, "639": 6, "88": 6, "0405418264898": 6, "7430046191126949": 6, "677": 6, "89": 6, "77203208258476": 6, "9015965341429055": 6, "90": 6, "363": 6, "1622720320929": 6, "6746575663752555": 6, "91": 6, "7403796626193": 6, "9057564666836629": 6, "797": 6, "92": 6, "63579667712696": 6, "9332275205203372": 6, "93": [6, 7], "6886425884964": 6, "9433063264508291": 6, "94": 6, "9341048659705": 6, "884739221967487": 6, "935": 6, "95": [6, 7], "63507445779743": 6, "9381000493689634": 6, "986": 6, "96": 6, "06021011302374": 6, "963138023068903": 6, "97": 6, "9990546212019": 6, "9601651093867907": 6, "066": 6, "98": 6, "3821": 6, "2267845437514": 6, "117": 6, "6786067133016": 6, "721603508336166": 6, "seaborn": [6, 7], "sn": [6, 7], "set_them": 6, "darkgrid": 6, "default_reg_scor": 6, "ax": [6, 7], "scatterplot": 6, "trials_datafram": 6, "xlabel": [6, 7], "ylabel": [6, 7], "ojbect": 6, "interest": [6, 7], "neg_mean_squared_error": [6, 9], "highlight": 6, "color": [6, 7], "cv_test": 6, "user_attrs_test_scor": 6, "lambda": [6, 12], "item": [6, 10], "idx": [6, 7], "v": 6, "hue": 6, "palett": 6, "set1": 6, "inspect": 6, "apischema": [6, 8], "buildconfig_as_dict": 6, "serial": 6, "response_typ": [6, 7, 8], "deduplication_strategi": [6, 7, 8], "split_strategi": [6, 7, 8], "nosplit": [6, 12], "save_intermediate_fil": [6, 8], "log_transform": [6, 7, 8], "log_transform_bas": [6, 7, 8], "null": 6, "log_transform_neg": [6, 7, 8], "log_transform_unit_convers": [6, 7, 8], "probabilistic_threshold_represent": [6, 7, 8], "probabilistic_threshold_representation_threshold": [6, 7, 8], "probabilistic_threshold_representation_std": [6, 7, 8], "metadata": [6, 8, 9, 10, 11], "shuffl": [6, 9, 11, 12, 13], "best_trial": [6, 9, 11], "best_valu": [6, 9, 11], "tracking_rest_endpoint": [6, 9], "best_build": 6, "rb": 6, "predict_from_smil": [6, 8], "cc1": [6, 7], "43103985": 6, "177": 6, "99850936": 6, "now": [6, 7, 9], "panda": [6, 7], "pd": [6, 7], "df": [6, 12, 13], "read_csv": 6, "expect": [6, 7, 10], "matplotlib": [6, 7], "pyplot": [6, 7], "plt": [6, 7], "scatter": 6, "lim": 6, "max": [6, 7, 12], "diagon": 6, "r2_score": 6, "mean_squared_error": 6, "mean_absolute_error": 6, "y_true": [6, 8, 11], "y_pred": [6, 8, 11], "rmse": 6, "ad": [6, 7, 12, 13], "mae": 6, "absolut": 6, "8566354978126369": 6, "204909888075044": 6, "298453946973815": 6, "accept": [6, 7], "again": 6, "hopefulli": [6, 7], "littl": 6, "better": [6, 7, 8, 12], "send": 6, "strategi": [6, 8], "current": 6, "observ": [6, 7], "last": [6, 7, 12], "alreadi": [6, 7], "sort": [6, 7, 12, 13], "oldest": [6, 7, 12, 13], "newest": [6, 7, 12, 13], "end": [6, 7, 12, 13], "extern": 6, "tool": 6, "excel": 6, "ensur": [6, 7, 8, 9], "unballanc": 6, "work": [6, 7], "come": [6, 7], "measur": [6, 7], "fact": 6, "disregard": 6, "stereochemistri": [6, 7], "even": [6, 7], "sever": 6, "median": [6, 7, 12], "factor": [6, 7], "replic": [6, 7], "robust": [6, 7], "outlier": [6, 7], "acorss": 6, "trust": 6, "kept": 6, "splitter": [6, 7, 8, 10], "track_to_mlflow": [6, 9], "my_study_stratified_split": 6, "922": 6, "963": 6, "046": 6, "1856": 6, "4459752935309": 6, "123": 6, "1692": 6, "0451328577294": 6, "2918844591266672": 6, "592": 6, "1378": 6, "9731014410709": 6, "471164936778079": 6, "2658": 6, "13214897931": 6, "804": 6, "2059": 6, "3079659969176": 6, "330": [6, 7], "280": 6, "17777558722315": 6, "7001901522391756": 6, "422": 6, "3551": 6, "475476217507": 6, "466": 6, "2124": 6, "9660426577593": 6, "509": 6, "1686": 6, "5737716985532": 6, "9841058851292832": 6, "552": 6, "1702": 6, "174704715547": 6, "861494545249233": 6, "578": 6, "621": 6, "1204": 6, "636967895143": 6, "5238298142840006": 6, "676": 6, "228": 6, "44505332657158": 6, "9836853549192415": 6, "729": 6, "3949": 6, "499774068696": 6, "04535826280986047": 6, "012999584021838e": 6, "829": 6, "2856": 6, "917927507731": 6, "linear_model": 6, "_coordinate_desc": 6, "678": 6, "convergencewarn": 6, "did": 6, "regularis": 6, "dualiti": 6, "gap": 6, "306e": 6, "toler": 6, "824e": 6, "cd_fast": 6, "enet_coordinate_desc": 6, "882": 6, "2554": 6, "2079198900733": 6, "10588223712643852": 6, "1261": 6, "484274761188": 6, "0950442632698256": 6, "965": 6, "282": 6, "6478019258886": 6, "2920636100136971": 6, "004": 6, "1814": 6, "6019641143478": 6, "048": 6, "1284": 6, "7430070920798": 6, "1729012287538991": 6, "237": 6, "98783693000647": 6, "1721667984096773": 6, "192": 6, "2129": 6, "55317061882": 6, "4997740833423": 6, "779895470793612": 6, "260941957410989e": 6, "279": 6, "1740": 6, "8894369939983": 6, "02841448247455669": 6, "698e": 6, "280e": 6, "820e": 6, "352e": 6, "770e": 6, "3317": 6, "417858905051": 6, "003050380617617421": 6, "404": 6, "448": 6, "1256": 6, "7270466276807": 6, "1594144041655936": 6, "491": 6, "1245": 6, "1399766270456": 6, "336730512398918": 6, "583": 6, "2908": 6, "3563960057677": 6, "628": 6, "1775": 6, "55204856041": 6, "721": 6, "1257": 6, "9288888831513": 6, "1441514794000534": 6, "808": 6, "98174313112844": 6, "1939105579414777": 6, "900": 6, "3054": 6, "7066202193805": 6, "944": 6, "1227": 6, "082986184029": 6, "909508127148669": 6, "988": 6, "1676": 6, "7481962719485": 6, "4307837873914335": 6, "079": 6, "307965996918": 6, "168": 6, "3441": 6, "9109103644514": 6, "211": 6, "1670": 6, "5213862925175": 6, "07945856808433427": 6, "264": 6, "2756": 6, "046839500092": 6, "320": 6, "4997735530674": 6, "022099719935614482": 6, "4657380646234507e": 6, "08": 6, "0862402902634642": 6, "12519632281925502": 6, "467": 6, "3438": 6, "566583971217": 6, "524": 6, "4422556954731": 6, "19967589906728334": 6, "016e": 6, "618": 6, "359": [6, 7], "7639743940817": 6, "059252880514551576": 6, "662": 6, "1246": 6, "7813032646238": 6, "3074782262329858": 6, "755": 6, "2224": 6, "3845873049813": 6, "810": 6, "1673": 6, "9639799911165": 6, "2737740844660712": 6, "896": 6, "3163": 6, "129883232068": 6, "987": 6, "2753": 6, "414173913392": 6, "057": 6, "263": 6, "1352845182604": 6, "627030918721665": 6, "105": 6, "271": 6, "2979718788249": 6, "8548903728617034": 6, "165": 6, "277": 6, "86441431259567": 6, "9605867591283856": 6, "227": 6, "4329099850367": 6, "9537398361705693": 6, "274": 6, "3838070241422": 6, "9045589309769144": 6, "334": 6, "260": 6, "4460398258507": 6, "5589021326002044": 6, "383": 6, "257": 6, "95032410206767": 6, "5053759377103249": 6, "444": 6, "256": 6, "5958038666581": 6, "4789082433356577": 6, "495": 6, "253": 6, "4269973575198": 6, "4281024602273042": 6, "560": 6, "249": 6, "40822811603962": 6, "3546313579812586": 6, "620": 6, "245": 6, "71101688809983": 6, "2913960369109012": 6, "675": 6, "247": 6, "88538215472033": 6, "3274897484709072": 6, "737": 6, "244": 6, "23847775159297": 6, "2647865635312279": 6, "803": 6, "59033004585282": 6, "3228443521984092": 6, "863": 6, "243": 6, "40694430653753": 6, "2489205103047292": 6, "928": 6, "223": 6, "85145692792733": 6, "8934822741396387": 6, "990": [6, 7], "221": 6, "94026043724057": 6, "8552798675517863": 6, "219": 6, "60947928367543": 6, "8149866573467666": 6, "108": 6, "84441955310717": 6, "8531301788095305": 6, "170": 6, "24134912135943": 6, "8418420411160932": 6, "232": 6, "34805357903284": 6, "883998932301903": 6, "293": 6, "99342925522842": 6, "8564564664338091": 6, "353": 6, "50886633416462": 6, "8672069097403997": 6, "415": 6, "61235541906441": 6, "8482856353268698": 6, "479": 6, "217": 6, "7749814513912": 6, "7823980442129331": 6, "538": 6, "216": 6, "00225784039503": 6, "7113129125761161": 6, "601": 6, "8736767409489": 6, "6250904023479531": 6, "666": 6, "94414119442342": 6, "6227757503715069": 6, "731": 6, "45936690929625": 6, "6343056785694773": 6, "63861804615567": 6, "6302707941523814": 6, "860": 6, "1969": 6, "3749442111905": 6, "00019861806798724335": 6, "586529041453": 6, "923": 6, "215": 6, "82051598778696": 6, "6518244359516081": 6, "06387687700067": 6, "6440087841656821": 6, "041": 6, "24994687849525": 6, "6393212787552464": 6, "106": 6, "92984604804667": 6, "6232144947646524": 6, "25506613319246": 6, "603388647930941": 6, "2733": 6, "5772576431627": 6, "287": 6, "29854648789728": 6, "5873312673596333": 6, "16592450348784": 6, "4337907998582289": 6, "410": 6, "68514116107337": 6, "6695836226711808": 6, "475": 6, "220": 6, "8939514172608": 6, "4420925048614356": 6, "535": 6, "72299797702155": 6, "6960582933068138": 6, "69285146262294": 6, "69078828949453": 6, "665": 6, "0538787714827": 6, "7144357045239296": 6, "728": 6, "4213281391621": 6, "7353090312302926": 6, "74724725664498": 6, "92653950485437e": 6, "858": 6, "12287184152592": 6, "7183304951103088": 6, "22186485689846": 6, "7234233661662641": 6, "977": 6, "2720": 6, "793752592223": 6, "042": 6, "3855763846717": 6, "4726201914486088": 6, "By": 6, "roc_auc": [6, 9], "model_evalu": 6, "amongst": 6, "regression_scor": 6, "classification_scor": 6, "explained_vari": [6, 9], "max_error": [6, 9], "neg_mean_absolute_error": [6, 9], "neg_median_absolute_error": [6, 9], "average_precis": [6, 9], "balanced_accuraci": [6, 9], "f1": [6, 9], "f1_macro": [6, 9], "f1_micro": [6, 9], "f1_weight": [6, 9], "jaccard": [6, 9], "jaccard_macro": [6, 9], "jaccard_micro": [6, 9], "jaccard_weight": [6, 9], "neg_brier_scor": [6, 9], "precis": [6, 7, 9], "precision_macro": [6, 9], "precision_micro": [6, 9], "precision_weight": [6, 9], "recal": [6, 9], "recall_macro": [6, 9], "recall_micro": [6, 9], "recall_weight": [6, 9], "auc_pr_cal": [6, 8, 9], "bedroc": [6, 8, 9], "concordance_index": [6, 8, 9], "my_study_r2": 6, "945": 6, "947": 6, "072": 6, "011171868665159623": 6, "197": 6, "08689402230378174": 6, "283": 6, "12553701248394863": 6, "141096648805748": 6, "4893466963980463e": 6, "3039309544203818": 6, "20182749628697164": 6, "485": 6, "8187194367176578": 6, "558": 6, "4647239019719945": 6, "6574750183038587": 6, "611": 6, "8614818478547979": 6, "3974313630683448": 6, "705": 6, "12769795082909816": 6, "773": 6, "8639946428338224": 6, "2391884918766034": 6, "838": 6, "12553701248377633": 6, "00044396482429275296": 6, "3831436879125245e": 6, "892": 6, "12553700871203702": 6, "00028965395242758657": 6, "99928292425642e": 6, "2935582042429075": 6, "976": 6, "18476333152695587": 6, "8190707459213998": 6, "4060379177903557": 6, "118": 6, "12206148974315871": 6, "3105263811279067": 6, "344271094811757": 6, "3562469062424869": 6, "670604991178476": 6, "316": [6, 7], "045959695906983344": 6, "8583939656024446": 6, "5158832554303112": 6, "433": 6, "3062574078515544": 6, "487": 6, "11657354998283716": 6, "0009327650919528738": 6, "062479210472502": 6, "586": 6, "629": 6, "8498478905829554": 6, "1366172066709432": 6, "733": 6, "1276979508290982": 6, "786": 6, "13519830637607919": 6, "92901911959232": 6, "999026012594694": 6, "839": 6, "8198078293055633": 6, "5888977841391714": 6, "878": 6, "8201573964824842": 6, "19435298754153707": 6, "958": 6, "013": 6, "6285506249643193": 6, "35441495011256785": 6, "11934070343348298": 6, "145": 6, "4374125584543907": 6, "2457809516380005": 6, "213": 6, "3625576518621392": 6, "6459129458824919": 6, "36175556871883746": 6, "8179058888285398": 6, "285": 6, "8202473217121523": 6, "0920052840435055": 6, "372": 6, "3672927879319306": 6, "8677032984759461": 6, "402": 6, "445": 6, "40076792599874356": 6, "2865764368847064": 6, "26560316846701765": 6, "632": 6, "41215254857081174": 6, "671": 6, "763": 6, "00461414372160085": 6, "27282533524183633": 6, "919": 6, "10220127407364991": 6, "975": 6, "30323404130582854": 6, "3044553805553568": 6, "6437201185807124": 6, "076": 6, "41502276709562": 6, "10978379088847677": 6, "120": 6, "36160209098547913": 6, "022707289534838138": 6, "175": 6, "2916101445983833": 6, "936e": 6, "782e": 6, "434e": 6, "977e": 6, "276": 6, "8609413020928532": 6, "04987590926279814": 6, "794e": 6, "830e": 6, "906e": 6, "578e": 6, "8610289662757457": 6, "019211413400468974": 6, "754e": 6, "843e": 6, "507e": 6, "493": 6, "8610070549049179": 6, "018492644772509947": 6, "840e": 6, "513e": 6, "924e": 6, "8569771623635769": 6, "008783442408928633": 6, "243e": 6, "014e": 6, "700": 6, "8624781673814641": 6, "05782221001517797": 6, "113e": 6, "935e": 6, "122e": 6, "798": 6, "8618589507037001": 6, "02487072255316275": 6, "886": 6, "864754359721037": 6, "2079910754941946": 6, "946": 6, "8622236413326235": 6, "333215560931422": 6, "009": 6, "861832165638517": 6, "3628098560209365": 6, "068": 6, "8620108533993581": 6, "34240779695521706": 6, "142": 6, "8638540565650902": 6, "26493714991266293": 6, "8629799500771645": 6, "30596394512914815": 6, "8621408609583922": 6, "33648829357762355": 6, "351": 6, "8638132124078156": 6, "2679814646317183": 6, "424": 6, "863983758876634": 6, "24062119162159595": 6, "500": 6, "8627356047945115": 6, "3141728910335158": 6, "8639203054085788": 6, "23391390640786494": 6, "8570103863991635": 6, "6124885145996103": 6, "742": 6, "8647961976727571": 6, "2059976546070975": 6, "830": 6, "8648312544921793": 6, "20266060662750784": 6, "926": 6, "8648431452862716": 6, "20027647978240445": 6, "010": 6, "8648491459660418": 6, "1968919999787333": 6, "8650873115156988": 6, "174598921162764": 6, "204": 6, "8650350577921149": 6, "16468002989641095": 6, "8649412283687147": 6, "1606717091615047": 6, "986e": 6, "396": [6, 7], "8649537211609554": 6, "14694925097689848": 6, "506": 6, "8649734575435447": 6, "147612713300643": 6, "446e": 6, "8648761002838515": 6, "14440434705706803": 6, "398e": 6, "775": 6, "8639826593122782": 6, "1265357179513065": 6, "690e": 6, "875": 6, "864435565531768": 6, "1374245525868926": 6, "938": 6, "8590221951825531": 6, "49890830155012533": 6, "8649098880804443": 6, "1573428812070292": 6, "405e": 6, "864536410656637": 6, "13886104722511608": 6, "8597401050431873": 6, "47746341180045787": 6, "8537465461603838": 6, "050e": 6, "8642643827090003": 6, "13446778921611002": 6, "175e": 6, "8641621818665252": 6, "1286796719653316": 6, "625": 6, "864182755916388": 6, "13303218726548235": 6, "693": 6, "1255357440899417": 6, "021711452917433944": 6, "559714273835951e": 6, "758": 6, "8604596648091501": 6, "43644874418279245": 6, "463e": 6, "861": 6, "8635689909135862": 6, "10940922083495383": 6, "951": 6, "8648544336551733": 6, "1912756875742137": 6, "8648496595672595": 6, "19628449928540487": 6, "8452625121122099": 6, "4324661283995224": 6, "149": 6, "8378670635846416": 6, "839206620815206": 6, "002e": 6, "082e": 6, "8649365368153895": 6, "07270781179126021": 6, "8875676754699953": 6, "0006995169897945908": 6, "586e": 6, "618e": 6, "234e": 6, "484": 6, "8730555131061773": 6, "0018186269840273495": 6, "12553508835019533": 6, "04867556317570456": 6, "0011658455138452": 6, "284e": 6, "177e": 6, "664": 6, "8586292788613132": 6, "005078762921098462": 6, "anyalgorithm": 6, "__args__": 6, "consider": [6, 7], "modif": [6, 12], "establish": 6, "rf": 6, "account": [6, 7, 8], "though": 6, "treat": [6, 7], "pdf": [6, 9], "denot": [6, 12, 13], "determinist": [6, 7], "quantiti": 6, "tradit": [6, 7], "discret": 6, "discretis": [6, 12, 14], "bioactiv": [6, 7], "integr": 6, "afford": [6, 7], "particularli": 6, "liklihood": 6, "membership": [6, 7], "iopscienc": 6, "iop": 6, "articl": [6, 7], "3847": 6, "1538": 6, "3881": 6, "aaf101": 6, "pxc50": [6, 7], "p24863": 6, "enabl": [6, 7], "alwai": [6, 7], "734": 6, "joblib": [6, 8], "577": 6, "joblibcollisionwarn": 6, "collis": 6, "180": 6, "self": [6, 9], "_cached_cal": 6, "shelv": 6, "00": 6, "764": 6, "08099580623289632": 6, "prfclassifier_algorithm_hash": 6, "efe0ba9870529a6cde0dd3ad22447cbb": 6, "max_depth__efe0ba9870529a6cde0dd3ad22447cbb": 6, "n_estimators__efe0ba9870529a6cde0dd3ad22447cbb": 6, "max_features__efe0ba9870529a6cde0dd3ad22447cbb": 6, "prfclassifiermaxfeatur": [6, 9], "min_py_sum_leaf__efe0ba9870529a6cde0dd3ad22447cbb": 6, "use_py_gini__efe0ba9870529a6cde0dd3ad22447cbb": 6, "use_py_leafs__efe0ba9870529a6cde0dd3ad22447cbb": 6, "408": [6, 7], "07261454017489567": 6, "780": 6, "08791063872794351": 6, "911": 6, "07114663955819509": 6, "879": 6, "06537440628140882": 6, "446": 6, "05680450487193368": 6, "968": 6, "543": 6, "0656836821774901": 6, "333": 6, "07863564862376404": 6, "329": 6, "0648840199215795": 6, "014": 6, "07861037073288182": 6, "608": 6, "06669924317660021": 6, "997": 6, "06734611679947522": 6, "526": 6, "06810559387741143": 6, "0528189695245453": 6, "best_built": 6, "demonstr": [6, 7], "purpos": [6, 7], "transduct": 6, "example_smil": 6, "get_set": [6, 7, 8], "b": [6, 7], "outsid": [6, 7, 8], "likelihood": 6, "problemat": 6, "except": [6, 8], "valueerror": 6, "As": [6, 7], "raw": [6, 7, 8, 12], "760": 6, "800": 6, "w": 6, "801": 6, "fail": 6, "traceback": 6, "_optim": 6, "196": 6, "_run_trial": 6, "value_or_valu": 6, "func": 6, "128": 6, "__call__": 6, "_validate_algo": 6, "rais": [6, 8], "summaris": 6, "handl": 6, "via": [6, 7, 12], "convent": [6, 7], "classic": 6, "relev": 6, "cutoff": [6, 7, 12], "ouput": 6, "reflect": [6, 7], "arguabl": 6, "mpo": 6, "pub": 6, "ac": 6, "full": [6, 9], "jcim": 6, "9b00237": 6, "slide": 6, "googl": 6, "14pbd9ltxzfpsjhyxykflxnk8q80lhvnjimg8a3wqcrm": 6, "edit": 6, "calcault": 6, "directli": [6, 7], "later": [6, 7], "smilesbaseddescriptor": 6, "architectur": [6, 7], "quickli": 6, "867": 6, "868": 6, "root": [6, 9, 10], "enqueu": [6, 8], "manual": 6, "activation__668a7428ff5cdb271b01c0925e8fea45": 6, "relu": [6, 9], "aggregation__668a7428ff5cdb271b01c0925e8fea45": 6, "aggregation_norm__668a7428ff5cdb271b01c0925e8fea45": 6, "batch_size__668a7428ff5cdb271b01c0925e8fea45": 6, "depth__668a7428ff5cdb271b01c0925e8fea45": 6, "dropout__668a7428ff5cdb271b01c0925e8fea45": 6, "features_generator__668a7428ff5cdb271b01c0925e8fea45": 6, "ffn_hidden_size__668a7428ff5cdb271b01c0925e8fea45": 6, "ffn_num_layers__668a7428ff5cdb271b01c0925e8fea45": 6, "final_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45": 6, "hidden_size__668a7428ff5cdb271b01c0925e8fea45": 6, "init_lr_ratio_exp__668a7428ff5cdb271b01c0925e8fea45": 6, "max_lr_exp__668a7428ff5cdb271b01c0925e8fea45": 6, "warmup_epochs_ratio__668a7428ff5cdb271b01c0925e8fea45": 6, "chempropregressor_algorithm_hash": 6, "668a7428ff5cdb271b01c0925e8fea45": 6, "301": 6, "6833": 6, "034983241957": 6, "chempropactiv": [6, 9], "chempropaggreg": [6, 9], "ensemble_size__668a7428ff5cdb271b01c0925e8fea45": 6, "epochs__668a7428ff5cdb271b01c0925e8fea45": 6, "chempropfeatures_gener": [6, 9], "6445": 6, "608102397302": 6, "1700": 6, "2300": 6, "safe": 6, "nan": [6, 8, 12, 13], "prompt": 6, "due": [6, 7, 8], "deactiv": 6, "own": 6, "still": 6, "commun": 6, "facilit": [6, 7, 9], "preset": 6, "enqu": 6, "bayesian": 6, "suggest": 6, "split_chemprop": [6, 9], "flag": [6, 7, 12, 13], "n_chemprop_tri": [6, 9], "desir": 6, "undirect": 6, "shown": [6, 7], "aspect": [6, 8], "princip": [6, 7], "expand": 6, "differnt": 6, "rememb": 6, "unless": 6, "alter": [6, 9], "characterist": [6, 7], "too": [6, 9], "limit": [6, 7, 9], "vice": [6, 9], "versa": [6, 9], "extens": [6, 9], "trail": [6, 9], "applic": [6, 7, 9], "chanc": 6, "help": 6, "caruana": 6, "particular": 6, "achiev": [6, 7], "st": 6, "figur": [6, 7], "signal": 6, "aka": 6, "could": [6, 7], "share": [6, 12, 13], "until": [6, 9], "longer": [6, 7], "mt": 6, "onc": [6, 7, 12], "knowledg": 6, "order": [6, 7, 9, 12], "earlier": [6, 7], "accompi": 6, "train_side_info": 6, "clog": 6, "surfac": 6, "area": [6, 7], "psa": 6, "265": 6, "22475": 6, "8088": 6, "883": 6, "32297": 6, "6237": 6, "835": 6, "33334": 6, "2804": 6, "314": 6, "26075": 6, "2533": 6, "498": 6, "278": 6, "18917": 6, "5102": 6, "694": 6, "246": 6, "12575": 6, "7244": 6, "255": 6, "14831": 6, "4338": 6, "895": 6, "302": 6, "26838": 6, "2041": 6, "22298": 6, "match": 6, "train_side_info_cl": 6, "clogp_gt2": 6, "clogs_gt": 6, "acceptors_gt5": 6, "donors_gt0": 6, "area_gt250": 6, "psa_lt0": 6, "aux": [6, 8, 10], "03": 6, "350": 6, "443": 6, "5817": 6, "944008002311": 6, "chemprophyperoptregressor_algorithm_hash": 6, "db9e60f9b8f0a43eff4b41917b6293d9": 6, "ensemble_size__db9e60f9b8f0a43eff4b41917b6293d9": 6, "epochs__db9e60f9b8f0a43eff4b41917b6293d9": 6, "features_generator__db9e60f9b8f0a43eff4b41917b6293d9": 6, "num_iters__db9e60f9b8f0a43eff4b41917b6293d9": 6, "search_parameter_level__db9e60f9b8f0a43eff4b41917b6293d9": 6, "chempropsearch_parameter_level": [6, 9], "aux_weight_pc__db9e60f9b8f0a43eff4b41917b6293d9": 6, "5796": 6, "34392897437": 6, "439": 6, "5795": 6, "086720713623": 6, "470": 6, "241": 6, "5820": 6, "227555999914": 6, "322": 6, "5852": 6, "160071204277": 6, "inlfuenc": 6, "henc": [6, 7], "percent": 6, "product": 6, "user_attrs_trial_ran": 6, "drop": [6, 7, 8], "erron": 6, "__": 6, "params_aux_weight_pc": 6, "conclud": [6, 7], "produc": 6, "overrid": [6, 9], "situat": [6, 7], "along": 6, "potenti": [6, 7], "compris": 6, "incompat": 6, "whilst": [6, 7], "desciptor": 6, "grei": [6, 7], "tial": 6, "what": 6, "design": 6, "unpromis": 6, "why": [6, 7], "poor": 6, "sampler": 6, "incompta": 6, "repeatedli": 6, "hyerparamet": 6, "ident": 6, "9525489095524835": 6, "aux_weight_pc__cfa1990d5153c8812982f034d788d7e": 6, "777": 6, "4824": 6, "686269039228": 6, "7731425652872588": 6, "819": 6, "849": 6, "4409": 6, "946844928445": 6, "791002332112292": 6, "021": [6, 7], "167": 6, "329624779366306": 6, "00015024763718638216": 6, "269": 6, "523": 6, "4396": 6, "722635068717": 6, "559": 6, "753": 6, "4577379164707": 6, "790": 6, "960": 6, "consult": 6, "incompitbl": 6, "algo": [6, 9], "occur": 6, "assign": [6, 7], "doe": [6, 7, 8, 9, 12], "params_algorithm_nam": 6, "move_legend": 6, "upper": [6, 7], "bbox_to_anchor": [6, 7], "overview": 6, "never": 6, "successfulli": 6, "absenc": 6, "8th": 6, "miss": [6, 8, 12, 13], "associ": [6, 7], "asid": 6, "mitig": 6, "overal": [6, 7], "serv": [6, 11], "argument": [6, 8], "tl": 6, "wider": 6, "activation__e0d3a442222d4b38f3aa1434851320db": 6, "aggregation__e0d3a442222d4b38f3aa1434851320db": 6, "aggregation_norm__e0d3a442222d4b38f3aa1434851320db": 6, "batch_size__e0d3a442222d4b38f3aa1434851320db": 6, "depth__e0d3a442222d4b38f3aa1434851320db": 6, "dropout__e0d3a442222d4b38f3aa1434851320db": 6, "features_generator__e0d3a442222d4b38f3aa1434851320db": 6, "ffn_hidden_size__e0d3a442222d4b38f3aa1434851320db": 6, "ffn_num_layers__e0d3a442222d4b38f3aa1434851320db": 6, "final_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db": 6, "hidden_size__e0d3a442222d4b38f3aa1434851320db": 6, "init_lr_ratio_exp__e0d3a442222d4b38f3aa1434851320db": 6, "max_lr_exp__e0d3a442222d4b38f3aa1434851320db": 6, "warmup_epochs_ratio__e0d3a442222d4b38f3aa1434851320db": 6, "e0d3a442222d4b38f3aa1434851320db": 6, "4937": 6, "540075659691": 6, "ensemble_size__e0d3a442222d4b38f3aa1434851320db": 6, "epochs__e0d3a442222d4b38f3aa1434851320db": 6, "retrain": 6, "954": 6, "043": 6, "5114": 6, "7131239123555": 6, "chempropregressorpretrained_algorithm_hash": 6, "dfc518a76317f23d95e5aa5a3eac77f0": 6, "frzn__dfc518a76317f23d95e5aa5a3eac77f0": 6, "chempropfrzn": [6, 9], "epochs__dfc518a76317f23d95e5aa5a3eac77f0": 6, "cover": 6, "global": 6, "job": [6, 7], "fair": 6, "1545": 6, "tl_studi": 6, "loc": [6, 7], "params_epoch": 6, "fillna": 6, "astyp": [6, 7], "agg": 6, "join": [6, 7], "params_chempropregressor_algorithm_hash": 6, "isna": 6, "annot": 6, "172": 6, "5891": 6, "7552821093905": 6, "140": 6, "5890": 6, "94653501547": 6, "77dfc8230317e08504ed5e643243fbc2": 6, "frzn__77dfc8230317e08504ed5e643243fbc2": 6, "epochs__77dfc8230317e08504ed5e643243fbc2": 6, "181": 6, "right": [6, 7], "ncol": 6, "world": [6, 7, 12, 13], "penultim": [6, 9], "chemprop_model": 6, "heatmap": 6, "predictor": [6, 8], "chemprop_fingerprint": 6, "fingerprint_typ": 6, "mpn": 6, "cbar_kw": 6, "semi": 6, "supervis": 6, "altern": [6, 7], "last_ffn": 6, "howeev": 6, "respect": [6, 7], "kind": [6, 7, 12], "confid": 6, "calibratedclassifiercv": 6, "understand": 6, "predict_proba": [6, 8], "among": 6, "gave": 6, "close": [6, 7], "actual": [6, 7], "topic": 6, "sigmoid": [6, 9], "review": 6, "those": [6, 7, 8], "calibration_curv": 6, "collect": 6, "defaultdict": 6, "precision_scor": 6, "recall_scor": 6, "f1_score": 6, "brier_score_loss": 6, "log_loss": 6, "roc_auc_scor": 6, "n_job": [6, 9, 11], "calibrated_rf": 6, "calibrated_model": 6, "173": 6, "110": 6, "8353535353535354": 6, "calibratedclassifiercvwithva_algorithm_hash": 6, "e788dfbfc5075967acb5ddf9d971ea20": 6, "n_folds__e788dfbfc5075967acb5ddf9d971ea20": 6, "max_depth__e788dfbfc5075967acb5ddf9d971ea20": 6, "n_estimators__e788dfbfc5075967acb5ddf9d971ea20": 6, "max_features__e788dfbfc5075967acb5ddf9d971ea20": 6, "uncalibr": 6, "uncalibrated_rf": 6, "uncalibrated_model": 6, "566": 6, "915": 6, "8185858585858585": 6, "randomforestclassifier_algorithm_hash": 6, "167e1e88dd2a80133e317c78f009bdc9": 6, "max_depth__167e1e88dd2a80133e317c78f009bdc9": 6, "n_estimators__167e1e88dd2a80133e317c78f009bdc9": 6, "max_features__167e1e88dd2a80133e317c78f009bdc9": 6, "conserv": 6, "1000": [6, 7], "random_st": [6, 12, 13], "calibrated_predict": 6, "uncalibrated_predict": 6, "cal_df": 6, "datafram": [6, 7, 8, 11, 12], "boxplot": 6, "melt": 6, "set_ylabel": [6, 7], "behaviour": [6, 7], "curv": [6, 7], "reliabl": 6, "diagram": 6, "against": 6, "figsiz": [6, 7], "ax1": 6, "subplot2grid": 6, "rowspan": 6, "ax2": 6, "perfectli": [6, 7], "pred": 6, "fraction_of_posit": 6, "mean_predicted_valu": 6, "n_bin": 6, "brier": 6, "2f": 6, "hist": 6, "histtyp": 6, "lw": 6, "set_ylim": 6, "legend": [6, 7], "set_titl": 6, "set_xlabel": [6, 7], "center": [6, 7], "tight_layout": [6, 7], "compos": 6, "refin": 6, "notic": 6, "significantli": 6, "cell": 6, "accur": 6, "alloc": 6, "y_prob": 6, "ye": 6, "score_nam": 6, "__name__": 6, "capit": 6, "score_df": 6, "set_index": 6, "decim": 6, "roc": 6, "auc": [6, 8], "184705": 6, "547129": 6, "830565": 6, "744048": 6, "784929": 6, "716536": 6, "175297": 6, "529474": 6, "811209": 6, "818452": 6, "814815": 6, "714104": 6, "va": 6, "multipoint": 6, "0c00476": 6, "margin": [6, 7], "bounari": 6, "548": 6, "537": 6, "8213131313131313": 6, "79765fbec1586f3c917ff30de274fdb4": 6, "n_folds__79765fbec1586f3c917ff30de274fdb4": 6, "max_depth__79765fbec1586f3c917ff30de274fdb4": 6, "n_estimators__79765fbec1586f3c917ff30de274fdb4": 6, "max_features__79765fbec1586f3c917ff30de274fdb4": 6, "uncert": [6, 8], "chem": [6, 7, 8], "allchem": 6, "pandastool": [6, 7], "rdconfig": 6, "datastruct": 6, "train_df": 6, "addmoleculecolumntofram": 6, "includefingerprint": 6, "getmorganfingerprint": 6, "nn": 6, "bulktanimotosimilar": 6, "add": [6, 7, 8, 10], "va_pr": 6, "va_uncert": 6, "dtm": 6, "trelli": 6, "fig": [6, 7], "subplot": [6, 7], "sharei": 6, "regplot": 6, "referenc": 6, "boundari": [6, 7, 12, 14], "neither": 6, "nor": 6, "dissimilar": 6, "cp_pred_ensembl": 6, "cp_uncert_ensembl": 6, "916": 6, "959": 6, "activation__fd833c2dde0b7147e6516ea5eebb2657": 6, "aggregation__fd833c2dde0b7147e6516ea5eebb2657": 6, "aggregation_norm__fd833c2dde0b7147e6516ea5eebb2657": 6, "batch_size__fd833c2dde0b7147e6516ea5eebb2657": 6, "depth__fd833c2dde0b7147e6516ea5eebb2657": 6, "dropout__fd833c2dde0b7147e6516ea5eebb2657": 6, "features_generator__fd833c2dde0b7147e6516ea5eebb2657": 6, "ffn_hidden_size__fd833c2dde0b7147e6516ea5eebb2657": 6, "ffn_num_layers__fd833c2dde0b7147e6516ea5eebb2657": 6, "final_lr_ratio_exp__fd833c2dde0b7147e6516ea5eebb2657": 6, "hidden_size__fd833c2dde0b7147e6516ea5eebb2657": 6, "init_lr_ratio_exp__fd833c2dde0b7147e6516ea5eebb2657": 6, "max_lr_exp__fd833c2dde0b7147e6516ea5eebb2657": 6, "warmup_epochs_ratio__fd833c2dde0b7147e6516ea5eebb2657": 6, "chempropclassifier_algorithm_hash": 6, "fd833c2dde0b7147e6516ea5eebb2657": 6, "65625": 6, "ensemble_size__fd833c2dde0b7147e6516ea5eebb2657": 6, "epochs__fd833c2dde0b7147e6516ea5eebb2657": 6, "midpoint": 6, "mont": 6, "carlo": 6, "virtual": [6, 8], "468": 6, "activation__c73885c5d5a4182168b8b002d321965a": 6, "aggregation__c73885c5d5a4182168b8b002d321965a": 6, "aggregation_norm__c73885c5d5a4182168b8b002d321965a": 6, "batch_size__c73885c5d5a4182168b8b002d321965a": 6, "depth__c73885c5d5a4182168b8b002d321965a": 6, "dropout__c73885c5d5a4182168b8b002d321965a": 6, "features_generator__c73885c5d5a4182168b8b002d321965a": 6, "ffn_hidden_size__c73885c5d5a4182168b8b002d321965a": 6, "ffn_num_layers__c73885c5d5a4182168b8b002d321965a": 6, "final_lr_ratio_exp__c73885c5d5a4182168b8b002d321965a": 6, "hidden_size__c73885c5d5a4182168b8b002d321965a": 6, "init_lr_ratio_exp__c73885c5d5a4182168b8b002d321965a": 6, "max_lr_exp__c73885c5d5a4182168b8b002d321965a": 6, "warmup_epochs_ratio__c73885c5d5a4182168b8b002d321965a": 6, "c73885c5d5a4182168b8b002d321965a": 6, "46875": 6, "ensemble_size__c73885c5d5a4182168b8b002d321965a": 6, "epochs__c73885c5d5a4182168b8b002d321965a": 6, "cp_pred_dropout": 6, "cp_uncert_dropout": 6, "previou": 6, "proabil": 6, "va_predict": 6, "correl": [6, 7], "drouput": 6, "uncertatinti": 6, "cp_uncert_delta": 6, "categor": [6, 12], "unit": [6, 7], "parsabl": 6, "date": 6, "cast": 6, "appropri": 6, "befor": [6, 7, 12], "choic": [6, 12], "822": 6, "862": 6, "4430": 6, "271946796234": 6, "mapie_algorithm_hash": 6, "976d211e4ac64e5568d369bcddd3aeb1": 6, "mapie_alpha__976d211e4ac64e5568d369bcddd3aeb1": 6, "max_depth__976d211e4ac64e5568d369bcddd3aeb1": 6, "n_estimators__976d211e4ac64e5568d369bcddd3aeb1": 6, "max_features__976d211e4ac64e5568d369bcddd3aeb1": 6, "analysi": [6, 7, 8], "perfom": 6, "post": 6, "mapie_pr": 6, "mapie_unc": 6, "bar": 6, "visualis": 6, "errorbar": 6, "yerr": 6, "ab": 6, "fmt": 6, "black": [6, 7], "ecolor": 6, "grai": 6, "elinewidth": 6, "capsiz": 6, "move": [6, 10], "analys": [6, 7], "width": 6, "alpha_impact": 6, "ma": 6, "unc_df": 6, "unc": 6, "reset_index": [6, 7], "concat": 6, "lineplot": 6, "err_styl": 6, "se": 6, "incorpor": 6, "tradition": 6, "unsupport": 6, "kernelexplain": [6, 8], "shaplei": 6, "explan": 6, "slundberg": 6, "game": 6, "credit": 6, "theori": 6, "publish": 6, "comopsit": 6, "540": 6, "_ridg": 6, "userwarn": [6, 7], "matrix": 6, "dual": 6, "34035600917066766": 6, "676421027478709": 6, "dga": 6, "dgp": 6, "dgtot": 6, "sa": 6, "sdc": 6, "sdx": 6, "numhacceptor": 6, "numhdonor": 6, "maxabsestateindex": 6, "maxestateindex": 6, "minabsestateindex": 6, "minestateindex": 6, "qed": 6, "sp": 6, "heavyatommolwt": 6, "exactmolwt": 6, "numvalenceelectron": 6, "numradicalelectron": 6, "maxpartialcharg": 6, "minpartialcharg": 6, "maxabspartialcharg": 6, "minabspartialcharg": 6, "fpdensitymorgan1": 6, "fpdensitymorgan2": 6, "fpdensitymorgan3": 6, "bcut2d_mwhi": 6, "bcut2d_mwlow": 6, "bcut2d_chghi": 6, "bcut2d_chglo": 6, "bcut2d_logphi": 6, "bcut2d_logplow": 6, "bcut2d_mrhi": 6, "bcut2d_mrlow": 6, "avgipc": 6, "balabanj": 6, "bertzct": 6, "chi0": 6, "chi0n": 6, "chi0v": 6, "chi1": 6, "chi1n": 6, "chi1v": 6, "chi2n": 6, "chi2v": 6, "chi3n": 6, "chi3v": 6, "chi4n": 6, "chi4v": 6, "hallkieralpha": 6, "ipc": 6, "kappa1": 6, "kappa2": 6, "kappa3": 6, "labuteasa": 6, "peoe_vsa1": 6, "peoe_vsa10": 6, "peoe_vsa11": 6, "peoe_vsa12": 6, "peoe_vsa13": 6, "peoe_vsa14": 6, "peoe_vsa2": 6, "peoe_vsa3": 6, "peoe_vsa4": 6, "peoe_vsa5": 6, "peoe_vsa6": 6, "peoe_vsa7": 6, "peoe_vsa8": 6, "peoe_vsa9": 6, "smr_vsa1": 6, "smr_vsa10": 6, "smr_vsa2": 6, "smr_vsa3": 6, "smr_vsa4": 6, "smr_vsa5": 6, "smr_vsa6": 6, "smr_vsa7": 6, "smr_vsa8": 6, "smr_vsa9": 6, "slogp_vsa1": 6, "slogp_vsa10": 6, "slogp_vsa11": 6, "slogp_vsa12": 6, "slogp_vsa2": 6, "slogp_vsa3": 6, "slogp_vsa4": 6, "slogp_vsa5": 6, "slogp_vsa6": 6, "slogp_vsa7": 6, "slogp_vsa8": 6, "slogp_vsa9": 6, "estate_vsa1": 6, "estate_vsa10": 6, "estate_vsa11": 6, "estate_vsa2": 6, "estate_vsa3": 6, "estate_vsa4": 6, "estate_vsa5": 6, "estate_vsa6": 6, "estate_vsa7": 6, "estate_vsa8": 6, "estate_vsa9": 6, "vsa_estate1": 6, "vsa_estate10": 6, "vsa_estate2": 6, "vsa_estate3": 6, "vsa_estate4": 6, "vsa_estate5": 6, "vsa_estate6": 6, "vsa_estate7": 6, "vsa_estate8": 6, "vsa_estate9": 6, "fractioncsp3": 6, "heavyatomcount": 6, "nhohcount": 6, "nocount": 6, "numaliphaticcarbocycl": 6, "numaliphaticheterocycl": 6, "numaliphaticr": 6, "numaromaticcarbocycl": 6, "numaromaticheterocycl": 6, "numaromaticr": 6, "numheteroatom": 6, "numrotatablebond": 6, "numsaturatedcarbocycl": 6, "numsaturatedheterocycl": 6, "numsaturatedr": 6, "ringcount": 6, "mollogp": 6, "molmr": 6, "fr_al_coo": 6, "fr_al_oh": 6, "fr_al_oh_notert": 6, "fr_arn": 6, "fr_ar_coo": 6, "fr_ar_n": 6, "fr_ar_nh": 6, "fr_ar_oh": 6, "fr_coo": 6, "fr_coo2": 6, "fr_c_o": 6, "fr_c_o_nocoo": 6, "fr_c_": 6, "fr_hoccn": 6, "fr_imin": 6, "fr_nh0": 6, "fr_nh1": 6, "fr_nh2": 6, "fr_n_o": 6, "fr_ndealkylation1": 6, "fr_ndealkylation2": 6, "fr_nhpyrrol": 6, "fr_sh": 6, "fr_aldehyd": 6, "fr_alkyl_carbam": 6, "fr_alkyl_halid": 6, "fr_allylic_oxid": 6, "fr_amid": 6, "fr_amidin": 6, "fr_anilin": 6, "fr_aryl_methyl": 6, "fr_azid": 6, "fr_azo": 6, "fr_barbitur": 6, "fr_benzen": 6, "fr_benzodiazepin": 6, "fr_bicycl": 6, "fr_diazo": 6, "fr_dihydropyridin": 6, "fr_epoxid": 6, "fr_ester": 6, "fr_ether": 6, "fr_furan": 6, "fr_guanido": 6, "fr_halogen": 6, "fr_hdrzine": 6, "fr_hdrzone": 6, "fr_imidazol": 6, "fr_imid": 6, "fr_isocyan": 6, "fr_isothiocyan": 6, "fr_keton": 6, "fr_ketone_topliss": 6, "fr_lactam": 6, "fr_lacton": 6, "fr_methoxi": 6, "fr_morpholin": 6, "fr_nitril": 6, "fr_nitro": 6, "fr_nitro_arom": 6, "fr_nitro_arom_nonortho": 6, "fr_nitroso": 6, "fr_oxazol": 6, "fr_oxim": 6, "fr_para_hydroxyl": 6, "fr_phenol": 6, "fr_phenol_noorthohbond": 6, "fr_phos_acid": 6, "fr_phos_est": 6, "fr_piperdin": 6, "fr_piperzin": 6, "fr_priamid": 6, "fr_prisulfonamd": 6, "fr_pyridin": 6, "fr_quatn": 6, "fr_sulfid": 6, "fr_sulfonamd": 6, "fr_sulfon": 6, "fr_term_acetylen": 6, "fr_tetrazol": 6, "fr_thiazol": 6, "fr_thiocyan": 6, "fr_thiophen": 6, "fr_unbrch_alkan": 6, "fr_urea": 6, "shap_valu": 6, "2227": 6, "042023e": 6, "2229": 6, "025199e": 6, "2228": 6, "802158e": 6, "2267": 6, "387276e": 6, "2230": 6, "106653e": 6, "1784": 6, "598471e": 6, "1785": 6, "584": 6, "ns": 6, "995": 6, "996": 6, "845": 6, "846": 6, "1375": 6, "1376": 6, "s1": 6, "n1c": 6, "1570": 6, "contrinubt": 6, "datset": 6, "unscaledphyschemjazzi": 6, "rank": [6, 8], "usag": 6, "978": 6, "032": 6, "818": 6, "t": [6, 7, 9], "kekul": 6, "unkekul": 6, "rational": 6, "rationale_scor": 6, "386": 6, "097": 6, "ch3": 6, "ch": 6, "389": [6, 7], "151": 6, "c1c": 6, "c1n": 6, "ch2": 6, "nh2": 6, "nn1cc1c": 6, "384": 6, "720": 6, "c1cccc": 6, "871": 6, "854": 6, "contian": 6, "second": 6, "dummi": 6, "third": 6, "smallest": 6, "made": [6, 7], "fourth": 6, "irregularli": 6, "xc50": [6, 7], "log10": [6, 7, 12, 14], "6th": 6, "convers": [6, 7, 12, 14], "logbas": [6, 7, 12, 14], "logneg": [6, 7, 12, 14], "negat": [6, 7, 12, 14], "transformed_studi": 6, "transform_exampl": 6, "028": 6, "5959493772536109": 6, "6571993250300608": 6, "169": 6, "1511102853256885": 6, "2487063317112765": 6, "288": 6, "6714912461080983": 6, "2725944467796781": 6, "369": 6, "194926264155893": 6, "395": 6, "7520919188596032": 6, "469": 6, "7803723847416691": 6, "499": 6, "6397753979196248": 6, "528": 6, "151110299986041": 6, "151110111437006": 6, "5410418750776741": 6, "612": 6, "7183231137124538": 6, "640": 6, "2721824844856162": 6, "716": 6, "1900929470222508": 6, "745": 6, "774": 6, "5585323973564646": 6, "3169218304262786": 6, "980": 6, "7974925066137679": 6, "008": 6, "218395226466336": 6, "039": 6, "1474226942497083": 6, "054": 6, "083": 6, "0239005731675412": 6, "160": 6, "191": 6, "178901060853144": 6, "27137790098830755": 6, "2710284516876423": 6, "361": 6, "6273152492418945": 6, "438": 6, "496": 6, "1907041717628215": 6, "3209075619139279": 6, "545": 6, "2709423025014604": 6, "609": 6, "3133943310851415": 6, "657": 6, "257769959239938": 6, "735": 6, "40359637945134746": 6, "817": 6, "4127882135896648": 6, "905": 6, "9246005133276612": 6, "003": 6, "8908739215746116": 6, "035": 6, "107536316777608": 6, "067": 6, "098": 6, "054360360588395": 6, "129": 6, "5428179904345867": 6, "5696273642213351": 6, "194": 6, "27099769667470536": 6, "1580741708125475": 6, "2709564785634315": 6, "10900413894771653": 6, "268": 6, "2709799905898163": 6, "13705914456987853": 6, "27097230608092054": 6, "12790870116376127": 6, "337": 6, "2709499903064464": 6, "10123180962907431": 6, "2710895886052581": 6, "26565663774320425": 6, "411": 6, "2708711012023424": 6, "005637048678674678": 6, "27092322402109364": 6, "06902647427781451": 6, "482": 6, "2712140349882": 6, "4076704953178294": 6, "515": 6, "27090080367174": 6, "04187106800188596": 6, "550": 6, "27086925247190047": 6, "003371853599610078": 6, "2708933298483799": 6, "032781796328385376": 6, "623": 6, "27087205624489635": 6, "006806773659187283": 6, "658": 6, "2708869511176179": 6, "025009489814943348": 6, "695": 6, "2711465077924297": 6, "3311125627707556": 6, "2708756855936628": 6, "011249102380159387": 6, "766": 6, "27087301924224993": 6, "007985924302396141": 6, "802": 6, "2708685399954944": 6, "00249856291483601": 6, "27121879554836553": 6, "4130244908975993": 6, "880": 6, "2708693196600531": 6, "0034541978803366022": 6, "918": 6, "27110195265802334": 6, "27994943662091765": 6, "956": 6, "2708682582859318": 6, "0021532199144365088": 6, "27087024523986086": 6, "0045884092728113585": 6, "27087351807632193": 6, "008596600952859433": 6, "2710818633795896": 6, "2567049271070902": 6, "109": 6, "27103241786565463": 6, "1990111983307052": 6, "146": 6, "2710350879598171": 6, "20214459724424078": 6, "183": 6, "2708688328221868": 6, "00285750520671645": 6, "27100832234449684": 6, "17064008990759916": 6, "258": 6, "27268613236193845": 6, "8725420109733135": 6, "296": 6, "27119617446689237": 6, "387533542012365": 6, "2708691110831552": 6, "0031985656730512953": 6, "27086852174155146": 6, "002476186542950981": 6, "27135383618835024": 6, "5626643670396761": 6, "449": 6, "2709819654433871": 6, "1394077979875128": 6, "488": [6, 8], "2718548944510965": 6, "0858347526799794": 6, "1508084699212935": 6, "03329943145150872": 6, "00025672309762227527": 6, "27249853374634975": 6, "702026434077893": 6, "604": 6, "27095660957755363": 6, "10916094511173127": 6, "643": 6, "27102160995407715": 6, "18630665884100353": 6, "681": 6, "27095708822582026": 6, "10973377642487026": 6, "27088222008661084": 6, "019235980282946118": 6, "762": 6, "2708703086029017": 6, "004666043957133775": 6, "799": 6, "27095279044622245": 6, "1045877457096882": 6, "840": 6, "2709408288690431": 6, "09023455456986404": 6, "9289218260898663": 6, "8200088368788958": 6, "917": 6, "27086675101898655": 6, "00030502148265565063": 6, "957": 6, "2710491243757999": 6, "21858260742423916": 6, "001": 6, "1491615840508995": 6, "024725853754515203": 6, "040": 6, "2709462479577586": 6, "0967427718847167": 6, "default_studi": 6, "252": 6, "332": 6, "3501": 6, "942111261296": 6, "5451": 6, "207265576796": 6, "459": 6, "1049201007814": 6, "9964": 6, "541364058234": 6, "3543": 6, "953608539901": 6, "570": 6, "6837": 6, "057544630979": 6, "613": 6, "2507": 6, "1794330606067": 6, "650": 6, "21534": 6, "719219668405": 6, "726": 6, "2899": 6, "736555614694": 6, "294e": 6, "760e": 6, "21674": 6, "445000284228": 6, "1049203123567": 6, "1049192609138": 6, "877": 6, "3630": 6, "72768093756": 6, "907": 6, "3431": 6, "942816967268": 6, "6908": 6, "462045154488": 6, "5964": 6, "65935954044": 6, "036": 6, "21070": 6, "107195348774": 6, "065": 6, "4977": 6, "068508997133": 6, "133": 6, "8873": 6, "669262669626": 6, "21387": 6, "63697424318": 6, "202": 6, "9958": 6, "573006910125": 6, "5182695600183": 6, "428": 6, "20684": 6, "56412138056": 6, "544": 6, "150": 6, "3435882510586": 6, "571": 6, "7068": 6, "705383113378": 6, "599": 6, "7150": 6, "482090052133": 6, "077": 6, "203": 6, "93637462922368": 6, "2570": 6, "5111262532305": 6, "21987": 6, "659957192194": 6, "9889": 6, "493204596083": 6, "413": 6, "7172": 6, "208490771303": 6, "9804": 6, "512701665093": 6, "555": 6, "585": 6, "9165": 6, "74081120673": 6, "0280270800017": 6, "161": 6, "1602933782954": 6, "888460860864": 6, "864": 6, "8414": 6, "932694243476": 6, "2270": 6, "540799189147": 6, "10383": 6, "79559309305": 6, "20815": 6, "025469865475": 6, "206": 6, "7560385808573": 6, "5264": 6, "4700789389035": 6, "3668": 6, "255064135424": 6, "156": 6, "12174877890536": 6, "793408178086295": 6, "99902820845678": 6, "157": 6, "371632749506": 6, "88307313087517": 6, "140915461519354": 6, "218": 6, "153": 6, "66773675231477": 6, "177324126813716": 6, "77906017834145": 6, "186": 6, "52056745848623": 6, "4565714180547": 6, "6710444346508": 6, "294": 6, "30976119334312": 6, "62916671166313": 6, "023639423189294": 6, "053696900694": 6, "914617418880486": 6, "31140591484044": 6, "201": 6, "33573874994386": 6, "569769302718845": 6, "5781354926491789": 6, "412": 6, "190": 6, "1384885119049": 6, "87666716965626": 6, "2537791489618": 6, "451": 6, "076949848299": 6, "9559574710535281": 6, "0032830967319653665": 6, "764974036324": 6, "03910427457823": 6, "406811480459925": 6, "164": 6, "4477304958181": 6, "701690847791482": 6, "819274780536123": 6, "567": 6, "87939164358104": 6, "32187661108304": 6, "660320437878754": 6, "607": 6, "01705178481896": 6, "61397716361812": 6, "603665957830847": 6, "645": 6, "155": 6, "73257312230092": 6, "759645965959294": 6, "503212714246787": 6, "684": 6, "154": 6, "46848394144124": 6, "8546740801317": 6, "35327336610912": 6, "724": 6, "20421802817864": 6, "57596974747163": 6, "84756262407801": 6, "51233215278089": 6, "3564642040401464": 6, "5034542273159819": 6, "207": 6, "68667089892196": 6, "034895878929095": 6, "03653571911285094": 6, "842": 6, "102": 6, "52277054278186": 6, "01961499216484045": 6, "670937191883546": 6, "881": 6, "28722475694815": 6, "012434370509176538": 6, "34222704431493": 6, "921": 6, "87402050281146": 6, "008452015347522093": 6, "914863578437455": 6, "38847505937936": 6, "01573542234868893": 6, "99307522974174": 6, "999": 6, "96336195786391": 6, "009845516063879428": 6, "59422914099683": 6, "19345618324213": 6, "009382525091504246": 6, "35573659237662": 6, "080": 6, "30772721342525": 6, "010579672066291478": 6, "35550323165882": 6, "23970902543148": 6, "013369359066405863": 6, "4744102498801": 6, "34331248758777": 6, "011398351701814368": 6, "54146340620301": 6, "195": 6, "104535853341": 6, "011708779850509646": 6, "682286191624579e": 6, "0653774146952": 6, "009806826677473646": 6, "90274406278985": 6, "64646042813787": 6, "0038598153381434685": 6, "20918134828555": 6, "68420472011734": 6, "0032474576673554513": 6, "35551178979624": 6, "85985201823172": 6, "003187930738019005": 6, "29431603544847": 6, "399": 6, "21583898009355": 6, "003122319313153475": 6, "83526418992966": 6, "437": 6, "34787242859676": 6, "002781955938462633": 6, "76228981520067": 6, "478": 6, "70914272129673": 6, "0023173546614751305": 6, "3000082904498813": 6, "519": 6, "10492031097328": 6, "002606064524407": 6, "7861330234653922e": 6, "1049154281806": 6, "0029210589377408366": 6, "200933937391094e": 6, "10492028002287": 6, "06431564840324226": 6, "2981641934644904e": 6, "56066541774658": 6, "0010848843623839548": 6, "151493073951163": 6, "76337597039308": 6, "004134805589645341": 6, "88115336652716": 6, "58009587759925": 6, "004763418454688096": 6, "02920758025023": 6, "113": 6, "35230417583477": 6, "0009098023238189749": 6, "57100980886017": 6, "809": 6, "30807467406214": 6, "03739791555156691": 6, "12818940557025": 6, "850": 6, "44100655116532": 6, "006380481141720477": 6, "4882351186755": 6, "891": 6, "35181001564942": 6, "0036244007454981787": 6, "608797806921866": 6, "124": 6, "3719027482892": 6, "0014198536004321608": 6, "05588994284273": 6, "28568052794907": 6, "005434972462746285": 6, "215759789700954": 6, "06": 6, "018": 6, "20325": 6, "66479442037": 6, "9696417046589247": 6, "132": 6, "21507621375022": 6, "0004528978867024753": 6, "80386923876023": 6, "85570350846885": 6, "0016948043699497222": 6, "455627755557016": 6, "contrast": [6, 7], "relplot": 6, "col": [6, 7], "facet_kw": 6, "axisgrid": [6, 7], "facetgrid": 6, "0x7fb3797f6b30": 6, "noramlis": 6, "unlog": 6, "yield": [6, 7, 12, 13], "mse": 6, "1126": 6, "56968721": 6, "20237903": 6, "revers": [6, 7, 12, 14], "onto": 6, "action": 6, "importantli": 6, "easili": [6, 7], "94824194": 6, "92008694": 6, "instruct": 6, "untransform": 6, "wish": 6, "cut": [6, 7], "10um": 6, "ptr_config_log_transform": 6, "ptr_transformed_studi": 6, "ptr_and_transform_exampl": 6, "518": 6, "002341918451736245": 6, "805": 6, "0024908979029632677": 6, "847": 6, "007901407671048116": 6, "888": 6, "00496231674623194": 6, "0026848278110363512": 6, "0010872728889471893": 6, "000": 6, "008706109201510277": 6, "027": 6, "093": 6, "002999462459688867": 6, "00825680029907454": 6, "148": 6, "007901407993550248": 6, "007901405163828307": 6, "0021653695362066753": 6, "002869169486971014": 6, "0010855652626111146": 6, "00550533804299308": 6, "002236800860454562": 6, "006105985607235417": 6, "004846526544994462": 6, "006964668794465202": 6, "670": 6, "699": 6, "008384326901042542": 6, "730": 6, "001082194093844804": 6, "761": 6, "0010807084256204563": 6, "948": 6, "005505338042993082": 6, "979": 6, "005247934991526694": 6, "0010803393728928605": 6, "005218354425190125": 6, "138": 6, "004999207507691546": 6, "0015694919308122948": 6, "326": 6, "0019757694194001384": 6, "421": 6, "002341918451736244": 6, "453": 6, "00368328296527152": 6, "521": 6, "003412828259848677": 6, "551": 6, "004412110711416997": 6, "616": 6, "647": 6, "0021743798524909573": 6, "0022761245849848527": 6, "0010805768178458735": 6, "750": 6, "001080400188305814": 6, "784": 6, "0010805009783570441": 6, "0010804680472500541": 6, "0010803723579987025": 6, "890": 6, "001080969596032512": 6, "925": 6, "0010800333715082816": 6, "0010802574700236845": 6, "0010814994986419817": 6, "037": 6, "001080161136846237": 6, "071": 6, "0010800254136811547": 6, "107": 6, "0010801290036870739": 6, "001080037482216557": 6, "179": 6, "0010801015705851358": 6, "0010812122378841013": 6, "0010800531021304936": 6, "291": 6, "00108004162698813": 6, "328": 6, "0010800223466649803": 6, "364": 6, "0010815197263834202": 6, "0010800257029027847": 6, "0010810223438672223": 6, "0010800211339555509": 6, "513": 6, "0010800296871141684": 6, "0010800437739166451": 6, "0010809366267195716": 6, "627": 6, "001080725386603206": 6, "0010807368035830652": 6, "704": 6, "0010800236072155854": 6, "741": 6, "0010806223050773966": 6, "779": 6, "0010876516369772728": 6, "00108142358144501": 6, "857": 6, "0010800248050489667": 6, "894": 6, "001080022268085466": 6, "0010820922958715991": 6, "969": 6, "0010805094397523254": 6, "007": 6, "0010841993753324146": 6, "007899735988203994": 6, "086": 6, "0010868762004637347": 6, "001080400750193767": 6, "163": 6, "0010806791616300314": 6, "0010804028029753213": 6, "0010800812188506515": 6, "0010800299598580359": 6, "0010803843696362083": 6, "001080333048974234": 6, "394": [6, 7], "432": 6, "001080014645182176": 6, "473": 6, "0010807968027851892": 6, "516": 6, "007907028395366658": 6, "553": 6, "0010803563024666294": 6, "inted": 6, "opter": 6, "probabilst": 6, "lossi": 6, "anywai": 6, "intention": 6, "clip": [6, 7], "cannot": 6, "timepoint": 6, "aux_column": [6, 8], "accord": [6, 7, 8], "aux_col_config": 6, "aux_descriptors_dataset": 6, "train_with_conc": 6, "aux1": 6, "aux_col_studi": 6, "covariate_exampl": 6, "aux1_model": 6, "323": 6, "5186": 6, "767663956718": 6, "522": 6, "4679": 6, "740824270968": 6, "575": 6, "4890": 6, "6705099499995": 6, "3803": 6, "9324375833753": 6, "667": 6, "3135": 6, "6497388676926": 6, "2518812859375": 6, "778": 6, "4309": 6, "124112370974": 6, "30159424580074": 6, "897": 6, "4357": 6, "02827013125": 6, "1437929337522": 6, "45281013": 6, "shape": [6, 7], "thrown": [6, 8], "prediciton": 6, "regardless": 6, "utilis": [6, 7], "seper": 6, "vector_covariate_config": 6, "precomputed_descriptor": 6, "train_with_fp": 6, "aux_transform": [6, 8], "vector_covariate_studi": 6, "vector_aux_exampl": 6, "vector_covariate_model": 6, "2200": 6, "6817959410578": 6, "011994365911634164": 6, "95660880078": 6, "029071783512897825": 6, "5798": 6, "564494725643": 6, "022631709120790048": 6, "2198637677605415": 6, "972": 6, "2899178898048": 6, "8916194399474267": 6, "556": 6, "3336440433073": 6, "5914093983615214": 6, "614": 6, "653": 6, "3036472748931": 6, "6201811079699818": 6, "3807": 6, "8035919667395": 6, "901e": 6, "892e": 6, "914e": 6, "752": 6, "5019": 6, "459500770764": 6, "1376436589359351": 6, "4017711284796": 6, "893": 6, "771": 6, "797115414836": 6, "74340620175102": 6, "train_smil": [6, 8], "train_i": [6, 8], "train_aux": [6, 8], "test_smil": [6, 8], "test_i": [6, 8], "test_aux": [6, 8], "512": 6, "legth": 6, "39754917": 6, "465": 6, "06352766": 6, "52031134": 6, "341": 6, "89875316": 6, "371": 6, "5516046": 6, "85042171": 6, "436": 6, "33406203": 6, "91439129": 6, "80585907": 6, "346": 6, "48565041": 6, "protein": [6, 12, 14], "alongsid": 6, "chemic": [6, 7, 12, 13], "sequenc": [6, 12, 14], "former": 6, "wherea": 6, "latter": [6, 7], "interact": 6, "basi": [6, 7], "toxinpred3": 6, "No": [6, 9, 12], "demponstr": 6, "zscale_covariate_config": 6, "zscale_covariate_studi": 6, "zscale_aux_exampl": 6, "zscale_covariate_model": 6, "458": 6, "8886986575836505": 6, "kneighborsclassifier_algorithm_hash": 6, "e51ca55089f389fc37a736adb2aa0e42": 6, "metric__e51ca55089f389fc37a736adb2aa0e42": 6, "n_neighbors__e51ca55089f389fc37a736adb2aa0e42": 6, "weights__e51ca55089f389fc37a736adb2aa0e42": 6, "unlik": 6, "21269231": 6, "91153846": 6, "29038462": 6, "69846154": 6, "22230769": 6, "99521739": 6, "59826087": 6, "34695652": 6, "03086957": 6, "13391304": 6, "08083333": 6, "6125": 6, "82916667": 6, "05083333": 6, "56083333": 6, "02178571": 6, "91785714": 6, "45392857": 6, "37642857": 6, "03107143": 6, "93357143": 6, "78964286": 6, "62928571": 6, "50857143": 6, "50107143": 6, "1232": 6, "3364": 6, "2328": 6, "1368": 6, "2304": 6, "7062": 6, "x_": 6, "vmin": 6, "vmax": 6, "cmap": 6, "spectral": 6, "248bit": 6, "128bit": 6, "minimz": 6, "consist": 6, "generaliz": 6, "minimize_std_dev": 6, "minimise_std_dev": [6, 9], "std": [6, 7, 12, 14], "dev": [6, 7], "example_multi": 6, "parameter_analysi": 6, "set_metric_nam": 6, "740": 6, "4008740644240856": 6, "9876203329634794": 6, "331": 6, "3561484909673425": 6, "9875061220991906": 6, "472": 6, "7856521165563053": 6, "21863029956806662": 6, "525": 6, "9125905675311808": 6, "7861693342190089": 6, "603": 6, "5238765412750027": 6, "2789424384877304": 6, "5348363849100434": 6, "5741725628917808": 6, "746": 6, "0072511048320134": 6, "2786318125997387": 6, "9625764609276656": 6, "27575381401822424": 6, "1114006274062536": 6, "7647766019001522": 6, "7801680863916906": 6, "2725738454485389": 6, "121": 6, "785652116470164": 6, "21863029955530786": 6, "152": 6, "785651973436432": 6, "21863032832257323": 6, "6101359993004856": 6, "3011280543457062": 6, "209": 6, "5361950698070447": 6, "23560786523195643": 6, "5356113574175657": 6, "5769721187181905": 6, "543430366921729": 6, "514747412346662": 6, "508": [6, 8], "5194661889628072": 6, "40146744515282495": 6, "659": 6, "659749443628722": 6, "6659085938841998": 6, "876": 6, "1068495306229729": 6, "24457822094737378": 6, "8604898820838102": 6, "7086875504668667": 6, "949": 6, "5919869916997383": 6, "2367498627927979": 6, "2497762395862362": 6, "10124660026536195": 6, "205": 6, "study_name_1": 6, "669": 6, "0621601907738047": 6, "2749020946925899": 6, "xxx": 6, "values_neg_mean_squared_error": 6, "values_standard": 6, "twinx": 6, "r": 6, "floor": 6, "ceil": 6, "align": 6, "set_ytick": 6, "linspac": 6, "set_xtick": 6, "text": [6, 7, 8, 12], "pareto": 6, "front": 6, "plot_pareto_front": 6, "plot_param_import": 6, "dictionari": [6, 10], "ordereddict": 6, "descend": 6, "algortihm": 6, "impact": 6, "durat": 6, "total_second": 6, "target_nam": 6, "relationship": 6, "plot_parallel_coordin": [6, 8, 9, 11], "param": [6, 7, 9, 10], "taken": [6, 7], "101": 6, "precomputed_config": 6, "precomputed_studi": 6, "precomputed_exampl": 6, "precomputed_model": 6, "785": 6, "788": 6, "3014": 6, "274803630188": 6, "471088599086": 6, "03592375122963953": 6, "511": 6, "3029": 6, "113810544919": 6, "8153295905650357": 6, "4358": 6, "575772003129": 6, "unseen": 6, "caus": 6, "111": 6, "new_molecul": 6, "112": 6, "tempfil": 6, "temp": [6, 7], "store": [6, 7, 11], "temporari": [6, 10], "extract": 6, "1st": 6, "example_fp": 6, "namedtemporaryfil": 6, "temp_fil": 6, "len": [6, 7], "to_csv": 6, "292": 6, "65709987": 6, "64327077": 6, "common": 7, "proper": 7, "optuna_az": 7, "process": [7, 8], "draw": 7, "ipythonconsol": 7, "ipython": 7, "displai": 7, "os": 7, "listdir": 7, "isfil": 7, "walk": 7, "handi": 7, "var": 7, "1v": 7, "9y_z128d7gvcp8mf8q0pz3ch0000gq": 7, "ipykernel_82497": 7, "796203442": 7, "deprecationwarn": 7, "deprec": 7, "med": 7, "titles": 7, "fontsiz": 7, "labels": 7, "xtick": 7, "ytick": 7, "rcparam": 7, "whitegrid": 7, "set_styl": 7, "white": 7, "inlin": 7, "3336016810": 7, "matplotlibdeprecationwarn": 7, "ship": 7, "v0_8": 7, "api": [7, 8], "boolean": 7, "nomin": 7, "convert": [7, 12], "conduct": 7, "primarydf": 7, "loadsdf": 7, "inchi": 7, "skeletonspher": 7, "nm": 7, "id": 7, "romol": 7, "ic50": 7, "\u00b5m": 7, "c1ccc2c": 7, "co2": 7, "004320939": 7, "1s": 7, "c8h7no2": 7, "c10": 7, "qrcgftxrxymjo": 7, "uhfffaoysa": 7, "86075": 7, "kinas": 7, "p38": 7, "rdchem": 7, "0x7fd1c0d34040": 7, "c1ccc": 7, "ccc2ccccc2occ3ccc": 7, "882397308": 7, "c22h20o3": 7, "c23": 7, "ytdaoqyeyfcini": 7, "89637": 7, "kd": 7, "retinoid": 7, "receptor": 7, "0x7fd1f8f965e0": 7, "130299026": 7, "0x7fd1f8f96650": 7, "nh": 7, "48148606": 7, "c9h7no": 7, "c11": 7, "lisfmebwquvkpj": 7, "n93": 7, "n82": 7, "n65": 7, "n36": 7, "n33": 7, "ki": 7, "nki": 7, "carbon": 7, "anhydras": 7, "xii": 7, "ncarbon": 7, "0x7fd1f8f966c0": 7, "nc1ccccn1": 7, "c7h8n2o": 7, "h2": 7, "5h": 7, "qrokotbwfzitjz": 7, "86233": 7, "nicotin": 7, "phosphoribosyltransferas": 7, "0x7fd1f8f96730": 7, "renam": 7, "conveni": [7, 8, 9, 12], "rest": 7, "moltosmil": 7, "isomericsmil": 7, "c1coc2ccccc2n1": 7, "004321": 7, "coc2ccccc2ccc2ccccc2": 7, "882397": 7, "130299": 7, "c1ccc2ccccc2": 7, "481486": 7, "stick": 7, "engin": 7, "assum": [7, 12, 13], "stage": 7, "slightli": 7, "pose": 7, "wise": 7, "uniqu": 7, "outlin": 7, "occurr": 7, "preserv": 7, "vari": 7, "recommend": 7, "abil": 7, "being": 7, "df_po": 7, "dedup": [7, 12], "df_rnd": 7, "df_max": 7, "df_avg": 7, "df_med": 7, "deduplidc": 7, "397": 7, "indici": 7, "coc1cc2ncnc": 7, "nc3ccc": 7, "scc4ccccc4": 7, "cl": 7, "c3": 7, "c2cc1oc": 7, "282579": 7, "912929": 7, "cc1cccc": 7, "nc2ncnc3ccc": 7, "c4ccccc4": 7, "cc23": 7, "n2cc": 7, "cc2c": 7, "nc2cccc": 7, "n2": 7, "065502": 7, "390": 7, "958607": 7, "392": 7, "oc2cccc": 7, "cn1nc": 7, "c2cnc": 7, "c3ccc": 7, "nc1c1": 7, "oc1nc": 7, "oc2cc": 7, "c3ccccc3": 7, "c3cccc": 7, "cn": 7, "936291": 7, "uniquify_by_posit": 7, "uniquify_randomli": 7, "essenc": 7, "uniquify_by_valu": 7, "highest": 7, "minim": [7, 9], "densiti": 7, "dpi": 7, "kdeplot": 7, "shade": 7, "orang": 7, "ndigit": 7, "blue": 7, "keepaverag": 7, "deeppink": 7, "hold": [7, 8], "assess": 7, "aim": 7, "simpli": 7, "sai": 7, "veri": 7, "intern": [7, 10], "resembl": 7, "easiest": 7, "especi": 7, "reli": 7, "stochast": 7, "initi": [7, 8, 9], "train_ran": 7, "test_ran": 7, "time_column": 7, "old": 7, "datapoint": [7, 10, 12, 13], "accru": 7, "entri": [7, 12, 13], "timestamp": 7, "top": 7, "df_med_tempor": 7, "fake": 7, "insert": 7, "nccccccoc1ccc2c": 7, "n1cc": 7, "cc1c": 7, "684240": 7, "1037": 7, "481464": 7, "1036": 7, "c1cn": 7, "n2c": 7, "nc3cccc": 7, "n3": 7, "cc3cc3": 7, "732625": 7, "1035": 7, "568332": 7, "1034": 7, "790259": 7, "1033": 7, "train_tempor": 7, "test_tempor": 7, "highli": 7, "skew": 7, "respcol": 7, "fd": [7, 12, 13], "histogram": [7, 12, 13], "determin": [7, 12, 13], "balanc": 7, "train_str": 7, "test_str": 7, "realist": 7, "deplopi": 7, "emul": [7, 12, 13], "hop": 7, "opportun": 7, "seri": 7, "push": 7, "domain": 7, "realibl": 7, "challeng": 7, "scaf_split": 7, "train_sca": 7, "test_sca": 7, "ground": 7, "df_val": 7, "roughli": [7, 9], "evid": 7, "pronounc": 7, "dodgerblu": 7, "middl": 7, "saffold": 7, "discuss": 7, "propreti": 7, "endpoint": 7, "routin": 7, "linearis": 7, "variat": 7, "gaussian": 7, "realiti": 7, "nearli": 7, "exhibit": 7, "superior": 7, "wide": 7, "xc50_data": 7, "keepallnodedupl": [7, 12], "pxc50_data": 7, "zip": 7, "jointplot": 7, "suptitl": 7, "plot_margin": 7, "rugplot": 7, "crimson": 7, "height": 7, "clip_on": 7, "jointgrid": 7, "0x7fd1f984f670": 7, "heteroscedast": 7, "homoscedast": 7, "transorm": 7, "0x7fd1eb924700": 7, "logartihm": 7, "power": [7, 12, 14], "um": 7, "dataset_transform": 7, "THe": 7, "0x7fd1db908550": 7, "had": 7, "reverse_transform": [7, 12], "0x7fd1bc05b4c0": 7, "mervin": 7, "framework": 7, "somewher": 7, "unavoid": 7, "ideal": 7, "deviat": [7, 12, 14], "\u03c3": 7, "heterogen": 7, "versu": 7, "assimil": 7, "hypothesi": 7, "degre": 7, "p_": 7, "activityt": 7, "cumul": 7, "cdf": 7, "eq": 7, "equal": [7, 9], "delta": 7, "vec": 7, "frac": 7, "erf": 7, "overrightarrow": 7, "sigma": 7, "concret": 7, "arbitrari": 7, "unknown": 7, "therefor": 7, "schemat": 7, "lookup": 7, "tabl": 7, "sd": 7, "scenario": [7, 12, 13], "philosoph": 7, "delimit": [7, 12, 14], "operand": 7, "censor": 7, "far": 7, "granular": 7, "therebi": 7, "combin": 7, "becom": 7, "jcheminf": 7, "biomedcentr": 7, "1186": 7, "s13321": 7, "00539": 7, "sec12": 7, "pxc50_threshold": 7, "pxc50_std": 7, "exmapl": 7, "ptr_data": 7, "gather": 7, "certainti": 7, "lower_reproduc": 7, "upper_reproduc": 7, "dataload": 7, "somewhat": 7, "invert": 7, "bell": 7, "kde": 7, "behav": 7, "patch": 7, "mpatch": 7, "line2d": 7, "ax_joint": 7, "axhlin": 7, "linestyl": 7, "ax_marg_x": 7, "axvlin": 7, "ax_marg_i": 7, "region": 7, "uncert_color": 7, "purpl": 7, "uncert_region": 7, "rectangl": 7, "fill": 7, "add_patch": 7, "box": 7, "nthreshold": 7, "nptr": 7, "nregion": 7, "fancybox": 7, "borderaxespad": 7, "leg": 7, "get_legend": 7, "legendhandl": 7, "set_color": 7, "set_alpha": 7, "tight": 7, "layout": 7, "subplots_adjust": 7, "set_size_inch": 7, "1725493911": 7, "attribut": 7, "minor": 7, "legend_handl": 7, "dash": 7, "histrogram": 7, "aforement": 7, "ptrtransform": [7, 12, 14], "ptr_transform": 7, "histplot": 7, "stat": 7, "scipi": 7, "resp_col": [7, 8], "pearsonr": 7, "std_df": 7, "groupbi": 7, "mdn_df": 7, "std_vs_median": 7, "suffix": 7, "_std": 7, "_median": 7, "dropna": 7, "activity_std": 7, "activity_median": 7, "088539": 7, "674782": 7, "265123": 7, "138620": 7, "157605": 7, "stdev": 7, "stat_func": 7, "plot_joint": 7, "zorder": 7, "0x7fd1def2f8b0": 7, "signific": 7, "assumpt": 7, "fulfil": 7, "ptr_train_ran": 7, "ptr_test_ran": 7, "ptr_train_str": 7, "ptr_test_str": 7, "ptr_train_tempor": 7, "ptr_test_tempor": 7, "ptr_train_sca": 7, "ptr_test_sca": 7, "xlim": 7, "pypoetri": 7, "virtualenv": 7, "_qsktrft": 7, "py3": 7, "warn_singular": 7, "msg": 7, "peak": 7, "extem": 7, "dsitribut": 7, "awai": 7, "bias": 7, "enum": [8, 9, 10, 12], "building_configuration_enum": [8, 10], "configuration_enum": [8, 10], "interface_enum": [8, 10], "model_runner_enum": [8, 10], "objective_enum": [8, 10], "optimization_configuration_enum": [8, 10], "prediction_configuration_enum": [8, 10], "return_values_enum": [8, 10], "visualization_enum": [8, 10], "merge_train_and_test_data": 8, "isvalid": 8, "read_data": 8, "filenam": [8, 10], "smiles_col": [8, 10, 12, 13], "aux_col": 8, "invalid": 8, "pars": [8, 9, 11], "tupl": [8, 12, 13], "ambigu": 8, "smiles_": 8, "y_": [8, 10, 12], "aux_": 8, "factori": [8, 9], "intermediate_training_dataset_fil": 8, "intermediate_test_dataset_fil": 8, "get_merged_set": 8, "check_set": 8, "scalingfittingerror": 8, "descriptor_str": 8, "insuffici": 8, "unfittedsklearnscla": 8, "novalidsmil": 8, "mol_from_smi": 8, "numpy_from_rdkit": 8, "dtype": [8, 12], "moldescriptor": 8, "nameparameterdataclass": [8, 9, 12], "abc": [8, 9, 12], "abstract": [8, 9, 12], "parallel_compute_descriptor": 8, "n_core": 8, "parallel": 8, "rdkitdescriptor": 8, "liter": [8, 9, 12, 13], "pathlib": 8, "get_fitted_scaler_for_fp": 8, "saved_param": 8, "get_fitted_scal": 8, "set_unfitted_scaler_data": 8, "fp_info": 8, "canonicalsmil": 8, "scaffold": [8, 12, 13], "genericscaffold": 8, "validdescriptor": 8, "descriptor_from_config": 8, "return_failed_idx": 8, "configur": [8, 9, 10, 11], "score_al": 8, "get_scor": 8, "score_all_smil": 8, "get_train_test_scor": 8, "get_merged_train_scor": 8, "get_ecfp_fpinfo": 8, "get_ecfpcount_fpinfo": 8, "explain_ecfp": 8, "len_feat": 8, "get_fp_info": 8, "exp_df": 8, "descript": [8, 9], "fp_idx": 8, "strt_idx": 8, "runshap": 8, "x_pred": 8, "shap": 8, "shapexplain": 8, "popul": 8, "explainpr": 8, "shallow": 8, "validate_cls_input": 8, "pi_zero": 8, "pr": 8, "melloddi": 8, "sparsechem": 8, "imbal": 8, "bedroc_scor": [8, 9], "truchon": 8, "j": 8, "bayli": 8, "screen": 8, "bad": 8, "recognit": 8, "2007": 8, "concord": 8, "statist": 8, "qualiti": 8, "harald": 8, "On": 8, "surviv": 8, "bound": 8, "2008": 8, "1209": 8, "1216": 8, "posterior": 8, "mark": 8, "abstractmethod": [8, 9], "predict_uncert": 8, "quantifi": 8, "qsartunamodel": 8, "nonetyp": [8, 9], "sent": 8, "get_metadata": 8, "train_scor": [8, 11], "test_scor": [8, 10, 11], "get_transform": 8, "perform_ptr": 8, "wrap_model": 8, "save_model": 8, "novaliddescriptor": 8, "null_scor": 8, "predict_pl": 8, "model_path": 8, "inference_path": 8, "argserror": 8, "issu": [8, 12], "uncertaintyerror": 8, "correctli": 8, "auxcovariatemiss": 8, "precomputederror": 8, "validate_arg": 8, "validate_uncertainti": 8, "check_precomp_arg": 8, "validate_set_precomput": 8, "validate_aux": 8, "doctitl": 8, "docstr": 8, "type_base_schema": 8, "tp": 8, "wyfo": 8, "json_schema": 8, "patch_schema_gener": 8, "patch_schema_optunaz": 8, "split_optim": 8, "base_chemprop_param": 8, "alg": [8, 9], "pop": 8, "fix": [8, 9], "run_studi": 8, "storag": [8, 11], "trial_number_offset": [8, 10], "log_scor": 8, "main_scor": 8, "outfnam": 8, "plot_by_configur": 8, "conf": 8, "plot_slic": [8, 9, 11], "folder_path": 8, "file_format": [8, 9, 11], "png": [8, 9], "plot_contour": [8, 9, 11], "static": 8, "plot_histori": [8, 9, 11], "set_build_cach": 9, "preexist": 9, "remove_algo_hash": 9, "buildconfig_from_tri": 9, "encode_nam": 9, "cenam": 9, "suggest_alg_param": 9, "suggest_aux_param": 9, "desc": 9, "check_invalid_descriptor_param": 9, "adaboostclassifierparamet": 9, "lassoparamet": 9, "kneighborsclassifierparamet": 9, "kneighborsregressorparamet": 9, "logisticregressionparamet": 9, "plsparamet": 9, "randomforestparamet": 9, "ridgeparamet": 9, "svcparamet": 9, "svrparamet": 9, "xgbregressorparamet": 9, "prfclassifierparamet": 9, "bootstrap": 9, "new_syn_data_frac": 9, "chempropregressorparamet": 9, "chempropclassifierparamet": 9, "chempropregressorpretrainedparamet": 9, "chemprophyperoptclassifierparamet": 9, "chemprophyperoptregressorparamet": 9, "calibratedclassifiercvparamet": 9, "mapieparamet": 9, "regressionscor": 9, "classificationscor": 9, "closer": 9, "greater": 9, "manhattan": 9, "trainarg": 9, "tanh": 9, "leakyrelu": 9, "prelu": 9, "selu": 9, "elu": 9, "turn": [9, 12, 13], "morgan_count": 9, "rdkit_2d": 9, "rdkit_2d_norm": 9, "mpnn_first_ffn": 9, "mpnn_last_ffn": 9, "linked_hidden_s": 9, "constrain": 9, "l3": 9, "init_lr_exp": 9, "final_lr_exp": 9, "warmup_epoch": 9, "l4": 9, "l5": 9, "l6": 9, "l7": 9, "l8": 9, "isanyof": 9, "obj": [9, 10], "detect_mode_from_alg": 9, "copy_path_for_scaled_descriptor": 9, "cv_split_strategi": 9, "use_cach": 9, "optuna_storag": 9, "set_cach": 9, "set_algo_hash": 9, "declar": 9, "pydant": 9, "don": 9, "classmethod": 9, "1024": 9, "shorter": 9, "output_fold": [9, 11], "use_xvfb": [9, 11], "imagefileformat": 9, "jpeg": 9, "jpg": 9, "svg": 9, "move_up_directori": 10, "attach_root_path": 10, "attach": 10, "loadjson": 10, "add_ellipsi": 10, "max_length": 10, "shorten_nam": 10, "mlflowcallback": 10, "tracking_uri": 10, "callback": 10, "uri": 10, "server": 10, "set_tracking_uri": 10, "prepare_tag": 10, "tag": 10, "tmp_buildconfig": 10, "create_depend": 10, "remove_schema_properti": 10, "add_boolean_guards_for_schema_properti": 10, "replacekei": 10, "input_": 10, "replacevalu": 10, "addsibl": 10, "delsibl": 10, "sibl": 10, "getref": 10, "context": 10, "recurs": 10, "nest": 10, "copytitl": 10, "oneof": 10, "replaceenum": 10, "singleton": 10, "const": 10, "addtitl": 10, "get_authorization_head": 10, "trackingdata": 10, "trial_numb": [10, 11], "trial_valu": 10, "trial_stat": 10, "all_cv_test_scor": 10, "dataclass": 10, "removeprefix": 10, "prefix": 10, "round_scor": 10, "internaltrackingcallback": 10, "progress": 10, "buildtrackingdata": 10, "response_column_nam": 10, "test_point": 10, "track_build": 10, "mkdict": 10, "load_df_from_fil": 10, "remove_failed_idx": 10, "failed_idx": 10, "md5_hash": 10, "md5": 10, "buildingconfigurationenum": 11, "configurationenum": 11, "general_hyperparamet": 11, "hyper_paramet": 11, "general_regressor": 11, "general_classifi": 11, "metadata_besttri": 11, "metadata_bestvalu": 11, "general_dis": 11, "general_paramet": 11, "task_optim": 11, "task_build": 11, "data_inputcolumn": 11, "data_responsecolumn": 11, "data_train": 11, "data_test": 11, "descriptors_avalon": 11, "descriptors_avalon_nbit": 11, "descriptors_ecfp": 11, "descriptors_ecfp_radiu": 11, "descriptors_ecfp_nbit": 11, "descriptors_ecfpcount": 11, "descriptors_ecfpcounts_radiu": 11, "descriptors_ecfpcounts_usefeatur": 11, "descriptors_pathfp": 11, "descriptors_pathfp_maxpath": 11, "descriptors_pathfp_fps": 11, "descriptors_maccskei": 11, "descriptors_unsc_physchem": 11, "descriptors_physchem": 11, "descriptors_physchem_rdkitnam": 11, "descriptors_unsc_jazzi": 11, "descriptors_jazzi": 11, "descriptors_jazzy_jazzynam": 11, "descriptors_precomput": 11, "descriptors_precomputed_fil": 11, "descriptors_precomputed_input_columnn": 11, "descriptors_precomputed_response_column": 11, "descriptors_unsc_zscal": 11, "descriptors_zscal": 11, "descriptors_smil": 11, "descriptors_smiles_and_si": 11, "descriptors_smiles_and_si_fil": 11, "descriptors_smiles_and_si_input_column": 11, "descriptors_smiles_and_si_aux_weight_pc": 11, "descriptors_sc": 11, "descriptors_scaled_descriptor": 11, "descriptors_scaled_descriptor_paramet": 11, "descriptors_composit": 11, "settings_mod": 11, "settings_mode_regress": 11, "settings_mode_classif": 11, "settings_cross_valid": 11, "settings_direct": 11, "settings_n_tri": 11, "settings_n_job": 11, "settings_shuffl": 11, "algorithms_low": 11, "algorithms_high": 11, "algorithms_q": 11, "algorithms_interface_sklearn": 11, "algorithms_interface_xgboost": 11, "algorithms_rfregressor": 11, "algorithms_rfclassifi": 11, "algorithms_rf_max_featur": 11, "algorithms_rf_max_depth": 11, "algorithms_rf_n_estim": 11, "algorithms_svr": 11, "algorithms_svr_c": 11, "algorithms_svr_gamma": 11, "algorithms_svc": 11, "algorithms_svc_c": 11, "algorithms_svc_gamma": 11, "algorithms_lasso": 11, "algorithms_lasso_alpha": 11, "algorithms_kneighborsclassifi": 11, "algorithms_kneighborsregressor": 11, "algorithms_kneighbors_n_neighbor": 11, "algorithms_kneighbors_metr": 11, "algorithms_kneighbors_weight": 11, "algorithms_ridg": 11, "algorithms_ridge_alpha": 11, "algorithms_plsregress": 11, "algorithms_plsregression_n_compon": 11, "algorithms_logisticregress": 11, "algorithms_logisticregression_solv": 11, "algorithms_logisticregression_c": 11, "algorithms_adaboostclassifi": 11, "algorithms_adaboostclassifier_n_estim": 11, "algorithms_adaboostclassifier_learning_r": 11, "algorithms_xgbregressor": 11, "algorithms_xgbregressor_max_depth": 11, "algorithms_xgbregressor_n_estim": 11, "algorithms_xgbregressor_learning_r": 11, "algorithms_prf": 11, "algorithms_prf_max_featur": 11, "algorithms_prf_max_depth": 11, "algorithms_prf_n_estim": 11, "algorithms_prf_minpysumleaf": 11, "algorithms_prf_use_py_gini": 11, "algorithms_prf_use_py_leaf": 11, "algorithms_chemprop": 11, "basechemprop": 11, "algorithms_chemprop_regressor": 11, "algorithms_chemprop_hyperopt_regressor": 11, "algorithms_chemprop_classifi": 11, "algorithms_chemprop_hyperopt_classifi": 11, "algorithms_chemprop_activ": 11, "algorithms_chemprop_aggreg": 11, "algorithms_chemprop_aggregation_norm": 11, "algorithms_chemprop_batch_s": 11, "algorithms_chemprop_depth": 11, "algorithms_chemprop_dropout": 11, "algorithms_chemprop_epoch": 11, "algorithms_chemprop_ensemble_s": 11, "algorithms_chemprop_features_gener": 11, "algorithms_chemprop_ffn_hidden_s": 11, "algorithms_chemprop_ffn_num_lay": 11, "algorithms_chemprop_frzn": 11, "algorithms_chemprop_final_lr_ratio_exp": 11, "algorithms_chemprop_hidden_s": 11, "algorithms_chemprop_num_it": 11, "algorithms_chemprop_init_lr_ratio_exp": 11, "algorithms_chemprop_max_lr_exp": 11, "algorithms_chemprop_pretrained_model": 11, "algorithms_chemprop_search_parameter_level": 11, "algorithms_chemprop_startup_random_it": 11, "startup_random_it": 11, "algorithms_chemprop_warmup_epochs_ratio": 11, "algorithms_calibratedclassifiercv": 11, "algorithms_calibratedclassifiercv_ensembl": 11, "algorithms_calibratedclassifiercv_estim": 11, "algorithms_calibratedclassifiercv_method": 11, "algorithms_calibratedclassifiercv_n_fold": 11, "algorithms_calibratedclassifiercv_param": 11, "calibrated_param": 11, "algorithms_mapi": 11, "algorithms_mapie_alpha": 11, "interfaceenum": 11, "sklearn_set": 11, "xgboost_set": 11, "chemprop_set": 11, "prf_set": 11, "calibrated_set": 11, "modelrunnerdataframeenum": 11, "modelrunn": 11, "objectiveenum": 11, "attribute_trial_train_scor": 11, "extra_column_besthit": 11, "besthit": 11, "optimizationconfigurationenum": 11, "predictionconfigurationenum": 11, "data_dataset": 11, "sklearnreturnvalueenum": 11, "cross_validate_fit_tim": 11, "fit_tim": 11, "cross_validate_score_tim": 11, "score_tim": 11, "cross_validate_test_scor": 11, "cross_validate_train_scor": 11, "xgboostreturnvalueenum": 11, "visualizationenum": 11, "visualization_regressor": 11, "visualization_classifi": 11, "visualization_use_xvfb": 11, "visualization_output_fold": 11, "visualization_file_format": 11, "visualization_plot": 11, "visualization_plots_histori": 11, "visualization_plots_contour": 11, "visualization_plots_parallel_coordin": 11, "visualization_plots_slic": 11, "optuna_system_attrs_numb": 11, "_number": 11, "optuna_system_attrs_intermediate_valu": 11, "intermediate_valu": 11, "optuna_system_attrs_trial_id": 11, "trial_id": 11, "studyuserattr": 11, "trialuserattr": 11, "trialparam": 11, "algorithm_hash": 11, "mlflowlogparam": 11, "sklearnsplitt": 12, "get_n_split": 12, "cvsplitter": 12, "although": 12, "underli": 12, "get_sklearn_splitt": 12, "n_split": 12, "repeat": [12, 13], "edg": [12, 13], "stratifiedshufflesplit": [12, 13], "revert": [12, 13], "empti": 12, "kfold": 12, "affect": 12, "consecut": 12, "fd_bin": 12, "11879": 12, "10297": 12, "adjac": 12, "downstream": 12, "nativ": 12, "histogramstratifiedshufflesplit": 12, "test_fract": 12, "groupingsplitt": 12, "column_nam": [12, 13], "butina_clust": [12, 13], "cluster": [12, 13], "butina": [12, 13], "make_scaffold_gener": [12, 13], "murcko": [12, 13], "hetero": [12, 13], "difficulti": [12, 13], "novel": [12, 13], "datatransform": 12, "logarithm": [12, 14], "base_dict": 12, "ufunc": 12, "base_neg": 12, "reverse_dict": 12, "exp": 12, "transform_df": 12, "transform_on": 12, "reverse_transform_df": 12, "reverse_transform_on": 12, "auxtransform": 12, "transfor": 12, "auxiliary_data": 12, "usabl": [12, 14]}, "objects": {"": [[8, 0, 0, "-", "optunaz"]], "optunaz": [[8, 0, 0, "-", "builder"], [9, 0, 0, "-", "config"], [8, 0, 0, "-", "datareader"], [8, 0, 0, "-", "descriptors"], [8, 0, 0, "-", "evaluate"], [8, 0, 0, "-", "explainability"], [8, 0, 0, "-", "metircs"], [8, 0, 0, "-", "model_writer"], [8, 0, 0, "-", "objective"], [8, 0, 0, "-", "optbuild"], [8, 0, 0, "-", "predict"], [8, 0, 0, "-", "schemagen"], [8, 0, 0, "-", "three_step_opt_build_merge"], [10, 0, 0, "-", "utils"], [8, 0, 0, "-", "visualizer"]], "optunaz.builder": [[8, 1, 1, "", "build"]], "optunaz.config": [[9, 2, 1, "", "Algorithm"], [9, 2, 1, "", "ModelMode"], [9, 2, 1, "", "NameParameterDataclass"], [9, 2, 1, "", "OptimizationDirection"], [9, 2, 1, "", "Task"], [9, 2, 1, "", "Visualization"], [9, 0, 0, "-", "build_from_opt"], [9, 0, 0, "-", "buildconfig"], [9, 0, 0, "-", "optconfig"]], "optunaz.config.ModelMode": [[9, 3, 1, "", "CLASSIFICATION"], [9, 3, 1, "", "REGRESSION"]], "optunaz.config.NameParameterDataclass": [[9, 4, 1, "", "new"]], "optunaz.config.OptimizationDirection": [[9, 3, 1, "", "MAXIMIZATION"], [9, 3, 1, "", "MINIMIZATION"]], "optunaz.config.Task": [[9, 3, 1, "", "BUILDING"], [9, 3, 1, "", "OPTIMIZATION"], [9, 3, 1, "", "PREDICTION"]], "optunaz.config.Visualization": [[9, 2, 1, "", "ImageFileFormat"], [9, 2, 1, "", "Plots"], [9, 3, 1, "", "file_format"], [9, 3, 1, "", "output_folder"], [9, 3, 1, "", "plots"], [9, 3, 1, "", "use_xvfb"]], "optunaz.config.Visualization.ImageFileFormat": [[9, 3, 1, "", "JPEG"], [9, 3, 1, "", "JPG"], [9, 3, 1, "", "PDF"], [9, 3, 1, "", "PNG"], [9, 3, 1, "", "SVG"]], "optunaz.config.Visualization.Plots": [[9, 3, 1, "", "plot_contour"], [9, 3, 1, "", "plot_history"], [9, 3, 1, "", "plot_parallel_coordinate"], [9, 3, 1, "", "plot_slice"]], "optunaz.config.build_from_opt": [[9, 1, 1, "", "buildconfig_from_trial"], [9, 1, 1, "", "check_invalid_descriptor_param"], [9, 1, 1, "", "encode_name"], [9, 1, 1, "", "remove_algo_hash"], [9, 1, 1, "", "set_build_cache"], [9, 1, 1, "", "suggest_alg_params"], [9, 1, 1, "", "suggest_aux_params"]], "optunaz.config.buildconfig": [[9, 2, 1, "", "AdaBoostClassifier"], [9, 2, 1, "", "Algorithm"], [9, 2, 1, "", "BuildConfig"], [9, 2, 1, "", "CalibratedClassifierCVWithVA"], [9, 2, 1, "", "ChemPropClassifier"], [9, 2, 1, "", "ChemPropHyperoptClassifier"], [9, 2, 1, "", "ChemPropHyperoptRegressor"], [9, 2, 1, "", "ChemPropRegressor"], [9, 2, 1, "", "ChemPropRegressorPretrained"], [9, 2, 1, "", "KNeighborsClassifier"], [9, 2, 1, "", "KNeighborsRegressor"], [9, 2, 1, "", "Lasso"], [9, 2, 1, "", "LogisticRegression"], [9, 2, 1, "", "Mapie"], [9, 2, 1, "", "PLSRegression"], [9, 2, 1, "", "PRFClassifier"], [9, 2, 1, "", "RandomForestClassifier"], [9, 2, 1, "", "RandomForestRegressor"], [9, 2, 1, "", "Ridge"], [9, 2, 1, "", "SVC"], [9, 2, 1, "", "SVR"], [9, 2, 1, "", "XGBRegressor"]], "optunaz.config.buildconfig.AdaBoostClassifier": [[9, 2, 1, "", "AdaBoostClassifierParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.AdaBoostClassifier.AdaBoostClassifierParameters": [[9, 3, 1, "", "learning_rate"], [9, 3, 1, "", "n_estimators"]], "optunaz.config.buildconfig.Algorithm": [[9, 4, 1, "", "estimator"]], "optunaz.config.buildconfig.BuildConfig": [[9, 2, 1, "", "Metadata"], [9, 2, 1, "", "Settings"], [9, 3, 1, "", "algorithm"], [9, 3, 1, "", "data"], [9, 3, 1, "", "descriptor"], [9, 3, 1, "", "metadata"], [9, 3, 1, "", "settings"], [9, 3, 1, "", "task"]], "optunaz.config.buildconfig.BuildConfig.Metadata": [[9, 3, 1, "", "best_trial"], [9, 3, 1, "", "best_value"], [9, 3, 1, "", "cross_validation"], [9, 3, 1, "", "n_trials"], [9, 3, 1, "", "name"], [9, 3, 1, "", "shuffle"], [9, 3, 1, "", "visualization"]], "optunaz.config.buildconfig.BuildConfig.Settings": [[9, 3, 1, "", "direction"], [9, 3, 1, "", "mode"], [9, 3, 1, "", "n_trials"], [9, 3, 1, "", "scoring"], [9, 3, 1, "", "tracking_rest_endpoint"]], "optunaz.config.buildconfig.CalibratedClassifierCVWithVA": [[9, 2, 1, "", "CalibratedClassifierCVParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.CalibratedClassifierCVWithVA.CalibratedClassifierCVParameters": [[9, 3, 1, "", "ensemble"], [9, 3, 1, "", "estimator"], [9, 3, 1, "", "method"], [9, 3, 1, "", "n_folds"]], "optunaz.config.buildconfig.ChemPropClassifier": [[9, 2, 1, "", "ChemPropClassifierParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.ChemPropClassifier.ChemPropClassifierParameters": [[9, 3, 1, "", "activation"], [9, 3, 1, "", "aggregation"], [9, 3, 1, "", "aggregation_norm"], [9, 3, 1, "", "aux_weight_pc"], [9, 3, 1, "", "batch_size"], [9, 3, 1, "", "depth"], [9, 3, 1, "", "dropout"], [9, 3, 1, "", "ensemble_size"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "features_generator"], [9, 3, 1, "", "ffn_hidden_size"], [9, 3, 1, "", "ffn_num_layers"], [9, 3, 1, "", "final_lr_ratio_exp"], [9, 3, 1, "", "hidden_size"], [9, 3, 1, "", "init_lr_ratio_exp"], [9, 3, 1, "", "max_lr_exp"], [9, 3, 1, "", "warmup_epochs_ratio"]], "optunaz.config.buildconfig.ChemPropHyperoptClassifier": [[9, 2, 1, "", "ChemPropHyperoptClassifierParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.ChemPropHyperoptClassifier.ChemPropHyperoptClassifierParameters": [[9, 3, 1, "", "aux_weight_pc"], [9, 3, 1, "", "ensemble_size"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "features_generator"], [9, 3, 1, "", "num_iters"], [9, 3, 1, "", "search_parameter_level"]], "optunaz.config.buildconfig.ChemPropHyperoptRegressor": [[9, 2, 1, "", "ChemPropHyperoptRegressorParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.ChemPropHyperoptRegressor.ChemPropHyperoptRegressorParameters": [[9, 3, 1, "", "aux_weight_pc"], [9, 3, 1, "", "ensemble_size"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "features_generator"], [9, 3, 1, "", "num_iters"], [9, 3, 1, "", "search_parameter_level"]], "optunaz.config.buildconfig.ChemPropRegressor": [[9, 2, 1, "", "ChemPropRegressorParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.ChemPropRegressor.ChemPropRegressorParameters": [[9, 3, 1, "", "activation"], [9, 3, 1, "", "aggregation"], [9, 3, 1, "", "aggregation_norm"], [9, 3, 1, "", "aux_weight_pc"], [9, 3, 1, "", "batch_size"], [9, 3, 1, "", "depth"], [9, 3, 1, "", "dropout"], [9, 3, 1, "", "ensemble_size"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "features_generator"], [9, 3, 1, "", "ffn_hidden_size"], [9, 3, 1, "", "ffn_num_layers"], [9, 3, 1, "", "final_lr_ratio_exp"], [9, 3, 1, "", "hidden_size"], [9, 3, 1, "", "init_lr_ratio_exp"], [9, 3, 1, "", "max_lr_exp"], [9, 3, 1, "", "warmup_epochs_ratio"]], "optunaz.config.buildconfig.ChemPropRegressorPretrained": [[9, 2, 1, "", "ChemPropRegressorPretrainedParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.ChemPropRegressorPretrained.ChemPropRegressorPretrainedParameters": [[9, 3, 1, "", "epochs"], [9, 3, 1, "", "frzn"], [9, 3, 1, "", "pretrained_model"]], "optunaz.config.buildconfig.KNeighborsClassifier": [[9, 2, 1, "", "KNeighborsClassifierParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.KNeighborsClassifier.KNeighborsClassifierParameters": [[9, 3, 1, "", "metric"], [9, 3, 1, "", "n_neighbors"], [9, 3, 1, "", "weights"]], "optunaz.config.buildconfig.KNeighborsRegressor": [[9, 2, 1, "", "KNeighborsRegressorParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.KNeighborsRegressor.KNeighborsRegressorParameters": [[9, 3, 1, "", "metric"], [9, 3, 1, "", "n_neighbors"], [9, 3, 1, "", "weights"]], "optunaz.config.buildconfig.Lasso": [[9, 2, 1, "", "LassoParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.Lasso.LassoParameters": [[9, 3, 1, "", "alpha"]], "optunaz.config.buildconfig.LogisticRegression": [[9, 2, 1, "", "LogisticRegressionParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.LogisticRegression.LogisticRegressionParameters": [[9, 3, 1, "", "C"], [9, 3, 1, "", "solver"]], "optunaz.config.buildconfig.Mapie": [[9, 2, 1, "", "MapieParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.Mapie.MapieParameters": [[9, 3, 1, "", "estimator"], [9, 3, 1, "", "mapie_alpha"]], "optunaz.config.buildconfig.PLSRegression": [[9, 2, 1, "", "PLSParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.PLSRegression.PLSParameters": [[9, 3, 1, "", "n_components"]], "optunaz.config.buildconfig.PRFClassifier": [[9, 2, 1, "", "PRFClassifierParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.PRFClassifier.PRFClassifierParameters": [[9, 3, 1, "", "bootstrap"], [9, 3, 1, "", "max_depth"], [9, 3, 1, "", "max_features"], [9, 3, 1, "", "min_py_sum_leaf"], [9, 3, 1, "", "n_estimators"], [9, 3, 1, "", "new_syn_data_frac"], [9, 3, 1, "", "use_py_gini"], [9, 3, 1, "", "use_py_leafs"]], "optunaz.config.buildconfig.RandomForestClassifier": [[9, 2, 1, "", "RandomForestParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.RandomForestClassifier.RandomForestParameters": [[9, 3, 1, "", "max_depth"], [9, 3, 1, "", "max_features"], [9, 3, 1, "", "n_estimators"]], "optunaz.config.buildconfig.RandomForestRegressor": [[9, 2, 1, "", "RandomForestParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.RandomForestRegressor.RandomForestParameters": [[9, 3, 1, "", "max_depth"], [9, 3, 1, "", "max_features"], [9, 3, 1, "", "n_estimators"]], "optunaz.config.buildconfig.Ridge": [[9, 2, 1, "", "RidgeParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.Ridge.RidgeParameters": [[9, 3, 1, "", "alpha"]], "optunaz.config.buildconfig.SVC": [[9, 2, 1, "", "SVCParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.SVC.SVCParameters": [[9, 3, 1, "", "C"], [9, 3, 1, "", "gamma"]], "optunaz.config.buildconfig.SVR": [[9, 2, 1, "", "SVRParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.SVR.SVRParameters": [[9, 3, 1, "", "C"], [9, 3, 1, "", "gamma"]], "optunaz.config.buildconfig.XGBRegressor": [[9, 2, 1, "", "XGBRegressorParameters"], [9, 4, 1, "", "estimator"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.buildconfig.XGBRegressor.XGBRegressorParameters": [[9, 3, 1, "", "learning_rate"], [9, 3, 1, "", "max_depth"], [9, 3, 1, "", "n_estimators"]], "optunaz.config.optconfig": [[9, 2, 1, "", "AdaBoostClassifier"], [9, 2, 1, "", "Algorithm"], [9, 2, 1, "", "CalibratedClassifierCVEnsemble"], [9, 2, 1, "", "CalibratedClassifierCVMethod"], [9, 2, 1, "", "CalibratedClassifierCVWithVA"], [9, 2, 1, "", "ChemPropActivation"], [9, 2, 1, "", "ChemPropAggregation"], [9, 2, 1, "", "ChemPropClassifier"], [9, 2, 1, "", "ChemPropFeatures_Generator"], [9, 2, 1, "", "ChemPropFrzn"], [9, 2, 1, "", "ChemPropHyperoptClassifier"], [9, 2, 1, "", "ChemPropHyperoptRegressor"], [9, 2, 1, "", "ChemPropRegressor"], [9, 2, 1, "", "ChemPropRegressorPretrained"], [9, 2, 1, "", "ChemPropSearch_Parameter_Level"], [9, 2, 1, "", "ClassificationScore"], [9, 2, 1, "", "KNeighborsClassifier"], [9, 2, 1, "", "KNeighborsMetric"], [9, 2, 1, "", "KNeighborsRegressor"], [9, 2, 1, "", "KNeighborsWeights"], [9, 2, 1, "", "Lasso"], [9, 2, 1, "", "LogisticRegression"], [9, 2, 1, "", "Mapie"], [9, 2, 1, "", "OptimizationConfig"], [9, 2, 1, "", "PLSRegression"], [9, 2, 1, "", "PRFClassifier"], [9, 2, 1, "", "PRFClassifierMaxFeatures"], [9, 2, 1, "", "RandomForestClassifier"], [9, 2, 1, "", "RandomForestMaxFeatures"], [9, 2, 1, "", "RandomForestRegressor"], [9, 2, 1, "", "RegressionScore"], [9, 2, 1, "", "Ridge"], [9, 2, 1, "", "SVC"], [9, 2, 1, "", "SVR"], [9, 2, 1, "", "XGBRegressor"], [9, 1, 1, "", "copy_path_for_scaled_descriptor"], [9, 1, 1, "", "detect_mode_from_algs"], [9, 1, 1, "", "isanyof"]], "optunaz.config.optconfig.AdaBoostClassifier": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.AdaBoostClassifier.Parameters": [[9, 2, 1, "", "AdaBoostClassifierParametersLearningRate"], [9, 2, 1, "", "AdaBoostClassifierParametersNEstimators"], [9, 3, 1, "", "learning_rate"], [9, 3, 1, "", "n_estimators"]], "optunaz.config.optconfig.AdaBoostClassifier.Parameters.AdaBoostClassifierParametersLearningRate": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.AdaBoostClassifier.Parameters.AdaBoostClassifierParametersNEstimators": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.CalibratedClassifierCVEnsemble": [[9, 3, 1, "", "FALSE"], [9, 3, 1, "", "TRUE"]], "optunaz.config.optconfig.CalibratedClassifierCVMethod": [[9, 3, 1, "", "ISOTONIC"], [9, 3, 1, "", "SIGMOID"], [9, 3, 1, "", "VENNABERS"]], "optunaz.config.optconfig.CalibratedClassifierCVWithVA": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.CalibratedClassifierCVWithVA.Parameters": [[9, 3, 1, "", "ensemble"], [9, 3, 1, "", "estimator"], [9, 3, 1, "", "method"], [9, 3, 1, "", "n_folds"]], "optunaz.config.optconfig.ChemPropActivation": [[9, 3, 1, "", "ELU"], [9, 3, 1, "", "LEAKYRELU"], [9, 3, 1, "", "PRELU"], [9, 3, 1, "", "RELU"], [9, 3, 1, "", "SELU"], [9, 3, 1, "", "TANH"]], "optunaz.config.optconfig.ChemPropAggregation": [[9, 3, 1, "", "MEAN"], [9, 3, 1, "", "NORM"], [9, 3, 1, "", "SUM"]], "optunaz.config.optconfig.ChemPropClassifier": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters": [[9, 2, 1, "", "ChemPropParametersAggregation_Norm"], [9, 2, 1, "", "ChemPropParametersBatch_Size"], [9, 2, 1, "", "ChemPropParametersDepth"], [9, 2, 1, "", "ChemPropParametersDropout"], [9, 2, 1, "", "ChemPropParametersFFN_Hidden_Size"], [9, 2, 1, "", "ChemPropParametersFFN_Num_Layers"], [9, 2, 1, "", "ChemPropParametersFinal_Lr_Ratio_Exp"], [9, 2, 1, "", "ChemPropParametersHidden_Size"], [9, 2, 1, "", "ChemPropParametersInit_Lr_Ratio_Exp"], [9, 2, 1, "", "ChemPropParametersMax_Lr_Exp"], [9, 2, 1, "", "ChemPropParametersWarmup_Epochs_Ratio"], [9, 3, 1, "", "activation"], [9, 3, 1, "", "aggregation"], [9, 3, 1, "", "aggregation_norm"], [9, 3, 1, "", "batch_size"], [9, 3, 1, "", "depth"], [9, 3, 1, "", "dropout"], [9, 3, 1, "", "ensemble_size"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "features_generator"], [9, 3, 1, "", "ffn_hidden_size"], [9, 3, 1, "", "ffn_num_layers"], [9, 3, 1, "", "final_lr_ratio_exp"], [9, 3, 1, "", "hidden_size"], [9, 3, 1, "", "init_lr_ratio_exp"], [9, 3, 1, "", "max_lr_exp"], [9, 3, 1, "", "warmup_epochs_ratio"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersAggregation_Norm": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersBatch_Size": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersDepth": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersDropout": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersFFN_Hidden_Size": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersFFN_Num_Layers": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersFinal_Lr_Ratio_Exp": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersHidden_Size": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersInit_Lr_Ratio_Exp": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersMax_Lr_Exp": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.ChemPropClassifier.Parameters.ChemPropParametersWarmup_Epochs_Ratio": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropFeatures_Generator": [[9, 3, 1, "", "MORGAN"], [9, 3, 1, "", "MORGAN_COUNT"], [9, 3, 1, "", "NONE"], [9, 3, 1, "", "RDKIT_2D"], [9, 3, 1, "", "RDKIT_2D_NORMALIZED"]], "optunaz.config.optconfig.ChemPropFrzn": [[9, 3, 1, "", "MPNN"], [9, 3, 1, "", "MPNN_FIRST_FFN"], [9, 3, 1, "", "MPNN_LAST_FFN"], [9, 3, 1, "", "NONE"]], "optunaz.config.optconfig.ChemPropHyperoptClassifier": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.ChemPropHyperoptClassifier.Parameters": [[9, 3, 1, "", "ensemble_size"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "features_generator"], [9, 3, 1, "", "num_iters"], [9, 3, 1, "", "search_parameter_level"]], "optunaz.config.optconfig.ChemPropHyperoptRegressor": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.ChemPropHyperoptRegressor.Parameters": [[9, 3, 1, "", "ensemble_size"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "features_generator"], [9, 3, 1, "", "num_iters"], [9, 3, 1, "", "search_parameter_level"]], "optunaz.config.optconfig.ChemPropRegressor": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters": [[9, 2, 1, "", "ChemPropParametersAggregation_Norm"], [9, 2, 1, "", "ChemPropParametersBatch_Size"], [9, 2, 1, "", "ChemPropParametersDepth"], [9, 2, 1, "", "ChemPropParametersDropout"], [9, 2, 1, "", "ChemPropParametersFFN_Hidden_Size"], [9, 2, 1, "", "ChemPropParametersFFN_Num_Layers"], [9, 2, 1, "", "ChemPropParametersFinal_Lr_Ratio_Exp"], [9, 2, 1, "", "ChemPropParametersHidden_Size"], [9, 2, 1, "", "ChemPropParametersInit_Lr_Ratio_Exp"], [9, 2, 1, "", "ChemPropParametersMax_Lr_Exp"], [9, 2, 1, "", "ChemPropParametersWarmup_Epochs_Ratio"], [9, 3, 1, "", "activation"], [9, 3, 1, "", "aggregation"], [9, 3, 1, "", "aggregation_norm"], [9, 3, 1, "", "batch_size"], [9, 3, 1, "", "depth"], [9, 3, 1, "", "dropout"], [9, 3, 1, "", "ensemble_size"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "features_generator"], [9, 3, 1, "", "ffn_hidden_size"], [9, 3, 1, "", "ffn_num_layers"], [9, 3, 1, "", "final_lr_ratio_exp"], [9, 3, 1, "", "hidden_size"], [9, 3, 1, "", "init_lr_ratio_exp"], [9, 3, 1, "", "max_lr_exp"], [9, 3, 1, "", "warmup_epochs_ratio"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersAggregation_Norm": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersBatch_Size": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersDepth": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersDropout": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersFFN_Hidden_Size": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersFFN_Num_Layers": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersFinal_Lr_Ratio_Exp": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersHidden_Size": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersInit_Lr_Ratio_Exp": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersMax_Lr_Exp": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.ChemPropRegressor.Parameters.ChemPropParametersWarmup_Epochs_Ratio": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropRegressorPretrained": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.ChemPropRegressorPretrained.Parameters": [[9, 2, 1, "", "ChemPropParametersEpochs"], [9, 3, 1, "", "epochs"], [9, 3, 1, "", "frzn"], [9, 3, 1, "", "pretrained_model"]], "optunaz.config.optconfig.ChemPropRegressorPretrained.Parameters.ChemPropParametersEpochs": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"], [9, 3, 1, "", "q"]], "optunaz.config.optconfig.ChemPropSearch_Parameter_Level": [[9, 3, 1, "", "AUTO"], [9, 3, 1, "", "L1"], [9, 3, 1, "", "L2"], [9, 3, 1, "", "L3"], [9, 3, 1, "", "L4"], [9, 3, 1, "", "L5"], [9, 3, 1, "", "L6"], [9, 3, 1, "", "L7"], [9, 3, 1, "", "L8"]], "optunaz.config.optconfig.ClassificationScore": [[9, 3, 1, "", "ACCURACY"], [9, 3, 1, "", "AUC_PR_CAL"], [9, 3, 1, "", "AVERAGE_PRECISION"], [9, 3, 1, "", "BALANCED_ACCURACY"], [9, 3, 1, "", "BEDROC"], [9, 3, 1, "", "CONCORDANCE_INDEX"], [9, 3, 1, "", "F1"], [9, 3, 1, "", "F1_MACRO"], [9, 3, 1, "", "F1_MICRO"], [9, 3, 1, "", "F1_WEIGHTED"], [9, 3, 1, "", "JACCARD"], [9, 3, 1, "", "JACCARD_MACRO"], [9, 3, 1, "", "JACCARD_MICRO"], [9, 3, 1, "", "JACCARD_WEIGHTED"], [9, 3, 1, "", "NEG_BRIER_SCORE"], [9, 3, 1, "", "PRECISION"], [9, 3, 1, "", "PRECISION_MACRO"], [9, 3, 1, "", "PRECISION_MICRO"], [9, 3, 1, "", "PRECISION_WEIGHTED"], [9, 3, 1, "", "RECALL"], [9, 3, 1, "", "RECALL_MACRO"], [9, 3, 1, "", "RECALL_MICRO"], [9, 3, 1, "", "RECALL_WEIGHTED"], [9, 3, 1, "", "ROC_AUC"]], "optunaz.config.optconfig.KNeighborsClassifier": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.KNeighborsClassifier.Parameters": [[9, 2, 1, "", "KNeighborsClassifierParametersN_Neighbors"], [9, 3, 1, "", "metric"], [9, 3, 1, "", "n_neighbors"], [9, 3, 1, "", "weights"]], "optunaz.config.optconfig.KNeighborsClassifier.Parameters.KNeighborsClassifierParametersN_Neighbors": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.KNeighborsMetric": [[9, 3, 1, "", "EUCLIDEAN"], [9, 3, 1, "", "MANHATTAN"], [9, 3, 1, "", "MINKOWSKI"]], "optunaz.config.optconfig.KNeighborsRegressor": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.KNeighborsRegressor.Parameters": [[9, 2, 1, "", "KNeighborsRegressorParametersN_Neighbors"], [9, 3, 1, "", "metric"], [9, 3, 1, "", "n_neighbors"], [9, 3, 1, "", "weights"]], "optunaz.config.optconfig.KNeighborsRegressor.Parameters.KNeighborsRegressorParametersN_Neighbors": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.KNeighborsWeights": [[9, 3, 1, "", "DISTANCE"], [9, 3, 1, "", "UNIFORM"]], "optunaz.config.optconfig.Lasso": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.Lasso.Parameters": [[9, 2, 1, "", "LassoParametersAlpha"], [9, 3, 1, "", "alpha"]], "optunaz.config.optconfig.Lasso.Parameters.LassoParametersAlpha": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.LogisticRegression": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.LogisticRegression.Parameters": [[9, 3, 1, "", "C"], [9, 2, 1, "", "LogisticRegressionParametersParameterC"], [9, 3, 1, "", "solver"]], "optunaz.config.optconfig.LogisticRegression.Parameters.LogisticRegressionParametersParameterC": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.Mapie": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.Mapie.Parameters": [[9, 3, 1, "", "estimator"], [9, 3, 1, "", "mapie_alpha"]], "optunaz.config.optconfig.OptimizationConfig": [[9, 2, 1, "", "Settings"], [9, 3, 1, "", "algorithms"], [9, 3, 1, "", "data"], [9, 3, 1, "", "description"], [9, 3, 1, "", "descriptors"], [9, 3, 1, "", "mode"], [9, 3, 1, "", "name"], [9, 4, 1, "", "set_algo_hashes"], [9, 4, 1, "", "set_cache"], [9, 3, 1, "", "settings"], [9, 3, 1, "", "task"], [9, 3, 1, "", "visualization"]], "optunaz.config.optconfig.OptimizationConfig.Settings": [[9, 3, 1, "", "cross_validation"], [9, 3, 1, "", "cv_split_strategy"], [9, 3, 1, "", "direction"], [9, 3, 1, "", "minimise_std_dev"], [9, 3, 1, "", "mode"], [9, 3, 1, "", "n_chemprop_trials"], [9, 3, 1, "", "n_jobs"], [9, 3, 1, "", "n_startup_trials"], [9, 3, 1, "", "n_trials"], [9, 3, 1, "", "optuna_storage"], [9, 3, 1, "", "random_seed"], [9, 3, 1, "", "scoring"], [9, 3, 1, "", "shuffle"], [9, 3, 1, "", "split_chemprop"], [9, 3, 1, "", "track_to_mlflow"], [9, 3, 1, "", "tracking_rest_endpoint"], [9, 3, 1, "", "use_cache"]], "optunaz.config.optconfig.PLSRegression": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.PLSRegression.Parameters": [[9, 2, 1, "", "NComponents"], [9, 3, 1, "", "n_components"]], "optunaz.config.optconfig.PLSRegression.Parameters.NComponents": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.PRFClassifier": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.PRFClassifier.Parameters": [[9, 2, 1, "", "PRFClassifierParametersMaxDepth"], [9, 2, 1, "", "PRFClassifierParametersMinPySumLeaf"], [9, 2, 1, "", "PRFClassifierParametersNEstimators"], [9, 3, 1, "", "max_depth"], [9, 3, 1, "", "max_features"], [9, 3, 1, "", "min_py_sum_leaf"], [9, 3, 1, "", "n_estimators"], [9, 3, 1, "", "use_py_gini"], [9, 3, 1, "", "use_py_leafs"]], "optunaz.config.optconfig.PRFClassifier.Parameters.PRFClassifierParametersMaxDepth": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.PRFClassifier.Parameters.PRFClassifierParametersMinPySumLeaf": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.PRFClassifier.Parameters.PRFClassifierParametersNEstimators": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.PRFClassifierMaxFeatures": [[9, 3, 1, "", "AUTO"], [9, 3, 1, "", "LOG2"], [9, 3, 1, "", "SQRT"]], "optunaz.config.optconfig.RandomForestClassifier": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.RandomForestClassifier.Parameters": [[9, 2, 1, "", "RandomForestClassifierParametersMaxDepth"], [9, 2, 1, "", "RandomForestClassifierParametersNEstimators"], [9, 3, 1, "", "max_depth"], [9, 3, 1, "", "max_features"], [9, 3, 1, "", "n_estimators"]], "optunaz.config.optconfig.RandomForestClassifier.Parameters.RandomForestClassifierParametersMaxDepth": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.RandomForestClassifier.Parameters.RandomForestClassifierParametersNEstimators": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.RandomForestMaxFeatures": [[9, 3, 1, "", "AUTO"], [9, 3, 1, "", "LOG2"], [9, 3, 1, "", "SQRT"]], "optunaz.config.optconfig.RandomForestRegressor": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.RandomForestRegressor.Parameters": [[9, 2, 1, "", "RandomForestRegressorParametersMaxDepth"], [9, 2, 1, "", "RandomForestRegressorParametersNEstimators"], [9, 3, 1, "", "max_depth"], [9, 3, 1, "", "max_features"], [9, 3, 1, "", "n_estimators"]], "optunaz.config.optconfig.RandomForestRegressor.Parameters.RandomForestRegressorParametersMaxDepth": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.RandomForestRegressor.Parameters.RandomForestRegressorParametersNEstimators": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.RegressionScore": [[9, 3, 1, "", "EXPLAINED_VARIANCE"], [9, 3, 1, "", "MAX_ERROR"], [9, 3, 1, "", "NEG_MEAN_ABSOLUTE_ERROR"], [9, 3, 1, "", "NEG_MEAN_SQUARED_ERROR"], [9, 3, 1, "", "NEG_MEDIAN_ABSOLUTE_ERROR"], [9, 3, 1, "", "R2"]], "optunaz.config.optconfig.Ridge": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.Ridge.Parameters": [[9, 2, 1, "", "Alpha"], [9, 3, 1, "", "alpha"]], "optunaz.config.optconfig.Ridge.Parameters.Alpha": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.SVC": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.SVC.Parameters": [[9, 3, 1, "", "C"], [9, 2, 1, "", "Gamma"], [9, 2, 1, "", "SVCParametersParameterC"], [9, 3, 1, "", "gamma"]], "optunaz.config.optconfig.SVC.Parameters.Gamma": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.SVC.Parameters.SVCParametersParameterC": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.SVR": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.SVR.Parameters": [[9, 3, 1, "", "C"], [9, 2, 1, "", "SVRParametersGamma"], [9, 2, 1, "", "SVRParametersParameterC"], [9, 3, 1, "", "gamma"]], "optunaz.config.optconfig.SVR.Parameters.SVRParametersGamma": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.SVR.Parameters.SVRParametersParameterC": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.XGBRegressor": [[9, 2, 1, "", "Parameters"], [9, 3, 1, "", "name"], [9, 3, 1, "", "parameters"]], "optunaz.config.optconfig.XGBRegressor.Parameters": [[9, 2, 1, "", "LearningRate"], [9, 2, 1, "", "MaxDepth"], [9, 2, 1, "", "NEstimators"], [9, 3, 1, "", "learning_rate"], [9, 3, 1, "", "max_depth"], [9, 3, 1, "", "n_estimators"]], "optunaz.config.optconfig.XGBRegressor.Parameters.LearningRate": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.XGBRegressor.Parameters.MaxDepth": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.config.optconfig.XGBRegressor.Parameters.NEstimators": [[9, 3, 1, "", "high"], [9, 3, 1, "", "low"]], "optunaz.datareader": [[8, 2, 1, "", "Dataset"], [8, 1, 1, "", "deduplicate"], [8, 1, 1, "", "isvalid"], [8, 1, 1, "", "merge"], [8, 1, 1, "", "read_data"], [8, 1, 1, "", "split"], [8, 1, 1, "", "transform"]], "optunaz.datareader.Dataset": [[8, 3, 1, "", "aux_column"], [8, 3, 1, "", "aux_transform"], [8, 4, 1, "", "check_sets"], [8, 3, 1, "", "deduplication_strategy"], [8, 4, 1, "", "get_merged_sets"], [8, 4, 1, "", "get_sets"], [8, 3, 1, "", "input_column"], [8, 3, 1, "", "intermediate_test_dataset_file"], [8, 3, 1, "", "intermediate_training_dataset_file"], [8, 3, 1, "", "log_transform"], [8, 3, 1, "", "log_transform_base"], [8, 3, 1, "", "log_transform_negative"], [8, 3, 1, "", "log_transform_unit_conversion"], [8, 3, 1, "", "probabilistic_threshold_representation"], [8, 3, 1, "", "probabilistic_threshold_representation_std"], [8, 3, 1, "", "probabilistic_threshold_representation_threshold"], [8, 3, 1, "", "response_column"], [8, 3, 1, "", "response_type"], [8, 3, 1, "", "save_intermediate_files"], [8, 3, 1, "", "split_strategy"], [8, 3, 1, "", "test_dataset_file"], [8, 3, 1, "", "training_dataset_file"]], "optunaz.descriptors": [[8, 2, 1, "", "Avalon"], [8, 2, 1, "", "CanonicalSmiles"], [8, 2, 1, "", "CompositeDescriptor"], [8, 2, 1, "", "ECFP"], [8, 2, 1, "", "ECFP_counts"], [8, 2, 1, "", "FittedSklearnScaler"], [8, 2, 1, "", "GenericScaffold"], [8, 2, 1, "", "JazzyDescriptors"], [8, 2, 1, "", "MACCS_keys"], [8, 2, 1, "", "MolDescriptor"], [8, 5, 1, "", "NoValidSmiles"], [8, 2, 1, "", "PathFP"], [8, 2, 1, "", "PhyschemDescriptors"], [8, 2, 1, "", "PrecomputedDescriptorFromFile"], [8, 2, 1, "", "RdkitDescriptor"], [8, 2, 1, "", "Scaffold"], [8, 2, 1, "", "ScaledDescriptor"], [8, 5, 1, "", "ScalingFittingError"], [8, 2, 1, "", "SmilesAndSideInfoFromFile"], [8, 2, 1, "", "SmilesFromFile"], [8, 2, 1, "", "UnfittedSklearnScaler"], [8, 2, 1, "", "UnscaledJazzyDescriptors"], [8, 2, 1, "", "UnscaledPhyschemDescriptors"], [8, 2, 1, "", "UnscaledZScalesDescriptors"], [8, 2, 1, "", "ValidDescriptor"], [8, 2, 1, "", "ZScalesDescriptors"], [8, 1, 1, "", "descriptor_from_config"], [8, 1, 1, "", "mol_from_smi"], [8, 1, 1, "", "numpy_from_rdkit"]], "optunaz.descriptors.Avalon": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_mol"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.Avalon.Parameters": [[8, 3, 1, "", "nBits"]], "optunaz.descriptors.CanonicalSmiles": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.CompositeDescriptor": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 4, 1, "", "fp_info"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.CompositeDescriptor.Parameters": [[8, 3, 1, "", "descriptors"]], "optunaz.descriptors.ECFP": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_mol"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.ECFP.Parameters": [[8, 3, 1, "", "nBits"], [8, 3, 1, "", "radius"], [8, 3, 1, "", "returnRdkit"]], "optunaz.descriptors.ECFP_counts": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_mol"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.ECFP_counts.Parameters": [[8, 3, 1, "", "nBits"], [8, 3, 1, "", "radius"], [8, 3, 1, "", "useFeatures"]], "optunaz.descriptors.FittedSklearnScaler": [[8, 4, 1, "", "get_fitted_scaler"], [8, 3, 1, "", "name"], [8, 3, 1, "", "saved_params"]], "optunaz.descriptors.GenericScaffold": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.JazzyDescriptors": [[8, 2, 1, "", "Parameters"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.JazzyDescriptors.Parameters": [[8, 3, 1, "", "descriptor"], [8, 3, 1, "", "jazzy_filters"], [8, 3, 1, "", "jazzy_names"], [8, 3, 1, "", "scaler"]], "optunaz.descriptors.MACCS_keys": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_mol"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.MolDescriptor": [[8, 4, 1, "", "calculate_from_smi"], [8, 4, 1, "", "parallel_compute_descriptor"]], "optunaz.descriptors.PathFP": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_mol"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.PathFP.Parameters": [[8, 3, 1, "", "fpSize"], [8, 3, 1, "", "maxPath"]], "optunaz.descriptors.PhyschemDescriptors": [[8, 2, 1, "", "Parameters"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.PhyschemDescriptors.Parameters": [[8, 3, 1, "", "descriptor"], [8, 3, 1, "", "rdkit_names"], [8, 3, 1, "", "scaler"]], "optunaz.descriptors.PrecomputedDescriptorFromFile": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.PrecomputedDescriptorFromFile.Parameters": [[8, 3, 1, "", "file"], [8, 3, 1, "", "input_column"], [8, 3, 1, "", "response_column"]], "optunaz.descriptors.RdkitDescriptor": [[8, 4, 1, "", "calculate_from_mol"], [8, 4, 1, "", "calculate_from_smi"]], "optunaz.descriptors.Scaffold": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.ScaledDescriptor": [[8, 2, 1, "", "ScaledDescriptorParameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"], [8, 4, 1, "", "set_unfitted_scaler_data"]], "optunaz.descriptors.ScaledDescriptor.ScaledDescriptorParameters": [[8, 3, 1, "", "descriptor"], [8, 3, 1, "", "scaler"]], "optunaz.descriptors.SmilesAndSideInfoFromFile": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.SmilesAndSideInfoFromFile.Parameters": [[8, 2, 1, "", "Aux_Weight_Pc"], [8, 3, 1, "", "aux_weight_pc"], [8, 3, 1, "", "file"], [8, 3, 1, "", "input_column"]], "optunaz.descriptors.SmilesAndSideInfoFromFile.Parameters.Aux_Weight_Pc": [[8, 3, 1, "", "high"], [8, 3, 1, "", "low"], [8, 3, 1, "", "q"]], "optunaz.descriptors.SmilesFromFile": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.UnfittedSklearnScaler": [[8, 2, 1, "", "MolData"], [8, 4, 1, "", "get_fitted_scaler_for_fp"], [8, 3, 1, "", "mol_data"], [8, 3, 1, "", "name"]], "optunaz.descriptors.UnfittedSklearnScaler.MolData": [[8, 3, 1, "", "file_path"], [8, 3, 1, "", "smiles_column"]], "optunaz.descriptors.UnscaledJazzyDescriptors": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.UnscaledJazzyDescriptors.Parameters": [[8, 3, 1, "", "jazzy_filters"], [8, 3, 1, "", "jazzy_names"]], "optunaz.descriptors.UnscaledPhyschemDescriptors": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_mol"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.UnscaledPhyschemDescriptors.Parameters": [[8, 3, 1, "", "rdkit_names"]], "optunaz.descriptors.UnscaledZScalesDescriptors": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.ValidDescriptor": [[8, 2, 1, "", "Parameters"], [8, 4, 1, "", "calculate_from_smi"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.ZScalesDescriptors": [[8, 2, 1, "", "Parameters"], [8, 3, 1, "", "name"], [8, 3, 1, "", "parameters"]], "optunaz.descriptors.ZScalesDescriptors.Parameters": [[8, 3, 1, "", "descriptor"], [8, 3, 1, "", "scaler"]], "optunaz.evaluate": [[8, 1, 1, "", "get_merged_train_score"], [8, 1, 1, "", "get_scores"], [8, 1, 1, "", "get_train_test_scores"], [8, 1, 1, "", "score_all"], [8, 1, 1, "", "score_all_smiles"]], "optunaz.explainability": [[8, 1, 1, "", "ExplainPreds"], [8, 1, 1, "", "ShapExplainer"], [8, 1, 1, "", "explain_ECFP"], [8, 1, 1, "", "get_ecfp_fpinfo"], [8, 1, 1, "", "get_ecfpcount_fpinfo"], [8, 1, 1, "", "get_fp_info"], [8, 1, 1, "", "runShap"]], "optunaz.metircs": [[8, 1, 1, "", "auc_pr_cal"], [8, 1, 1, "", "bedroc_score"], [8, 1, 1, "", "concordance_index"], [8, 1, 1, "", "validate_cls_input"]], "optunaz.model_writer": [[8, 2, 1, "", "Predictor"], [8, 2, 1, "", "QSARtunaModel"], [8, 1, 1, "", "get_metadata"], [8, 1, 1, "", "get_transform"], [8, 1, 1, "", "perform_ptr"], [8, 1, 1, "", "save_model"], [8, 1, 1, "", "wrap_model"]], "optunaz.model_writer.Predictor": [[8, 4, 1, "", "explain"], [8, 4, 1, "", "predict"], [8, 4, 1, "", "predict_proba"], [8, 4, 1, "", "predict_uncert"]], "optunaz.model_writer.QSARtunaModel": [[8, 3, 1, "", "aux_transform"], [8, 3, 1, "", "descriptor"], [8, 3, 1, "", "metadata"], [8, 3, 1, "", "mode"], [8, 4, 1, "", "predict_from_smiles"], [8, 3, 1, "", "predictor"], [8, 3, 1, "", "transform"]], "optunaz.objective": [[8, 5, 1, "", "NoValidDescriptors"], [8, 2, 1, "", "Objective"], [8, 1, 1, "", "null_scores"]], "optunaz.objective.Objective": [[8, 3, 1, "", "cache"], [8, 3, 1, "", "optconfig"], [8, 3, 1, "", "train_aux"], [8, 3, 1, "", "train_smiles"], [8, 3, 1, "", "train_y"]], "optunaz.optbuild": [[8, 1, 1, "", "main"], [8, 1, 1, "", "predict_pls"]], "optunaz.predict": [[8, 5, 1, "", "ArgsError"], [8, 5, 1, "", "AuxCovariateMissing"], [8, 5, 1, "", "PrecomputedError"], [8, 5, 1, "", "UncertaintyError"], [8, 1, 1, "", "check_precomp_args"], [8, 1, 1, "", "main"], [8, 1, 1, "", "validate_args"], [8, 1, 1, "", "validate_aux"], [8, 1, 1, "", "validate_set_precomputed"], [8, 1, 1, "", "validate_uncertainty"]], "optunaz.schemagen": [[8, 1, 1, "", "doctitle"], [8, 1, 1, "", "main"], [8, 1, 1, "", "patch_schema_generic"], [8, 1, 1, "", "patch_schema_optunaz"], [8, 1, 1, "", "type_base_schema"]], "optunaz.three_step_opt_build_merge": [[8, 1, 1, "", "base_chemprop_params"], [8, 1, 1, "", "build_best"], [8, 1, 1, "", "build_merged"], [8, 1, 1, "", "buildconfig_best"], [8, 1, 1, "", "log_scores"], [8, 1, 1, "", "optimize"], [8, 1, 1, "", "run_study"], [8, 1, 1, "", "split_optimize"]], "optunaz.utils": [[11, 0, 0, "-", "enums"], [10, 0, 0, "-", "files_paths"], [10, 1, 1, "", "load_df_from_file"], [10, 0, 0, "-", "load_json"], [10, 1, 1, "", "md5_hash"], [10, 1, 1, "", "mkdict"], [10, 0, 0, "-", "mlflow"], [12, 0, 0, "-", "preprocessing"], [10, 1, 1, "", "remove_failed_idx"], [10, 0, 0, "-", "schema"], [10, 0, 0, "-", "tracking"]], "optunaz.utils.enums": [[11, 2, 1, "", "MlflowLogParams"], [11, 2, 1, "", "StudyUserAttrs"], [11, 2, 1, "", "TrialParams"], [11, 2, 1, "", "TrialUserAttrs"], [11, 0, 0, "-", "building_configuration_enum"], [11, 0, 0, "-", "configuration_enum"], [11, 0, 0, "-", "interface_enum"], [11, 0, 0, "-", "model_runner_enum"], [11, 0, 0, "-", "objective_enum"], [11, 0, 0, "-", "optimization_configuration_enum"], [11, 0, 0, "-", "prediction_configuration_enum"], [11, 0, 0, "-", "return_values_enum"], [11, 0, 0, "-", "visualization_enum"]], "optunaz.utils.enums.MlflowLogParams": [[11, 3, 1, "", "TRIAL_NUMBER"]], "optunaz.utils.enums.StudyUserAttrs": [[11, 3, 1, "", "OPTCONFIG"]], "optunaz.utils.enums.TrialParams": [[11, 3, 1, "", "ALGORITHM_HASH"], [11, 3, 1, "", "ALGORITHM_NAME"], [11, 3, 1, "", "DESCRIPTOR"]], "optunaz.utils.enums.TrialUserAttrs": [[11, 3, 1, "", "TEST_SCORES"], [11, 3, 1, "", "TRAIN_SCORES"]], "optunaz.utils.enums.building_configuration_enum": [[11, 2, 1, "", "BuildingConfigurationEnum"]], "optunaz.utils.enums.building_configuration_enum.BuildingConfigurationEnum": [[11, 3, 1, "", "GENERAL_CLASSIFIER"], [11, 3, 1, "", "GENERAL_HYPERPARAMETERS"], [11, 3, 1, "", "GENERAL_REGRESSOR"], [11, 3, 1, "", "METADATA"], [11, 3, 1, "", "METADATA_BESTTRIAL"], [11, 3, 1, "", "METADATA_BESTVALUE"]], "optunaz.utils.enums.configuration_enum": [[11, 2, 1, "", "ConfigurationEnum"]], "optunaz.utils.enums.configuration_enum.ConfigurationEnum": [[11, 3, 1, "", "ALGORITHMS"], [11, 3, 1, "", "ALGORITHMS_ADABOOSTCLASSIFIER"], [11, 3, 1, "", "ALGORITHMS_ADABOOSTCLASSIFIER_LEARNING_RATE"], [11, 3, 1, "", "ALGORITHMS_ADABOOSTCLASSIFIER_N_ESTIMATORS"], [11, 3, 1, "", "ALGORITHMS_CALIBRATEDCLASSIFIERCV"], [11, 3, 1, "", "ALGORITHMS_CALIBRATEDCLASSIFIERCV_ENSEMBLE"], [11, 3, 1, "", "ALGORITHMS_CALIBRATEDCLASSIFIERCV_ESTIMATOR"], [11, 3, 1, "", "ALGORITHMS_CALIBRATEDCLASSIFIERCV_METHOD"], [11, 3, 1, "", "ALGORITHMS_CALIBRATEDCLASSIFIERCV_N_FOLDS"], [11, 3, 1, "", "ALGORITHMS_CALIBRATEDCLASSIFIERCV_PARAMS"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_ACTIVATION"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_AGGREGATION"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_AGGREGATION_NORM"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_BATCH_SIZE"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_CLASSIFIER"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_DEPTH"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_DROPOUT"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_ENSEMBLE_SIZE"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_EPOCHS"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_FEATURES_GENERATOR"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_FFN_HIDDEN_SIZE"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_FFN_NUM_LAYERS"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_FINAL_LR_RATIO_EXP"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_FRZN"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_HIDDEN_SIZE"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_HYPEROPT_CLASSIFIER"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_HYPEROPT_REGRESSOR"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_INIT_LR_RATIO_EXP"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_MAX_LR_EXP"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_NUM_ITERS"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_PRETRAINED_MODEL"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_REGRESSOR"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_SEARCH_PARAMETER_LEVEL"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_STARTUP_RANDOM_ITERS"], [11, 3, 1, "", "ALGORITHMS_CHEMPROP_WARMUP_EPOCHS_RATIO"], [11, 3, 1, "", "ALGORITHMS_HIGH"], [11, 3, 1, "", "ALGORITHMS_INTERFACE_SKLEARN"], [11, 3, 1, "", "ALGORITHMS_INTERFACE_XGBOOST"], [11, 3, 1, "", "ALGORITHMS_KNEIGHBORSCLASSIFIER"], [11, 3, 1, "", "ALGORITHMS_KNEIGHBORSREGRESSOR"], [11, 3, 1, "", "ALGORITHMS_KNEIGHBORS_METRIC"], [11, 3, 1, "", "ALGORITHMS_KNEIGHBORS_N_NEIGHBORS"], [11, 3, 1, "", "ALGORITHMS_KNEIGHBORS_WEIGHTS"], [11, 3, 1, "", "ALGORITHMS_LASSO"], [11, 3, 1, "", "ALGORITHMS_LASSO_ALPHA"], [11, 3, 1, "", "ALGORITHMS_LOGISTICREGRESSION"], [11, 3, 1, "", "ALGORITHMS_LOGISTICREGRESSION_C"], [11, 3, 1, "", "ALGORITHMS_LOGISTICREGRESSION_SOLVER"], [11, 3, 1, "", "ALGORITHMS_LOW"], [11, 3, 1, "", "ALGORITHMS_MAPIE"], [11, 3, 1, "", "ALGORITHMS_MAPIE_ALPHA"], [11, 3, 1, "", "ALGORITHMS_PLSREGRESSION"], [11, 3, 1, "", "ALGORITHMS_PLSREGRESSION_N_COMPONENTS"], [11, 3, 1, "", "ALGORITHMS_PRF"], [11, 3, 1, "", "ALGORITHMS_PRF_MAX_DEPTH"], [11, 3, 1, "", "ALGORITHMS_PRF_MAX_FEATURES"], [11, 3, 1, "", "ALGORITHMS_PRF_MINPYSUMLEAF"], [11, 3, 1, "", "ALGORITHMS_PRF_N_ESTIMATORS"], [11, 3, 1, "", "ALGORITHMS_PRF_USE_PY_GINI"], [11, 3, 1, "", "ALGORITHMS_PRF_USE_PY_LEAFS"], [11, 3, 1, "", "ALGORITHMS_Q"], [11, 3, 1, "", "ALGORITHMS_RFCLASSIFIER"], [11, 3, 1, "", "ALGORITHMS_RFREGRESSOR"], [11, 3, 1, "", "ALGORITHMS_RF_MAX_DEPTH"], [11, 3, 1, "", "ALGORITHMS_RF_MAX_FEATURES"], [11, 3, 1, "", "ALGORITHMS_RF_N_ESTIMATORS"], [11, 3, 1, "", "ALGORITHMS_RIDGE"], [11, 3, 1, "", "ALGORITHMS_RIDGE_ALPHA"], [11, 3, 1, "", "ALGORITHMS_SVC"], [11, 3, 1, "", "ALGORITHMS_SVC_C"], [11, 3, 1, "", "ALGORITHMS_SVC_GAMMA"], [11, 3, 1, "", "ALGORITHMS_SVR"], [11, 3, 1, "", "ALGORITHMS_SVR_C"], [11, 3, 1, "", "ALGORITHMS_SVR_GAMMA"], [11, 3, 1, "", "ALGORITHMS_XGBREGRESSOR"], [11, 3, 1, "", "ALGORITHMS_XGBREGRESSOR_LEARNING_RATE"], [11, 3, 1, "", "ALGORITHMS_XGBREGRESSOR_MAX_DEPTH"], [11, 3, 1, "", "ALGORITHMS_XGBREGRESSOR_N_ESTIMATORS"], [11, 3, 1, "", "DATA"], [11, 3, 1, "", "DATA_INPUTCOLUMN"], [11, 3, 1, "", "DATA_RESPONSECOLUMN"], [11, 3, 1, "", "DATA_TEST"], [11, 3, 1, "", "DATA_TRAINING"], [11, 3, 1, "", "DESCRIPTORS"], [11, 3, 1, "", "DESCRIPTORS_AVALON"], [11, 3, 1, "", "DESCRIPTORS_AVALON_NBITS"], [11, 3, 1, "", "DESCRIPTORS_COMPOSITE"], [11, 3, 1, "", "DESCRIPTORS_ECFP"], [11, 3, 1, "", "DESCRIPTORS_ECFPCOUNTS"], [11, 3, 1, "", "DESCRIPTORS_ECFPCOUNTS_RADIUS"], [11, 3, 1, "", "DESCRIPTORS_ECFPCOUNTS_USEFEATURES"], [11, 3, 1, "", "DESCRIPTORS_ECFP_NBITS"], [11, 3, 1, "", "DESCRIPTORS_ECFP_RADIUS"], [11, 3, 1, "", "DESCRIPTORS_JAZZY"], [11, 3, 1, "", "DESCRIPTORS_JAZZY_JAZZYNAMES"], [11, 3, 1, "", "DESCRIPTORS_MACCSKEYS"], [11, 3, 1, "", "DESCRIPTORS_PATHFP"], [11, 3, 1, "", "DESCRIPTORS_PATHFP_FPSIZE"], [11, 3, 1, "", "DESCRIPTORS_PATHFP_MAXPATH"], [11, 3, 1, "", "DESCRIPTORS_PHYSCHEM"], [11, 3, 1, "", "DESCRIPTORS_PHYSCHEM_RDKITNAMES"], [11, 3, 1, "", "DESCRIPTORS_PRECOMPUTED"], [11, 3, 1, "", "DESCRIPTORS_PRECOMPUTED_FILE"], [11, 3, 1, "", "DESCRIPTORS_PRECOMPUTED_INPUT_COLUMNN"], [11, 3, 1, "", "DESCRIPTORS_PRECOMPUTED_RESPONSE_COLUMN"], [11, 3, 1, "", "DESCRIPTORS_SCALED"], [11, 3, 1, "", "DESCRIPTORS_SCALED_DESCRIPTOR"], [11, 3, 1, "", "DESCRIPTORS_SCALED_DESCRIPTOR_PARAMETERS"], [11, 3, 1, "", "DESCRIPTORS_SMILES"], [11, 3, 1, "", "DESCRIPTORS_SMILES_AND_SI"], [11, 3, 1, "", "DESCRIPTORS_SMILES_AND_SI_AUX_WEIGHT_PC"], [11, 3, 1, "", "DESCRIPTORS_SMILES_AND_SI_FILE"], [11, 3, 1, "", "DESCRIPTORS_SMILES_AND_SI_INPUT_COLUMN"], [11, 3, 1, "", "DESCRIPTORS_UNSC_JAZZY"], [11, 3, 1, "", "DESCRIPTORS_UNSC_PHYSCHEM"], [11, 3, 1, "", "DESCRIPTORS_UNSC_ZSCALES"], [11, 3, 1, "", "DESCRIPTORS_ZSCALES"], [11, 3, 1, "", "GENERAL_DISABLED"], [11, 3, 1, "", "GENERAL_PARAMETERS"], [11, 3, 1, "", "SETTINGS"], [11, 3, 1, "", "SETTINGS_CROSS_VALIDATION"], [11, 3, 1, "", "SETTINGS_DIRECTION"], [11, 3, 1, "", "SETTINGS_MODE"], [11, 3, 1, "", "SETTINGS_MODE_CLASSIFICATION"], [11, 3, 1, "", "SETTINGS_MODE_REGRESSION"], [11, 3, 1, "", "SETTINGS_N_JOBS"], [11, 3, 1, "", "SETTINGS_N_TRIALS"], [11, 3, 1, "", "SETTINGS_SHUFFLE"], [11, 3, 1, "", "TASK"], [11, 3, 1, "", "TASK_BUILDING"], [11, 3, 1, "", "TASK_OPTIMIZATION"]], "optunaz.utils.enums.interface_enum": [[11, 2, 1, "", "InterfaceEnum"]], "optunaz.utils.enums.interface_enum.InterfaceEnum": [[11, 3, 1, "", "CALIBRATED_SET"], [11, 3, 1, "", "CHEMPROP_SET"], [11, 3, 1, "", "PRF_SET"], [11, 3, 1, "", "SKLEARN_SET"], [11, 3, 1, "", "XGBOOST_SET"]], "optunaz.utils.enums.model_runner_enum": [[11, 2, 1, "", "ModelRunnerDataframeEnum"]], "optunaz.utils.enums.model_runner_enum.ModelRunnerDataframeEnum": [[11, 3, 1, "", "SET"], [11, 3, 1, "", "SMILES"], [11, 3, 1, "", "TEST"], [11, 3, 1, "", "TRAIN"], [11, 3, 1, "", "Y_PRED"], [11, 3, 1, "", "Y_TRUE"]], "optunaz.utils.enums.objective_enum": [[11, 2, 1, "", "ObjectiveEnum"]], "optunaz.utils.enums.objective_enum.ObjectiveEnum": [[11, 3, 1, "", "ATTRIBUTE_TRIAL_TRAIN_SCORE"], [11, 3, 1, "", "EXTRA_COLUMN_BESTHIT"]], "optunaz.utils.enums.optimization_configuration_enum": [[11, 2, 1, "", "OptimizationConfigurationEnum"]], "optunaz.utils.enums.prediction_configuration_enum": [[11, 2, 1, "", "PredictionConfigurationEnum"]], "optunaz.utils.enums.prediction_configuration_enum.PredictionConfigurationEnum": [[11, 3, 1, "", "DATA_DATASET"]], "optunaz.utils.enums.return_values_enum": [[11, 2, 1, "", "SklearnReturnValueEnum"], [11, 2, 1, "", "XGBoostReturnValueEnum"]], "optunaz.utils.enums.return_values_enum.SklearnReturnValueEnum": [[11, 3, 1, "", "CROSS_VALIDATE_FIT_TIME"], [11, 3, 1, "", "CROSS_VALIDATE_SCORE_TIME"], [11, 3, 1, "", "CROSS_VALIDATE_TEST_SCORE"], [11, 3, 1, "", "CROSS_VALIDATE_TRAIN_SCORE"]], "optunaz.utils.enums.return_values_enum.XGBoostReturnValueEnum": [[11, 3, 1, "", "CROSS_VALIDATE_FIT_TIME"], [11, 3, 1, "", "CROSS_VALIDATE_SCORE_TIME"], [11, 3, 1, "", "CROSS_VALIDATE_TEST_SCORE"], [11, 3, 1, "", "CROSS_VALIDATE_TRAIN_SCORE"]], "optunaz.utils.enums.visualization_enum": [[11, 2, 1, "", "VisualizationEnum"]], "optunaz.utils.enums.visualization_enum.VisualizationEnum": [[11, 3, 1, "", "OPTUNA_SYSTEM_ATTRS_INTERMEDIATE_VALUES"], [11, 3, 1, "", "OPTUNA_SYSTEM_ATTRS_NUMBER"], [11, 3, 1, "", "OPTUNA_SYSTEM_ATTRS_TRIAL_ID"], [11, 3, 1, "", "VISUALIZATION"], [11, 3, 1, "", "VISUALIZATION_CLASSIFIER"], [11, 3, 1, "", "VISUALIZATION_FILE_FORMAT"], [11, 3, 1, "", "VISUALIZATION_OUTPUT_FOLDER"], [11, 3, 1, "", "VISUALIZATION_PLOTS"], [11, 3, 1, "", "VISUALIZATION_PLOTS_CONTOUR"], [11, 3, 1, "", "VISUALIZATION_PLOTS_HISTORY"], [11, 3, 1, "", "VISUALIZATION_PLOTS_PARALLEL_COORDINATE"], [11, 3, 1, "", "VISUALIZATION_PLOTS_SLICE"], [11, 3, 1, "", "VISUALIZATION_REGRESSOR"], [11, 3, 1, "", "VISUALIZATION_USE_XVFB"]], "optunaz.utils.files_paths": [[10, 1, 1, "", "attach_root_path"], [10, 1, 1, "", "move_up_directory"]], "optunaz.utils.load_json": [[10, 1, 1, "", "loadJSON"]], "optunaz.utils.mlflow": [[10, 2, 1, "", "MLflowCallback"], [10, 1, 1, "", "add_ellipsis"], [10, 1, 1, "", "shorten_names"]], "optunaz.utils.mlflow.MLflowCallback": [[10, 3, 1, "", "optconfig"], [10, 4, 1, "", "prepare_tags"], [10, 4, 1, "", "tmp_buildconfig"], [10, 3, 1, "", "tracking_uri"], [10, 3, 1, "", "trial_number_offset"]], "optunaz.utils.preprocessing": [[12, 0, 0, "-", "deduplicator"], [12, 0, 0, "-", "splitter"], [12, 0, 0, "-", "transform"]], "optunaz.utils.preprocessing.deduplicator": [[12, 2, 1, "", "Deduplicator"], [12, 2, 1, "", "KeepAllNoDeduplication"], [12, 2, 1, "", "KeepAvg"], [12, 2, 1, "", "KeepFirst"], [12, 2, 1, "", "KeepLast"], [12, 2, 1, "", "KeepMax"], [12, 2, 1, "", "KeepMedian"], [12, 2, 1, "", "KeepMin"], [12, 2, 1, "", "KeepRandom"]], "optunaz.utils.preprocessing.deduplicator.Deduplicator": [[12, 4, 1, "", "dedup"]], "optunaz.utils.preprocessing.deduplicator.KeepAllNoDeduplication": [[12, 4, 1, "", "dedup"], [12, 3, 1, "", "name"]], "optunaz.utils.preprocessing.deduplicator.KeepAvg": [[12, 4, 1, "", "dedup"], [12, 3, 1, "", "name"]], "optunaz.utils.preprocessing.deduplicator.KeepFirst": [[12, 4, 1, "", "dedup"], [12, 3, 1, "", "name"]], "optunaz.utils.preprocessing.deduplicator.KeepLast": [[12, 4, 1, "", "dedup"], [12, 3, 1, "", "name"]], "optunaz.utils.preprocessing.deduplicator.KeepMax": [[12, 4, 1, "", "dedup"], [12, 3, 1, "", "name"]], "optunaz.utils.preprocessing.deduplicator.KeepMedian": [[12, 4, 1, "", "dedup"], [12, 3, 1, "", "name"]], "optunaz.utils.preprocessing.deduplicator.KeepMin": [[12, 4, 1, "", "dedup"], [12, 3, 1, "", "name"]], "optunaz.utils.preprocessing.deduplicator.KeepRandom": [[12, 4, 1, "", "dedup"], [12, 3, 1, "", "name"], [12, 3, 1, "", "seed"]], "optunaz.utils.preprocessing.splitter": [[12, 2, 1, "", "GroupingSplitter"], [12, 2, 1, "", "HistogramStratifiedShuffleSplit"], [12, 2, 1, "", "KFold"], [12, 2, 1, "", "NoSplitting"], [13, 2, 1, "", "Predefined"], [13, 2, 1, "", "Random"], [13, 2, 1, "", "ScaffoldSplit"], [12, 2, 1, "", "SklearnSplitter"], [12, 2, 1, "", "Splitter"], [13, 2, 1, "", "Stratified"], [13, 2, 1, "", "Temporal"], [12, 1, 1, "", "butina_cluster"], [12, 1, 1, "", "fd_bin"], [12, 1, 1, "", "stratify"]], "optunaz.utils.preprocessing.splitter.GroupingSplitter": [[12, 4, 1, "", "groups"]], "optunaz.utils.preprocessing.splitter.HistogramStratifiedShuffleSplit": [[12, 3, 1, "", "bins"], [12, 4, 1, "", "get_n_splits"], [12, 3, 1, "", "n_splits"], [12, 3, 1, "", "random_state"], [12, 4, 1, "", "split"], [12, 3, 1, "", "test_fraction"]], "optunaz.utils.preprocessing.splitter.KFold": [[12, 4, 1, "", "get_sklearn_splitter"], [12, 3, 1, "", "name"], [12, 3, 1, "", "random_state"], [12, 3, 1, "", "shuffle"], [12, 4, 1, "", "split"]], "optunaz.utils.preprocessing.splitter.NoSplitting": [[12, 4, 1, "", "get_sklearn_splitter"], [12, 3, 1, "", "name"], [12, 4, 1, "", "split"]], "optunaz.utils.preprocessing.splitter.Predefined": [[12, 3, 1, "", "column_name"], [12, 4, 1, "", "get_sklearn_splitter"], [12, 4, 1, "", "groups"], [12, 3, 1, "", "name"], [13, 4, 1, "", "split"]], "optunaz.utils.preprocessing.splitter.Random": [[12, 3, 1, "", "fraction"], [12, 4, 1, "", "get_sklearn_splitter"], [12, 3, 1, "", "name"], [12, 3, 1, "", "seed"]], "optunaz.utils.preprocessing.splitter.ScaffoldSplit": [[12, 3, 1, "", "bins"], [12, 3, 1, "", "butina_cluster"], [12, 4, 1, "", "get_n_splits"], [12, 4, 1, "", "get_sklearn_splitter"], [13, 4, 1, "", "groups"], [12, 3, 1, "", "make_scaffold_generic"], [12, 3, 1, "", "name"], [12, 3, 1, "", "random_state"], [13, 4, 1, "", "split"]], "optunaz.utils.preprocessing.splitter.SklearnSplitter": [[12, 4, 1, "", "get_n_splits"], [12, 4, 1, "", "split"]], "optunaz.utils.preprocessing.splitter.Splitter": [[12, 4, 1, "", "get_sklearn_splitter"], [12, 4, 1, "", "split"]], "optunaz.utils.preprocessing.splitter.Stratified": [[12, 3, 1, "", "bins"], [12, 3, 1, "", "fraction"], [12, 4, 1, "", "get_sklearn_splitter"], [12, 3, 1, "", "name"], [12, 3, 1, "", "seed"]], "optunaz.utils.preprocessing.splitter.Temporal": [[12, 3, 1, "", "fraction"], [12, 4, 1, "", "get_sklearn_splitter"], [12, 3, 1, "", "name"], [13, 4, 1, "", "split"]], "optunaz.utils.preprocessing.transform": [[12, 2, 1, "", "AuxTransformer"], [12, 2, 1, "", "DataTransform"], [12, 2, 1, "", "LogBase"], [12, 2, 1, "", "LogNegative"], [14, 2, 1, "", "ModelDataTransform"], [14, 2, 1, "", "PTRTransform"], [14, 2, 1, "", "VectorFromColumn"], [14, 2, 1, "", "ZScales"]], "optunaz.utils.preprocessing.transform.AuxTransformer": [[12, 4, 1, "", "transform"]], "optunaz.utils.preprocessing.transform.DataTransform": [[12, 4, 1, "", "transform"]], "optunaz.utils.preprocessing.transform.LogBase": [[12, 3, 1, "", "LOG"], [12, 3, 1, "", "LOG10"], [12, 3, 1, "", "LOG2"]], "optunaz.utils.preprocessing.transform.LogNegative": [[12, 3, 1, "", "FALSE"], [12, 3, 1, "", "TRUE"]], "optunaz.utils.preprocessing.transform.ModelDataTransform": [[14, 2, 1, "", "Parameters"], [12, 3, 1, "", "base_dict"], [12, 3, 1, "", "base_negation"], [12, 3, 1, "", "name"], [12, 3, 1, "", "parameters"], [12, 3, 1, "", "reverse_dict"], [12, 4, 1, "", "reverse_transform"], [12, 4, 1, "", "reverse_transform_df"], [12, 4, 1, "", "reverse_transform_one"], [12, 4, 1, "", "transform"], [12, 4, 1, "", "transform_df"], [12, 4, 1, "", "transform_one"]], "optunaz.utils.preprocessing.transform.ModelDataTransform.Parameters": [[12, 3, 1, "", "base"], [12, 3, 1, "", "conversion"], [12, 3, 1, "", "negation"]], "optunaz.utils.preprocessing.transform.PTRTransform": [[14, 2, 1, "", "Parameters"], [12, 3, 1, "", "name"], [12, 3, 1, "", "parameters"], [12, 4, 1, "", "reverse_transform"], [12, 4, 1, "", "transform"]], "optunaz.utils.preprocessing.transform.PTRTransform.Parameters": [[12, 3, 1, "", "std"], [12, 3, 1, "", "threshold"]], "optunaz.utils.preprocessing.transform.VectorFromColumn": [[14, 2, 1, "", "Parameters"], [12, 3, 1, "", "name"], [12, 3, 1, "", "parameters"], [12, 4, 1, "", "transform"]], "optunaz.utils.preprocessing.transform.VectorFromColumn.Parameters": [[12, 3, 1, "", "delimiter"]], "optunaz.utils.preprocessing.transform.ZScales": [[14, 2, 1, "", "Parameters"], [12, 3, 1, "", "name"], [12, 3, 1, "", "parameters"], [12, 4, 1, "", "transform"]], "optunaz.utils.schema": [[10, 1, 1, "", "add_boolean_guards_for_schema_properties"], [10, 1, 1, "", "addsibling"], [10, 1, 1, "", "addtitles"], [10, 1, 1, "", "copytitle"], [10, 1, 1, "", "create_dependency"], [10, 1, 1, "", "delsibling"], [10, 1, 1, "", "getref"], [10, 1, 1, "", "remove_schema_properties"], [10, 1, 1, "", "replaceenum"], [10, 1, 1, "", "replacekey"], [10, 1, 1, "", "replacevalue"]], "optunaz.utils.tracking": [[10, 2, 1, "", "BuildTrackingData"], [10, 2, 1, "", "Datapoint"], [10, 2, 1, "", "InternalTrackingCallback"], [10, 2, 1, "", "TrackingData"], [10, 1, 1, "", "get_authorization_header"], [10, 1, 1, "", "removeprefix"], [10, 1, 1, "", "round_scores"], [10, 1, 1, "", "track_build"]], "optunaz.utils.tracking.BuildTrackingData": [[10, 3, 1, "", "response_column_name"], [10, 3, 1, "", "test_points"], [10, 3, 1, "", "test_scores"]], "optunaz.utils.tracking.Datapoint": [[10, 3, 1, "", "expected"], [10, 3, 1, "", "predicted"], [10, 3, 1, "", "smiles"]], "optunaz.utils.tracking.InternalTrackingCallback": [[10, 3, 1, "", "optconfig"], [10, 3, 1, "", "trial_number_offset"]], "optunaz.utils.tracking.TrackingData": [[10, 3, 1, "", "all_cv_test_scores"], [10, 3, 1, "", "buildconfig"], [10, 3, 1, "", "scoring"], [10, 3, 1, "", "trial_number"], [10, 3, 1, "", "trial_state"], [10, 3, 1, "", "trial_value"]], "optunaz.visualizer": [[8, 2, 1, "", "Visualizer"]], "optunaz.visualizer.Visualizer": [[8, 4, 1, "", "plot_by_configuration"], [8, 4, 1, "", "plot_contour"], [8, 4, 1, "", "plot_history"], [8, 4, 1, "", "plot_parallel_coordinate"], [8, 4, 1, "", "plot_slice"]]}, "objtypes": {"0": "py:module", "1": "py:function", "2": "py:class", "3": "py:attribute", "4": "py:method", "5": "py:exception"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "function", "Python function"], "2": ["py", "class", "Python class"], "3": ["py", "attribute", "Python attribute"], "4": ["py", "method", "Python method"], "5": ["py", "exception", "Python exception"]}, "titleterms": {"qsartuna": [0, 4, 6, 7], "\ud80c\udd9b": 0, "qsar": 0, "us": 0, "optim": [0, 6], "hyperparamet": 0, "tune": 0, "formerli": 0, "optuna": 0, "az": 0, "qptuna": 0, "background": [0, 6], "The": [0, 6], "three": [0, 6], "step": [0, 6], "process": [0, 6], "json": 0, "base": [0, 7], "command": 0, "line": 0, "interfac": 0, "configur": [0, 6], "file": [0, 6], "run": [0, 6], "via": 0, "singulart": 0, "submit": 0, "slurm": 0, "model": [0, 6], "option": [0, 6], "inspect": 0, "from": [0, 6, 7], "python": 0, "jupyt": 0, "notebook": 0, "avail": [1, 2, 3, 13, 14], "algorithm": [1, 6], "adaboostclassifi": 1, "lasso": 1, "kneighborsclassifi": 1, "kneighborsregressor": 1, "logisticregress": 1, "plsregress": 1, "randomforestclassifi": 1, "randomforestregressor": 1, "ridg": 1, "svc": 1, "svr": 1, "xgbregressor": 1, "prfclassifi": 1, "chempropregressor": 1, "chempropclassifi": 1, "chemprophyperoptclassifi": 1, "chemprophyperoptregressor": 1, "chemprophyperoptregressorpretrain": 1, "calibratedclassifiercvwithva": 1, "mapi": [1, 6], "dedupl": [2, 7, 12], "keepfirst": 2, "keeplast": 2, "keeprandom": 2, "keepmin": 2, "keepmax": 2, "keepavg": 2, "keepmedian": 2, "keepkeepallnodedupl": 2, "descriptor": [3, 6, 8], "avalon": 3, "ecfp": 3, "ecfp_count": 3, "pathfp": 3, "maccs_kei": 3, "unscaledphyschemdescriptor": 3, "unscaledjazzydescriptor": 3, "unscaledzscalesdescriptor": 3, "physchemdescriptor": 3, "jazzydescriptor": 3, "precomputeddescriptorfromfil": 3, "zscale": [3, 14], "smilesfromfil": 3, "smilesandsideinfofromfil": 3, "scaleddescriptor": 3, "compositedescriptor": 3, "welcom": 4, "document": 4, "develop": 4, "optunaz": [5, 8, 9, 10, 11, 12], "cli": 6, "tutori": 6, "thi": 6, "prepar": 6, "regress": 6, "exampl": 6, "creat": 6, "visual": [6, 8], "progress": 6, "pick": 6, "best": [6, 7], "trial": 6, "build": 6, "merg": 6, "preprocess": [6, 7, 12], "split": [6, 7], "data": [6, 7], "train": 6, "test": 6, "set": 6, "remov": 6, "duplic": [6, 7], "dataset": 6, "choos": 6, "score": 6, "function": 6, "advanc": 6, "functoinail": 6, "probabilist": [6, 7], "random": [6, 7, 13], "forest": 6, "prf": 6, "interlud": [6, 7], "cautionari": 6, "advic": 6, "y": 6, "respons": 6, "column": 6, "valid": 6, "chemprop": 6, "simpl": 6, "separ": 6, "shallow": 6, "method": 6, "default": 6, "behavior": 6, "turn": 6, "hyperopt": 6, "within": [6, 7], "functionail": 6, "veri": 6, "larg": 6, "comput": 6, "cost": 6, "A": 6, "note": 6, "mpnn": 6, "search": 6, "space": 6, "side": 6, "inform": 6, "multi": 6, "task": 6, "learn": 6, "mtl": 6, "combin": 6, "onli": 6, "recommend": 6, "long": 6, "time": 6, "pre": 6, "adapt": 6, "transfer": 6, "fingerprint": 6, "encod": 6, "latent": 6, "represent": [6, 7], "probabl": 6, "calibr": 6, "classif": 6, "uncertainti": 6, "estim": 6, "vennab": 6, "ensembl": 6, "dropout": 6, "explain": [6, 8], "shap": 6, "interpret": 6, "log": [6, 7], "transform": [6, 7, 12, 14], "covari": 6, "one": 6, "e": 6, "g": 6, "dose": 6, "point": 6, "co": 6, "variat": 6, "proteochemometr": 6, "pcm": 6, "more": 6, "vectorfromsmil": 6, "z": 6, "scale": 6, "object": [6, 8], "priorit": 6, "perform": [6, 7], "standard": 6, "deviat": 6, "further": 6, "precomput": 6, "introduct": 7, "translat": 7, "sdf": 7, "csv": 7, "need": 7, "deal": 7, "compar": 7, "differ": 7, "unif": 7, "strategi": 7, "tempor": [7, 13], "stratifi": [7, 13], "scaffold": 7, "input": 7, "user": 7, "import": 7, "logarithm": 7, "dataread": [7, 8], "threshold": 7, "ptr": [7, 14], "experiment": 7, "error": 7, "definit": 7, "implement": 7, "conclus": 7, "calcul": 7, "evalu": [7, 8], "reproduc": 7, "practic": 7, "packag": [8, 9, 10, 11, 12], "subpackag": [8, 10], "submodul": [8, 9, 10, 11, 12], "builder": 8, "modul": [8, 9, 10, 11, 12], "metirc": 8, "model_writ": 8, "optbuild": 8, "predict": 8, "schemagen": 8, "three_step_opt_build_merg": 8, "content": [8, 9, 10, 11, 12], "config": 9, "build_from_opt": 9, "buildconfig": 9, "optconfig": 9, "util": [10, 11, 12], "files_path": 10, "load_json": 10, "mlflow": 10, "schema": 10, "track": 10, "enum": 11, "building_configuration_enum": 11, "configuration_enum": 11, "interface_enum": 11, "model_runner_enum": 11, "objective_enum": 11, "optimization_configuration_enum": 11, "prediction_configuration_enum": 11, "return_values_enum": 11, "visualization_enum": 11, "splitter": [12, 13], "predefin": 13, "scaffoldsplit": 13, "modeldatatransform": 14, "vectorfromcolumn": 14}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "nbsphinx": 4, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 56}}) \ No newline at end of file