From 9b65cb26274ce73f7bf14906143e239e428fda2a Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 12 Aug 2024 11:57:54 -0700 Subject: [PATCH 01/12] allow genetic feature selection to work with nan values --- tpot2/search_spaces/nodes/genetic_feature_selection.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tpot2/search_spaces/nodes/genetic_feature_selection.py b/tpot2/search_spaces/nodes/genetic_feature_selection.py index f9c4892a..97418cd1 100644 --- a/tpot2/search_spaces/nodes/genetic_feature_selection.py +++ b/tpot2/search_spaces/nodes/genetic_feature_selection.py @@ -21,6 +21,10 @@ def __init__(self, mask): def fit(self, X, y=None): return self + def _get_tags(self): + tags = {"allow_nan": True, "requires_y": False} + return tags + def _get_support_mask(self): return np.array(self.mask) From 7906c44c71638aa3ebc5185302f9695f76fac473 Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 12 Aug 2024 12:04:28 -0700 Subject: [PATCH 02/12] fss tag to allow nans --- tpot2/builtin_modules/feature_set_selector.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tpot2/builtin_modules/feature_set_selector.py b/tpot2/builtin_modules/feature_set_selector.py index 9a36dc5d..ec6ab1f1 100644 --- a/tpot2/builtin_modules/feature_set_selector.py +++ b/tpot2/builtin_modules/feature_set_selector.py @@ -92,6 +92,9 @@ def fit(self, X, y=None): # def transform(self, X): + def _get_tags(self): + tags = {"allow_nan": True, "requires_y": False} + return tags def _get_support_mask(self): """ From 2b19c72e764462f808ec4d836e17889d3c4088bd Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 12 Aug 2024 13:20:09 -0700 Subject: [PATCH 03/12] added an attribute so sklearn knows estimator has been fitted. also keep track of feature names --- tpot2/search_spaces/nodes/genetic_feature_selection.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tpot2/search_spaces/nodes/genetic_feature_selection.py b/tpot2/search_spaces/nodes/genetic_feature_selection.py index 97418cd1..28482120 100644 --- a/tpot2/search_spaces/nodes/genetic_feature_selection.py +++ b/tpot2/search_spaces/nodes/genetic_feature_selection.py @@ -19,6 +19,11 @@ def __init__(self, mask): self.mask = mask def fit(self, X, y=None): + self.n_features_in_ = X.shape[1] + if isinstance(X, pd.DataFrame): + self.feature_names_in_ = X.columns + # self.set_output(transform="pandas") + self.is_fitted_ = True #so sklearn knows it's fitted return self def _get_tags(self): @@ -28,6 +33,8 @@ def _get_tags(self): def _get_support_mask(self): return np.array(self.mask) + def get_feature_names_out(self, input_features=None): + return self.feature_names_in_[self.get_support()] class GeneticFeatureSelectorIndividual(SklearnIndividual): def __init__( self, From 2e5f90bd878ee968967b56f9e5cb6040616f0a79 Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 12 Aug 2024 14:02:18 -0700 Subject: [PATCH 04/12] easier pandas output --- tpot2/search_spaces/nodes/genetic_feature_selection.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tpot2/search_spaces/nodes/genetic_feature_selection.py b/tpot2/search_spaces/nodes/genetic_feature_selection.py index 28482120..0bea039a 100644 --- a/tpot2/search_spaces/nodes/genetic_feature_selection.py +++ b/tpot2/search_spaces/nodes/genetic_feature_selection.py @@ -15,8 +15,11 @@ class MaskSelector(BaseEstimator, SelectorMixin): """Select predefined feature subsets.""" - def __init__(self, mask): + def __init__(self, mask, set_output_transform=None): self.mask = mask + self.set_output_transform = set_output_transform + if set_output_transform is not None: + self.set_output(transform=set_output_transform) def fit(self, X, y=None): self.n_features_in_ = X.shape[1] From 7af5b0b591fab00817e57a1b4a45f49703aba25f Mon Sep 17 00:00:00 2001 From: nickotto Date: Thu, 22 Aug 2024 13:58:50 -0700 Subject: [PATCH 05/12] Update docs.yml --- .github/workflows/docs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2d56cfaf..cc0a696f 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -22,6 +22,7 @@ jobs: pip install --upgrade pip pip install . pip install -r docs/requirements_docs.txt + pip install mkdocstrings[python] griffe # - name: Convert notebooks to HTML # # if: ${{ github.event_name == 'push' && contains(github.event.head_commit.modified, 'Tutorial/') && contains(github.event.head_commit.modified, '.ipynb') }} From 9fbbad5ba7641d1f37ac8854803b5ec189cd9a1f Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 9 Sep 2024 11:54:54 -0700 Subject: [PATCH 06/12] added option to change the base nodetype for hyperparameter mutation/crossover --- tpot2/config/get_configspace.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 4a5cc997..118d4412 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -401,12 +401,12 @@ def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_st raise ValueError(f"Could not find configspace for {name}") -def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None, return_choice_pipeline=True): +def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None, return_choice_pipeline=True, base_node=EstimatorNode): #if list of names, return a list of EstimatorNodes if isinstance(name, list) or isinstance(name, np.ndarray): - search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=False) for n in name] + search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=False, base_node=base_node) for n in name] #remove Nones search_spaces = [s for s in search_spaces if s is not None] @@ -417,12 +417,12 @@ def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_st if name in GROUPNAMES: name_list = GROUPNAMES[name] - return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=return_choice_pipeline) + return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=return_choice_pipeline, base_node=base_node) - return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) + return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, base_node=base_node) -def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None): +def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None, base_node=EstimatorNode): #these are wrappers that take in another estimator as a parameter # TODO Add AdaBoostRegressor, AdaBoostClassifier as wrappers? wrap a decision tree with different params? @@ -447,34 +447,34 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None #these are nodes that have special search spaces which require custom parsing of the hyperparameters if name == "RobustScaler": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.robust_scaler_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.robust_scaler_hyperparameter_parser) if name == "GradientBoostingClassifier": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GradientBoostingClassifier_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GradientBoostingClassifier_hyperparameter_parser) if name == "HistGradientBoostingClassifier": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.HistGradientBoostingClassifier_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.HistGradientBoostingClassifier_hyperparameter_parser) if name == "GradientBoostingRegressor": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GradientBoostingRegressor_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GradientBoostingRegressor_hyperparameter_parser) if name == "HistGradientBoostingRegressor": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.HistGradientBoostingRegressor_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.HistGradientBoostingRegressor_hyperparameter_parser) if name == "MLPClassifier": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.MLPClassifier_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.MLPClassifier_hyperparameter_parser) if name == "MLPRegressor": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.MLPRegressor_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.MLPRegressor_hyperparameter_parser) if name == "GaussianProcessRegressor": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GaussianProcessRegressor_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GaussianProcessRegressor_hyperparameter_parser) if name == "GaussianProcessClassifier": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GaussianProcessClassifier_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GaussianProcessClassifier_hyperparameter_parser) if name == "FeatureAgglomeration": configspace = get_configspace(name, n_features=n_features) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.FeatureAgglomeration_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.FeatureAgglomeration_hyperparameter_parser) configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) if configspace is None: @@ -482,4 +482,4 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None warnings.warn(f"Could not find configspace for {name}") return None - return EstimatorNode(STRING_TO_CLASS[name], configspace) \ No newline at end of file + return base_node(STRING_TO_CLASS[name], configspace) \ No newline at end of file From 190a0f60565f83d401175a8bd2c43b0170557adf Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 9 Sep 2024 12:06:50 -0700 Subject: [PATCH 07/12] cleanup and gradual estimator node example --- tpot2/search_spaces/base.py | 8 - .../nodes/estimator_node_gradual.py | 146 ++++++++++++++++++ 2 files changed, 146 insertions(+), 8 deletions(-) create mode 100644 tpot2/search_spaces/nodes/estimator_node_gradual.py diff --git a/tpot2/search_spaces/base.py b/tpot2/search_spaces/base.py index 3133057e..6dc2c76a 100644 --- a/tpot2/search_spaces/base.py +++ b/tpot2/search_spaces/base.py @@ -1,18 +1,10 @@ import tpot2 -import numpy as np -import pandas as pd import sklearn -from tpot2 import config -from typing import Generator, List, Tuple, Union -import random from sklearn.base import BaseEstimator import sklearn import networkx as nx from . import graph_utils from typing import final -from abc import ABC, abstractmethod - - diff --git a/tpot2/search_spaces/nodes/estimator_node_gradual.py b/tpot2/search_spaces/nodes/estimator_node_gradual.py new file mode 100644 index 00000000..f2e8cf81 --- /dev/null +++ b/tpot2/search_spaces/nodes/estimator_node_gradual.py @@ -0,0 +1,146 @@ +# try https://automl.github.io/ConfigSpace/main/api/hyperparameters.html + +import numpy as np +from tpot2.search_spaces.base import SklearnIndividual, SklearnIndividualGenerator +from ConfigSpace import ConfigurationSpace +from typing import final +import ConfigSpace + + +NONE_SPECIAL_STRING = "" +TRUE_SPECIAL_STRING = "" +FALSE_SPECIAL_STRING = "" + + +def default_hyperparameter_parser(params:dict) -> dict: + return params + + +# NOTE: This is not the default, currently experimental +class EstimatorNodeIndividual_gradual(SklearnIndividual): + """ + Note that ConfigurationSpace does not support None as a parameter. Instead, use the special string "". TPOT will automatically replace instances of this string with the Python None. + + Parameters + ---------- + method : type + The class of the estimator to be used + + space : ConfigurationSpace|dict + The hyperparameter space to be used. If a dict is passed, hyperparameters are fixed and not learned. + + """ + def __init__(self, method: type, + space: ConfigurationSpace|dict, #TODO If a dict is passed, hyperparameters are fixed and not learned. Is this confusing? Should we make a second node type? + hyperparameter_parser: callable = None, + rng=None) -> None: + super().__init__() + self.method = method + self.space = space + + if hyperparameter_parser is None: + self.hyperparameter_parser = default_hyperparameter_parser + else: + self.hyperparameter_parser = hyperparameter_parser + + if isinstance(space, dict): + self.hyperparameters = space + else: + rng = np.random.default_rng(rng) + self.space.seed(rng.integers(0, 2**32)) + self.hyperparameters = dict(self.space.sample_configuration()) + + self.check_hyperparameters_for_None() + + def mutate(self, rng=None): + if isinstance(self.space, dict): + return False + self.hyperparameters = gradual_hyperparameter_update(params=self.hyperparameters, configspace=self.space, rng=rng) + self.check_hyperparameters_for_None() + return True + + def crossover(self, other, rng=None): + if isinstance(self.space, dict): + return False + + rng = np.random.default_rng(rng) + if self.method != other.method: + return False + + #loop through hyperparameters, randomly swap items in self.hyperparameters with items in other.hyperparameters + for hyperparameter in self.space: + if rng.choice([True, False]): + if hyperparameter in other.hyperparameters: + self.hyperparameters[hyperparameter] = other.hyperparameters[hyperparameter] + + self.check_hyperparameters_for_None() + + return True + + def check_hyperparameters_for_None(self): + for key, value in self.hyperparameters.items(): + #if string + if isinstance(value, str): + if value == NONE_SPECIAL_STRING: + self.hyperparameters[key] = None + elif value == TRUE_SPECIAL_STRING: + self.hyperparameters[key] = True + elif value == FALSE_SPECIAL_STRING: + self.hyperparameters[key] = False + + @final #this method should not be overridden, instead override hyperparameter_parser + def export_pipeline(self, **kwargs): + return self.method(**self.hyperparameter_parser(self.hyperparameters)) + + def unique_id(self): + #return a dictionary of the method and the hyperparameters + method_str = self.method.__name__ + params = list(self.hyperparameters.keys()) + params = sorted(params) + + id_str = f"{method_str}({', '.join([f'{param}={self.hyperparameters[param]}' for param in params])})" + + return id_str + +def gradual_hyperparameter_update(params:dict, configspace:ConfigurationSpace, rng=None): + rng = np.random.default_rng(rng) + configspace.seed(rng.integers(0, 2**32)) + new_params = dict(configspace.sample_configuration()) + for param in list(new_params.keys()): + #if parameter is float, multiply by normal distribution + if param not in params: + continue + try: + if issubclass(type(configspace[param]), ConfigSpace.hyperparameters.hyperparameter.FloatHyperparameter): + + if configspace[param].log: + new_params[param] = params[param] * rng.lognormal(0, 1) + else: + new_params[param] = params[param] + rng.normal(0, .1)* (configspace[param].upper-configspace[param].lower) + # if check if above or below min and cap + if new_params[param] < configspace[param].lower: + new_params[param] = configspace[param].lower + elif new_params[param] > configspace[param].upper: + new_params[param] = configspace[param].upper + #if parameter is integer, add normal distribution + elif issubclass(type(configspace[param]), ConfigSpace.hyperparameters.hyperparameter.IntegerHyperparameter): + new_params[param] = params[param] * np.random.normal(0, 1) + # if check if above or below min and cap + if new_params[param] < configspace[param].lower: + new_params[param] = configspace[param].lower + elif new_params[param] > configspace[param].upper: + new_params[param] = configspace[param].upper + new_params[param] = int(new_params[param]) + except: + pass + + return new_params + +class EstimatorNode_gradual(SklearnIndividualGenerator): + def __init__(self, method, space, hyperparameter_parser=default_hyperparameter_parser): + self.method = method + self.space = space + self.hyperparameter_parser = hyperparameter_parser + + def generate(self, rng=None): + return EstimatorNodeIndividual_gradual(self.method, self.space, hyperparameter_parser=self.hyperparameter_parser, rng=rng) \ No newline at end of file From 45aaa56cddebedf37b175b64b89ec0ef099095b0 Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 9 Sep 2024 12:34:22 -0700 Subject: [PATCH 08/12] change param range to prevent error and fix failing test --- tpot2/config/classifiers.py | 2 +- tpot2/config/regressors.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tpot2/config/classifiers.py b/tpot2/config/classifiers.py index 49b714ac..2fb09e41 100644 --- a/tpot2/config/classifiers.py +++ b/tpot2/config/classifiers.py @@ -535,7 +535,7 @@ def MLPClassifier_hyperparameter_parser(params): def get_GaussianProcessClassifier_ConfigurationSpace(n_features, random_state): space = { 'n_features': n_features, - 'alpha': Float("alpha", bounds=(1e-14, 1.0), log=True), + 'alpha': Float("alpha", bounds=(1e-10, 1.0), log=True), 'thetaL': Float("thetaL", bounds=(1e-10, 1e-3), log=True), 'thetaU': Float("thetaU", bounds=(1.0, 100000), log=True), } diff --git a/tpot2/config/regressors.py b/tpot2/config/regressors.py index d1b9343d..ab14a7ea 100644 --- a/tpot2/config/regressors.py +++ b/tpot2/config/regressors.py @@ -354,7 +354,7 @@ def get_ExtraTreesRegressor_ConfigurationSpace(random_state): def get_GaussianProcessRegressor_ConfigurationSpace(n_features, random_state): space = { 'n_features': n_features, - 'alpha': Float("alpha", bounds=(1e-14, 1.0), log=True), + 'alpha': Float("alpha", bounds=(1e-10, 1.0), log=True), 'thetaL': Float("thetaL", bounds=(1e-10, 1e-3), log=True), 'thetaU': Float("thetaU", bounds=(1.0, 100000), log=True), } From 5a12031bfa36e7e3498af1cf1b111612de56ccda Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 9 Sep 2024 17:25:13 -0700 Subject: [PATCH 09/12] added amltk parser --- .../amltk_search_space_parser_example.ipynb | 1945 +++++++++++++++++ setup.py | 3 +- tpot2/__init__.py | 2 +- tpot2/utils/__init__.py | 11 +- tpot2/utils/amltk_parser.py | 72 + 5 files changed, 2030 insertions(+), 3 deletions(-) create mode 100644 Tutorial/amltk_search_space_parser_example.ipynb create mode 100644 tpot2/utils/amltk_parser.py diff --git a/Tutorial/amltk_search_space_parser_example.ipynb b/Tutorial/amltk_search_space_parser_example.ipynb new file mode 100644 index 00000000..9dc62527 --- /dev/null +++ b/Tutorial/amltk_search_space_parser_example.ipynb @@ -0,0 +1,1945 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The AMLTK (https://github.com/automl/amltk) provides a framework for developing AutoML systems. One component of this system is the search space definitions. \n", + "\n", + "TPOT2 provides a function called tpot2.utils.tpot2_parser which can convert a search space defined in the AMLTK API into the search space class used by TPOT2. This allows users to define a single search space to be used by both algorithms, facilitating better comparisons. Below is an example of a few search spaces defined in AMLTK and how to use them in TPOT2.\n", + "\n", + "Note: this feature is still experimental and not all features present in the AMLTK API are fully supported in TPOT2 yet. (For example, automated splitting based on categorical vs numeric with amltk.pipeline.Split is not currently implemented in the parser.)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─ Split(split_imputation) ───────────────────────────────────────────────────────────────────────────────────────╮\n",
+       " ╭─ Sequential(categories) ───────────────────────────╮ ╭─ Sequential(numerics) ───────────────────────────────╮ \n",
+       "  ╭─ Fixed(ColumnTransformer) ─────────────────────╮   ╭─ Fixed(ColumnTransformer) ───────────────────────╮  \n",
+       "   item ColumnTransformer(transformers=[('passth…     item ColumnTransformer(transformers=[('passthro…   \n",
+       "        'passthrough',                                     'passthrough',                                \n",
+       "                                         <sklear…                                           <sklearn.…   \n",
+       "        object at 0x7ab9ec119d20>)])                       object at 0x7ab994db4c40>)])                  \n",
+       "  ╰────────────────────────────────────────────────╯   ╰──────────────────────────────────────────────────╯  \n",
+       "       \n",
+       "  ╭─ Fixed(SimpleImputer) ─────────────────────────╮   ╭─ Component(SimpleImputer) ─────────────╮            \n",
+       "   item SimpleImputer(fill_value='missing',           item  class SimpleImputer(...)                     \n",
+       "        strategy='constant')                          space {'strategy': ['mean', 'median']}             \n",
+       "  ╰────────────────────────────────────────────────╯   ╰────────────────────────────────────────╯            \n",
+       "    ╰──────────────────────────────────────────────────────╯ \n",
+       "  ╭─ Fixed(OneHotEncoder) ─────────────────────────╮                                                           \n",
+       "   item OneHotEncoder(drop='first',                                                                          \n",
+       "        sparse_output=False)                                                                                 \n",
+       "  ╰────────────────────────────────────────────────╯                                                           \n",
+       " ╰────────────────────────────────────────────────────╯                                                          \n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[38;2;119;125;167m╭─\u001b[0m\u001b[38;2;119;125;167m \u001b[0m\u001b[1;38;2;119;125;167mSplit\u001b[0m\u001b[38;2;119;125;167m(\u001b[0m\u001b[3;38;2;119;125;167msplit_imputation\u001b[0m\u001b[38;2;119;125;167m) \u001b[0m\u001b[38;2;119;125;167m──────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;119;125;167m─╮\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mcategories\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m──────────────────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mnumerics\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m──────────────────────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mColumnTransformer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mColumnTransformer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m──────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'passth…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'passthro…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95msklear…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95msklearn.…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7ab9ec119d20\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7ab994db4c40\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mSimpleImputer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m────────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSimpleImputer\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mSimpleImputer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mfill_value\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'missing'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=202447;https://www.scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html\u001b\\\u001b[4;39mSimpleImputer\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[33mstrategy\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'constant'\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'strategy'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[32m'mean'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'median'\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╰────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m╰──────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mOneHotEncoder\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m────────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mOneHotEncoder\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mdrop\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'first'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[33msparse_output\u001b[0m\u001b[39m=\u001b[0m\u001b[3;91mFalse\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╰────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [], + "text/plain": [ + "Split(name='split_imputation', item=None, nodes=(Sequential(name='categories', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='SimpleImputer', item=SimpleImputer(fill_value='missing', strategy='constant'), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='OneHotEncoder', item=OneHotEncoder(drop='first', sparse_output=False), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='numerics', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Component(name='SimpleImputer', item=, nodes=(), config=None, space={'strategy': ['mean', 'median']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.compose import make_column_selector\n", + "import numpy as np\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.svm import SVC\n", + "from amltk.pipeline import Choice, Component, Sequential, Split\n", + "import tpot2\n", + "from sklearn.preprocessing import FunctionTransformer\n", + "from sklearn.compose import make_column_transformer\n", + "import tpot2\n", + "import numpy as np\n", + "import sklearn\n", + "import sklearn.datasets\n", + "import pandas as pd\n", + "# create dummy pandas dataset with both categorical and numerical columns\n", + "X, y = sklearn.datasets.make_classification(n_samples=100, n_features=5, n_informative=3, n_classes=2, random_state=42)\n", + "X = pd.DataFrame(X, columns=[f\"num_{i}\" for i in range(5)])\n", + "# add 5 categorical columns\n", + "for i in range(5):\n", + " X[f\"cat_{i}\"] = np.random.choice([\"A\", \"B\", \"C\"], size=100)\n", + "y = y.flatten()\n", + "# train test split\n", + "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.5)\n", + "\n", + "# TODO: implement support for this condition\n", + "# select_categories = make_column_selector(dtype_include=object)\n", + "# select_numerical = make_column_selector(dtype_include=np.number)\n", + "\n", + "# split_imputation = Split(\n", + "# {\n", + "# \"categories\": [SimpleImputer(strategy=\"constant\", fill_value=\"missing\"), OneHotEncoder(drop=\"first\")],\n", + "# \"numerics\": Component(SimpleImputer, space={\"strategy\": [\"mean\", \"median\"]}),\n", + "# },\n", + "# config={\"categories\": select_categories, \"numerics\": select_numerical}, #not yet supported\n", + "# name=\"feature_preprocessing\",\n", + "# )\n", + "# split_imputation\n", + "\n", + "select_categories = make_column_selector(dtype_include=object)\n", + "select_numerical = make_column_selector(dtype_include=np.number)\n", + "\n", + "cat_selector = make_column_transformer((\"passthrough\", select_categories))\n", + "num_selector = make_column_transformer((\"passthrough\", select_numerical))\n", + "\n", + "\n", + "split_imputation = Split(\n", + " {\n", + " \"categories\": [cat_selector,SimpleImputer(strategy=\"constant\", fill_value=\"missing\"), OneHotEncoder(drop=\"first\", sparse_output=False)],\n", + " \"numerics\": [num_selector, Component(SimpleImputer, space={\"strategy\": [\"mean\", \"median\"]})],\n", + " },\n", + " name=\"split_imputation\",\n", + ")\n", + "split_imputation" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─ Sequential(my_pipeline) ───────────────────────────────────────────────────────────────────────────────────────╮\n",
+       " ╭─ Split(split_imputation) ───────────────────────────────────────────────────────────────────────────────────╮ \n",
+       "  ╭─ Sequential(categories) ─────────────────────────╮ ╭─ Sequential(numerics) ─────────────────────────────╮  \n",
+       "   ╭─ Fixed(ColumnTransformer) ───────────────────╮   ╭─ Fixed(ColumnTransformer) ─────────────────────╮   \n",
+       "    item ColumnTransformer(transformers=[('pass…     item ColumnTransformer(transformers=[('passth…    \n",
+       "         'passthrough',                                   'passthrough',                               \n",
+       "                                          <skle…                                           <sklear…    \n",
+       "         object at 0x7ab9ec119d20>)])                     object at 0x7ab994db4c40>)])                 \n",
+       "   ╰──────────────────────────────────────────────╯   ╰────────────────────────────────────────────────╯   \n",
+       "         \n",
+       "   ╭─ Fixed(SimpleImputer) ───────────────────────╮   ╭─ Component(SimpleImputer) ─────────────╮           \n",
+       "    item SimpleImputer(fill_value='missing',         item  class SimpleImputer(...)                    \n",
+       "         strategy='constant')                        space {'strategy': ['mean', 'median']}            \n",
+       "   ╰──────────────────────────────────────────────╯   ╰────────────────────────────────────────╯           \n",
+       "     ╰────────────────────────────────────────────────────╯  \n",
+       "   ╭─ Fixed(OneHotEncoder) ───────────────────────╮                                                          \n",
+       "    item OneHotEncoder(drop='first',                                                                       \n",
+       "         sparse_output=False)                                                                              \n",
+       "   ╰──────────────────────────────────────────────╯                                                          \n",
+       "  ╰──────────────────────────────────────────────────╯                                                         \n",
+       " ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ \n",
+       "  \n",
+       " ╭─ Choice(selectors) ─────────────────────────────────────────────────────╮                                     \n",
+       "  ╭─ Component(SelectKBest) ─────╮ ╭─ Component(VarianceThreshold) ─────╮                                      \n",
+       "   item  class SelectKBest(...)   item  class VarianceThreshold(...)                                       \n",
+       "   space {'k': (1, 10)}           space {'threshold': (0.1, 1)}                                            \n",
+       "  ╰──────────────────────────────╯ ╰────────────────────────────────────╯                                      \n",
+       " ╰─────────────────────────────────────────────────────────────────────────╯                                     \n",
+       "  \n",
+       " ╭─ Split(transformers) ─────────────────────────────────────────────────────────────────────────────────╮       \n",
+       "  ╭─ Sequential(passthrough) ─╮ ╭─ Sequential(polynomial) ────────────────╮ ╭─ Sequential(zerocount) ─╮        \n",
+       "   ╭─ Fixed(Passthrough) ─╮    ╭─ Component(PolynomialFeatures) ─────╮   ╭─ Fixed(ZeroCount) ─╮          \n",
+       "    item Passthrough()        item  class PolynomialFeatures(...)     item ZeroCount()             \n",
+       "   ╰──────────────────────╯     space {'degree': [2, 3]}               ╰────────────────────╯          \n",
+       "  ╰───────────────────────────╯  ╰─────────────────────────────────────╯  ╰─────────────────────────╯        \n",
+       "                                ╰─────────────────────────────────────────╯                                    \n",
+       " ╰───────────────────────────────────────────────────────────────────────────────────────────────────────╯       \n",
+       "  \n",
+       " ╭─ Choice(estimator) ─────────────────────────────────────────────────────────────────────────────────────────╮ \n",
+       "  ╭─ Component(RandomForestClassifier) ──────────╮ ╭─ Component(SVC) ────────────────────────────╮             \n",
+       "   item   class RandomForestClassifier(...)       item  class SVC(...)                                     \n",
+       "   config {'max_depth': 3}                        space {'kernel': ['linear', 'rbf', 'poly']}              \n",
+       "   space  {                                      ╰─────────────────────────────────────────────╯             \n",
+       "              'n_estimators': (10, 100),                                                                     \n",
+       "              'criterion': [                                                                                 \n",
+       "                  'gini',                                                                                    \n",
+       "                  'log_loss'                                                                                 \n",
+       "              ]                                                                                              \n",
+       "          }                                                                                                  \n",
+       "  ╰──────────────────────────────────────────────╯                                                             \n",
+       " ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ \n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mmy_pipeline\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m──────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m╭─\u001b[0m\u001b[38;2;119;125;167m \u001b[0m\u001b[1;38;2;119;125;167mSplit\u001b[0m\u001b[38;2;119;125;167m(\u001b[0m\u001b[3;38;2;119;125;167msplit_imputation\u001b[0m\u001b[38;2;119;125;167m) \u001b[0m\u001b[38;2;119;125;167m──────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;119;125;167m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mcategories\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m────────────────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mnumerics\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m────────────────────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mColumnTransformer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m──────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mColumnTransformer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'pass…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'passth…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95mskle…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95msklear…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7ab9ec119d20\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7ab994db4c40\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mSimpleImputer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m──────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSimpleImputer\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mSimpleImputer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mfill_value\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'missing'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=27035;https://www.scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html\u001b\\\u001b[4;39mSimpleImputer\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[33mstrategy\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'constant'\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'strategy'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[32m'mean'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'median'\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╰────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m╰────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mOneHotEncoder\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m──────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mOneHotEncoder\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mdrop\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'first'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[33msparse_output\u001b[0m\u001b[39m=\u001b[0m\u001b[3;91mFalse\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╰──────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╭─\u001b[0m\u001b[38;2;255;69;0m \u001b[0m\u001b[1;38;2;255;69;0mChoice\u001b[0m\u001b[38;2;255;69;0m(\u001b[0m\u001b[3;38;2;255;69;0mselectors\u001b[0m\u001b[38;2;255;69;0m) \u001b[0m\u001b[38;2;255;69;0m────────────────────────────────────────────────────\u001b[0m\u001b[38;2;255;69;0m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSelectKBest\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mVarianceThreshold\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=924552;https://www.scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html\u001b\\\u001b[4;39mSelectKBest\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=534283;https://www.scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html\u001b\\\u001b[4;39mVarianceThreshold\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'k'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'threshold'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m0.1\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╰──────────────────────────────╯\u001b[0m \u001b[38;2;230;175;46m╰────────────────────────────────────╯\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╰─────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m╭─\u001b[0m\u001b[38;2;119;125;167m \u001b[0m\u001b[1;38;2;119;125;167mSplit\u001b[0m\u001b[38;2;119;125;167m(\u001b[0m\u001b[3;38;2;119;125;167mtransformers\u001b[0m\u001b[38;2;119;125;167m) \u001b[0m\u001b[38;2;119;125;167m────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;119;125;167m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mpassthrough\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mpolynomial\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m───────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mzerocount\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mPassthrough\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mPolynomialFeatures\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mZeroCount\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mPassthrough\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=160272;https://www.scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html\u001b\\\u001b[4;39mPolynomialFeatures\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mZeroCount\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'degree'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;36m2\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╰───────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╰─────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m╰─────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╰─────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m╰───────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╭─\u001b[0m\u001b[38;2;255;69;0m \u001b[0m\u001b[1;38;2;255;69;0mChoice\u001b[0m\u001b[38;2;255;69;0m(\u001b[0m\u001b[3;38;2;255;69;0mestimator\u001b[0m\u001b[38;2;255;69;0m) \u001b[0m\u001b[38;2;255;69;0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;255;69;0m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mRandomForestClassifier\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m─────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSVC\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m───────────────────────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=399181;https://www.scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html\u001b\\\u001b[4;39mRandomForestClassifier\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=945901;https://www.scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html\u001b\\\u001b[4;39mSVC\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mconfig\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'max_depth'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'kernel'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[32m'linear'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'rbf'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'poly'\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m╰─────────────────────────────────────────────╯\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[32m'n_estimators'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m10\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m100\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[32m'criterion'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[32m'gini'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[32m'log_loss'\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m]\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╰──────────────────────────────────────────────╯\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [], + "text/plain": [ + "Sequential(name='my_pipeline', item=None, nodes=(Split(name='split_imputation', item=None, nodes=(Sequential(name='categories', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='SimpleImputer', item=SimpleImputer(fill_value='missing', strategy='constant'), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='OneHotEncoder', item=OneHotEncoder(drop='first', sparse_output=False), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='numerics', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Component(name='SimpleImputer', item=, nodes=(), config=None, space={'strategy': ['mean', 'median']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Choice(name='selectors', item=None, nodes=(Component(name='SelectKBest', item=, nodes=(), config=None, space={'k': (1, 10)}, fidelities=None, config_transform=None, meta=None), Component(name='VarianceThreshold', item=, nodes=(), config=None, space={'threshold': (0.1, 1)}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Split(name='transformers', item=None, nodes=(Sequential(name='passthrough', item=None, nodes=(Fixed(name='Passthrough', item=Passthrough(), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='polynomial', item=None, nodes=(Component(name='PolynomialFeatures', item=, nodes=(), config=None, space={'degree': [2, 3]}, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='zerocount', item=None, nodes=(Fixed(name='ZeroCount', item=ZeroCount(), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Choice(name='estimator', item=None, nodes=(Component(name='RandomForestClassifier', item=, nodes=(), config={'max_depth': 3}, space={'n_estimators': (10, 100), 'criterion': ['gini', 'log_loss']}, fidelities=None, config_transform=None, meta=None), Component(name='SVC', item=, nodes=(), config=None, space={'kernel': ['linear', 'rbf', 'poly']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from tpot2.builtin_modules import Passthrough, ZeroCount\n", + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.decomposition import PCA\n", + "\n", + "from sklearn.feature_selection import VarianceThreshold, SelectKBest\n", + "\n", + "selectors = Choice(\n", + " Component(VarianceThreshold, space={\"threshold\": (0.1,1)}),\n", + " Component(SelectKBest, space={\"k\": (1, 10)}),\n", + " name=\"selectors\",\n", + ")\n", + "\n", + "\n", + "transformers = Split(\n", + " {\n", + " \"passthrough\": Passthrough(),\n", + " \"polynomial\": Component(PolynomialFeatures, space={\"degree\": [2, 3]}),\n", + " \"zerocount\" : ZeroCount(),\n", + " },\n", + " # config={\"categories\": select_categories, \"numerics\": select_numerical},\n", + " name=\"transformers\",\n", + ")\n", + "\n", + "pipeline = (\n", + " Sequential(name=\"my_pipeline\")\n", + " >> split_imputation\n", + " # >> Component(SimpleImputer, space={\"strategy\": [\"mean\", \"median\"]}) # Choose either mean or median\n", + " \n", + " >> selectors\n", + " >> transformers\n", + " >> Choice(\n", + " # Our pipeline can choose between two different estimators\n", + " Component(\n", + " RandomForestClassifier,\n", + " space={\"n_estimators\": (10, 100), \"criterion\": [\"gini\", \"log_loss\"]},\n", + " config={\"max_depth\": 3},\n", + " ),\n", + " Component(SVC, space={\"kernel\": [\"linear\", \"rbf\", \"poly\"]}),\n", + " name=\"estimator\",\n", + " )\n", + ")\n", + "\n", + "# Display the amltk Pipeline\n", + "pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Pipeline(steps=[('featureunion-1',\n",
+       "                 FeatureUnion(transformer_list=[('pipeline-1',\n",
+       "                                                 Pipeline(steps=[('columntransformer',\n",
+       "                                                                  ColumnTransformer(transformers=[('passthrough',\n",
+       "                                                                                                   'passthrough',\n",
+       "                                                                                                   <sklearn.compose._column_transformer.make_column_selector object at 0x7ab9ec119d20>)])),\n",
+       "                                                                 ('simpleimputer',\n",
+       "                                                                  SimpleImputer(fill_value='missing',\n",
+       "                                                                                strategy='constant')),\n",
+       "                                                                 ('onehotencode...\n",
+       "                ('selectkbest', SelectKBest(k=4)),\n",
+       "                ('featureunion-2',\n",
+       "                 FeatureUnion(transformer_list=[('pipeline-1',\n",
+       "                                                 Pipeline(steps=[('passthrough',\n",
+       "                                                                  Passthrough())])),\n",
+       "                                                ('pipeline-2',\n",
+       "                                                 Pipeline(steps=[('polynomialfeatures',\n",
+       "                                                                  PolynomialFeatures())])),\n",
+       "                                                ('pipeline-3',\n",
+       "                                                 Pipeline(steps=[('zerocount',\n",
+       "                                                                  ZeroCount())]))])),\n",
+       "                ('randomforestclassifier',\n",
+       "                 RandomForestClassifier(n_estimators=24))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('featureunion-1',\n", + " FeatureUnion(transformer_list=[('pipeline-1',\n", + " Pipeline(steps=[('columntransformer',\n", + " ColumnTransformer(transformers=[('passthrough',\n", + " 'passthrough',\n", + " )])),\n", + " ('simpleimputer',\n", + " SimpleImputer(fill_value='missing',\n", + " strategy='constant')),\n", + " ('onehotencode...\n", + " ('selectkbest', SelectKBest(k=4)),\n", + " ('featureunion-2',\n", + " FeatureUnion(transformer_list=[('pipeline-1',\n", + " Pipeline(steps=[('passthrough',\n", + " Passthrough())])),\n", + " ('pipeline-2',\n", + " Pipeline(steps=[('polynomialfeatures',\n", + " PolynomialFeatures())])),\n", + " ('pipeline-3',\n", + " Pipeline(steps=[('zerocount',\n", + " ZeroCount())]))])),\n", + " ('randomforestclassifier',\n", + " RandomForestClassifier(n_estimators=24))])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#convert to tpot search space\n", + "tpot_search_space = tpot2.utils.tpot2_parser(pipeline)\n", + "\n", + "# sample a pipeline from the tpot search space\n", + "tpot_search_space.generate().export_pipeline()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generation: 50%|█████ | 1/2 [00:03<00:03, 3.26s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generation: 1\n", + "Best roc_auc_score score: 0.9423333333333334\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generation: 100%|██████████| 2/2 [00:03<00:00, 1.86s/it]\n", + "2024-09-09 17:18:39,054 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.\n", + "Traceback (most recent call last):\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 225, in read\n", + " frames_nosplit_nbytes_bin = await stream.read_bytes(fmt_size)\n", + "tornado.iostream.StreamClosedError: Stream is closed\n", + "\n", + "The above exception was the direct cause of the following exception:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/worker.py\", line 1250, in heartbeat\n", + " response = await retry_operation(\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/utils_comm.py\", line 459, in retry_operation\n", + " return await retry(\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/utils_comm.py\", line 438, in retry\n", + " return await coro()\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/core.py\", line 1254, in send_recv_from_rpc\n", + " return await send_recv(comm=comm, op=key, **kwargs)\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/core.py\", line 1013, in send_recv\n", + " response = await comm.read(deserializers=deserializers)\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 236, in read\n", + " convert_stream_closed_error(self, e)\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 142, in convert_stream_closed_error\n", + " raise CommClosedError(f\"in {obj}: {exc}\") from exc\n", + "distributed.comm.core.CommClosedError: in : Stream is closed\n", + "2024-09-09 17:18:39,055 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.\n", + "Traceback (most recent call last):\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 225, in read\n", + " frames_nosplit_nbytes_bin = await stream.read_bytes(fmt_size)\n", + "tornado.iostream.StreamClosedError: Stream is closed\n", + "\n", + "The above exception was the direct cause of the following exception:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/worker.py\", line 1250, in heartbeat\n", + " response = await retry_operation(\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/utils_comm.py\", line 459, in retry_operation\n", + " return await retry(\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/utils_comm.py\", line 438, in retry\n", + " return await coro()\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/core.py\", line 1254, in send_recv_from_rpc\n", + " return await send_recv(comm=comm, op=key, **kwargs)\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/core.py\", line 1013, in send_recv\n", + " response = await comm.read(deserializers=deserializers)\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 236, in read\n", + " convert_stream_closed_error(self, e)\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 142, in convert_stream_closed_error\n", + " raise CommClosedError(f\"in {obj}: {exc}\") from exc\n", + "distributed.comm.core.CommClosedError: in : Stream is closed\n", + "2024-09-09 17:18:39,062 - distributed.scheduler - ERROR - Removing worker 'tcp://127.0.0.1:39033' caused the cluster to lose scattered data, which can't be recovered: {'ndarray-4295957c2613499053c4412f415dedb8', 'DataFrame-ee6ff64644f78f1c23d469116500dd47'} (stimulus_id='handle-worker-cleanup-1725927519.0628352')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generation: 2\n", + "Best roc_auc_score score: 0.96\n" + ] + }, + { + "data": { + "text/html": [ + "
TPOTEstimator(classification=True, generations=2, max_eval_time_seconds=300,\n",
+       "              n_jobs=10, population_size=10, scorers=['roc_auc'],\n",
+       "              scorers_weights=[1],\n",
+       "              search_space=<tpot2.search_spaces.pipelines.sequential.SequentialPipeline object at 0x7ab9928d8f40>,\n",
+       "              verbose=5)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "TPOTEstimator(classification=True, generations=2, max_eval_time_seconds=300,\n", + " n_jobs=10, population_size=10, scorers=['roc_auc'],\n", + " scorers_weights=[1],\n", + " search_space=,\n", + " verbose=5)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "\n", + "\n", + "est = tpot2.TPOTEstimator(\n", + " scorers = [\"roc_auc\"],\n", + " scorers_weights = [1],\n", + " classification = True,\n", + " cv = 5,\n", + " search_space = tpot_search_space, #converted search space goes here\n", + " population_size= 10,\n", + " generations = 2,\n", + " max_eval_time_seconds = 60*5,\n", + " verbose = 5,\n", + " n_jobs=10,\n", + ")\n", + "\n", + "est.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Pipeline(steps=[('featureunion-1',\n",
+       "                 FeatureUnion(transformer_list=[('pipeline-1',\n",
+       "                                                 Pipeline(steps=[('columntransformer',\n",
+       "                                                                  ColumnTransformer(transformers=[('passthrough',\n",
+       "                                                                                                   'passthrough',\n",
+       "                                                                                                   <sklearn.compose._column_transformer.make_column_selector object at 0x7ab98dbdb100>)])),\n",
+       "                                                                 ('simpleimputer',\n",
+       "                                                                  SimpleImputer(fill_value='missing',\n",
+       "                                                                                strategy='constant')),\n",
+       "                                                                 ('onehotencode...\n",
+       "                 VarianceThreshold(threshold=0.6396211247532)),\n",
+       "                ('featureunion-2',\n",
+       "                 FeatureUnion(transformer_list=[('pipeline-1',\n",
+       "                                                 Pipeline(steps=[('passthrough',\n",
+       "                                                                  Passthrough())])),\n",
+       "                                                ('pipeline-2',\n",
+       "                                                 Pipeline(steps=[('polynomialfeatures',\n",
+       "                                                                  PolynomialFeatures())])),\n",
+       "                                                ('pipeline-3',\n",
+       "                                                 Pipeline(steps=[('zerocount',\n",
+       "                                                                  ZeroCount())]))])),\n",
+       "                ('randomforestclassifier',\n",
+       "                 RandomForestClassifier(criterion='log_loss',\n",
+       "                                        n_estimators=47))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('featureunion-1',\n", + " FeatureUnion(transformer_list=[('pipeline-1',\n", + " Pipeline(steps=[('columntransformer',\n", + " ColumnTransformer(transformers=[('passthrough',\n", + " 'passthrough',\n", + " )])),\n", + " ('simpleimputer',\n", + " SimpleImputer(fill_value='missing',\n", + " strategy='constant')),\n", + " ('onehotencode...\n", + " VarianceThreshold(threshold=0.6396211247532)),\n", + " ('featureunion-2',\n", + " FeatureUnion(transformer_list=[('pipeline-1',\n", + " Pipeline(steps=[('passthrough',\n", + " Passthrough())])),\n", + " ('pipeline-2',\n", + " Pipeline(steps=[('polynomialfeatures',\n", + " PolynomialFeatures())])),\n", + " ('pipeline-3',\n", + " Pipeline(steps=[('zerocount',\n", + " ZeroCount())]))])),\n", + " ('randomforestclassifier',\n", + " RandomForestClassifier(criterion='log_loss',\n", + " n_estimators=47))])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.fitted_pipeline_" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0,\n", + " 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1,\n", + " 1, 0, 0, 1, 1, 0])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.predict(X_test)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "myenv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/setup.py b/setup.py index 0a404280..8586dbe7 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,8 @@ def calculate_version(): extras_require={ 'skrebate': ['skrebate>=0.3.4'], 'mdr': ['scikit-mdr>=0.4.4'], - 'sklearnex' : ['scikit-learn-intelex>=2023.2.1'] + 'sklearnex' : ['scikit-learn-intelex>=2023.2.1'], + 'amltk' : ['amltk>=1.12.1'], }, classifiers=[ 'Intended Audience :: Science/Research', diff --git a/tpot2/__init__.py b/tpot2/__init__.py index 62290884..f7014a29 100644 --- a/tpot2/__init__.py +++ b/tpot2/__init__.py @@ -8,9 +8,9 @@ from .population import Population from . import builtin_modules -from . import utils from . import config from . import search_spaces +from . import utils from . import evolvers from . import objectives from . import selectors diff --git a/tpot2/utils/__init__.py b/tpot2/utils/__init__.py index e9c795a3..12231446 100644 --- a/tpot2/utils/__init__.py +++ b/tpot2/utils/__init__.py @@ -1,2 +1,11 @@ from . import eval_utils -from .utils import * \ No newline at end of file +from .utils import * + +# If amltk is installed, import the parser +try: + from .amltk_parser import tpot2_parser +except ImportError: + # Handle the case when amltk is not installed + pass + # print("amltk is not installed. Please install it to use tpot2_parser.") + # Optional: raise an exception or provide alternative functionality \ No newline at end of file diff --git a/tpot2/utils/amltk_parser.py b/tpot2/utils/amltk_parser.py new file mode 100644 index 00000000..c147dbd8 --- /dev/null +++ b/tpot2/utils/amltk_parser.py @@ -0,0 +1,72 @@ +from amltk.pipeline import Choice, Component, Sequential, Node, Fixed, Split, Join, Searchable +from tpot2.search_spaces.pipelines import SequentialPipeline, ChoicePipeline, UnionPipeline +from tpot2.search_spaces.nodes import EstimatorNode +from ConfigSpace import ConfigurationSpace + +def component_to_estimatornode(component: Component) -> EstimatorNode: + method = component.item + space_dict = {} + if component.space is not None: + space_dict.update(component.space) + if component.config is not None: + space_dict.update(component.config) + space = ConfigurationSpace(component.space) + + tpot2_sp = EstimatorNode(method=method, space=space) + return tpot2_sp + +def fixed_to_estimatornode(node: Fixed) -> EstimatorNode: + method = node.item + #check if method is a class or an object + if not isinstance(method, type): + method = type(method) + + #if baseestimator, get params + if hasattr(node.item, 'get_params'): + space_dict = node.item.get_params(deep=False) + else: + space_dict = {} + if node.space is not None: + space_dict.update(node.space) + if node.config is not None: + space_dict.update(node.config) + + tpot2_sp = EstimatorNode(method=method, space=space_dict) + return tpot2_sp + +def sequential_to_sequentialpipeline(sequential: Sequential) -> SequentialPipeline: + nodes = [tpot2_parser(node) for node in sequential.nodes] + tpot2_sp = SequentialPipeline(search_spaces=nodes) + return tpot2_sp + +def choice_to_choicepipeline(choice: Choice) -> ChoicePipeline: + nodes = [tpot2_parser(node) for node in choice.nodes] + tpot2_sp = ChoicePipeline(search_spaces=nodes) + return tpot2_sp + + +def split_to_unionpipeline(split: Split) -> UnionPipeline: + nodes = [tpot2_parser(node) for node in split.nodes] + tpot2_sp = UnionPipeline(search_spaces=nodes) + return tpot2_sp + +def tpot2_parser( + node: Node, + # *, + # flat: bool = False, + # conditionals: bool = False, + # delim: str = ":", + ): + + if isinstance(node, Component): + return component_to_estimatornode(node) + elif isinstance(node, Sequential): + return sequential_to_sequentialpipeline(node) + elif isinstance(node, Choice): + return choice_to_choicepipeline(node) + elif isinstance(node, Fixed): + return fixed_to_estimatornode(node) + elif isinstance(node, Split): + return split_to_unionpipeline(node) + else: + raise ValueError(f"Node type {type(node)} not supported") From abd4ac6b73f63b0e2ab8ff504355495f750c68e3 Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 9 Sep 2024 17:26:55 -0700 Subject: [PATCH 10/12] rerun --- .../amltk_search_space_parser_example.ipynb | 176 +++++++----------- 1 file changed, 64 insertions(+), 112 deletions(-) diff --git a/Tutorial/amltk_search_space_parser_example.ipynb b/Tutorial/amltk_search_space_parser_example.ipynb index 9dc62527..fe2038df 100644 --- a/Tutorial/amltk_search_space_parser_example.ipynb +++ b/Tutorial/amltk_search_space_parser_example.ipynb @@ -25,7 +25,7 @@ " item ColumnTransformer(transformers=[('passth… item ColumnTransformer(transformers=[('passthro… \n", " 'passthrough', 'passthrough', \n", " <sklear… <sklearn.… \n", - " object at 0x7ab9ec119d20>)]) object at 0x7ab994db4c40>)]) \n", + " object at 0x7d354d946290>)]) object at 0x7d34edf94fa0>)]) \n", " ╰────────────────────────────────────────────────╯ ╰──────────────────────────────────────────────────╯ \n", " \n", " ╭─ Fixed(SimpleImputer) ─────────────────────────╮ ╭─ Component(SimpleImputer) ─────────────╮ \n", @@ -48,11 +48,11 @@ "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'passth…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'passthro…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95msklear…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95msklearn.…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", - "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7ab9ec119d20\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7ab994db4c40\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7d354d946290\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7d34edf94fa0\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mSimpleImputer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m────────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSimpleImputer\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", - "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mSimpleImputer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mfill_value\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'missing'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=202447;https://www.scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html\u001b\\\u001b[4;39mSimpleImputer\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mSimpleImputer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mfill_value\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'missing'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=861007;https://www.scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html\u001b\\\u001b[4;39mSimpleImputer\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[33mstrategy\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'constant'\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'strategy'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[32m'mean'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'median'\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╰────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m╰──────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", @@ -72,8 +72,8 @@ "text/html": [], "text/plain": [ "Split(name='split_imputation', item=None, nodes=(Sequential(name='categories', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", - " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='SimpleImputer', item=SimpleImputer(fill_value='missing', strategy='constant'), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='OneHotEncoder', item=OneHotEncoder(drop='first', sparse_output=False), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='numerics', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", - " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Component(name='SimpleImputer', item=, nodes=(), config=None, space={'strategy': ['mean', 'median']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)" + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='SimpleImputer', item=SimpleImputer(fill_value='missing', strategy='constant'), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='OneHotEncoder', item=OneHotEncoder(drop='first', sparse_output=False), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='numerics', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Component(name='SimpleImputer', item=, nodes=(), config=None, space={'strategy': ['mean', 'median']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)" ] }, "execution_count": 1, @@ -153,7 +153,7 @@ " item ColumnTransformer(transformers=[('pass… item ColumnTransformer(transformers=[('passth… \n", " 'passthrough', 'passthrough', \n", " <skle… <sklear… \n", - " object at 0x7ab9ec119d20>)]) object at 0x7ab994db4c40>)]) \n", + " object at 0x7d354d946290>)]) object at 0x7d34edf94fa0>)]) \n", " ╰──────────────────────────────────────────────╯ ╰────────────────────────────────────────────────╯ \n", " \n", " ╭─ Fixed(SimpleImputer) ───────────────────────╮ ╭─ Component(SimpleImputer) ─────────────╮ \n", @@ -208,11 +208,11 @@ "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'pass…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'passth…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95mskle…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95msklear…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", - "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7ab9ec119d20\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7ab994db4c40\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7d354d946290\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7d34edf94fa0\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mSimpleImputer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m──────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSimpleImputer\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", - "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mSimpleImputer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mfill_value\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'missing'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=27035;https://www.scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html\u001b\\\u001b[4;39mSimpleImputer\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mSimpleImputer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mfill_value\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'missing'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=178888;https://www.scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html\u001b\\\u001b[4;39mSimpleImputer\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[33mstrategy\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'constant'\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'strategy'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[32m'mean'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'median'\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╰────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m╰────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", @@ -225,7 +225,7 @@ "\u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╭─\u001b[0m\u001b[38;2;255;69;0m \u001b[0m\u001b[1;38;2;255;69;0mChoice\u001b[0m\u001b[38;2;255;69;0m(\u001b[0m\u001b[3;38;2;255;69;0mselectors\u001b[0m\u001b[38;2;255;69;0m) \u001b[0m\u001b[38;2;255;69;0m────────────────────────────────────────────────────\u001b[0m\u001b[38;2;255;69;0m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSelectKBest\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mVarianceThreshold\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", - "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=924552;https://www.scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html\u001b\\\u001b[4;39mSelectKBest\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=534283;https://www.scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html\u001b\\\u001b[4;39mVarianceThreshold\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=870666;https://www.scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html\u001b\\\u001b[4;39mSelectKBest\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=23174;https://www.scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html\u001b\\\u001b[4;39mVarianceThreshold\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'k'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'threshold'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m0.1\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╰──────────────────────────────╯\u001b[0m \u001b[38;2;230;175;46m╰────────────────────────────────────╯\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╰─────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", @@ -233,7 +233,7 @@ "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m╭─\u001b[0m\u001b[38;2;119;125;167m \u001b[0m\u001b[1;38;2;119;125;167mSplit\u001b[0m\u001b[38;2;119;125;167m(\u001b[0m\u001b[3;38;2;119;125;167mtransformers\u001b[0m\u001b[38;2;119;125;167m) \u001b[0m\u001b[38;2;119;125;167m────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;119;125;167m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mpassthrough\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mpolynomial\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m───────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mzerocount\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mPassthrough\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mPolynomialFeatures\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mZeroCount\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", - "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mPassthrough\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=160272;https://www.scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html\u001b\\\u001b[4;39mPolynomialFeatures\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mZeroCount\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mPassthrough\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=605509;https://www.scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html\u001b\\\u001b[4;39mPolynomialFeatures\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mZeroCount\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'degree'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;36m2\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╰───────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╰─────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m╰─────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╰─────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", @@ -241,7 +241,7 @@ "\u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╭─\u001b[0m\u001b[38;2;255;69;0m \u001b[0m\u001b[1;38;2;255;69;0mChoice\u001b[0m\u001b[38;2;255;69;0m(\u001b[0m\u001b[3;38;2;255;69;0mestimator\u001b[0m\u001b[38;2;255;69;0m) \u001b[0m\u001b[38;2;255;69;0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;255;69;0m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mRandomForestClassifier\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m─────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSVC\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m───────────────────────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", - "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=399181;https://www.scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html\u001b\\\u001b[4;39mRandomForestClassifier\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=945901;https://www.scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html\u001b\\\u001b[4;39mSVC\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=470078;https://www.scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html\u001b\\\u001b[4;39mRandomForestClassifier\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=315827;https://www.scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html\u001b\\\u001b[4;39mSVC\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mconfig\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'max_depth'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'kernel'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[32m'linear'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'rbf'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'poly'\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m╰─────────────────────────────────────────────╯\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[32m'n_estimators'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m10\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m100\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", @@ -263,8 +263,8 @@ "text/html": [], "text/plain": [ "Sequential(name='my_pipeline', item=None, nodes=(Split(name='split_imputation', item=None, nodes=(Sequential(name='categories', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", - " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='SimpleImputer', item=SimpleImputer(fill_value='missing', strategy='constant'), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='OneHotEncoder', item=OneHotEncoder(drop='first', sparse_output=False), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='numerics', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", - " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Component(name='SimpleImputer', item=, nodes=(), config=None, space={'strategy': ['mean', 'median']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Choice(name='selectors', item=None, nodes=(Component(name='SelectKBest', item=, nodes=(), config=None, space={'k': (1, 10)}, fidelities=None, config_transform=None, meta=None), Component(name='VarianceThreshold', item=, nodes=(), config=None, space={'threshold': (0.1, 1)}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Split(name='transformers', item=None, nodes=(Sequential(name='passthrough', item=None, nodes=(Fixed(name='Passthrough', item=Passthrough(), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='polynomial', item=None, nodes=(Component(name='PolynomialFeatures', item=, nodes=(), config=None, space={'degree': [2, 3]}, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='zerocount', item=None, nodes=(Fixed(name='ZeroCount', item=ZeroCount(), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Choice(name='estimator', item=None, nodes=(Component(name='RandomForestClassifier', item=, nodes=(), config={'max_depth': 3}, space={'n_estimators': (10, 100), 'criterion': ['gini', 'log_loss']}, fidelities=None, config_transform=None, meta=None), Component(name='SVC', item=, nodes=(), config=None, space={'kernel': ['linear', 'rbf', 'poly']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)" + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='SimpleImputer', item=SimpleImputer(fill_value='missing', strategy='constant'), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='OneHotEncoder', item=OneHotEncoder(drop='first', sparse_output=False), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='numerics', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Component(name='SimpleImputer', item=, nodes=(), config=None, space={'strategy': ['mean', 'median']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Choice(name='selectors', item=None, nodes=(Component(name='SelectKBest', item=, nodes=(), config=None, space={'k': (1, 10)}, fidelities=None, config_transform=None, meta=None), Component(name='VarianceThreshold', item=, nodes=(), config=None, space={'threshold': (0.1, 1)}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Split(name='transformers', item=None, nodes=(Sequential(name='passthrough', item=None, nodes=(Fixed(name='Passthrough', item=Passthrough(), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='polynomial', item=None, nodes=(Component(name='PolynomialFeatures', item=, nodes=(), config=None, space={'degree': [2, 3]}, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='zerocount', item=None, nodes=(Fixed(name='ZeroCount', item=ZeroCount(), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Choice(name='estimator', item=None, nodes=(Component(name='RandomForestClassifier', item=, nodes=(), config={'max_depth': 3}, space={'n_estimators': (10, 100), 'criterion': ['gini', 'log_loss']}, fidelities=None, config_transform=None, meta=None), Component(name='SVC', item=, nodes=(), config=None, space={'kernel': ['linear', 'rbf', 'poly']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)" ] }, "execution_count": 2, @@ -321,7 +321,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -736,50 +736,50 @@ " Pipeline(steps=[('columntransformer',\n", " ColumnTransformer(transformers=[('passthrough',\n", " 'passthrough',\n", - " <sklearn.compose._column_transformer.make_column_selector object at 0x7ab9ec119d20>)])),\n", + " <sklearn.compose._column_transformer.make_column_selector object at 0x7d354d946290>)])),\n", " ('simpleimputer',\n", " SimpleImputer(fill_value='missing',\n", " strategy='constant')),\n", " ('onehotencode...\n", - " ('selectkbest', SelectKBest(k=4)),\n", + " VarianceThreshold(threshold=0.6738938110936)),\n", " ('featureunion-2',\n", " FeatureUnion(transformer_list=[('pipeline-1',\n", " Pipeline(steps=[('passthrough',\n", " Passthrough())])),\n", " ('pipeline-2',\n", " Pipeline(steps=[('polynomialfeatures',\n", - " PolynomialFeatures())])),\n", + " PolynomialFeatures(degree=3))])),\n", " ('pipeline-3',\n", " Pipeline(steps=[('zerocount',\n", " ZeroCount())]))])),\n", " ('randomforestclassifier',\n", - " RandomForestClassifier(n_estimators=24))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + " Pipeline(steps=[('zerocount', ZeroCount())]))])
Passthrough()
PolynomialFeatures(degree=3)
ZeroCount()
RandomForestClassifier(n_estimators=16)
" ], "text/plain": [ "Pipeline(steps=[('featureunion-1',\n", @@ -809,27 +809,27 @@ " Pipeline(steps=[('columntransformer',\n", " ColumnTransformer(transformers=[('passthrough',\n", " 'passthrough',\n", - " )])),\n", + " )])),\n", " ('simpleimputer',\n", " SimpleImputer(fill_value='missing',\n", " strategy='constant')),\n", " ('onehotencode...\n", - " ('selectkbest', SelectKBest(k=4)),\n", + " VarianceThreshold(threshold=0.6738938110936)),\n", " ('featureunion-2',\n", " FeatureUnion(transformer_list=[('pipeline-1',\n", " Pipeline(steps=[('passthrough',\n", " Passthrough())])),\n", " ('pipeline-2',\n", " Pipeline(steps=[('polynomialfeatures',\n", - " PolynomialFeatures())])),\n", + " PolynomialFeatures(degree=3))])),\n", " ('pipeline-3',\n", " Pipeline(steps=[('zerocount',\n", " ZeroCount())]))])),\n", " ('randomforestclassifier',\n", - " RandomForestClassifier(n_estimators=24))])" + " RandomForestClassifier(n_estimators=16))])" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -844,14 +844,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Generation: 50%|█████ | 1/2 [00:03<00:03, 3.26s/it]" + "Generation: 50%|█████ | 1/2 [00:02<00:02, 2.60s/it]" ] }, { @@ -859,63 +859,15 @@ "output_type": "stream", "text": [ "Generation: 1\n", - "Best roc_auc_score score: 0.9423333333333334\n" + "Best roc_auc_score score: 0.976\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Generation: 100%|██████████| 2/2 [00:03<00:00, 1.86s/it]\n", - "2024-09-09 17:18:39,054 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.\n", - "Traceback (most recent call last):\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 225, in read\n", - " frames_nosplit_nbytes_bin = await stream.read_bytes(fmt_size)\n", - "tornado.iostream.StreamClosedError: Stream is closed\n", - "\n", - "The above exception was the direct cause of the following exception:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/worker.py\", line 1250, in heartbeat\n", - " response = await retry_operation(\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/utils_comm.py\", line 459, in retry_operation\n", - " return await retry(\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/utils_comm.py\", line 438, in retry\n", - " return await coro()\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/core.py\", line 1254, in send_recv_from_rpc\n", - " return await send_recv(comm=comm, op=key, **kwargs)\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/core.py\", line 1013, in send_recv\n", - " response = await comm.read(deserializers=deserializers)\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 236, in read\n", - " convert_stream_closed_error(self, e)\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 142, in convert_stream_closed_error\n", - " raise CommClosedError(f\"in {obj}: {exc}\") from exc\n", - "distributed.comm.core.CommClosedError: in : Stream is closed\n", - "2024-09-09 17:18:39,055 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.\n", - "Traceback (most recent call last):\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 225, in read\n", - " frames_nosplit_nbytes_bin = await stream.read_bytes(fmt_size)\n", - "tornado.iostream.StreamClosedError: Stream is closed\n", - "\n", - "The above exception was the direct cause of the following exception:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/worker.py\", line 1250, in heartbeat\n", - " response = await retry_operation(\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/utils_comm.py\", line 459, in retry_operation\n", - " return await retry(\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/utils_comm.py\", line 438, in retry\n", - " return await coro()\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/core.py\", line 1254, in send_recv_from_rpc\n", - " return await send_recv(comm=comm, op=key, **kwargs)\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/core.py\", line 1013, in send_recv\n", - " response = await comm.read(deserializers=deserializers)\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 236, in read\n", - " convert_stream_closed_error(self, e)\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 142, in convert_stream_closed_error\n", - " raise CommClosedError(f\"in {obj}: {exc}\") from exc\n", - "distributed.comm.core.CommClosedError: in : Stream is closed\n", - "2024-09-09 17:18:39,062 - distributed.scheduler - ERROR - Removing worker 'tcp://127.0.0.1:39033' caused the cluster to lose scattered data, which can't be recovered: {'ndarray-4295957c2613499053c4412f415dedb8', 'DataFrame-ee6ff64644f78f1c23d469116500dd47'} (stimulus_id='handle-worker-cleanup-1725927519.0628352')\n" + "Generation: 100%|██████████| 2/2 [00:03<00:00, 1.57s/it]\n", + "2024-09-09 17:25:40,301 - distributed.scheduler - ERROR - Removing worker 'tcp://127.0.0.1:39897' caused the cluster to lose scattered data, which can't be recovered: {'ndarray-3f2f44921e6e9cc40ef07cfcd8ae90fb', 'DataFrame-5551f84174fd651642ff10eb71e30b22'} (stimulus_id='handle-worker-cleanup-1725927940.3010821')\n" ] }, { @@ -923,7 +875,7 @@ "output_type": "stream", "text": [ "Generation: 2\n", - "Best roc_auc_score score: 0.96\n" + "Best roc_auc_score score: 0.984\n" ] }, { @@ -1336,22 +1288,22 @@ "
TPOTEstimator(classification=True, generations=2, max_eval_time_seconds=300,\n",
        "              n_jobs=10, population_size=10, scorers=['roc_auc'],\n",
        "              scorers_weights=[1],\n",
-       "              search_space=<tpot2.search_spaces.pipelines.sequential.SequentialPipeline object at 0x7ab9928d8f40>,\n",
+       "              search_space=<tpot2.search_spaces.pipelines.sequential.SequentialPipeline object at 0x7d34ec1efbb0>,\n",
        "              verbose=5)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "TPOTEstimator(classification=True, generations=2, max_eval_time_seconds=300,\n", " n_jobs=10, population_size=10, scorers=['roc_auc'],\n", " scorers_weights=[1],\n", - " search_space=,\n", + " search_space=,\n", " verbose=5)" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -1378,7 +1330,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -1793,12 +1745,12 @@ " Pipeline(steps=[('columntransformer',\n", " ColumnTransformer(transformers=[('passthrough',\n", " 'passthrough',\n", - " <sklearn.compose._column_transformer.make_column_selector object at 0x7ab98dbdb100>)])),\n", + " <sklearn.compose._column_transformer.make_column_selector object at 0x7d34eb307cd0>)])),\n", " ('simpleimputer',\n", " SimpleImputer(fill_value='missing',\n", " strategy='constant')),\n", " ('onehotencode...\n", - " VarianceThreshold(threshold=0.6396211247532)),\n", + " VarianceThreshold(threshold=0.1557560591318)),\n", " ('featureunion-2',\n", " FeatureUnion(transformer_list=[('pipeline-1',\n", " Pipeline(steps=[('passthrough',\n", @@ -1811,17 +1763,17 @@ " ZeroCount())]))])),\n", " ('randomforestclassifier',\n", " RandomForestClassifier(criterion='log_loss',\n", - " n_estimators=47))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + " Pipeline(steps=[('zerocount', ZeroCount())]))])
Passthrough()
PolynomialFeatures()
ZeroCount()
RandomForestClassifier(criterion='log_loss', n_estimators=80)
" ], "text/plain": [ "Pipeline(steps=[('featureunion-1',\n", @@ -1868,12 +1820,12 @@ " Pipeline(steps=[('columntransformer',\n", " ColumnTransformer(transformers=[('passthrough',\n", " 'passthrough',\n", - " )])),\n", + " )])),\n", " ('simpleimputer',\n", " SimpleImputer(fill_value='missing',\n", " strategy='constant')),\n", " ('onehotencode...\n", - " VarianceThreshold(threshold=0.6396211247532)),\n", + " VarianceThreshold(threshold=0.1557560591318)),\n", " ('featureunion-2',\n", " FeatureUnion(transformer_list=[('pipeline-1',\n", " Pipeline(steps=[('passthrough',\n", @@ -1886,10 +1838,10 @@ " ZeroCount())]))])),\n", " ('randomforestclassifier',\n", " RandomForestClassifier(criterion='log_loss',\n", - " n_estimators=47))])" + " n_estimators=80))])" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -1900,18 +1852,18 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0,\n", - " 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1,\n", - " 1, 0, 0, 1, 1, 0])" + "array([1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1,\n", + " 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0,\n", + " 1, 0, 0, 0, 0, 0])" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } From 419c7080f8e048125e6022ec436216a95b1ad8ff Mon Sep 17 00:00:00 2001 From: perib Date: Tue, 10 Sep 2024 10:14:04 -0700 Subject: [PATCH 11/12] fix seletors lists --- tpot2/config/get_configspace.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 46b13b60..51061b93 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -114,8 +114,8 @@ GROUPNAMES = { "selectors": ["SelectFwe", "SelectPercentile", "VarianceThreshold",], - "selectors_classification": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_classification", "SelectFromModel_classification"], - "selectors_regression": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_regression", "SelectFromModel_regression"], + "selectors_classification": ["SelectFwe", "SelectPercentile", "RFE_classification", "SelectFromModel_classification"], + "selectors_regression": ["SelectFwe", "SelectPercentile", "RFE_regression", "SelectFromModel_regression"], "classifiers" : ["LGBMClassifier", "BaggingClassifier", 'AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier','LinearDiscriminantAnalysis', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'], "regressors" : ["LGBMRegressor", 'AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'], From db614d84c3e949afa244e66e45acc0d86b4ba789 Mon Sep 17 00:00:00 2001 From: perib Date: Tue, 10 Sep 2024 10:15:36 -0700 Subject: [PATCH 12/12] undo --- tpot2/config/get_configspace.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 51061b93..46b13b60 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -114,8 +114,8 @@ GROUPNAMES = { "selectors": ["SelectFwe", "SelectPercentile", "VarianceThreshold",], - "selectors_classification": ["SelectFwe", "SelectPercentile", "RFE_classification", "SelectFromModel_classification"], - "selectors_regression": ["SelectFwe", "SelectPercentile", "RFE_regression", "SelectFromModel_regression"], + "selectors_classification": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_classification", "SelectFromModel_classification"], + "selectors_regression": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_regression", "SelectFromModel_regression"], "classifiers" : ["LGBMClassifier", "BaggingClassifier", 'AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier','LinearDiscriminantAnalysis', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'], "regressors" : ["LGBMRegressor", 'AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'],