From 2f224417736a579636348f96e76e4b86d0e23d76 Mon Sep 17 00:00:00 2001
From: peiyanpan <1065112771@qq.com>
Date: Tue, 3 Dec 2024 00:24:19 +0800
Subject: [PATCH 1/6] Add the new feature of customized initial population

---
 tpot2/tests/test_customized_iniPop.py | 62 +++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 tpot2/tests/test_customized_iniPop.py

diff --git a/tpot2/tests/test_customized_iniPop.py b/tpot2/tests/test_customized_iniPop.py
new file mode 100644
index 00000000..ab55a8f6
--- /dev/null
+++ b/tpot2/tests/test_customized_iniPop.py
@@ -0,0 +1,62 @@
+import pytest
+
+
+@pytest.fixture
+def test_customized_iniPop():
+    import tpot2
+    import sklearn
+    import sklearn.datasets
+
+    scorer = sklearn.metrics.get_scorer('roc_auc_ovo')
+
+    X, y = sklearn.datasets.load_iris(return_X_y=True)
+
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)
+
+    from tpot2.config.get_configspace import set_node
+    from tpot2.search_spaces.pipelines.union import UnionPipeline
+    from tpot2.search_spaces.pipelines.choice import ChoicePipeline
+    from tpot2.search_spaces.pipelines.sequential import SequentialPipeline
+    from tpot2.config.get_configspace import get_search_space
+
+    scalers = set_node("MinMaxScaler", {})
+    selectors = set_node("SelectFwe", {'alpha': 0.0002381268562})
+    transformers_layer =UnionPipeline([
+                            ChoicePipeline([
+                                set_node("SkipTransformer", {})
+                            ]),
+                            get_search_space("Passthrough",)
+                            ]
+                        )
+
+    inner_estimators_layer = UnionPipeline([
+                                get_search_space("Passthrough",)]
+                            )
+    estimators = set_node("HistGradientBoostingClassifier", 
+                        {'early_stop': 'valid', 
+                        'l2_regularization': 0.0011074158219, 
+                        'learning_rate': 0.0050792320068, 
+                        'max_depth': None, 
+                        'max_features': 0.3430178535213, 
+                        'max_leaf_nodes': 237, 
+                        'min_samples_leaf': 63, 
+                        'tol': 0.0001, 
+                        'n_iter_no_change': 14, 
+                        'validation_fraction': 0.2343285974496})
+
+    pipeline = SequentialPipeline(search_spaces=[
+                                        scalers,
+                                        selectors, 
+                                        transformers_layer,
+                                        inner_estimators_layer,
+                                        estimators,
+                                        ])
+    ind = pipeline.generate()
+
+    est = tpot2.TPOTClassifier(search_space="linear", n_jobs=40, verbose=5, generations=1, population_size=5, customized_initial_population=[ind])
+
+    est.fit(X_train, y_train)
+
+    print(str(est.fitted_pipeline_))
+
+    print(scorer(est, X_test, y_test))
\ No newline at end of file

From 4878b065a750bd268830200551a48ab5b4b7c299 Mon Sep 17 00:00:00 2001
From: peiyanpan <1065112771@qq.com>
Date: Tue, 3 Dec 2024 00:24:38 +0800
Subject: [PATCH 2/6] Add the new feature of customized initial population

---
 PULL_REQUEST_TEMPLATE.md          | 102 +++++++++++++++++++++++++++---
 tpot2/config/get_configspace.py   |   4 ++
 tpot2/evolvers/base_evolver.py    |  11 ++++
 tpot2/tpot_estimator/estimator.py |   5 ++
 4 files changed, 112 insertions(+), 10 deletions(-)

diff --git a/PULL_REQUEST_TEMPLATE.md b/PULL_REQUEST_TEMPLATE.md
index 365ff3e5..07a9da06 100644
--- a/PULL_REQUEST_TEMPLATE.md
+++ b/PULL_REQUEST_TEMPLATE.md
@@ -1,30 +1,112 @@
-[please review the [Contribution Guidelines](http://epistasislab.github.io/tpot/contributing/) prior to submitting your pull request. go ahead and delete this line if you've already reviewed said guidelines.]
-
 ## What does this PR do?
 
-
+Add the new feature of allowing users to specify customized initial pipeline population for TPOT2.
 
 ## Where should the reviewer start?
 
-
+- tpot2/tests/test_customized_iniPop.py
+- tpot2/config/get_configspace.py
+- tpot2/evolvers/base_evolver.py
+- tpot2/tpot_estimator/estimator.py
 
 ## How should this PR be tested?
 
+The test code is at tpot2/tests/test_customized_iniPop.py:
+
+**pytest test_customized_iniPop.py**
+
+```
+import pytest
+
+
+@pytest.fixture
+def test_customized_iniPop():
+    import tpot2
+    import sklearn
+    import sklearn.datasets
+
+    scorer = sklearn.metrics.get_scorer('roc_auc_ovo')
+
+    X, y = sklearn.datasets.load_iris(return_X_y=True)
+
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)
+
+    from tpot2.config.get_configspace import set_node
+    from tpot2.search_spaces.pipelines.union import UnionPipeline
+    from tpot2.search_spaces.pipelines.choice import ChoicePipeline
+    from tpot2.search_spaces.pipelines.sequential import SequentialPipeline
+    from tpot2.config.get_configspace import get_search_space
 
+    scalers = set_node("MinMaxScaler", {})
+    selectors = set_node("SelectFwe", {'alpha': 0.0002381268562})
+    transformers_layer =UnionPipeline([
+                            ChoicePipeline([
+                                set_node("SkipTransformer", {})
+                            ]),
+                            get_search_space("Passthrough",)
+                            ]
+                        )
+
+    inner_estimators_layer = UnionPipeline([
+                                get_search_space("Passthrough",)]
+                            )
+    estimators = set_node("HistGradientBoostingClassifier", 
+                        {'early_stop': 'valid', 
+                        'l2_regularization': 0.0011074158219, 
+                        'learning_rate': 0.0050792320068, 
+                        'max_depth': None, 
+                        'max_features': 0.3430178535213, 
+                        'max_leaf_nodes': 237, 
+                        'min_samples_leaf': 63, 
+                        'tol': 0.0001, 
+                        'n_iter_no_change': 14, 
+                        'validation_fraction': 0.2343285974496})
+
+    pipeline = SequentialPipeline(search_spaces=[
+                                        scalers,
+                                        selectors, 
+                                        transformers_layer,
+                                        inner_estimators_layer,
+                                        estimators,
+                                        ])
+    ind = pipeline.generate()
+
+    est = tpot2.TPOTClassifier(search_space="linear", n_jobs=40, verbose=5, generations=1, population_size=5, customized_initial_population=[ind])
+
+    est.fit(X_train, y_train)
+
+    print(str(est.fitted_pipeline_))
+
+    print(scorer(est, X_test, y_test))
+```
 
 ## Any background context you want to provide?
 
+Under this version, users can specify well-defined initial pipeline population in SequentialPipeline type pipeline. This update has the potential to enhance the algorithm's performance and reduce evolutionary time.
 
+Several Tips:
 
-## What are the relevant issues?
+1. These SequentialPipeline pipelines can be obtained:
+
+Referencing the examples in customized_initial_population.py and modifying them according to TPOT2's config_dict.
 
-[you can link directly to issues by entering # then the number of the issue]
+2. We consider the relationship between #customized initial pipelines and #population as follows:
 
-## Screenshots (if appropriate)
+```
+init_population_size = len(customized_initial_population)
+if self.cur_population_size <= init_population_size:
+    initial_population = customized_initial_population[:self.cur_population_size]
+else:
+    initial_population = [next(self.individual_generator) for _ in range(self.cur_population_size - init_population_size)]
+    initial_population = customized_initial_population + initial_population
+```
+3. The current version is only applicable to solve the problem where search_spaces is linear and the initialized pipeline is of type SequentialPipeline. We will continue to refine the scenario where search_spaces is graph and the pipeline is of type GraphPipeline in the near future if you think our approach is appropriate.
 
 
+## What are the relevant issues?
+
+[issue-61](https://github.com/EpistasisLab/tpot2/issues/61)
 
-## Questions:
+## Main Contributors
 
-- Do the docs need to be updated?
-- Does this PR add new (Python) dependencies?
+@peiyanpan @t-harden
\ No newline at end of file
diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py
index 72eac9a6..6d2a5921 100644
--- a/tpot2/config/get_configspace.py
+++ b/tpot2/config/get_configspace.py
@@ -546,6 +546,10 @@ def get_search_space(name, n_classes=3, n_samples=1000, n_features=100, random_s
     
     return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, base_node=base_node, n_jobs=n_jobs)
 
+def set_node(name, params):
+    node = get_node(name)
+    node.space = params
+    return node
 
 def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None, base_node=EstimatorNode, n_jobs=1):
     """
diff --git a/tpot2/evolvers/base_evolver.py b/tpot2/evolvers/base_evolver.py
index 8e4958aa..f50cf7d1 100644
--- a/tpot2/evolvers/base_evolver.py
+++ b/tpot2/evolvers/base_evolver.py
@@ -144,6 +144,8 @@ def __init__(   self,
                     callback = None,
                     rng=None,
 
+                    customized_initial_population=[]
+
                     ) -> None:
         """
         Uses mutation, crossover, and optimization functions to evolve a population of individuals towards the given objective functions.
@@ -433,6 +435,15 @@ def __init__(   self,
             init_names = init_names + ["Budget"]
         if self.population is None:
             self.population = tpot2.Population(column_names=init_names)
+            init_population_size = len(customized_initial_population)
+            if self.cur_population_size <= init_population_size:
+                initial_population = customized_initial_population[:self.cur_population_size]
+            else:
+                initial_population = [next(self.individual_generator) for _ in range(self.cur_population_size - init_population_size)]
+                initial_population = customized_initial_population + initial_population
+            # initial_population = [next(self.individual_generator) for _ in range(self.cur_population_size)]
+            # for individual in initial_population:
+            #     print(individual.unique_id())
             initial_population = [next(self.individual_generator) for _ in range(self.cur_population_size)]
             self.population.add_to_population(initial_population, self.rng)
             self.population.update_column(self.population.population, column_names="Generation", data=self.generation)
diff --git a/tpot2/tpot_estimator/estimator.py b/tpot2/tpot_estimator/estimator.py
index f7848f09..cb259fd4 100644
--- a/tpot2/tpot_estimator/estimator.py
+++ b/tpot2/tpot_estimator/estimator.py
@@ -135,6 +135,8 @@ def __init__(self,
                          # random seed for random number generator (rng)
                         random_state = None,
 
+                        customized_initial_population=[]
+
                         ):
 
         '''
@@ -508,6 +510,7 @@ def __init__(self,
 
         self.label_encoder_ = None
 
+        self.customized_initial_population = customized_initial_population
 
         set_dask_settings()
 
@@ -757,6 +760,8 @@ def ind_generator(rng):
                                             crossover_then_mutate_probability= self.crossover_then_mutate_probability,
 
                                             rng=self.rng,
+
+                                            customized_initial_population=self.customized_initial_population,
                                             )
 
 

From 1a74df36b6595203b119899c59ce51fc46b95d29 Mon Sep 17 00:00:00 2001
From: gygb <xssjdgy@163.com>
Date: Tue, 3 Dec 2024 11:25:48 +0100
Subject: [PATCH 3/6] some changes

---
 PULL_REQUEST_TEMPLATE.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PULL_REQUEST_TEMPLATE.md b/PULL_REQUEST_TEMPLATE.md
index 07a9da06..37c756fa 100644
--- a/PULL_REQUEST_TEMPLATE.md
+++ b/PULL_REQUEST_TEMPLATE.md
@@ -82,7 +82,7 @@ def test_customized_iniPop():
 
 ## Any background context you want to provide?
 
-Under this version, users can specify well-defined initial pipeline population in SequentialPipeline type pipeline. This update has the potential to enhance the algorithm's performance and reduce evolutionary time.
+In this version, users can specify a well-defined initial pipeline population, currently limited to the *SequentialPipeline* type. This update has the potential to improve algorithm performance and reduce evolutionary time.
 
 Several Tips:
 
@@ -90,7 +90,7 @@ Several Tips:
 
 Referencing the examples in customized_initial_population.py and modifying them according to TPOT2's config_dict.
 
-2. We consider the relationship between #customized initial pipelines and #population as follows:
+2. We consider the relationship between #customized initial pipelines and #population_size as follows:
 
 ```
 init_population_size = len(customized_initial_population)

From c983018e2e38d83a73e84aaa04b92af9531beb02 Mon Sep 17 00:00:00 2001
From: peiyanpan <1065112771@qq.com>
Date: Tue, 3 Dec 2024 19:15:27 +0800
Subject: [PATCH 4/6] Add some detail

---
 PULL_REQUEST_TEMPLATE.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/PULL_REQUEST_TEMPLATE.md b/PULL_REQUEST_TEMPLATE.md
index 37c756fa..6dde84eb 100644
--- a/PULL_REQUEST_TEMPLATE.md
+++ b/PULL_REQUEST_TEMPLATE.md
@@ -5,9 +5,13 @@ Add the new feature of allowing users to specify customized initial pipeline pop
 ## Where should the reviewer start?
 
 - tpot2/tests/test_customized_iniPop.py
+Contains the SequentialPipeline initialization method, which consists of scalers, selectors, transformers_layer, inner_estimators_layer, estimators and a sample of initializing this TPOTClassifier in a customized_initial_population parameter.
 - tpot2/config/get_configspace.py
+A new set_node() function has been added, containing mainly operations for adding new nodes in pipeline.
 - tpot2/evolvers/base_evolver.py
+Add some judgments about the number of initialized populations and the number of populations that need to be generated by crushed gold.
 - tpot2/tpot_estimator/estimator.py
+Add passing of customized_initial_population parameter
 
 ## How should this PR be tested?
 

From 0e3f4bab4c56561bf3dd2cdb95a2840cb3ccef23 Mon Sep 17 00:00:00 2001
From: peiyanpan <1065112771@qq.com>
Date: Tue, 3 Dec 2024 23:43:30 +0800
Subject: [PATCH 5/6] fix some bugs

---
 tpot2/config/get_configspace.py       |  4 ----
 tpot2/evolvers/base_evolver.py        |  4 ++--
 tpot2/tests/test_customized_iniPop.py | 10 +++++-----
 3 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py
index 6d2a5921..72eac9a6 100644
--- a/tpot2/config/get_configspace.py
+++ b/tpot2/config/get_configspace.py
@@ -546,10 +546,6 @@ def get_search_space(name, n_classes=3, n_samples=1000, n_features=100, random_s
     
     return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, base_node=base_node, n_jobs=n_jobs)
 
-def set_node(name, params):
-    node = get_node(name)
-    node.space = params
-    return node
 
 def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None, base_node=EstimatorNode, n_jobs=1):
     """
diff --git a/tpot2/evolvers/base_evolver.py b/tpot2/evolvers/base_evolver.py
index f50cf7d1..310341cd 100644
--- a/tpot2/evolvers/base_evolver.py
+++ b/tpot2/evolvers/base_evolver.py
@@ -437,14 +437,14 @@ def __init__(   self,
             self.population = tpot2.Population(column_names=init_names)
             init_population_size = len(customized_initial_population)
             if self.cur_population_size <= init_population_size:
-                initial_population = customized_initial_population[:self.cur_population_size]
+                initial_population = customized_initial_population
+                # initial_population = customized_initial_population[:self.cur_population_size]
             else:
                 initial_population = [next(self.individual_generator) for _ in range(self.cur_population_size - init_population_size)]
                 initial_population = customized_initial_population + initial_population
             # initial_population = [next(self.individual_generator) for _ in range(self.cur_population_size)]
             # for individual in initial_population:
             #     print(individual.unique_id())
-            initial_population = [next(self.individual_generator) for _ in range(self.cur_population_size)]
             self.population.add_to_population(initial_population, self.rng)
             self.population.update_column(self.population.population, column_names="Generation", data=self.generation)
 
diff --git a/tpot2/tests/test_customized_iniPop.py b/tpot2/tests/test_customized_iniPop.py
index ab55a8f6..cc12845d 100644
--- a/tpot2/tests/test_customized_iniPop.py
+++ b/tpot2/tests/test_customized_iniPop.py
@@ -13,17 +13,17 @@ def test_customized_iniPop():
 
     X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)
 
-    from tpot2.config.get_configspace import set_node
+    from tpot2.search_spaces.nodes.estimator_node import EstimatorNodeIndividual
     from tpot2.search_spaces.pipelines.union import UnionPipeline
     from tpot2.search_spaces.pipelines.choice import ChoicePipeline
     from tpot2.search_spaces.pipelines.sequential import SequentialPipeline
     from tpot2.config.get_configspace import get_search_space
 
-    scalers = set_node("MinMaxScaler", {})
-    selectors = set_node("SelectFwe", {'alpha': 0.0002381268562})
+    scalers = EstimatorNodeIndividual("MinMaxScaler", {})
+    selectors = EstimatorNodeIndividual("SelectFwe", {'alpha': 0.0002381268562})
     transformers_layer =UnionPipeline([
                             ChoicePipeline([
-                                set_node("SkipTransformer", {})
+                                EstimatorNodeIndividual("SkipTransformer", {})
                             ]),
                             get_search_space("Passthrough",)
                             ]
@@ -32,7 +32,7 @@ def test_customized_iniPop():
     inner_estimators_layer = UnionPipeline([
                                 get_search_space("Passthrough",)]
                             )
-    estimators = set_node("HistGradientBoostingClassifier", 
+    estimators = EstimatorNodeIndividual("HistGradientBoostingClassifier", 
                         {'early_stop': 'valid', 
                         'l2_regularization': 0.0011074158219, 
                         'learning_rate': 0.0050792320068, 

From 9ca211617b0d85c7bdc711da93ebef0d6aeb53e7 Mon Sep 17 00:00:00 2001
From: peiyanpan <1065112771@qq.com>
Date: Sat, 7 Dec 2024 16:52:14 +0800
Subject: [PATCH 6/6] enhance set_node

---
 Tutorial/1_Using_TPOT.ipynb     | 212 ++++++++++++++++++++++++++++++--
 tpot2/config/get_configspace.py |  12 ++
 2 files changed, 216 insertions(+), 8 deletions(-)

diff --git a/Tutorial/1_Using_TPOT.ipynb b/Tutorial/1_Using_TPOT.ipynb
index 92821ca1..f520926a 100644
--- a/Tutorial/1_Using_TPOT.ipynb
+++ b/Tutorial/1_Using_TPOT.ipynb
@@ -23,6 +23,207 @@
     "This is actually an advantage over fixed grid search techniques: TPOT is meant to be an assistant that gives you ideas on how to solve a particular machine learning problem by exploring pipeline configurations that you might have never considered, then leaves the fine-tuning to more constrained parameter tuning techniques such as grid search or bayesian optimization."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n",
+      "/Users/pyp/Desktop/code/tpot2/tpot2/tpot_estimator/estimator.py:457: UserWarning: Both generations and max_time_mins are set. TPOT will terminate when the first condition is met.\n",
+      "  warnings.warn(\"Both generations and max_time_mins are set. TPOT will terminate when the first condition is met.\")\n",
+      "Generation:   0%|          | 0/1 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING THIS INDIVIDUAL CAUSED AND EXCEPTION \n",
+      " <tpot2.search_spaces.pipelines.sequential.SequentialPipelineIndividual object at 0x15bc6da50> \n",
+      " X has 13 features, but BernoulliNB is expecting 11 features as input. \n",
+      " Traceback (most recent call last):\n",
+      "  File \"/Users/pyp/Desktop/code/tpot2/tpot2/utils/eval_utils.py\", line 87, in objective_nan_wrapper\n",
+      "    value = func_timeout.func_timeout(timeout, objective_function, args=[individual], kwargs=objective_kwargs)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/func_timeout/dafunc.py\", line 108, in func_timeout\n",
+      "    raise_exception(exception)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/func_timeout/py3_raise.py\", line 7, in raise_exception\n",
+      "    raise exception[0] from None\n",
+      "  File \"/Users/pyp/Desktop/code/tpot2/tpot2/tpot_estimator/estimator.py\", line 644, in objective_function\n",
+      "    return objective_function_generator(\n",
+      "  File \"/Users/pyp/Desktop/code/tpot2/tpot2/tpot_estimator/estimator_utils.py\", line 162, in objective_function_generator\n",
+      "    cv_obj_scores = cross_val_score_objective(sklearn.base.clone(pipeline),x,y,scorers=scorers, cv=cv , fold=step)\n",
+      "  File \"/Users/pyp/Desktop/code/tpot2/tpot2/tpot_estimator/cross_val_utils.py\", line 93, in cross_val_score_objective\n",
+      "    this_fold_scores = [sklearn.metrics.get_scorer(scorer)(this_fold_estimator, X_test, y_test) for scorer in scorers]\n",
+      "  File \"/Users/pyp/Desktop/code/tpot2/tpot2/tpot_estimator/cross_val_utils.py\", line 93, in <listcomp>\n",
+      "    this_fold_scores = [sklearn.metrics.get_scorer(scorer)(this_fold_estimator, X_test, y_test) for scorer in scorers]\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/metrics/_scorer.py\", line 279, in __call__\n",
+      "    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/metrics/_scorer.py\", line 371, in _score\n",
+      "    y_pred = method_caller(\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/metrics/_scorer.py\", line 89, in _cached_call\n",
+      "    result, _ = _get_response_values(\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/utils/_response.py\", line 211, in _get_response_values\n",
+      "    y_pred = prediction_method(X)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/pipeline.py\", line 721, in predict_proba\n",
+      "    return self.steps[-1][1].predict_proba(Xt, **params)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/naive_bayes.py\", line 144, in predict_proba\n",
+      "    return np.exp(self.predict_log_proba(X))\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/naive_bayes.py\", line 122, in predict_log_proba\n",
+      "    X = self._check_X(X)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/naive_bayes.py\", line 1178, in _check_X\n",
+      "    X = super()._check_X(X)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/naive_bayes.py\", line 574, in _check_X\n",
+      "    return self._validate_data(X, accept_sparse=\"csr\", reset=False)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/base.py\", line 654, in _validate_data\n",
+      "    self._check_n_features(X, reset=reset)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/base.py\", line 443, in _check_n_features\n",
+      "    raise ValueError(\n",
+      "ValueError: X has 13 features, but BernoulliNB is expecting 11 features as input.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Generation: 100%|██████████| 1/1 [00:02<00:00,  2.56s/it]\n",
+      "2024-12-07 16:43:46,992 - distributed.scheduler - ERROR - Removing worker 'tcp://127.0.0.1:60611' caused the cluster to lose scattered data, which can't be recovered: {'ndarray-0597151fa3bea6002bd2596e80d2f9ac', 'ndarray-6fcb6e5015d408ff90b146c00ff2c93b'} (stimulus_id='handle-worker-cleanup-1733561026.9929442')\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING THIS INDIVIDUAL CAUSED AND EXCEPTION \n",
+      " <tpot2.search_spaces.pipelines.sequential.SequentialPipelineIndividual object at 0x11e5b5ab0> \n",
+      " X has 18 features, but QuadraticDiscriminantAnalysis is expecting 14 features as input. \n",
+      " Traceback (most recent call last):\n",
+      "  File \"/Users/pyp/Desktop/code/tpot2/tpot2/utils/eval_utils.py\", line 87, in objective_nan_wrapper\n",
+      "    value = func_timeout.func_timeout(timeout, objective_function, args=[individual], kwargs=objective_kwargs)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/func_timeout/dafunc.py\", line 108, in func_timeout\n",
+      "    raise_exception(exception)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/func_timeout/py3_raise.py\", line 7, in raise_exception\n",
+      "    raise exception[0] from None\n",
+      "  File \"/Users/pyp/Desktop/code/tpot2/tpot2/tpot_estimator/estimator.py\", line 644, in objective_function\n",
+      "    return objective_function_generator(\n",
+      "  File \"/Users/pyp/Desktop/code/tpot2/tpot2/tpot_estimator/estimator_utils.py\", line 162, in objective_function_generator\n",
+      "    cv_obj_scores = cross_val_score_objective(sklearn.base.clone(pipeline),x,y,scorers=scorers, cv=cv , fold=step)\n",
+      "  File \"/Users/pyp/Desktop/code/tpot2/tpot2/tpot_estimator/cross_val_utils.py\", line 93, in cross_val_score_objective\n",
+      "    this_fold_scores = [sklearn.metrics.get_scorer(scorer)(this_fold_estimator, X_test, y_test) for scorer in scorers]\n",
+      "  File \"/Users/pyp/Desktop/code/tpot2/tpot2/tpot_estimator/cross_val_utils.py\", line 93, in <listcomp>\n",
+      "    this_fold_scores = [sklearn.metrics.get_scorer(scorer)(this_fold_estimator, X_test, y_test) for scorer in scorers]\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/metrics/_scorer.py\", line 279, in __call__\n",
+      "    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/metrics/_scorer.py\", line 371, in _score\n",
+      "    y_pred = method_caller(\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/metrics/_scorer.py\", line 89, in _cached_call\n",
+      "    result, _ = _get_response_values(\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/utils/_response.py\", line 211, in _get_response_values\n",
+      "    y_pred = prediction_method(X)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/pipeline.py\", line 721, in predict_proba\n",
+      "    return self.steps[-1][1].predict_proba(Xt, **params)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/discriminant_analysis.py\", line 1037, in predict_proba\n",
+      "    values = self._decision_function(X)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/discriminant_analysis.py\", line 966, in _decision_function\n",
+      "    X = self._validate_data(X, reset=False)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/base.py\", line 654, in _validate_data\n",
+      "    self._check_n_features(X, reset=reset)\n",
+      "  File \"/Users/pyp/miniconda3/envs/tpot2env3/lib/python3.10/site-packages/sklearn/base.py\", line 443, in _check_n_features\n",
+      "    raise ValueError(\n",
+      "ValueError: X has 18 features, but QuadraticDiscriminantAnalysis is expecting 14 features as input.\n",
+      "\n",
+      "Generation:  1\n",
+      "Best roc_auc_score score: 0.9898313492063491\n",
+      "Pipeline(steps=[('normalizer', Normalizer(norm='l1')),\n",
+      "                ('passthrough', Passthrough()),\n",
+      "                ('featureunion-1',\n",
+      "                 FeatureUnion(transformer_list=[('featureunion',\n",
+      "                                                 FeatureUnion(transformer_list=[('quantiletransformer',\n",
+      "                                                                                 QuantileTransformer(n_quantiles=65,\n",
+      "                                                                                                     output_distribution='normal')),\n",
+      "                                                                                ('pca',\n",
+      "                                                                                 PCA(n_components=0.748157093073))])),\n",
+      "                                                ('passthrough',\n",
+      "                                                 Passthrough())])),\n",
+      "                ('featureunion-2',\n",
+      "                 FeatureUnion(transformer_list=[('skiptransformer',\n",
+      "                                                 SkipTransformer()),\n",
+      "                                                ('passthrough',\n",
+      "                                                 Passthrough())])),\n",
+      "                ('decisiontreeclassifier',\n",
+      "                 DecisionTreeClassifier(class_weight='balanced', max_depth=4,\n",
+      "                                        max_features='log2', min_samples_leaf=4,\n",
+      "                                        min_samples_split=12))])\n",
+      "0.9976851851851851\n"
+     ]
+    }
+   ],
+   "source": [
+    "import tpot2\n",
+    "import sklearn\n",
+    "import sklearn.datasets\n",
+    "\n",
+    "scorer = sklearn.metrics.get_scorer('roc_auc_ovo')\n",
+    "\n",
+    "X, y = sklearn.datasets.load_iris(return_X_y=True)\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n",
+    "\n",
+    "from tpot2.search_spaces.nodes.estimator_node import EstimatorNode\n",
+    "from tpot2.search_spaces.pipelines.union import UnionPipeline\n",
+    "from tpot2.search_spaces.pipelines.choice import ChoicePipeline\n",
+    "from tpot2.search_spaces.pipelines.sequential import SequentialPipeline\n",
+    "from tpot2.config.get_configspace import get_search_space, set_node, get_node\n",
+    "\n",
+    "scalers = set_node(\"MinMaxScaler\", {})\n",
+    "selectors = set_node(\"SelectFwe\", {'alpha': 0.0002381268562})\n",
+    "transformers_layer =UnionPipeline([\n",
+    "                        ChoicePipeline([\n",
+    "                            set_node(\"SkipTransformer\", {})\n",
+    "                        ]),\n",
+    "                        get_search_space(\"Passthrough\",)\n",
+    "                        ]\n",
+    "                    )\n",
+    "\n",
+    "inner_estimators_layer = UnionPipeline([\n",
+    "                            get_search_space(\"Passthrough\",)]\n",
+    "                        )\n",
+    "estimators = set_node(\"HistGradientBoostingClassifier\", \n",
+    "                    {'early_stop': 'valid', \n",
+    "                    'l2_regularization': 0.0011074158219, \n",
+    "                    'learning_rate': 0.0050792320068, \n",
+    "                    'max_depth': None, \n",
+    "                    'max_features': 0.3430178535213, \n",
+    "                    'max_leaf_nodes': 237, \n",
+    "                    'min_samples_leaf': 63, \n",
+    "                    'tol': 0.0001, \n",
+    "                    'n_iter_no_change': 14, \n",
+    "                    'validation_fraction': 0.2343285974496})\n",
+    "\n",
+    "pipeline = SequentialPipeline(search_spaces=[\n",
+    "                                    scalers,\n",
+    "                                    selectors, \n",
+    "                                    transformers_layer,\n",
+    "                                    inner_estimators_layer,\n",
+    "                                    estimators,\n",
+    "                                    ])\n",
+    "ind = pipeline.generate()\n",
+    "\n",
+    "est = tpot2.TPOTClassifier(search_space=\"linear\", n_jobs=40, verbose=5, generations=1, population_size=5, customized_initial_population=[ind])\n",
+    "# for ind in est.evaluated_individuals.iterrows():\n",
+    "#     print(ind[1]['Instance'])\n",
+    "est.fit(X_train, y_train)\n",
+    "\n",
+    "print(str(est.fitted_pipeline_))\n",
+    "\n",
+    "print(scorer(est, X_test, y_test))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -2414,7 +2615,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "tpot_dev",
+   "display_name": "tpot2env3",
    "language": "python",
    "name": "python3"
   },
@@ -2428,14 +2629,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.14"
+   "version": "3.10.15"
   },
-  "orig_nbformat": 4,
-  "vscode": {
-   "interpreter": {
-    "hash": "7fe1fe9ef32cd5efd76326a08046147513534f0dd2318301a1a96ae9071c1c4e"
-   }
-  }
+  "orig_nbformat": 4
  },
  "nbformat": 4,
  "nbformat_minor": 2
diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py
index 72eac9a6..8943ac5f 100644
--- a/tpot2/config/get_configspace.py
+++ b/tpot2/config/get_configspace.py
@@ -546,6 +546,18 @@ def get_search_space(name, n_classes=3, n_samples=1000, n_features=100, random_s
     
     return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, base_node=base_node, n_jobs=n_jobs)
 
+def set_node(name, params):
+    node = get_node(name)
+    if isinstance(node.space, dict):
+        node.space = params
+    else:
+        rng = np.random.default_rng(rng)
+        node.space.seed(rng.integers(0, 2**32))
+        node.hyperparameters = dict(node.space.sample_configuration())
+        for key, val in params.items():
+            node.hyperparameters[key] = val
+        node.space = node.hyperparameters
+    return node
 
 def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None, base_node=EstimatorNode, n_jobs=1):
     """