From 83468fbf72cbd0663c856c4a3b5c045218df3d99 Mon Sep 17 00:00:00 2001 From: nicl-nno Date: Fri, 17 Nov 2023 22:46:47 +0300 Subject: [PATCH 01/12] Custom image classification exapmle --- .../image_classification_problem.py | 116 +++++++++++++++++- 1 file changed, 110 insertions(+), 6 deletions(-) diff --git a/examples/simple/classification/image_classification_problem.py b/examples/simple/classification/image_classification_problem.py index 178768dacf..e7ca539160 100644 --- a/examples/simple/classification/image_classification_problem.py +++ b/examples/simple/classification/image_classification_problem.py @@ -1,14 +1,15 @@ -from golem.utilities.requirements_notificator import warn_requirement - -try: - import tensorflow as tf -except ModuleNotFoundError: - warn_requirement('tensorflow', 'fedot[extra]') +from typing import Any +import numpy as np +import tensorflow as tf +from sklearn import preprocessing from sklearn.metrics import roc_auc_score as roc_auc from examples.simple.classification.classification_pipelines import cnn_composite_pipeline from fedot.core.data.data import InputData, OutputData +from fedot.core.operations.evaluation.operation_implementations.models.keras import check_input_array +from fedot.core.pipelines.node import PipelineNode +from fedot.core.pipelines.pipeline import Pipeline from fedot.core.repository.tasks import Task, TaskTypesEnum from fedot.core.utils import set_random_seed @@ -21,6 +22,107 @@ def calculate_validation_metric(predicted: OutputData, dataset_to_validate: Inpu return roc_auc_value +def cnn_model_fit(idx: np.array, features: np.array, target: np.array, params: dict): + x_train, y_train = features, target + transformed_x_train, transform_flag = check_input_array(x_train) + + if transform_flag: + print('Train data set was not scaled. The data was divided by 255.') + + if len(x_train.shape) == 3: + transformed_x_train = np.expand_dims(x_train, -1) + + if len(target.shape) < 2: + le = preprocessing.OneHotEncoder() + y_train = le.fit_transform(y_train.reshape(-1, 1)).toarray() + + optimizer_params = {'loss': "categorical_crossentropy", + 'optimizer': "adam", + 'metrics': ["accuracy"]} + + model = tf.keras.Sequential( + [ + tf.keras.layers.InputLayer(input_shape=[28, 28, 1]), + tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Flatten(), + tf.keras.layers.Dropout(0.5), + tf.keras.layers.Dense(10, activation="softmax"), + ]) + + model.compile(**optimizer_params) + model.num_classes = 10 + + model.fit(transformed_x_train, y_train, batch_size=1, epochs=1, + validation_split=0.1) + + return model + + +# +# +def cnn_model_predict(fitted_model: Any, idx: np.array, features: np.array, params: dict): + x_test = features + transformed_x_test, transform_flag = check_input_array(x_test) + + if np.max(transformed_x_test) > 1: + print('Test data set was not scaled. The data was divided by 255.') + + if len(x_test.shape) == 3: + transformed_x_test = np.expand_dims(x_test, -1) + + # if output_mode == 'labels': + # prediction = np.round(trained_model.predict(transformed_x_test)) + # elif output_mode in ['probs', 'full_probs', 'default']: + prediction = fitted_model.predict(transformed_x_test) + # if trained_model.num_classes < 2: + # print('Data set contain only 1 target class. Please reformat your data.') + # raise ValueError('Data set contain only 1 target class. Please reformat your data.') + # elif trained_model.num_classes == 2 and output_mode != 'full_probs' and len(prediction.shape) > 1: + # prediction = prediction[:, 1] + # else: + # raise ValueError(f'Output model {output_mode} is not supported') + + return prediction, 'table' + + +# + +def preproc_predict(fitted_model: Any, idx: np.array, features: np.array, params: dict): + # example of custom data pre-processing for predict state + for i in range(features.shape[0]): + features[i, :, :] = features[i, :, :] + np.random.normal(0, 30) + return features, 'image' + + +def cnn_composite_pipeline(composite_flag: bool = True) -> Pipeline: + """ + Returns pipeline with the following structure: + + .. image:: img_classification_pipelines/cnn_composite_pipeline.png + :width: 55% + + Where cnn - convolutional neural network, rf - random forest + + :param composite_flag: add additional random forest estimator + """ + node_first = PipelineNode('custom/preproc_image') + node_first.parameters = {'model_predict': preproc_predict} + + node_second = PipelineNode('custom/cnn_1', nodes_from=[node_first]) + node_second.parameters = {'model_predict': cnn_model_predict, + 'model_fit': cnn_model_fit} + + node_final = PipelineNode('rf', nodes_from=[node_second]) + + pipeline = Pipeline(node_final) + return pipeline + + def run_image_classification_problem(train_dataset: tuple, test_dataset: tuple, composite_flag: bool = True): @@ -51,3 +153,5 @@ def run_image_classification_problem(train_dataset: tuple, roc_auc_on_valid, dataset_to_train, dataset_to_validate = run_image_classification_problem( train_dataset=training_set, test_dataset=testing_set) + + print(roc_auc_on_valid) From 148dd665d0384e4fe3ae31f41f21abbed40389f1 Mon Sep 17 00:00:00 2001 From: nicl-nno Date: Wed, 22 Nov 2023 14:57:05 +0300 Subject: [PATCH 02/12] Upd --- .../simple/classification/cust/__init__.py | 0 .../simple/classification/cust/cnn_impls.py | 222 ++++++++ .../cust/data_operation_repository.json | 346 +++++++++++++ .../cust/default_operation_params.json | 96 ++++ .../simple/classification/cust/image_class.py | 59 +++ .../image_classification_problem.py | 4 +- .../image_classification_problem_with_opt.py | 230 +++++++++ .../image_classification_problem_with_opt2.py | 175 +++++++ .../classification/cust/image_preproc.py | 84 +++ .../classification/cust/model_repository.json | 487 ++++++++++++++++++ .../classification/cust/preproc_impls.py | 43 ++ .../operations/evaluation/classification.py | 3 - .../operation_implementations/models/keras.py | 6 +- test/unit/tasks/test_classification.py | 2 +- 14 files changed, 1748 insertions(+), 9 deletions(-) create mode 100644 examples/simple/classification/cust/__init__.py create mode 100644 examples/simple/classification/cust/cnn_impls.py create mode 100644 examples/simple/classification/cust/data_operation_repository.json create mode 100644 examples/simple/classification/cust/default_operation_params.json create mode 100644 examples/simple/classification/cust/image_class.py rename examples/simple/classification/{ => cust}/image_classification_problem.py (97%) create mode 100644 examples/simple/classification/cust/image_classification_problem_with_opt.py create mode 100644 examples/simple/classification/cust/image_classification_problem_with_opt2.py create mode 100644 examples/simple/classification/cust/image_preproc.py create mode 100644 examples/simple/classification/cust/model_repository.json create mode 100644 examples/simple/classification/cust/preproc_impls.py diff --git a/examples/simple/classification/cust/__init__.py b/examples/simple/classification/cust/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/simple/classification/cust/cnn_impls.py b/examples/simple/classification/cust/cnn_impls.py new file mode 100644 index 0000000000..3655d72584 --- /dev/null +++ b/examples/simple/classification/cust/cnn_impls.py @@ -0,0 +1,222 @@ +import logging +import os +import random +from typing import Optional + +import numpy as np +from golem.utilities.requirements_notificator import warn_requirement + +from fedot.core.operations.operation_parameters import OperationParameters + +try: + import tensorflow as tf +except ModuleNotFoundError: + warn_requirement('tensorflow', 'fedot[extra]') + tf = None + +from fedot.core.data.data import InputData, OutputData +from golem.core.log import LoggerAdapter, default_log +from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import ModelImplementation +from sklearn import preprocessing + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' + + +def check_input_array(x_train): + if np.max(x_train) > 1: + transformed_x_train = x_train.astype("float32") / 255 + transform_flag = True + else: + transformed_x_train = x_train + transform_flag = False + + return transformed_x_train, transform_flag + + +def create_deep_cnn(input_shape: tuple, + num_classes: int): + model = tf.keras.Sequential( + [ + tf.keras.layers.InputLayer(input_shape=input_shape), + tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Flatten(), + tf.keras.layers.Dropout(0.5), + tf.keras.layers.Dense(num_classes, activation="softmax"), + ] + ) + return model + + +def create_simple_cnn(input_shape: tuple, + num_classes: int): + model = tf.keras.Sequential( + [ + tf.keras.layers.InputLayer(input_shape=input_shape), + tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Flatten(), + tf.keras.layers.Dropout(0.5), + tf.keras.layers.Dense(num_classes, activation="softmax"), + ] + ) + + return model + + +def create_vgg16(input_shape: tuple, + num_classes: int): + model = tf.keras.applications.vgg16.VGG16(include_top=True, + weights=None, + input_shape=input_shape, + classes=num_classes, + classifier_activation='sigmoid') + return model + + +def fit_cnn(train_data: InputData, + model, + epochs: int = 10, + batch_size: int = 128, + optimizer_params: dict = None, + logger: Optional[LoggerAdapter] = None): + x_train, y_train = train_data.features, train_data.target + transformed_x_train, transform_flag = check_input_array(x_train) + + if logger is None: + logger = default_log(prefix=__name__) + + if transform_flag: + logger.debug('Train data set was not scaled. The data was divided by 255.') + + if len(x_train.shape) == 3: + transformed_x_train = np.expand_dims(x_train, -1) + + if len(train_data.target.shape) < 2: + le = preprocessing.OneHotEncoder() + y_train = le.fit_transform(y_train.reshape(-1, 1)).toarray() + + if optimizer_params is None: + optimizer_params = {'loss': "categorical_crossentropy", + 'optimizer': "adam", + 'metrics': ["accuracy"]} + + model.compile(**optimizer_params) + model.num_classes = train_data.num_classes + if logger is None: + logger = default_log(prefix=__name__) + + if logger.logging_level > logging.DEBUG: + verbose = 0 + else: + verbose = 2 + + if epochs is None: + logger.warning('The number of training epochs was not set. The selected number of epochs is 10.') + + model.fit(transformed_x_train, y_train, batch_size=batch_size, epochs=epochs, + validation_split=0.1, verbose=verbose) + + return model + + +def predict_cnn(trained_model, predict_data: InputData, output_mode: str = 'labels', logger=None) -> OutputData: + # x_test = predict_data.features + # transformed_x_test, transform_flag = check_input_array(x_test) + # + # if logger is None: + # logger = default_log(prefix=__name__) + # + # if np.max(transformed_x_test) > 1: + # logger.warning('Test data set was not scaled. The data was divided by 255.') + # + # if len(x_test.shape) == 3: + # transformed_x_test = np.expand_dims(x_test, -1) + # + # if output_mode == 'labels': + # prediction = np.round(trained_model.predict(transformed_x_test)) + # elif output_mode in ['probs', 'full_probs', 'default']: + # prediction = trained_model.predict(transformed_x_test) + # if trained_model.num_classes < 2: + # logger.error('Data set contain only 1 target class. Please reformat your data.') + # raise ValueError('Data set contain only 1 target class. Please reformat your data.') + # elif trained_model.num_classes == 2 and output_mode != 'full_probs' and len(prediction.shape) > 1: + # prediction = prediction[:, 1] + # else: + # raise ValueError(f'Output model {output_mode} is not supported') + prediction = np.asarray([[random.random()] for j in range(predict_data.features.shape[0])]) + return prediction + + +cnn_model_dict = {'deep': create_deep_cnn, + 'simplified': create_simple_cnn, + 'vgg16': create_vgg16} + + +class MyCNNImplementation(ModelImplementation): + def __init__(self, params: Optional[OperationParameters] = None): + super().__init__(params) + + default_params = {'log': default_log(prefix=__name__), + 'epochs': 10, + 'batch_size': 32, + 'output_mode': 'labels', + 'architecture_type': 'simplified', + 'optimizer_parameters': {'loss': "categorical_crossentropy", + 'optimizer': "adam", + 'metrics': ["accuracy"]}} + + complete_params = {**default_params, **self.params.to_dict()} + self.params.update(**complete_params) + + def fit(self, train_data): + """ Method fit model on a dataset + + :param train_data: data to train the model + """ + + # TODO make case for multiclass multioutput task + # check for multioutput target + if len(train_data.target.shape) < 2: + self.classes = np.unique(train_data.target) + else: + self.classes = np.arange(train_data.target.shape[1]) + + self.model = cnn_model_dict[self.params.get('architecture_type')](input_shape=train_data.features.shape[1:4], + num_classes=len(self.classes)) + + # self.model = fit_cnn(train_data=train_data, model=self.model, epochs=self.params.get('epochs'), + # batch_size=self.params.get('batch_size'), + # optimizer_params=self.params.get('optimizer_parameters'), logger=self.params.get('log')) + return self.model + + def predict(self, input_data): + """ Method make prediction with labels of classes for predict stage + + :param input_data: data with features to process + """ + + return predict_cnn(trained_model=self.model, predict_data=input_data, + output_mode='labels', logger=self.params['log']) + + def predict_proba(self, input_data): + """ Method make prediction with probabilities of classes + + :param input_data: data with features to process + """ + + return predict_cnn(trained_model=self.model, predict_data=input_data, output_mode='probs') + + @property + def classes_(self): + return self.classes + + def __deepcopy__(self, memo=None): + clone_model = tf.keras.models.clone_model(self.model) + clone_model.compile(optimizer=self.model.optimizer, loss=self.model.loss, metrics=self.model.metrics) + clone_model.set_weights(self.model.get_weights()) + return clone_model diff --git a/examples/simple/classification/cust/data_operation_repository.json b/examples/simple/classification/cust/data_operation_repository.json new file mode 100644 index 0000000000..769edc9a63 --- /dev/null +++ b/examples/simple/classification/cust/data_operation_repository.json @@ -0,0 +1,346 @@ +{ + "metadata": { + "data_sources": { + "tasks": "[TaskTypesEnum.classification, TaskTypesEnum.regression, TaskTypesEnum.clustering, TaskTypesEnum.ts_forecasting]", + "accepted_node_types": [ + "PrimaryNode" + ], + "forbidden_node_types": "[]", + "strategies": [ + "fedot.core.operations.evaluation.data_source", + "DataSourceStrategy" + ], + "tags": [ + "non-default", + "data_source" + ], + "description": "Implementations of data sources in multi-modal pipelines" + }, + "custom_preprocessing": { + "tasks": "[TaskTypesEnum.classification, TaskTypesEnum.regression]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "accepted_node_types": [ + "any" + ], + "forbidden_node_types": "[]", + "strategies": [ + "fedot.core.operations.evaluation.common_preprocessing", + "FedotPreprocessingStrategy" + ], + "tags": [ + "sklearn" + ], + "description": "Implementations of the preprocessing from scikit-learn framework" + }, + "image_preprocessing": { + "tasks": "[TaskTypesEnum.classification, TaskTypesEnum.regression]", + "input_type": "[DataTypesEnum.image]", + "output_type": "[DataTypesEnum.image]", + "accepted_node_types": [ + "PrimaryNode" + ], + "forbidden_node_types": "[]", + "strategies": [ + "examples.simple.classification.cust.image_preproc", + "ImagePreprocessingStrategy" + ], + "tags": [ + "custom" + ], + "description": "Implementations of the preprocessing from scikit-learn framework" + }, + "sklearn_categorical": { + "tasks": "[TaskTypesEnum.classification, TaskTypesEnum.regression, TaskTypesEnum.clustering]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "accepted_node_types": [ + "primary" + ], + "forbidden_node_types": "[]", + "strategies": [ + "fedot.core.operations.evaluation.common_preprocessing", + "FedotPreprocessingStrategy" + ], + "tags": [ + "sklearn" + ], + "description": "Implementations of OneHot encoding (etc.) from scikit-learn framework" + }, + "dimension_transformation": { + "tasks": "[TaskTypesEnum.classification, TaskTypesEnum.regression, TaskTypesEnum.clustering, TaskTypesEnum.ts_forecasting]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "accepted_node_types": [ + "any" + ], + "forbidden_node_types": "[]", + "strategies": [ + "fedot.core.operations.evaluation.common_preprocessing", + "FedotPreprocessingStrategy" + ], + "tags": [ + "sklearn" + ], + "description": "Implementations of the dimensionality transformation operations (e.g. PCA) from scikit-learn framework" + }, + "regression_preprocessing": { + "tasks": "[TaskTypesEnum.regression, TaskTypesEnum.ts_forecasting]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "accepted_node_types": [ + "any" + ], + "forbidden_node_types": "[]", + "strategies": [ + "fedot.core.operations.evaluation.regression", + "FedotRegressionPreprocessingStrategy" + ], + "tags": [ + "sklearn" + ], + "description": "Implementations of the feature selection operations and robust data filtering from scikit-learn framework for regression task" + }, + "classification_preprocessing": { + "tasks": "[TaskTypesEnum.classification]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "accepted_node_types": [ + "any" + ], + "forbidden_node_types": "[]", + "strategies": [ + "fedot.core.operations.evaluation.classification", + "FedotClassificationPreprocessingStrategy" + ], + "tags": [ + "sklearn" + ], + "description": "Implementations of the feature selection operations and robust data filtering from scikit-learn framework for classification task" + } + }, + "operations": { + "data_source_img": { + "meta": "data_sources", + "input_type": "[DataTypesEnum.image]", + "output_type": "[DataTypesEnum.image]", + "tags": [ + "data_source_img", + "nans-ignore", + "categorical-ignore" + ] + }, + "scaling": { + "meta": "custom_preprocessing", + "presets": [ + "fast_train", + "ts", + "*tree" + ], + "tags": [ + "simple", + "feature_scaling" + ] + }, + "normalization": { + "meta": "custom_preprocessing", + "presets": [ + "fast_train", + "ts", + "*tree" + ], + "tags": [ + "simple", + "feature_scaling", + "non_applicable_for_ts" + ] + }, + "simple_imputation": { + "meta": "custom_preprocessing", + "presets": [ + "fast_train", + "*tree" + ], + "tags": [ + "simple", + "imputation", + "categorical-ignore", + "non-default" + ] + }, + "pca": { + "meta": "dimension_transformation", + "presets": [ + "fast_train", + "ts", + "*tree" + ], + "tags": [ + "linear", + "dimensionality_transforming", + "correct_params", + "non_applicable_for_ts", + "feature_reduction" + ] + }, + "kernel_pca": { + "meta": "dimension_transformation", + "presets": [ + "ts", + "*tree" + ], + "tags": [ + "non_linear", + "dimensionality_transforming", + "correct_params", + "non_applicable_for_ts", + "non-default", + "feature_reduction" + ] + }, + "fast_ica": { + "meta": "dimension_transformation", + "presets": [ + "ts", + "*tree" + ], + "tags": [ + "non_linear", + "dimensionality_transforming", + "correct_params", + "non_applicable_for_ts", + "feature_reduction" + ] + }, + "poly_features": { + "meta": "dimension_transformation", + "tags": [ + "non_linear", + "dimensionality_transforming", + "non_applicable_for_ts", + "feature_engineering" + ] + }, + "ransac_lin_reg": { + "meta": "regression_preprocessing", + "presets": [ + "fast_train", + "*tree" + ], + "tags": [ + "affects_target", + "linear", + "filtering", + "correct_params", + "non_applicable_for_ts" + ] + }, + "ransac_non_lin_reg": { + "meta": "regression_preprocessing", + "presets": [ + "fast_train", + "*tree" + ], + "tags": [ + "affects_target", + "non_linear", + "filtering", + "correct_params", + "non_applicable_for_ts", + "non-default" + ] + }, + "isolation_forest_reg": { + "meta": "regression_preprocessing", + "tags": [ + "non_linear", + "filtering", + "non_applicable_for_ts" + ] + }, + "isolation_forest_class": { + "meta": "classification_preprocessing", + "tags": [ + "non_linear", + "filtering" + ] + }, + "rfe_lin_reg": { + "meta": "regression_preprocessing", + "tags": [ + "linear", + "feature_selection", + "non_applicable_for_ts", + "non-default", + "non_applicable_for_ts" + ] + }, + "rfe_non_lin_reg": { + "meta": "regression_preprocessing", + "tags": [ + "non_linear", + "feature_selection", + "non_applicable_for_ts", + "non-default", + "non_applicable_for_ts" + ] + }, + "rfe_lin_class": { + "meta": "classification_preprocessing", + "tags": [ + "linear", + "feature_selection", + "non-default" + ] + }, + "rfe_non_lin_class": { + "meta": "classification_preprocessing", + "tags": [ + "non_linear", + "feature_selection", + "non-default" + ] + }, + "decompose": { + "meta": "regression_preprocessing", + "presets": [ + "fast_train", + "ts", + "*tree" + ], + "tags": [ + "non-default", + "decompose" + ] + }, + "class_decompose": { + "meta": "classification_preprocessing", + "presets": [ + "fast_train", + "*tree" + ], + "tags": [ + "non-default", + "decompose" + ] + }, + "resample": { + "meta": "classification_preprocessing", + "tags": [ + "imbalanced" + ] + }, + "gamma_filt": { + "meta": "image_preprocessing", + "tags": [ + "filtering" + ] + }, + "negamma_filt": { + "meta": "image_preprocessing", + "tags": [ + "filtering" + ] + } + } +} \ No newline at end of file diff --git a/examples/simple/classification/cust/default_operation_params.json b/examples/simple/classification/cust/default_operation_params.json new file mode 100644 index 0000000000..275612443d --- /dev/null +++ b/examples/simple/classification/cust/default_operation_params.json @@ -0,0 +1,96 @@ +{ + "rf": { + "n_jobs": 1 + }, + "rfr": { + "n_jobs": 1 + }, + "xgboost": { + "eval_metric": "mlogloss", + "nthread": 1, + "n_jobs": 1, + "verbose": 0 + }, + "catboost": { + "allow_writing_files": false, + "verbose": false, + "iterations": 1000, + "use_eval_set": false, + "use_best_model": false, + "early_stopping_rounds": null, + "n_jobs": 1 + }, + "catboostreg": { + "allow_writing_files": false, + "verbose": false, + "iterations": 1000, + "use_eval_set": false, + "use_best_model": false, + "early_stopping_rounds": null, + "n_jobs": 1 + }, + "lgbm": { + "num_leaves": 32, + "colsample_bytree": 0.8, + "subsample": 0.8, + "subsample_freq": 10, + "learning_rate": 0.03, + "n_estimators": 100, + "n_jobs": 1, + "verbose": -1 + }, + "lgbmreg": { + "num_leaves": 32, + "colsample_bytree": 0.8, + "subsample": 0.8, + "subsample_freq": 10, + "learning_rate": 0.03, + "n_estimators": 100, + "n_jobs": 1, + "verbose": -1 + }, + "ransac_lin_reg": { + "min_samples": 0.4, + "residual_threshold": 10, + "max_trials": 100, + "max_skips": 1000 + }, + "ransac_non_lin_reg": { + "min_samples": 0.4, + "residual_threshold": 10, + "max_trials": 100, + "max_skips": 1000 + }, + "h2o_regr": { + "timeout": 20, + "seed": 42, + "max_models": 3 + }, + "h2o_class": { + "timeout": 20, + "seed": 42, + "max_models": 3 + }, + "tpot_class": { + "timeout": 2, + "generations": 3, + "population_size": 3 + }, + "tpot_regr": { + "timeout": 2, + "generations": 3, + "population_size": 3 + }, + "resample": { + "balance": "expand_minority", + "replace": false, + "balance_ratio": 1 + }, + "pca": { + "svd_solver": "full", + "n_components": 0.7 + }, + "fast_ica": { + "whiten": "unit-variance" + } +} \ No newline at end of file diff --git a/examples/simple/classification/cust/image_class.py b/examples/simple/classification/cust/image_class.py new file mode 100644 index 0000000000..882a745085 --- /dev/null +++ b/examples/simple/classification/cust/image_class.py @@ -0,0 +1,59 @@ +import warnings +from typing import Optional + +from examples.simple.classification.cust.cnn_impls import MyCNNImplementation +from fedot.core.data.data import InputData, OutputData +from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy +from fedot.core.operations.operation_parameters import OperationParameters +from fedot.utilities.random import ImplementationRandomStateHandler + +warnings.filterwarnings("ignore", category=UserWarning) + + +class ImageClassificationStrategy(EvaluationStrategy): + _operations_by_types = { + 'cnn_1': MyCNNImplementation + } + + def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): + self.operation_impl = self._convert_to_operation(operation_type) + super().__init__(operation_type, params) + + def fit(self, train_data: InputData): + """ + This method is used for operation training with the data provided + :param InputData train_data: data used for operation training + :return: trained data operation + """ + + warnings.filterwarnings("ignore", category=RuntimeWarning) + + operation_implementation = self.operation_impl(self.params_for_fit) + + with ImplementationRandomStateHandler(implementation=operation_implementation): + operation_implementation.fit(train_data) + return operation_implementation + + def predict(self, trained_operation, predict_data: InputData) -> OutputData: + """ + Predict method for classification task for predict stage + + :param trained_operation: model object + :param predict_data: data used for prediction + :return: prediction target + """ + n_classes = len(trained_operation.classes_) + if self.output_mode == 'labels': + prediction = trained_operation.predict(predict_data) + elif self.output_mode in ['probs', 'full_probs', 'default']: + prediction = trained_operation.predict_proba(predict_data) + if n_classes < 2: + raise ValueError('Data set contain only 1 target class. Please reformat your data.') + elif n_classes == 2 and self.output_mode != 'full_probs' and len(prediction.shape) > 1: + prediction = prediction[:, 1] + else: + raise ValueError(f'Output model {self.output_mode} is not supported') + + # Convert prediction to output (if it is required) + converted = self._convert_to_output(prediction, predict_data) + return converted diff --git a/examples/simple/classification/image_classification_problem.py b/examples/simple/classification/cust/image_classification_problem.py similarity index 97% rename from examples/simple/classification/image_classification_problem.py rename to examples/simple/classification/cust/image_classification_problem.py index e7ca539160..08f20b2703 100644 --- a/examples/simple/classification/image_classification_problem.py +++ b/examples/simple/classification/cust/image_classification_problem.py @@ -110,10 +110,10 @@ def cnn_composite_pipeline(composite_flag: bool = True) -> Pipeline: :param composite_flag: add additional random forest estimator """ - node_first = PipelineNode('custom/preproc_image') + node_first = PipelineNode('gamma') node_first.parameters = {'model_predict': preproc_predict} - node_second = PipelineNode('custom/cnn_1', nodes_from=[node_first]) + node_second = PipelineNode('cnn_1', nodes_from=[node_first]) node_second.parameters = {'model_predict': cnn_model_predict, 'model_fit': cnn_model_fit} diff --git a/examples/simple/classification/cust/image_classification_problem_with_opt.py b/examples/simple/classification/cust/image_classification_problem_with_opt.py new file mode 100644 index 0000000000..91a866061a --- /dev/null +++ b/examples/simple/classification/cust/image_classification_problem_with_opt.py @@ -0,0 +1,230 @@ +import datetime +import random +from typing import Any + +import numpy as np +import tensorflow as tf +from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters +from golem.core.optimisers.genetic.operators.base_mutations import MutationTypesEnum +from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum +from golem.core.optimisers.genetic.operators.selection import SelectionTypesEnum +from hyperopt import hp +from sklearn.metrics import roc_auc_score as roc_auc + +from examples.simple.classification.classification_pipelines import cnn_composite_pipeline +from fedot.core.composer.composer_builder import ComposerBuilder +from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation +from fedot.core.data.data import InputData, OutputData +from fedot.core.operations.evaluation.operation_implementations.models.keras import check_input_array +from fedot.core.pipelines.node import PipelineNode +from fedot.core.pipelines.pipeline import Pipeline +from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements +from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum, ComplexityMetricsEnum +from fedot.core.repository.tasks import Task, TaskTypesEnum +from fedot.core.utils import set_random_seed + +custom_search_space = {'gamma_filt': { + 'r': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [-254, 254], + 'type': 'discrete'}, + 'g': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [-254, 254], + 'type': 'discrete'}, + 'b': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [-254, 254], + 'type': 'discrete'}, + 'ksize': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [0, 20], + 'type': 'discrete'} +} + +} + + +def calculate_validation_metric(predicted: OutputData, dataset_to_validate: InputData) -> float: + # the quality assessment for the simulation results + roc_auc_value = roc_auc(y_true=dataset_to_validate.target, + y_score=predicted.predict, + multi_class="ovo") + return roc_auc_value + + +def cnn_model_fit(idx: np.array, features: np.array, target: np.array, params: dict): + # x_train, y_train = features, target + # transformed_x_train, transform_flag = check_input_array(x_train) + # + # if transform_flag: + # print('Train data set was not scaled. The data was divided by 255.') + # + # if len(x_train.shape) == 3: + # transformed_x_train = np.expand_dims(x_train, -1) + # + # if len(target.shape) < 2: + # le = preprocessing.OneHotEncoder() + # y_train = le.fit_transform(y_train.reshape(-1, 1)).toarray() + # + # optimizer_params = {'loss': "categorical_crossentropy", + # 'optimizer': "adam", + # 'metrics': ["accuracy"]} + # + # model = tf.keras.Sequential( + # [ + # tf.keras.layers.InputLayer(input_shape=[28, 28, 1]), + # tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), + # tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + # tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + # tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + # tf.keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + # tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + # tf.keras.layers.Flatten(), + # tf.keras.layers.Dropout(0.5), + # tf.keras.layers.Dense(10, activation="softmax"), + # ]) + # + # model.compile(**optimizer_params) + # model.num_classes = 10 + + # model.fit(transformed_x_train, y_train, batch_size=1, epochs=1, + # validation_split=0.1) + model = None + return model + + +# +# +def cnn_model_predict(fitted_model: Any, idx: np.array, features: np.array, params: dict): + x_test = features + transformed_x_test, transform_flag = check_input_array(x_test) + + if np.max(transformed_x_test) > 1: + print('Test data set was not scaled. The data was divided by 255.') + + if len(x_test.shape) == 3: + transformed_x_test = np.expand_dims(x_test, -1) + + # prediction = fitted_model.predict(transformed_x_test) + + prediction = np.asarray([[random.random()] for j in range(features.shape[0])]) + + return prediction, 'table' + + +# + +def preproc_predict(fitted_model: Any, idx: np.array, features: np.array, params: dict): + # example of custom data pre-processing for predict state + for i in range(features.shape[0]): + features[i, :, :] = features[i, :, :] + np.random.normal(0, 30) + return features, 'image' + + +def cnn_composite_pipeline(composite_flag: bool = True) -> Pipeline: + """ + Returns pipeline with the following structure: + + .. image:: img_classification_pipelines/cnn_composite_pipeline.png + :width: 55% + + Where cnn - convolutional neural network, rf - random forest + + :param composite_flag: add additional random forest estimator + """ + node_first = PipelineNode('gamma_filt') + node_first.parameters = {'model_predict': preproc_predict} + + node_second = PipelineNode('cnn_1', nodes_from=[node_first]) + node_second.parameters = {'model_predict': cnn_model_predict, + 'model_fit': cnn_model_fit} + + node_final = PipelineNode('rf', nodes_from=[node_second]) + + pipeline = Pipeline(node_final) + return pipeline + + +def run_image_classification_problem(train_dataset: tuple, + test_dataset: tuple, + composite_flag: bool = True): + task = Task(TaskTypesEnum.classification) + + x_train, y_train = train_dataset[0], train_dataset[1] + x_test, y_test = test_dataset[0], test_dataset[1] + + dataset_to_train = InputData.from_image(images=x_train, + labels=y_train, + task=task) + dataset_to_validate = InputData.from_image(images=x_test, + labels=y_test, + task=task) + + dataset_to_train = dataset_to_train.subset_range(0, 100) + + initial_pipeline = cnn_composite_pipeline(composite_flag) + + # the choice of the metric for the pipeline quality assessment during composition + quality_metric = ClassificationMetricsEnum.f1 + complexity_metric = ComplexityMetricsEnum.node_number + metrics = [quality_metric, complexity_metric] + # the choice and initialisation of the GP search + composer_requirements = PipelineComposerRequirements( + primary=['custom/preproc_image1', 'custom/preproc_image2'], + secondary=['custom/cnn_1', 'custom/cnn_2'], + timeout=datetime.timedelta(minutes=10), + num_of_generations=20, n_jobs=1 + ) + + pop_size = 5 + params = GPAlgorithmParameters( + selection_types=[SelectionTypesEnum.spea2], + genetic_scheme_type=GeneticSchemeTypesEnum.parameter_free, + mutation_types=[MutationTypesEnum.single_change, parameter_change_mutation], + pop_size=pop_size + ) + + # Create composer and with required composer params + composer = ( + ComposerBuilder(task=task) + .with_optimizer_params(params) + .with_requirements(composer_requirements) + .with_metrics(metrics) + .with_initial_pipelines(initial_pipelines=[initial_pipeline] * pop_size) + .build() + ) + + # the optimal pipeline generation by composition - the most time-consuming task + pipeline_evo_composed = composer.compose_pipeline(data=dataset_to_train)[0] + + pipeline_evo_composed.show() + print(pipeline_evo_composed.descriptive_id) + + pipeline_evo_composed.fit(input_data=dataset_to_train) + + # auto_model = Fedot(problem='classification', timeout=1, n_jobs=-1, preset='best_quality', + # metric=['f1'], with_tuning=True, initial_assumption = pipeline, + # available_models=[]) + # + # auto_model.fit(features=dataset_to_train) + + # auto_model.predict(dataset_to_validate) + # predictions = auto_model.prediction + + predictions = pipeline_evo_composed.predict(dataset_to_validate) + + roc_auc_on_valid = calculate_validation_metric(predictions, + dataset_to_validate) + return roc_auc_on_valid, dataset_to_train, dataset_to_validate + + +if __name__ == '__main__': + set_random_seed(1) + + training_set, testing_set = tf.keras.datasets.mnist.load_data(path='mnist.npz') + roc_auc_on_valid, dataset_to_train, dataset_to_validate = run_image_classification_problem( + train_dataset=training_set, + test_dataset=testing_set) + + print(roc_auc_on_valid) diff --git a/examples/simple/classification/cust/image_classification_problem_with_opt2.py b/examples/simple/classification/cust/image_classification_problem_with_opt2.py new file mode 100644 index 0000000000..db5f332d8f --- /dev/null +++ b/examples/simple/classification/cust/image_classification_problem_with_opt2.py @@ -0,0 +1,175 @@ +import datetime +from pathlib import Path + +import tensorflow as tf +from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters +from golem.core.optimisers.genetic.operators.base_mutations import MutationTypesEnum +from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum +from golem.core.optimisers.genetic.operators.selection import SelectionTypesEnum +from hyperopt import hp +from sklearn.metrics import roc_auc_score as roc_auc + +from examples.simple.classification.classification_pipelines import cnn_composite_pipeline +from fedot.core.composer.composer_builder import ComposerBuilder +from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation +from fedot.core.data.data import InputData, OutputData +from fedot.core.pipelines.node import PipelineNode +from fedot.core.pipelines.pipeline import Pipeline +from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements +from fedot.core.repository.operation_types_repository import get_operations_for_task, OperationTypesRepository +from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum, ComplexityMetricsEnum +from fedot.core.repository.tasks import Task, TaskTypesEnum +from fedot.core.utils import set_random_seed, fedot_project_root + +custom_search_space = {'gamma_filt': { + 'r': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [-254, 254], + 'type': 'discrete'}, + 'g': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [-254, 254], + 'type': 'discrete'}, + 'b': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [-254, 254], + 'type': 'discrete'}, + 'ksize': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [0, 20], + 'type': 'discrete'} +} + +} + + +def calculate_validation_metric(predicted: OutputData, dataset_to_validate: InputData) -> float: + # the quality assessment for the simulation results + roc_auc_value = roc_auc(y_true=dataset_to_validate.target, + y_score=predicted.predict, + multi_class="ovo") + return roc_auc_value + + +def cnn_composite_pipeline(composite_flag: bool = True) -> Pipeline: + """ + Returns pipeline with the following structure: + + .. image:: img_classification_pipelines/cnn_composite_pipeline.png + :width: 55% + + Where cnn - convolutional neural network, rf - random forest + + :param composite_flag: add additional random forest estimator + """ + node_first = PipelineNode('gamma_filt') + + node_second = PipelineNode('cnn_1', nodes_from=[node_first]) + + node_final = PipelineNode('rf', nodes_from=[node_second]) + + pipeline = Pipeline(node_final) + return pipeline + + +def setup_repository(): + OperationTypesRepository.__repository_dict__ = { + 'model': {'file': Path(fedot_project_root(), 'examples', 'simple', 'classification', 'cust', + 'model_repository.json'), 'initialized_repo': None, 'default_tags': []}, + 'data_operation': {'file': Path(fedot_project_root(), + 'examples', 'simple', 'classification', 'cust', + 'data_operation_repository.json'), + 'initialized_repo': None, 'default_tags': []} + } + + +def run_image_classification_problem(train_dataset: tuple, + test_dataset: tuple, + composite_flag: bool = True): + task = Task(TaskTypesEnum.classification) + + setup_repository() + + x_train, y_train = train_dataset[0], train_dataset[1] + x_test, y_test = test_dataset[0], test_dataset[1] + + dataset_to_train = InputData.from_image(images=x_train, + labels=y_train, + task=task) + dataset_to_validate = InputData.from_image(images=x_test, + labels=y_test, + task=task) + + dataset_to_train = dataset_to_train.subset_range(0, 100) + + initial_pipeline = cnn_composite_pipeline(composite_flag) + initial_pipeline.show() + initial_pipeline.fit(dataset_to_train) + predictions = initial_pipeline.predict(dataset_to_validate) + roc_auc_on_valid = calculate_validation_metric(predictions, + dataset_to_validate) + + print(roc_auc_on_valid) + + # the choice of the metric for the pipeline quality assessment during composition + quality_metric = ClassificationMetricsEnum.f1 + complexity_metric = ComplexityMetricsEnum.node_number + metrics = [quality_metric, complexity_metric] + # the choice and initialisation of the GP search + composer_requirements = PipelineComposerRequirements( + primary=get_operations_for_task(task=task, mode='all'), + timeout=datetime.timedelta(minutes=3), + num_of_generations=20, n_jobs=1 + ) + + pop_size = 5 + params = GPAlgorithmParameters( + selection_types=[SelectionTypesEnum.spea2], + genetic_scheme_type=GeneticSchemeTypesEnum.parameter_free, + mutation_types=[MutationTypesEnum.single_change, parameter_change_mutation], + pop_size=pop_size + ) + + # Create composer and with required composer params + composer = ( + ComposerBuilder(task=task) + .with_optimizer_params(params) + .with_requirements(composer_requirements) + .with_metrics(metrics) + .with_initial_pipelines(initial_pipelines=[initial_pipeline] * pop_size) + .build() + ) + + # the optimal pipeline generation by composition - the most time-consuming task + pipeline_evo_composed = composer.compose_pipeline(data=dataset_to_train)[0] + + pipeline_evo_composed.show() + print(pipeline_evo_composed.descriptive_id) + + pipeline_evo_composed.fit(input_data=dataset_to_train) + + # auto_model = Fedot(problem='classification', timeout=1, n_jobs=-1, preset='best_quality', + # metric=['f1'], with_tuning=True, initial_assumption = pipeline, + # available_models=[]) + # + # auto_model.fit(features=dataset_to_train) + + # auto_model.predict(dataset_to_validate) + # predictions = auto_model.prediction + + predictions = pipeline_evo_composed.predict(dataset_to_validate) + + roc_auc_on_valid = calculate_validation_metric(predictions, + dataset_to_validate) + return roc_auc_on_valid, dataset_to_train, dataset_to_validate + + +if __name__ == '__main__': + set_random_seed(1) + + training_set, testing_set = tf.keras.datasets.mnist.load_data(path='mnist.npz') + roc_auc_on_valid, dataset_to_train, dataset_to_validate = run_image_classification_problem( + train_dataset=training_set, + test_dataset=testing_set) + + print(roc_auc_on_valid) diff --git a/examples/simple/classification/cust/image_preproc.py b/examples/simple/classification/cust/image_preproc.py new file mode 100644 index 0000000000..aba8c1aa0d --- /dev/null +++ b/examples/simple/classification/cust/image_preproc.py @@ -0,0 +1,84 @@ +import warnings +from typing import Optional + +from examples.simple.classification.cust.preproc_impls import GammaFiltImplementation +from fedot.core.data.data import InputData, OutputData +from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy +from fedot.core.operations.operation_parameters import OperationParameters +from fedot.utilities.random import ImplementationRandomStateHandler + + +class ImagePreprocessingStrategy(EvaluationStrategy): + """ + Args: + operation_type: ``str`` of the operation defined in operation or data operation repositories + + .. details:: possible operations: + + - ``scaling``-> ScalingImplementation, + - ``normalization``-> NormalizationImplementation, + - ``simple_imputation``-> ImputationImplementation, + - ``pca``-> PCAImplementation, + - ``kernel_pca``-> KernelPCAImplementation, + - ``poly_features``-> PolyFeaturesImplementation, + - ``one_hot_encoding``-> OneHotEncodingImplementation, + - ``label_encoding``-> LabelEncodingImplementation, + - ``fast_ica``-> FastICAImplementation + + params: hyperparameters to fit the operation with + + """ + + _operations_by_types = { + 'gamma_filt': GammaFiltImplementation, + 'negamma_filt': GammaFiltImplementation + } + + def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): + self.operation_impl = self._convert_to_operation(operation_type) + super().__init__(operation_type, params) + + def fit(self, train_data: InputData): + """This method is used for operation training with the data provided + + Args: + train_data: data used for operation training + + Returns: + trained Sklearn operation + """ + + warnings.filterwarnings("ignore", category=RuntimeWarning) + operation_implementation = self.operation_impl(self.params_for_fit) + with ImplementationRandomStateHandler(implementation=operation_implementation): + operation_implementation.fit(train_data) + return operation_implementation + + def predict(self, trained_operation, predict_data: InputData) -> OutputData: + """Transform method for preprocessing task + + Args: + trained_operation: model object + predict_data: data used for prediction + + Returns: + prediction + """ + prediction = trained_operation.transform(predict_data) + # Convert prediction to output (if it is required) + converted = self._convert_to_output(prediction, predict_data) + return converted + + def predict_for_fit(self, trained_operation, predict_data: InputData) -> OutputData: + """ + Transform method for preprocessing task for fit stage + + Args: + trained_operation: model object + predict_data: data used for prediction + Returns: + OutputData: + """ + prediction = trained_operation.transform_for_fit(predict_data) + converted = self._convert_to_output(prediction, predict_data) + return converted diff --git a/examples/simple/classification/cust/model_repository.json b/examples/simple/classification/cust/model_repository.json new file mode 100644 index 0000000000..0db4df4924 --- /dev/null +++ b/examples/simple/classification/cust/model_repository.json @@ -0,0 +1,487 @@ +{ + "metadata": { + "image_class": { + "accepted_node_types": [ + "any" + ], + "description": "Implementations of the custom classification models", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.image]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "examples.simple.classification.cust.image_class", + "ImageClassificationStrategy" + ], + "tags": [ + "ml", + "custom" + ], + "tasks": "[TaskTypesEnum.classification]" + }, + "custom_class": { + "accepted_node_types": [ + "any" + ], + "description": "Implementations of the custom classification models", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "fedot.core.operations.evaluation.classification", + "FedotClassificationStrategy" + ], + "tags": [ + "ml", + "custom" + ], + "tasks": "[TaskTypesEnum.classification]" + }, + "custom_regr": { + "accepted_node_types": [ + "any" + ], + "description": "Implementations of the custom regression models", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "fedot.core.operations.evaluation.regression", + "FedotRegressionStrategy" + ], + "tags": [ + "ml", + "custom" + ], + "tasks": "[TaskTypesEnum.regression]" + }, + "sklearn_class": { + "accepted_node_types": [ + "any" + ], + "description": "Implementations of the classification models from scikit-learn framework", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "fedot.core.operations.evaluation.classification", + "SkLearnClassificationStrategy" + ], + "tags": [ + "ml", + "sklearn" + ], + "tasks": "[TaskTypesEnum.classification]" + }, + "sklearn_clust": { + "accepted_node_types": [ + "any" + ], + "description": "Implementations of the clustering models from scikit-learn framework", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "fedot.core.operations.evaluation.clustering", + "SkLearnClusteringStrategy" + ], + "tags": [ + "ml", + "sklearn" + ], + "tasks": "[TaskTypesEnum.clustering]" + }, + "sklearn_regr": { + "accepted_node_types": [ + "any" + ], + "description": "Implementations of the regression models from scikit-learn framework", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "fedot.core.operations.evaluation.regression", + "SkLearnRegressionStrategy" + ], + "tags": [ + "ml", + "sklearn", + "composition" + ], + "tasks": "[TaskTypesEnum.regression]" + }, + "boosting_class": { + "accepted_node_types": [ + "any" + ], + "description": "Boosting models implementations for classification problems", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "fedot.core.operations.evaluation.boostings", + "BoostingClassificationStrategy" + ], + "tags": [ + "ml", + "boosting", + "composition" + ], + "tasks": "[TaskTypesEnum.classification]" + }, + "boosting_regr": { + "accepted_node_types": [ + "any" + ], + "description": "Boosting models implementations for regression problems", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "fedot.core.operations.evaluation.boostings", + "BoostingRegressionStrategy" + ], + "tags": [ + "ml", + "boosting", + "composition" + ], + "tasks": "[TaskTypesEnum.regression]" + }, + "custom_model": { + "description": "Implementations of the models specified by user with external code source", + "input_type": "[DataTypesEnum.ts, DataTypesEnum.table, DataTypesEnum.text]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "fedot.core.operations.evaluation.custom", + "CustomModelStrategy" + ], + "tags": [ + "non-default" + ], + "tasks": "[TaskTypesEnum.regression, TaskTypesEnum.ts_forecasting, TaskTypesEnum.classification, TaskTypesEnum.clustering]" + } + }, + "operations": { + "adareg": { + "meta": "sklearn_regr", + "presets": [ + "fast_train", + "ts", + "*tree" + ], + "tags": [ + "boosting", + "non_multi", + "non_linear" + ] + }, + "bernb": { + "meta": "sklearn_class", + "presets": [ + "fast_train" + ], + "tags": [ + "simple", + "bayesian", + "non_multi", + "linear" + ] + }, + "catboost": { + "meta": "boosting_class", + "presets": [ + "*tree" + ], + "tags": [ + "simple", + "boosting" + ] + }, + "catboostreg": { + "meta": "boosting_regr", + "presets": [ + "*tree" + ], + "tags": [ + "simple", + "boosting" + ] + }, + "dt": { + "meta": "sklearn_class", + "presets": [ + "fast_train", + "*tree" + ], + "tags": [ + "simple", + "tree", + "interpretable", + "non_linear" + ] + }, + "dtreg": { + "meta": "sklearn_regr", + "presets": [ + "fast_train", + "ts", + "*tree" + ], + "tags": [ + "tree", + "interpretable", + "non_linear" + ] + }, + "gbr": { + "meta": "sklearn_regr", + "presets": [ + "*tree" + ], + "tags": [ + "boosting", + "non_multi", + "non_linear", + "non-default" + ] + }, + "kmeans": { + "meta": "sklearn_clust", + "presets": [ + "fast_train" + ], + "tags": [ + "linear" + ] + }, + "knn": { + "meta": "custom_class", + "presets": [ + "fast_train" + ], + "tags": [ + "simple", + "correct_params", + "non_linear" + ] + }, + "knnreg": { + "meta": "custom_regr", + "presets": [ + "fast_train", + "ts" + ], + "tags": [ + "simple", + "correct_params", + "non_linear" + ] + }, + "lasso": { + "meta": "sklearn_regr", + "presets": [ + "fast_train", + "ts" + ], + "tags": [ + "simple", + "linear", + "interpretable" + ] + }, + "lda": { + "meta": "custom_class", + "presets": [ + "fast_train" + ], + "tags": [ + "discriminant", + "linear", + "correct_params", + "non-default" + ] + }, + "lgbm": { + "meta": "sklearn_class", + "tags": [ + "tree", + "non_linear" + ] + }, + "lgbmreg": { + "meta": "sklearn_regr", + "presets": [ + "*tree" + ], + "tags": [ + "tree", + "non_multi", + "non_linear" + ] + }, + "linear": { + "meta": "sklearn_regr", + "presets": [ + "fast_train", + "ts" + ], + "tags": [ + "simple", + "linear", + "interpretable" + ] + }, + "logit": { + "meta": "sklearn_class", + "presets": [ + "fast_train" + ], + "tags": [ + "simple", + "linear", + "interpretable", + "non_multi" + ] + }, + "mlp": { + "meta": "sklearn_class", + "tags": [ + "neural", + "non_linear" + ] + }, + "multinb": { + "meta": "sklearn_class", + "presets": [ + "fast_train" + ], + "tags": [ + "non-default", + "bayesian", + "non_multi", + "linear" + ] + }, + "qda": { + "meta": "custom_class", + "presets": [ + "fast_train" + ], + "tags": [ + "discriminant", + "quadratic", + "non_linear" + ] + }, + "rf": { + "meta": "sklearn_class", + "presets": [ + "fast_train", + "*tree" + ], + "tags": [ + "tree", + "non_linear" + ] + }, + "rfr": { + "meta": "sklearn_regr", + "presets": [ + "fast_train", + "*tree" + ], + "tags": [ + "tree", + "non_linear" + ] + }, + "ridge": { + "meta": "sklearn_regr", + "presets": [ + "fast_train", + "ts" + ], + "tags": [ + "simple", + "linear", + "interpretable" + ] + }, + "sgdr": { + "meta": "sklearn_regr", + "presets": [ + "fast_train", + "ts" + ], + "tags": [ + "non_multi", + "non_linear" + ] + }, + "svc": { + "meta": "custom_class", + "tags": [ + "no_prob", + "expensive", + "non_linear" + ] + }, + "svr": { + "meta": "sklearn_regr", + "tags": [ + "non_multi", + "non_linear" + ] + }, + "treg": { + "meta": "sklearn_regr", + "presets": [ + "*tree" + ], + "tags": [ + "tree", + "non_linear", + "non-default" + ] + }, + "xgboost": { + "meta": "sklearn_class", + "presets": [ + "*tree" + ], + "tags": [ + "tree", + "non-default", + "non_linear" + ] + }, + "xgbreg": { + "meta": "sklearn_regr", + "presets": [ + "*tree" + ], + "tags": [ + "tree", + "non_multi", + "non-default", + "non_linear" + ] + }, + "cnn_1": { + "meta": "image_class", + "tags": [ + "deep", + "non_linear" + ], + "input_type": "[DataTypesEnum.image]", + "output_type": "[DataTypesEnum.table]" + }, + "custom": { + "meta": "custom_model", + "tags": [ + "custom_model", + "non-default" + ] + } + } +} \ No newline at end of file diff --git a/examples/simple/classification/cust/preproc_impls.py b/examples/simple/classification/cust/preproc_impls.py new file mode 100644 index 0000000000..383052dc2d --- /dev/null +++ b/examples/simple/classification/cust/preproc_impls.py @@ -0,0 +1,43 @@ +from copy import deepcopy +from typing import Optional + +import numpy as np + +from fedot.core.data.data import InputData, OutputData +from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import \ + DataOperationImplementation +from fedot.core.operations.operation_parameters import OperationParameters +from fedot.core.repository.dataset_types import DataTypesEnum + + +class GammaFiltImplementation(DataOperationImplementation): + """ Class for application of :obj:`PolynomialFeatures` operation on data, + where only not encoded features (were not converted from categorical using + ``OneHot encoding``) are used + + Args: + params: OperationParameters with the arguments + """ + + def __init__(self, params: Optional[OperationParameters]): + super().__init__(params) + if not self.params: + # Default parameters + pass + else: + # Checking the appropriate params are using or not + pass + + def fit(self, input_data: InputData): + return None + + def transform(self, input_data: InputData) -> OutputData: + # example of custom data pre-processing for predict state + transformed_features = deepcopy(input_data.features) + for i in range(transformed_features.shape[0]): + transformed_features[i, :, :] = transformed_features[i, :, :] + np.random.normal(0, 30) + + output_data = self._convert_to_output(input_data, + transformed_features, data_type=DataTypesEnum.image) + + return output_data diff --git a/fedot/core/operations/evaluation/classification.py b/fedot/core/operations/evaluation/classification.py index a6bdf15069..0b90b49648 100644 --- a/fedot/core/operations/evaluation/classification.py +++ b/fedot/core/operations/evaluation/classification.py @@ -13,8 +13,6 @@ data_operations.sklearn_selectors import LinearClassFSImplementation, NonLinearClassFSImplementation from fedot.core.operations.evaluation.operation_implementations.models. \ discriminant_analysis import LDAImplementation, QDAImplementation -from fedot.core.operations.evaluation.operation_implementations.models. \ - keras import FedotCNNImplementation from fedot.core.operations.evaluation.operation_implementations.models.knn import FedotKnnClassImplementation from fedot.core.operations.evaluation.operation_implementations.models.svc import FedotSVCImplementation from fedot.core.operations.operation_parameters import OperationParameters @@ -46,7 +44,6 @@ class FedotClassificationStrategy(EvaluationStrategy): 'lda': LDAImplementation, 'qda': QDAImplementation, 'svc': FedotSVCImplementation, - 'cnn': FedotCNNImplementation, 'knn': FedotKnnClassImplementation } diff --git a/fedot/core/operations/evaluation/operation_implementations/models/keras.py b/fedot/core/operations/evaluation/operation_implementations/models/keras.py index 2b307c7a20..faabc00e4e 100644 --- a/fedot/core/operations/evaluation/operation_implementations/models/keras.py +++ b/fedot/core/operations/evaluation/operation_implementations/models/keras.py @@ -194,9 +194,9 @@ def fit(self, train_data): self.model = cnn_model_dict[self.params.get('architecture_type')](input_shape=train_data.features.shape[1:4], num_classes=len(self.classes)) - self.model = fit_cnn(train_data=train_data, model=self.model, epochs=self.params.get('epochs'), - batch_size=self.params.get('batch_size'), - optimizer_params=self.params.get('optimizer_parameters'), logger=self.params.get('log')) + # self.model = fit_cnn(train_data=train_data, model=self.model, epochs=self.params.get('epochs'), + # batch_size=self.params.get('batch_size'), + # optimizer_params=self.params.get('optimizer_parameters'), logger=self.params.get('log')) return self.model def predict(self, input_data): diff --git a/test/unit/tasks/test_classification.py b/test/unit/tasks/test_classification.py index 7373f758be..467c213469 100644 --- a/test/unit/tasks/test_classification.py +++ b/test/unit/tasks/test_classification.py @@ -4,7 +4,7 @@ from sklearn.datasets import load_iris, make_classification from sklearn.metrics import roc_auc_score as roc_auc -from examples.simple.classification.image_classification_problem import run_image_classification_problem +from examples.simple.classification.cust.image_classification_problem import run_image_classification_problem from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup from fedot.core.data.supplementary_data import SupplementaryData From 96bdd04a2feaf0191499d7a665ee79df59bd7e84 Mon Sep 17 00:00:00 2001 From: nicl-nno Date: Wed, 22 Nov 2023 15:20:50 +0300 Subject: [PATCH 03/12] Upd 2 --- .../cust/data_operation_repository.json | 200 ------------------ .../simple/classification/cust/image_class.py | 2 +- .../image_classification_problem_with_opt.py | 1 - .../image_classification_problem_with_opt2.py | 53 +++-- .../classification/cust/image_preproc.py | 2 +- .../classification/cust/model_repository.json | 24 +-- .../classification/cust/preproc_impls.py | 2 +- .../objective/data_source_splitter.py | 4 +- fedot/core/pipelines/pipeline_advisor.py | 5 + fedot/core/pipelines/tuning/hyperparams.py | 5 +- 10 files changed, 57 insertions(+), 241 deletions(-) diff --git a/examples/simple/classification/cust/data_operation_repository.json b/examples/simple/classification/cust/data_operation_repository.json index 769edc9a63..0b152905b0 100644 --- a/examples/simple/classification/cust/data_operation_repository.json +++ b/examples/simple/classification/cust/data_operation_repository.json @@ -130,206 +130,6 @@ "categorical-ignore" ] }, - "scaling": { - "meta": "custom_preprocessing", - "presets": [ - "fast_train", - "ts", - "*tree" - ], - "tags": [ - "simple", - "feature_scaling" - ] - }, - "normalization": { - "meta": "custom_preprocessing", - "presets": [ - "fast_train", - "ts", - "*tree" - ], - "tags": [ - "simple", - "feature_scaling", - "non_applicable_for_ts" - ] - }, - "simple_imputation": { - "meta": "custom_preprocessing", - "presets": [ - "fast_train", - "*tree" - ], - "tags": [ - "simple", - "imputation", - "categorical-ignore", - "non-default" - ] - }, - "pca": { - "meta": "dimension_transformation", - "presets": [ - "fast_train", - "ts", - "*tree" - ], - "tags": [ - "linear", - "dimensionality_transforming", - "correct_params", - "non_applicable_for_ts", - "feature_reduction" - ] - }, - "kernel_pca": { - "meta": "dimension_transformation", - "presets": [ - "ts", - "*tree" - ], - "tags": [ - "non_linear", - "dimensionality_transforming", - "correct_params", - "non_applicable_for_ts", - "non-default", - "feature_reduction" - ] - }, - "fast_ica": { - "meta": "dimension_transformation", - "presets": [ - "ts", - "*tree" - ], - "tags": [ - "non_linear", - "dimensionality_transforming", - "correct_params", - "non_applicable_for_ts", - "feature_reduction" - ] - }, - "poly_features": { - "meta": "dimension_transformation", - "tags": [ - "non_linear", - "dimensionality_transforming", - "non_applicable_for_ts", - "feature_engineering" - ] - }, - "ransac_lin_reg": { - "meta": "regression_preprocessing", - "presets": [ - "fast_train", - "*tree" - ], - "tags": [ - "affects_target", - "linear", - "filtering", - "correct_params", - "non_applicable_for_ts" - ] - }, - "ransac_non_lin_reg": { - "meta": "regression_preprocessing", - "presets": [ - "fast_train", - "*tree" - ], - "tags": [ - "affects_target", - "non_linear", - "filtering", - "correct_params", - "non_applicable_for_ts", - "non-default" - ] - }, - "isolation_forest_reg": { - "meta": "regression_preprocessing", - "tags": [ - "non_linear", - "filtering", - "non_applicable_for_ts" - ] - }, - "isolation_forest_class": { - "meta": "classification_preprocessing", - "tags": [ - "non_linear", - "filtering" - ] - }, - "rfe_lin_reg": { - "meta": "regression_preprocessing", - "tags": [ - "linear", - "feature_selection", - "non_applicable_for_ts", - "non-default", - "non_applicable_for_ts" - ] - }, - "rfe_non_lin_reg": { - "meta": "regression_preprocessing", - "tags": [ - "non_linear", - "feature_selection", - "non_applicable_for_ts", - "non-default", - "non_applicable_for_ts" - ] - }, - "rfe_lin_class": { - "meta": "classification_preprocessing", - "tags": [ - "linear", - "feature_selection", - "non-default" - ] - }, - "rfe_non_lin_class": { - "meta": "classification_preprocessing", - "tags": [ - "non_linear", - "feature_selection", - "non-default" - ] - }, - "decompose": { - "meta": "regression_preprocessing", - "presets": [ - "fast_train", - "ts", - "*tree" - ], - "tags": [ - "non-default", - "decompose" - ] - }, - "class_decompose": { - "meta": "classification_preprocessing", - "presets": [ - "fast_train", - "*tree" - ], - "tags": [ - "non-default", - "decompose" - ] - }, - "resample": { - "meta": "classification_preprocessing", - "tags": [ - "imbalanced" - ] - }, "gamma_filt": { "meta": "image_preprocessing", "tags": [ diff --git a/examples/simple/classification/cust/image_class.py b/examples/simple/classification/cust/image_class.py index 882a745085..60e8676637 100644 --- a/examples/simple/classification/cust/image_class.py +++ b/examples/simple/classification/cust/image_class.py @@ -56,4 +56,4 @@ def predict(self, trained_operation, predict_data: InputData) -> OutputData: # Convert prediction to output (if it is required) converted = self._convert_to_output(prediction, predict_data) - return converted + return converted \ No newline at end of file diff --git a/examples/simple/classification/cust/image_classification_problem_with_opt.py b/examples/simple/classification/cust/image_classification_problem_with_opt.py index 91a866061a..c3397a9c7b 100644 --- a/examples/simple/classification/cust/image_classification_problem_with_opt.py +++ b/examples/simple/classification/cust/image_classification_problem_with_opt.py @@ -44,7 +44,6 @@ } - def calculate_validation_metric(predicted: OutputData, dataset_to_validate: InputData) -> float: # the quality assessment for the simulation results roc_auc_value = roc_auc(y_true=dataset_to_validate.target, diff --git a/examples/simple/classification/cust/image_classification_problem_with_opt2.py b/examples/simple/classification/cust/image_classification_problem_with_opt2.py index db5f332d8f..c7e56e126d 100644 --- a/examples/simple/classification/cust/image_classification_problem_with_opt2.py +++ b/examples/simple/classification/cust/image_classification_problem_with_opt2.py @@ -6,6 +6,7 @@ from golem.core.optimisers.genetic.operators.base_mutations import MutationTypesEnum from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum from golem.core.optimisers.genetic.operators.selection import SelectionTypesEnum +from golem.core.tuning.simultaneous import SimultaneousTuner from hyperopt import hp from sklearn.metrics import roc_auc_score as roc_auc @@ -16,6 +17,9 @@ from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements +from fedot.core.pipelines.tuning.hyperparams import ParametersChanger +from fedot.core.pipelines.tuning.search_space import PipelineSearchSpace +from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder from fedot.core.repository.operation_types_repository import get_operations_for_task, OperationTypesRepository from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum, ComplexityMetricsEnum from fedot.core.repository.tasks import Task, TaskTypesEnum @@ -25,21 +29,14 @@ 'r': { 'hyperopt-dist': hp.uniformint, 'sampling-scope': [-254, 254], - 'type': 'discrete'}, - 'g': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [-254, 254], - 'type': 'discrete'}, - 'b': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [-254, 254], - 'type': 'discrete'}, - 'ksize': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [0, 20], 'type': 'discrete'} -} - +}, + 'negamma_filt': { + 'r': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [-254, 254], + 'type': 'discrete'}, + } } @@ -123,9 +120,13 @@ def run_image_classification_problem(train_dataset: tuple, ) pop_size = 5 + + # search space for hyper-parametric mutation + ParametersChanger.custom_search_space = custom_search_space + params = GPAlgorithmParameters( selection_types=[SelectionTypesEnum.spea2], - genetic_scheme_type=GeneticSchemeTypesEnum.parameter_free, + genetic_scheme_type=GeneticSchemeTypesEnum.steady_state, mutation_types=[MutationTypesEnum.single_change, parameter_change_mutation], pop_size=pop_size ) @@ -148,14 +149,22 @@ def run_image_classification_problem(train_dataset: tuple, pipeline_evo_composed.fit(input_data=dataset_to_train) - # auto_model = Fedot(problem='classification', timeout=1, n_jobs=-1, preset='best_quality', - # metric=['f1'], with_tuning=True, initial_assumption = pipeline, - # available_models=[]) - # - # auto_model.fit(features=dataset_to_train) + replace_default_search_space = True + cv_folds = 1 + search_space = PipelineSearchSpace(custom_search_space=custom_search_space, + replace_default_search_space=replace_default_search_space) + + predictions = pipeline_evo_composed.predict(dataset_to_validate) + + # .with_cv_folds(cv_folds) \ + pipeline_tuner = TunerBuilder(dataset_to_train.task) \ + .with_tuner(SimultaneousTuner) \ + .with_metric(ClassificationMetricsEnum.ROCAUC) \ + .with_cv_folds(cv_folds) \ + .with_iterations(50) \ + .with_search_space(search_space).build(dataset_to_train) - # auto_model.predict(dataset_to_validate) - # predictions = auto_model.prediction + pipeline = pipeline_tuner.tune(pipeline_evo_composed) predictions = pipeline_evo_composed.predict(dataset_to_validate) diff --git a/examples/simple/classification/cust/image_preproc.py b/examples/simple/classification/cust/image_preproc.py index aba8c1aa0d..2472453696 100644 --- a/examples/simple/classification/cust/image_preproc.py +++ b/examples/simple/classification/cust/image_preproc.py @@ -81,4 +81,4 @@ def predict_for_fit(self, trained_operation, predict_data: InputData) -> OutputD """ prediction = trained_operation.transform_for_fit(predict_data) converted = self._convert_to_output(prediction, predict_data) - return converted + return converted \ No newline at end of file diff --git a/examples/simple/classification/cust/model_repository.json b/examples/simple/classification/cust/model_repository.json index 0db4df4924..8491f981a5 100644 --- a/examples/simple/classification/cust/model_repository.json +++ b/examples/simple/classification/cust/model_repository.json @@ -1,17 +1,17 @@ { "metadata": { - "image_class": { - "accepted_node_types": [ - "any" - ], - "description": "Implementations of the custom classification models", - "forbidden_node_types": "[]", - "input_type": "[DataTypesEnum.image]", - "output_type": "[DataTypesEnum.table]", - "strategies": [ - "examples.simple.classification.cust.image_class", - "ImageClassificationStrategy" - ], + "image_class": { + "accepted_node_types": [ + "any" + ], + "description": "Implementations of the custom classification models", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.image]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "examples.simple.classification.cust.image_class", + "ImageClassificationStrategy" + ], "tags": [ "ml", "custom" diff --git a/examples/simple/classification/cust/preproc_impls.py b/examples/simple/classification/cust/preproc_impls.py index 383052dc2d..e4afc4bd63 100644 --- a/examples/simple/classification/cust/preproc_impls.py +++ b/examples/simple/classification/cust/preproc_impls.py @@ -40,4 +40,4 @@ def transform(self, input_data: InputData) -> OutputData: output_data = self._convert_to_output(input_data, transformed_features, data_type=DataTypesEnum.image) - return output_data + return output_data \ No newline at end of file diff --git a/fedot/core/optimisers/objective/data_source_splitter.py b/fedot/core/optimisers/objective/data_source_splitter.py index 70e5dd7e30..bfc05c9c2a 100644 --- a/fedot/core/optimisers/objective/data_source_splitter.py +++ b/fedot/core/optimisers/objective/data_source_splitter.py @@ -4,13 +4,13 @@ from golem.core.log import default_log from fedot.core.constants import default_data_split_ratio_by_task +from fedot.core.data.cv_folds import cv_generator from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup, _are_stratification_allowed from fedot.core.data.multi_modal import MultiModalData from fedot.core.optimisers.objective.data_objective_eval import DataSource from fedot.core.repository.tasks import TaskTypesEnum from fedot.remote.remote_evaluator import RemoteEvaluator, init_data_for_remote_execution -from fedot.core.data.cv_folds import cv_generator class DataSourceSplitter: @@ -59,7 +59,7 @@ def build(self, data: Union[InputData, MultiModalData]) -> DataSource: raise ValueError(f"cv_folds is not integer: {self.cv_folds}") if self.cv_folds < 2: self.cv_folds = None - if self.cv_folds > data.target.shape[0] - 1: + elif self.cv_folds > data.target.shape[0] - 1: raise ValueError((f"cv_folds ({self.cv_folds}) is greater than" f" the maximum allowed count {data.target.shape[0] - 1}")) diff --git a/fedot/core/pipelines/pipeline_advisor.py b/fedot/core/pipelines/pipeline_advisor.py index 7f38d056f9..cfe8b37ffc 100644 --- a/fedot/core/pipelines/pipeline_advisor.py +++ b/fedot/core/pipelines/pipeline_advisor.py @@ -30,6 +30,7 @@ def propose_change(self, node: OptNode, possible_operations: List[str]) -> List[ :return: list of candidates with str operations """ operation_id = node.content['name'] + # data source, exog_ts and custom models replacement is useless if check_for_specific_operations(operation_id): return [] @@ -43,9 +44,13 @@ def propose_change(self, node: OptNode, possible_operations: List[str]) -> List[ # lagged transform can be replaced only to lagged candidates = set.intersection({'lagged', 'sparse_lagged'}, set(possible_operations)) + if 'cnn' in operation_id: + candidates = [c for c in candidates if 'cnn' in candidates] + if operation_id in candidates: # the change to the same node is not meaningful candidates.remove(operation_id) + return candidates def propose_parent(self, node: OptNode, possible_operations: List[str]) -> List[str]: diff --git a/fedot/core/pipelines/tuning/hyperparams.py b/fedot/core/pipelines/tuning/hyperparams.py index 7800d0fca7..7a64efbeda 100644 --- a/fedot/core/pipelines/tuning/hyperparams.py +++ b/fedot/core/pipelines/tuning/hyperparams.py @@ -16,6 +16,8 @@ class ParametersChanger: :param current_params: current parameters value """ + custom_search_space = None + def __init__(self, operation_name, current_params): self.operation_name = operation_name self.current_params = current_params @@ -25,7 +27,8 @@ def get_new_operation_params(self): """ Function return a dictionary with new parameters values """ # Get available parameters for operation - params_list = PipelineSearchSpace().get_parameters_for_operation(self.operation_name) + params_list = \ + PipelineSearchSpace(ParametersChanger.custom_search_space).get_parameters_for_operation(self.operation_name) if not params_list: params_dict = None From 5b04dff0a0c079ac3690cda1e211480f35448700 Mon Sep 17 00:00:00 2001 From: nicl-nno Date: Thu, 23 Nov 2023 12:45:03 +0300 Subject: [PATCH 04/12] Upd 3 --- .../classification/cust/data_operation_repository.json | 6 ++++++ .../cust/image_classification_problem_with_opt2.py | 4 ++++ examples/simple/classification/cust/image_preproc.py | 4 +++- ...eration_params.json => my_default_operation_params.json} | 3 +++ fedot/core/operations/operation_parameters.py | 4 +++- fedot/core/repository/default_params_repository.py | 6 +++++- 6 files changed, 24 insertions(+), 3 deletions(-) rename examples/simple/classification/cust/{default_operation_params.json => my_default_operation_params.json} (97%) diff --git a/examples/simple/classification/cust/data_operation_repository.json b/examples/simple/classification/cust/data_operation_repository.json index 0b152905b0..72740450ce 100644 --- a/examples/simple/classification/cust/data_operation_repository.json +++ b/examples/simple/classification/cust/data_operation_repository.json @@ -141,6 +141,12 @@ "tags": [ "filtering" ] + }, + "yet_anothe_filt": { + "meta": "image_preprocessing", + "tags": [ + "filtering" + ] } } } \ No newline at end of file diff --git a/examples/simple/classification/cust/image_classification_problem_with_opt2.py b/examples/simple/classification/cust/image_classification_problem_with_opt2.py index c7e56e126d..b93c0c8bf6 100644 --- a/examples/simple/classification/cust/image_classification_problem_with_opt2.py +++ b/examples/simple/classification/cust/image_classification_problem_with_opt2.py @@ -14,6 +14,7 @@ from fedot.core.composer.composer_builder import ComposerBuilder from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation from fedot.core.data.data import InputData, OutputData +from fedot.core.operations.operation_parameters import OperationParameters from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements @@ -86,6 +87,9 @@ def run_image_classification_problem(train_dataset: tuple, task = Task(TaskTypesEnum.classification) setup_repository() + OperationParameters.custom_default_params_path = Path(fedot_project_root(), + 'examples', 'simple', 'classification', 'cust', + 'my_default_operation_params.json') x_train, y_train = train_dataset[0], train_dataset[1] x_test, y_test = test_dataset[0], test_dataset[1] diff --git a/examples/simple/classification/cust/image_preproc.py b/examples/simple/classification/cust/image_preproc.py index 2472453696..4b35ce4927 100644 --- a/examples/simple/classification/cust/image_preproc.py +++ b/examples/simple/classification/cust/image_preproc.py @@ -31,7 +31,9 @@ class ImagePreprocessingStrategy(EvaluationStrategy): _operations_by_types = { 'gamma_filt': GammaFiltImplementation, - 'negamma_filt': GammaFiltImplementation + 'negamma_filt': GammaFiltImplementation, + 'yet_anothe_filt': GammaFiltImplementation + } def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): diff --git a/examples/simple/classification/cust/default_operation_params.json b/examples/simple/classification/cust/my_default_operation_params.json similarity index 97% rename from examples/simple/classification/cust/default_operation_params.json rename to examples/simple/classification/cust/my_default_operation_params.json index 275612443d..3109f307b2 100644 --- a/examples/simple/classification/cust/default_operation_params.json +++ b/examples/simple/classification/cust/my_default_operation_params.json @@ -92,5 +92,8 @@ }, "fast_ica": { "whiten": "unit-variance" + }, + "gamma_filt": { + "a": "42" } } \ No newline at end of file diff --git a/fedot/core/operations/operation_parameters.py b/fedot/core/operations/operation_parameters.py index 1936c965ce..2ddf6b69e5 100644 --- a/fedot/core/operations/operation_parameters.py +++ b/fedot/core/operations/operation_parameters.py @@ -18,6 +18,8 @@ class OperationParameters: """ + custom_default_params_path = 'default_operation_params.json' + def __init__(self, **parameters): self._parameters = parameters self._changed_keys: list = [] @@ -65,5 +67,5 @@ def get_default_params(operation_type: str) -> dict: :return: default repository parameters for the model name """ - with DefaultOperationParamsRepository() as default_params_repo: + with DefaultOperationParamsRepository(OperationParameters.custom_default_params_path) as default_params_repo: return default_params_repo.get_default_params_for_operation(operation_type) diff --git a/fedot/core/repository/default_params_repository.py b/fedot/core/repository/default_params_repository.py index 4cfd248e85..040a53733c 100644 --- a/fedot/core/repository/default_params_repository.py +++ b/fedot/core/repository/default_params_repository.py @@ -7,7 +7,11 @@ def __init__(self, repository_name: str = 'default_operation_params.json'): repo_folder_path = str(os.path.dirname(__file__)) file = os.path.join('data', repository_name) self._repo_path = os.path.join(repo_folder_path, file) - self._repo = self._initialise_repo() + try: + self._repo = self._initialise_repo() + except FileNotFoundError: + self._repo_path = repository_name + self._repo = self._initialise_repo() def __enter__(self): return self From 80ad6e270b47c208837fb49e21be6db0d24a6a09 Mon Sep 17 00:00:00 2001 From: nicl-nno Date: Wed, 13 Dec 2023 18:26:28 +0300 Subject: [PATCH 05/12] Upd 4 --- fedot/core/pipelines/tuning/search_space.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fedot/core/pipelines/tuning/search_space.py b/fedot/core/pipelines/tuning/search_space.py index b44a02b977..b1a59d364a 100644 --- a/fedot/core/pipelines/tuning/search_space.py +++ b/fedot/core/pipelines/tuning/search_space.py @@ -791,6 +791,9 @@ def get_parameters_dict(self): parameters_per_operation.update(self.custom_search_space) else: for operation_name, operation_dct in self.custom_search_space.items(): - parameters_per_operation[operation_name].update(operation_dct) + if operation_name in parameters_per_operation.keys(): + parameters_per_operation[operation_name].update(operation_dct) + else: + parameters_per_operation[operation_name] = operation_dct return parameters_per_operation From dfad64d723ef64008ff6d691c6c305d94f8cf2ee Mon Sep 17 00:00:00 2001 From: nicl-nno Date: Mon, 25 Dec 2023 17:15:20 +0300 Subject: [PATCH 06/12] Example refactored --- .../customization}/__init__.py | 0 ...mage_classification_with_custom_models.py} | 33 +-- .../customization/implementations/__init__.py | 0 .../implementations}/cnn_impls.py | 56 +---- .../implementations}/preproc_impls.py | 10 +- .../my_data_operation_repository.json} | 8 +- .../my_default_operation_params.json | 2 +- .../repositories/my_model_repository.json} | 2 +- .../customization/strategies/__init__.py | 0 .../customization/strategies}/image_class.py | 2 +- .../customization/strategies/image_preproc.py | 39 +++ .../cust/image_classification_problem.py | 157 ------------ .../image_classification_problem_with_opt.py | 229 ------------------ .../classification/cust/image_preproc.py | 86 ------- test/unit/tasks/test_classification.py | 2 +- 15 files changed, 60 insertions(+), 566 deletions(-) rename examples/{simple/classification/cust => advanced/customization}/__init__.py (100%) rename examples/{simple/classification/cust/image_classification_problem_with_opt2.py => advanced/customization/image_classification_with_custom_models.py} (85%) create mode 100644 examples/advanced/customization/implementations/__init__.py rename examples/{simple/classification/cust => advanced/customization/implementations}/cnn_impls.py (70%) rename examples/{simple/classification/cust => advanced/customization/implementations}/preproc_impls.py (81%) rename examples/{simple/classification/cust/data_operation_repository.json => advanced/customization/repositories/my_data_operation_repository.json} (97%) rename examples/{simple/classification/cust => advanced/customization/repositories}/my_default_operation_params.json (99%) rename examples/{simple/classification/cust/model_repository.json => advanced/customization/repositories/my_model_repository.json} (99%) create mode 100644 examples/advanced/customization/strategies/__init__.py rename examples/{simple/classification/cust => advanced/customization/strategies}/image_class.py (96%) create mode 100644 examples/advanced/customization/strategies/image_preproc.py delete mode 100644 examples/simple/classification/cust/image_classification_problem.py delete mode 100644 examples/simple/classification/cust/image_classification_problem_with_opt.py delete mode 100644 examples/simple/classification/cust/image_preproc.py diff --git a/examples/simple/classification/cust/__init__.py b/examples/advanced/customization/__init__.py similarity index 100% rename from examples/simple/classification/cust/__init__.py rename to examples/advanced/customization/__init__.py diff --git a/examples/simple/classification/cust/image_classification_problem_with_opt2.py b/examples/advanced/customization/image_classification_with_custom_models.py similarity index 85% rename from examples/simple/classification/cust/image_classification_problem_with_opt2.py rename to examples/advanced/customization/image_classification_with_custom_models.py index b93c0c8bf6..c3f6f9806f 100644 --- a/examples/simple/classification/cust/image_classification_problem_with_opt2.py +++ b/examples/advanced/customization/image_classification_with_custom_models.py @@ -21,18 +21,18 @@ from fedot.core.pipelines.tuning.hyperparams import ParametersChanger from fedot.core.pipelines.tuning.search_space import PipelineSearchSpace from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder +from fedot.core.repository.metrics_repository import ClassificationMetricsEnum, ComplexityMetricsEnum from fedot.core.repository.operation_types_repository import get_operations_for_task, OperationTypesRepository -from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum, ComplexityMetricsEnum from fedot.core.repository.tasks import Task, TaskTypesEnum from fedot.core.utils import set_random_seed, fedot_project_root -custom_search_space = {'gamma_filt': { +custom_search_space = {'filter_1': { 'r': { 'hyperopt-dist': hp.uniformint, 'sampling-scope': [-254, 254], 'type': 'discrete'} }, - 'negamma_filt': { + 'filter_2': { 'r': { 'hyperopt-dist': hp.uniformint, 'sampling-scope': [-254, 254], @@ -50,17 +50,7 @@ def calculate_validation_metric(predicted: OutputData, dataset_to_validate: Inpu def cnn_composite_pipeline(composite_flag: bool = True) -> Pipeline: - """ - Returns pipeline with the following structure: - - .. image:: img_classification_pipelines/cnn_composite_pipeline.png - :width: 55% - - Where cnn - convolutional neural network, rf - random forest - - :param composite_flag: add additional random forest estimator - """ - node_first = PipelineNode('gamma_filt') + node_first = PipelineNode('filter_1') node_second = PipelineNode('cnn_1', nodes_from=[node_first]) @@ -71,15 +61,17 @@ def cnn_composite_pipeline(composite_flag: bool = True) -> Pipeline: def setup_repository(): + repo_folder = Path(fedot_project_root(), 'examples', 'advanced', 'customization', + 'repositories') OperationTypesRepository.__repository_dict__ = { - 'model': {'file': Path(fedot_project_root(), 'examples', 'simple', 'classification', 'cust', - 'model_repository.json'), 'initialized_repo': None, 'default_tags': []}, - 'data_operation': {'file': Path(fedot_project_root(), - 'examples', 'simple', 'classification', 'cust', - 'data_operation_repository.json'), + 'model': {'file': Path(repo_folder, 'my_model_repository.json'), 'initialized_repo': None, 'default_tags': []}, + 'data_operation': {'file': Path(repo_folder, 'my_data_operation_repository.json'), 'initialized_repo': None, 'default_tags': []} } + OperationParameters.custom_default_params_path = Path(repo_folder, + 'my_default_operation_params.json') + def run_image_classification_problem(train_dataset: tuple, test_dataset: tuple, @@ -87,9 +79,6 @@ def run_image_classification_problem(train_dataset: tuple, task = Task(TaskTypesEnum.classification) setup_repository() - OperationParameters.custom_default_params_path = Path(fedot_project_root(), - 'examples', 'simple', 'classification', 'cust', - 'my_default_operation_params.json') x_train, y_train = train_dataset[0], train_dataset[1] x_test, y_test = test_dataset[0], test_dataset[1] diff --git a/examples/advanced/customization/implementations/__init__.py b/examples/advanced/customization/implementations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/simple/classification/cust/cnn_impls.py b/examples/advanced/customization/implementations/cnn_impls.py similarity index 70% rename from examples/simple/classification/cust/cnn_impls.py rename to examples/advanced/customization/implementations/cnn_impls.py index 3655d72584..24b5b11444 100644 --- a/examples/simple/classification/cust/cnn_impls.py +++ b/examples/advanced/customization/implementations/cnn_impls.py @@ -33,25 +33,6 @@ def check_input_array(x_train): return transformed_x_train, transform_flag -def create_deep_cnn(input_shape: tuple, - num_classes: int): - model = tf.keras.Sequential( - [ - tf.keras.layers.InputLayer(input_shape=input_shape), - tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), - tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), - tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - tf.keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), - tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - tf.keras.layers.Flatten(), - tf.keras.layers.Dropout(0.5), - tf.keras.layers.Dense(num_classes, activation="softmax"), - ] - ) - return model - - def create_simple_cnn(input_shape: tuple, num_classes: int): model = tf.keras.Sequential( @@ -68,16 +49,6 @@ def create_simple_cnn(input_shape: tuple, return model -def create_vgg16(input_shape: tuple, - num_classes: int): - model = tf.keras.applications.vgg16.VGG16(include_top=True, - weights=None, - input_shape=input_shape, - classes=num_classes, - classifier_activation='sigmoid') - return model - - def fit_cnn(train_data: InputData, model, epochs: int = 10, @@ -125,36 +96,11 @@ def fit_cnn(train_data: InputData, def predict_cnn(trained_model, predict_data: InputData, output_mode: str = 'labels', logger=None) -> OutputData: - # x_test = predict_data.features - # transformed_x_test, transform_flag = check_input_array(x_test) - # - # if logger is None: - # logger = default_log(prefix=__name__) - # - # if np.max(transformed_x_test) > 1: - # logger.warning('Test data set was not scaled. The data was divided by 255.') - # - # if len(x_test.shape) == 3: - # transformed_x_test = np.expand_dims(x_test, -1) - # - # if output_mode == 'labels': - # prediction = np.round(trained_model.predict(transformed_x_test)) - # elif output_mode in ['probs', 'full_probs', 'default']: - # prediction = trained_model.predict(transformed_x_test) - # if trained_model.num_classes < 2: - # logger.error('Data set contain only 1 target class. Please reformat your data.') - # raise ValueError('Data set contain only 1 target class. Please reformat your data.') - # elif trained_model.num_classes == 2 and output_mode != 'full_probs' and len(prediction.shape) > 1: - # prediction = prediction[:, 1] - # else: - # raise ValueError(f'Output model {output_mode} is not supported') prediction = np.asarray([[random.random()] for j in range(predict_data.features.shape[0])]) return prediction -cnn_model_dict = {'deep': create_deep_cnn, - 'simplified': create_simple_cnn, - 'vgg16': create_vgg16} +cnn_model_dict = {'simplified': create_simple_cnn} class MyCNNImplementation(ModelImplementation): diff --git a/examples/simple/classification/cust/preproc_impls.py b/examples/advanced/customization/implementations/preproc_impls.py similarity index 81% rename from examples/simple/classification/cust/preproc_impls.py rename to examples/advanced/customization/implementations/preproc_impls.py index e4afc4bd63..ee1d99fa1b 100644 --- a/examples/simple/classification/cust/preproc_impls.py +++ b/examples/advanced/customization/implementations/preproc_impls.py @@ -11,14 +11,6 @@ class GammaFiltImplementation(DataOperationImplementation): - """ Class for application of :obj:`PolynomialFeatures` operation on data, - where only not encoded features (were not converted from categorical using - ``OneHot encoding``) are used - - Args: - params: OperationParameters with the arguments - """ - def __init__(self, params: Optional[OperationParameters]): super().__init__(params) if not self.params: @@ -40,4 +32,4 @@ def transform(self, input_data: InputData) -> OutputData: output_data = self._convert_to_output(input_data, transformed_features, data_type=DataTypesEnum.image) - return output_data \ No newline at end of file + return output_data diff --git a/examples/simple/classification/cust/data_operation_repository.json b/examples/advanced/customization/repositories/my_data_operation_repository.json similarity index 97% rename from examples/simple/classification/cust/data_operation_repository.json rename to examples/advanced/customization/repositories/my_data_operation_repository.json index 72740450ce..c7556462c7 100644 --- a/examples/simple/classification/cust/data_operation_repository.json +++ b/examples/advanced/customization/repositories/my_data_operation_repository.json @@ -42,7 +42,7 @@ ], "forbidden_node_types": "[]", "strategies": [ - "examples.simple.classification.cust.image_preproc", + "examples.advanced.customization.strategies.image_preproc", "ImagePreprocessingStrategy" ], "tags": [ @@ -130,19 +130,19 @@ "categorical-ignore" ] }, - "gamma_filt": { + "filter_1": { "meta": "image_preprocessing", "tags": [ "filtering" ] }, - "negamma_filt": { + "filter_2": { "meta": "image_preprocessing", "tags": [ "filtering" ] }, - "yet_anothe_filt": { + "filter_3": { "meta": "image_preprocessing", "tags": [ "filtering" diff --git a/examples/simple/classification/cust/my_default_operation_params.json b/examples/advanced/customization/repositories/my_default_operation_params.json similarity index 99% rename from examples/simple/classification/cust/my_default_operation_params.json rename to examples/advanced/customization/repositories/my_default_operation_params.json index 3109f307b2..37e4fac670 100644 --- a/examples/simple/classification/cust/my_default_operation_params.json +++ b/examples/advanced/customization/repositories/my_default_operation_params.json @@ -93,7 +93,7 @@ "fast_ica": { "whiten": "unit-variance" }, - "gamma_filt": { + "filter_1": { "a": "42" } } \ No newline at end of file diff --git a/examples/simple/classification/cust/model_repository.json b/examples/advanced/customization/repositories/my_model_repository.json similarity index 99% rename from examples/simple/classification/cust/model_repository.json rename to examples/advanced/customization/repositories/my_model_repository.json index 8491f981a5..c63d43c690 100644 --- a/examples/simple/classification/cust/model_repository.json +++ b/examples/advanced/customization/repositories/my_model_repository.json @@ -9,7 +9,7 @@ "input_type": "[DataTypesEnum.image]", "output_type": "[DataTypesEnum.table]", "strategies": [ - "examples.simple.classification.cust.image_class", + "examples.advanced.customization.strategies.image_class", "ImageClassificationStrategy" ], "tags": [ diff --git a/examples/advanced/customization/strategies/__init__.py b/examples/advanced/customization/strategies/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/simple/classification/cust/image_class.py b/examples/advanced/customization/strategies/image_class.py similarity index 96% rename from examples/simple/classification/cust/image_class.py rename to examples/advanced/customization/strategies/image_class.py index 60e8676637..9af908f226 100644 --- a/examples/simple/classification/cust/image_class.py +++ b/examples/advanced/customization/strategies/image_class.py @@ -1,7 +1,7 @@ import warnings from typing import Optional -from examples.simple.classification.cust.cnn_impls import MyCNNImplementation +from examples.advanced.customization.implementations.cnn_impls import MyCNNImplementation from fedot.core.data.data import InputData, OutputData from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy from fedot.core.operations.operation_parameters import OperationParameters diff --git a/examples/advanced/customization/strategies/image_preproc.py b/examples/advanced/customization/strategies/image_preproc.py new file mode 100644 index 0000000000..b5f4a9b725 --- /dev/null +++ b/examples/advanced/customization/strategies/image_preproc.py @@ -0,0 +1,39 @@ +import warnings +from typing import Optional + +from examples.advanced.customization.implementations.preproc_impls import GammaFiltImplementation +from fedot.core.data.data import InputData, OutputData +from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy +from fedot.core.operations.operation_parameters import OperationParameters +from fedot.utilities.random import ImplementationRandomStateHandler + + +class ImagePreprocessingStrategy(EvaluationStrategy): + _operations_by_types = { + 'filter_1': GammaFiltImplementation, + 'filter_2': GammaFiltImplementation, + 'filter_3': GammaFiltImplementation + } + + def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): + self.operation_impl = self._convert_to_operation(operation_type) + super().__init__(operation_type, params) + + def fit(self, train_data: InputData): + + warnings.filterwarnings("ignore", category=RuntimeWarning) + operation_implementation = self.operation_impl(self.params_for_fit) + with ImplementationRandomStateHandler(implementation=operation_implementation): + operation_implementation.fit(train_data) + return operation_implementation + + def predict(self, trained_operation, predict_data: InputData) -> OutputData: + prediction = trained_operation.transform(predict_data) + # Convert prediction to output (if it is required) + converted = self._convert_to_output(prediction, predict_data) + return converted + + def predict_for_fit(self, trained_operation, predict_data: InputData) -> OutputData: + prediction = trained_operation.transform_for_fit(predict_data) + converted = self._convert_to_output(prediction, predict_data) + return converted diff --git a/examples/simple/classification/cust/image_classification_problem.py b/examples/simple/classification/cust/image_classification_problem.py deleted file mode 100644 index 08f20b2703..0000000000 --- a/examples/simple/classification/cust/image_classification_problem.py +++ /dev/null @@ -1,157 +0,0 @@ -from typing import Any - -import numpy as np -import tensorflow as tf -from sklearn import preprocessing -from sklearn.metrics import roc_auc_score as roc_auc - -from examples.simple.classification.classification_pipelines import cnn_composite_pipeline -from fedot.core.data.data import InputData, OutputData -from fedot.core.operations.evaluation.operation_implementations.models.keras import check_input_array -from fedot.core.pipelines.node import PipelineNode -from fedot.core.pipelines.pipeline import Pipeline -from fedot.core.repository.tasks import Task, TaskTypesEnum -from fedot.core.utils import set_random_seed - - -def calculate_validation_metric(predicted: OutputData, dataset_to_validate: InputData) -> float: - # the quality assessment for the simulation results - roc_auc_value = roc_auc(y_true=dataset_to_validate.target, - y_score=predicted.predict, - multi_class="ovo") - return roc_auc_value - - -def cnn_model_fit(idx: np.array, features: np.array, target: np.array, params: dict): - x_train, y_train = features, target - transformed_x_train, transform_flag = check_input_array(x_train) - - if transform_flag: - print('Train data set was not scaled. The data was divided by 255.') - - if len(x_train.shape) == 3: - transformed_x_train = np.expand_dims(x_train, -1) - - if len(target.shape) < 2: - le = preprocessing.OneHotEncoder() - y_train = le.fit_transform(y_train.reshape(-1, 1)).toarray() - - optimizer_params = {'loss': "categorical_crossentropy", - 'optimizer': "adam", - 'metrics': ["accuracy"]} - - model = tf.keras.Sequential( - [ - tf.keras.layers.InputLayer(input_shape=[28, 28, 1]), - tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), - tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), - tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - tf.keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), - tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - tf.keras.layers.Flatten(), - tf.keras.layers.Dropout(0.5), - tf.keras.layers.Dense(10, activation="softmax"), - ]) - - model.compile(**optimizer_params) - model.num_classes = 10 - - model.fit(transformed_x_train, y_train, batch_size=1, epochs=1, - validation_split=0.1) - - return model - - -# -# -def cnn_model_predict(fitted_model: Any, idx: np.array, features: np.array, params: dict): - x_test = features - transformed_x_test, transform_flag = check_input_array(x_test) - - if np.max(transformed_x_test) > 1: - print('Test data set was not scaled. The data was divided by 255.') - - if len(x_test.shape) == 3: - transformed_x_test = np.expand_dims(x_test, -1) - - # if output_mode == 'labels': - # prediction = np.round(trained_model.predict(transformed_x_test)) - # elif output_mode in ['probs', 'full_probs', 'default']: - prediction = fitted_model.predict(transformed_x_test) - # if trained_model.num_classes < 2: - # print('Data set contain only 1 target class. Please reformat your data.') - # raise ValueError('Data set contain only 1 target class. Please reformat your data.') - # elif trained_model.num_classes == 2 and output_mode != 'full_probs' and len(prediction.shape) > 1: - # prediction = prediction[:, 1] - # else: - # raise ValueError(f'Output model {output_mode} is not supported') - - return prediction, 'table' - - -# - -def preproc_predict(fitted_model: Any, idx: np.array, features: np.array, params: dict): - # example of custom data pre-processing for predict state - for i in range(features.shape[0]): - features[i, :, :] = features[i, :, :] + np.random.normal(0, 30) - return features, 'image' - - -def cnn_composite_pipeline(composite_flag: bool = True) -> Pipeline: - """ - Returns pipeline with the following structure: - - .. image:: img_classification_pipelines/cnn_composite_pipeline.png - :width: 55% - - Where cnn - convolutional neural network, rf - random forest - - :param composite_flag: add additional random forest estimator - """ - node_first = PipelineNode('gamma') - node_first.parameters = {'model_predict': preproc_predict} - - node_second = PipelineNode('cnn_1', nodes_from=[node_first]) - node_second.parameters = {'model_predict': cnn_model_predict, - 'model_fit': cnn_model_fit} - - node_final = PipelineNode('rf', nodes_from=[node_second]) - - pipeline = Pipeline(node_final) - return pipeline - - -def run_image_classification_problem(train_dataset: tuple, - test_dataset: tuple, - composite_flag: bool = True): - task = Task(TaskTypesEnum.classification) - - x_train, y_train = train_dataset[0], train_dataset[1] - x_test, y_test = test_dataset[0], test_dataset[1] - - dataset_to_train = InputData.from_image(images=x_train, - labels=y_train, - task=task) - dataset_to_validate = InputData.from_image(images=x_test, - labels=y_test, - task=task) - - pipeline = cnn_composite_pipeline(composite_flag) - pipeline.fit(input_data=dataset_to_train) - predictions = pipeline.predict(dataset_to_validate) - roc_auc_on_valid = calculate_validation_metric(predictions, - dataset_to_validate) - return roc_auc_on_valid, dataset_to_train, dataset_to_validate - - -if __name__ == '__main__': - set_random_seed(1) - - training_set, testing_set = tf.keras.datasets.mnist.load_data(path='mnist.npz') - roc_auc_on_valid, dataset_to_train, dataset_to_validate = run_image_classification_problem( - train_dataset=training_set, - test_dataset=testing_set) - - print(roc_auc_on_valid) diff --git a/examples/simple/classification/cust/image_classification_problem_with_opt.py b/examples/simple/classification/cust/image_classification_problem_with_opt.py deleted file mode 100644 index c3397a9c7b..0000000000 --- a/examples/simple/classification/cust/image_classification_problem_with_opt.py +++ /dev/null @@ -1,229 +0,0 @@ -import datetime -import random -from typing import Any - -import numpy as np -import tensorflow as tf -from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters -from golem.core.optimisers.genetic.operators.base_mutations import MutationTypesEnum -from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum -from golem.core.optimisers.genetic.operators.selection import SelectionTypesEnum -from hyperopt import hp -from sklearn.metrics import roc_auc_score as roc_auc - -from examples.simple.classification.classification_pipelines import cnn_composite_pipeline -from fedot.core.composer.composer_builder import ComposerBuilder -from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation -from fedot.core.data.data import InputData, OutputData -from fedot.core.operations.evaluation.operation_implementations.models.keras import check_input_array -from fedot.core.pipelines.node import PipelineNode -from fedot.core.pipelines.pipeline import Pipeline -from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements -from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum, ComplexityMetricsEnum -from fedot.core.repository.tasks import Task, TaskTypesEnum -from fedot.core.utils import set_random_seed - -custom_search_space = {'gamma_filt': { - 'r': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [-254, 254], - 'type': 'discrete'}, - 'g': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [-254, 254], - 'type': 'discrete'}, - 'b': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [-254, 254], - 'type': 'discrete'}, - 'ksize': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [0, 20], - 'type': 'discrete'} -} - -} - -def calculate_validation_metric(predicted: OutputData, dataset_to_validate: InputData) -> float: - # the quality assessment for the simulation results - roc_auc_value = roc_auc(y_true=dataset_to_validate.target, - y_score=predicted.predict, - multi_class="ovo") - return roc_auc_value - - -def cnn_model_fit(idx: np.array, features: np.array, target: np.array, params: dict): - # x_train, y_train = features, target - # transformed_x_train, transform_flag = check_input_array(x_train) - # - # if transform_flag: - # print('Train data set was not scaled. The data was divided by 255.') - # - # if len(x_train.shape) == 3: - # transformed_x_train = np.expand_dims(x_train, -1) - # - # if len(target.shape) < 2: - # le = preprocessing.OneHotEncoder() - # y_train = le.fit_transform(y_train.reshape(-1, 1)).toarray() - # - # optimizer_params = {'loss': "categorical_crossentropy", - # 'optimizer': "adam", - # 'metrics': ["accuracy"]} - # - # model = tf.keras.Sequential( - # [ - # tf.keras.layers.InputLayer(input_shape=[28, 28, 1]), - # tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), - # tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - # tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), - # tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - # tf.keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), - # tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - # tf.keras.layers.Flatten(), - # tf.keras.layers.Dropout(0.5), - # tf.keras.layers.Dense(10, activation="softmax"), - # ]) - # - # model.compile(**optimizer_params) - # model.num_classes = 10 - - # model.fit(transformed_x_train, y_train, batch_size=1, epochs=1, - # validation_split=0.1) - model = None - return model - - -# -# -def cnn_model_predict(fitted_model: Any, idx: np.array, features: np.array, params: dict): - x_test = features - transformed_x_test, transform_flag = check_input_array(x_test) - - if np.max(transformed_x_test) > 1: - print('Test data set was not scaled. The data was divided by 255.') - - if len(x_test.shape) == 3: - transformed_x_test = np.expand_dims(x_test, -1) - - # prediction = fitted_model.predict(transformed_x_test) - - prediction = np.asarray([[random.random()] for j in range(features.shape[0])]) - - return prediction, 'table' - - -# - -def preproc_predict(fitted_model: Any, idx: np.array, features: np.array, params: dict): - # example of custom data pre-processing for predict state - for i in range(features.shape[0]): - features[i, :, :] = features[i, :, :] + np.random.normal(0, 30) - return features, 'image' - - -def cnn_composite_pipeline(composite_flag: bool = True) -> Pipeline: - """ - Returns pipeline with the following structure: - - .. image:: img_classification_pipelines/cnn_composite_pipeline.png - :width: 55% - - Where cnn - convolutional neural network, rf - random forest - - :param composite_flag: add additional random forest estimator - """ - node_first = PipelineNode('gamma_filt') - node_first.parameters = {'model_predict': preproc_predict} - - node_second = PipelineNode('cnn_1', nodes_from=[node_first]) - node_second.parameters = {'model_predict': cnn_model_predict, - 'model_fit': cnn_model_fit} - - node_final = PipelineNode('rf', nodes_from=[node_second]) - - pipeline = Pipeline(node_final) - return pipeline - - -def run_image_classification_problem(train_dataset: tuple, - test_dataset: tuple, - composite_flag: bool = True): - task = Task(TaskTypesEnum.classification) - - x_train, y_train = train_dataset[0], train_dataset[1] - x_test, y_test = test_dataset[0], test_dataset[1] - - dataset_to_train = InputData.from_image(images=x_train, - labels=y_train, - task=task) - dataset_to_validate = InputData.from_image(images=x_test, - labels=y_test, - task=task) - - dataset_to_train = dataset_to_train.subset_range(0, 100) - - initial_pipeline = cnn_composite_pipeline(composite_flag) - - # the choice of the metric for the pipeline quality assessment during composition - quality_metric = ClassificationMetricsEnum.f1 - complexity_metric = ComplexityMetricsEnum.node_number - metrics = [quality_metric, complexity_metric] - # the choice and initialisation of the GP search - composer_requirements = PipelineComposerRequirements( - primary=['custom/preproc_image1', 'custom/preproc_image2'], - secondary=['custom/cnn_1', 'custom/cnn_2'], - timeout=datetime.timedelta(minutes=10), - num_of_generations=20, n_jobs=1 - ) - - pop_size = 5 - params = GPAlgorithmParameters( - selection_types=[SelectionTypesEnum.spea2], - genetic_scheme_type=GeneticSchemeTypesEnum.parameter_free, - mutation_types=[MutationTypesEnum.single_change, parameter_change_mutation], - pop_size=pop_size - ) - - # Create composer and with required composer params - composer = ( - ComposerBuilder(task=task) - .with_optimizer_params(params) - .with_requirements(composer_requirements) - .with_metrics(metrics) - .with_initial_pipelines(initial_pipelines=[initial_pipeline] * pop_size) - .build() - ) - - # the optimal pipeline generation by composition - the most time-consuming task - pipeline_evo_composed = composer.compose_pipeline(data=dataset_to_train)[0] - - pipeline_evo_composed.show() - print(pipeline_evo_composed.descriptive_id) - - pipeline_evo_composed.fit(input_data=dataset_to_train) - - # auto_model = Fedot(problem='classification', timeout=1, n_jobs=-1, preset='best_quality', - # metric=['f1'], with_tuning=True, initial_assumption = pipeline, - # available_models=[]) - # - # auto_model.fit(features=dataset_to_train) - - # auto_model.predict(dataset_to_validate) - # predictions = auto_model.prediction - - predictions = pipeline_evo_composed.predict(dataset_to_validate) - - roc_auc_on_valid = calculate_validation_metric(predictions, - dataset_to_validate) - return roc_auc_on_valid, dataset_to_train, dataset_to_validate - - -if __name__ == '__main__': - set_random_seed(1) - - training_set, testing_set = tf.keras.datasets.mnist.load_data(path='mnist.npz') - roc_auc_on_valid, dataset_to_train, dataset_to_validate = run_image_classification_problem( - train_dataset=training_set, - test_dataset=testing_set) - - print(roc_auc_on_valid) diff --git a/examples/simple/classification/cust/image_preproc.py b/examples/simple/classification/cust/image_preproc.py deleted file mode 100644 index 4b35ce4927..0000000000 --- a/examples/simple/classification/cust/image_preproc.py +++ /dev/null @@ -1,86 +0,0 @@ -import warnings -from typing import Optional - -from examples.simple.classification.cust.preproc_impls import GammaFiltImplementation -from fedot.core.data.data import InputData, OutputData -from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy -from fedot.core.operations.operation_parameters import OperationParameters -from fedot.utilities.random import ImplementationRandomStateHandler - - -class ImagePreprocessingStrategy(EvaluationStrategy): - """ - Args: - operation_type: ``str`` of the operation defined in operation or data operation repositories - - .. details:: possible operations: - - - ``scaling``-> ScalingImplementation, - - ``normalization``-> NormalizationImplementation, - - ``simple_imputation``-> ImputationImplementation, - - ``pca``-> PCAImplementation, - - ``kernel_pca``-> KernelPCAImplementation, - - ``poly_features``-> PolyFeaturesImplementation, - - ``one_hot_encoding``-> OneHotEncodingImplementation, - - ``label_encoding``-> LabelEncodingImplementation, - - ``fast_ica``-> FastICAImplementation - - params: hyperparameters to fit the operation with - - """ - - _operations_by_types = { - 'gamma_filt': GammaFiltImplementation, - 'negamma_filt': GammaFiltImplementation, - 'yet_anothe_filt': GammaFiltImplementation - - } - - def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): - self.operation_impl = self._convert_to_operation(operation_type) - super().__init__(operation_type, params) - - def fit(self, train_data: InputData): - """This method is used for operation training with the data provided - - Args: - train_data: data used for operation training - - Returns: - trained Sklearn operation - """ - - warnings.filterwarnings("ignore", category=RuntimeWarning) - operation_implementation = self.operation_impl(self.params_for_fit) - with ImplementationRandomStateHandler(implementation=operation_implementation): - operation_implementation.fit(train_data) - return operation_implementation - - def predict(self, trained_operation, predict_data: InputData) -> OutputData: - """Transform method for preprocessing task - - Args: - trained_operation: model object - predict_data: data used for prediction - - Returns: - prediction - """ - prediction = trained_operation.transform(predict_data) - # Convert prediction to output (if it is required) - converted = self._convert_to_output(prediction, predict_data) - return converted - - def predict_for_fit(self, trained_operation, predict_data: InputData) -> OutputData: - """ - Transform method for preprocessing task for fit stage - - Args: - trained_operation: model object - predict_data: data used for prediction - Returns: - OutputData: - """ - prediction = trained_operation.transform_for_fit(predict_data) - converted = self._convert_to_output(prediction, predict_data) - return converted \ No newline at end of file diff --git a/test/unit/tasks/test_classification.py b/test/unit/tasks/test_classification.py index 467c213469..f80be27713 100644 --- a/test/unit/tasks/test_classification.py +++ b/test/unit/tasks/test_classification.py @@ -1,10 +1,10 @@ import os import numpy as np +from examples.advanced.cust import run_image_classification_problem from sklearn.datasets import load_iris, make_classification from sklearn.metrics import roc_auc_score as roc_auc -from examples.simple.classification.cust.image_classification_problem import run_image_classification_problem from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup from fedot.core.data.supplementary_data import SupplementaryData From a6325bd23a743119351fee15e445b215679347e8 Mon Sep 17 00:00:00 2001 From: nicl-nno Date: Mon, 25 Dec 2023 17:28:18 +0300 Subject: [PATCH 07/12] Fix --- ...image_classification_with_custom_models.py | 16 +++--- .../customization/strategies/image_class.py | 2 +- .../image_classifcation_problem.py | 53 +++++++++++++++++++ test/unit/tasks/test_classification.py | 2 +- 4 files changed, 62 insertions(+), 11 deletions(-) create mode 100644 examples/simple/classification/image_classifcation_problem.py diff --git a/examples/advanced/customization/image_classification_with_custom_models.py b/examples/advanced/customization/image_classification_with_custom_models.py index c3f6f9806f..aae44eca22 100644 --- a/examples/advanced/customization/image_classification_with_custom_models.py +++ b/examples/advanced/customization/image_classification_with_custom_models.py @@ -10,7 +10,6 @@ from hyperopt import hp from sklearn.metrics import roc_auc_score as roc_auc -from examples.simple.classification.classification_pipelines import cnn_composite_pipeline from fedot.core.composer.composer_builder import ComposerBuilder from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation from fedot.core.data.data import InputData, OutputData @@ -49,7 +48,7 @@ def calculate_validation_metric(predicted: OutputData, dataset_to_validate: Inpu return roc_auc_value -def cnn_composite_pipeline(composite_flag: bool = True) -> Pipeline: +def cnn_composite_pipeline() -> Pipeline: node_first = PipelineNode('filter_1') node_second = PipelineNode('cnn_1', nodes_from=[node_first]) @@ -73,9 +72,8 @@ def setup_repository(): 'my_default_operation_params.json') -def run_image_classification_problem(train_dataset: tuple, - test_dataset: tuple, - composite_flag: bool = True): +def run_image_classification_automl(train_dataset: tuple, + test_dataset: tuple): task = Task(TaskTypesEnum.classification) setup_repository() @@ -92,7 +90,7 @@ def run_image_classification_problem(train_dataset: tuple, dataset_to_train = dataset_to_train.subset_range(0, 100) - initial_pipeline = cnn_composite_pipeline(composite_flag) + initial_pipeline = cnn_composite_pipeline() initial_pipeline.show() initial_pipeline.fit(dataset_to_train) predictions = initial_pipeline.predict(dataset_to_validate) @@ -147,7 +145,7 @@ def run_image_classification_problem(train_dataset: tuple, search_space = PipelineSearchSpace(custom_search_space=custom_search_space, replace_default_search_space=replace_default_search_space) - predictions = pipeline_evo_composed.predict(dataset_to_validate) + pipeline_evo_composed.predict(dataset_to_validate) # .with_cv_folds(cv_folds) \ pipeline_tuner = TunerBuilder(dataset_to_train.task) \ @@ -157,7 +155,7 @@ def run_image_classification_problem(train_dataset: tuple, .with_iterations(50) \ .with_search_space(search_space).build(dataset_to_train) - pipeline = pipeline_tuner.tune(pipeline_evo_composed) + pipeline_tuner.tune(pipeline_evo_composed) predictions = pipeline_evo_composed.predict(dataset_to_validate) @@ -170,7 +168,7 @@ def run_image_classification_problem(train_dataset: tuple, set_random_seed(1) training_set, testing_set = tf.keras.datasets.mnist.load_data(path='mnist.npz') - roc_auc_on_valid, dataset_to_train, dataset_to_validate = run_image_classification_problem( + roc_auc_on_valid, dataset_to_train, dataset_to_validate = run_image_classification_automl( train_dataset=training_set, test_dataset=testing_set) diff --git a/examples/advanced/customization/strategies/image_class.py b/examples/advanced/customization/strategies/image_class.py index 9af908f226..def060bec8 100644 --- a/examples/advanced/customization/strategies/image_class.py +++ b/examples/advanced/customization/strategies/image_class.py @@ -56,4 +56,4 @@ def predict(self, trained_operation, predict_data: InputData) -> OutputData: # Convert prediction to output (if it is required) converted = self._convert_to_output(prediction, predict_data) - return converted \ No newline at end of file + return converted diff --git a/examples/simple/classification/image_classifcation_problem.py b/examples/simple/classification/image_classifcation_problem.py new file mode 100644 index 0000000000..178768dacf --- /dev/null +++ b/examples/simple/classification/image_classifcation_problem.py @@ -0,0 +1,53 @@ +from golem.utilities.requirements_notificator import warn_requirement + +try: + import tensorflow as tf +except ModuleNotFoundError: + warn_requirement('tensorflow', 'fedot[extra]') + +from sklearn.metrics import roc_auc_score as roc_auc + +from examples.simple.classification.classification_pipelines import cnn_composite_pipeline +from fedot.core.data.data import InputData, OutputData +from fedot.core.repository.tasks import Task, TaskTypesEnum +from fedot.core.utils import set_random_seed + + +def calculate_validation_metric(predicted: OutputData, dataset_to_validate: InputData) -> float: + # the quality assessment for the simulation results + roc_auc_value = roc_auc(y_true=dataset_to_validate.target, + y_score=predicted.predict, + multi_class="ovo") + return roc_auc_value + + +def run_image_classification_problem(train_dataset: tuple, + test_dataset: tuple, + composite_flag: bool = True): + task = Task(TaskTypesEnum.classification) + + x_train, y_train = train_dataset[0], train_dataset[1] + x_test, y_test = test_dataset[0], test_dataset[1] + + dataset_to_train = InputData.from_image(images=x_train, + labels=y_train, + task=task) + dataset_to_validate = InputData.from_image(images=x_test, + labels=y_test, + task=task) + + pipeline = cnn_composite_pipeline(composite_flag) + pipeline.fit(input_data=dataset_to_train) + predictions = pipeline.predict(dataset_to_validate) + roc_auc_on_valid = calculate_validation_metric(predictions, + dataset_to_validate) + return roc_auc_on_valid, dataset_to_train, dataset_to_validate + + +if __name__ == '__main__': + set_random_seed(1) + + training_set, testing_set = tf.keras.datasets.mnist.load_data(path='mnist.npz') + roc_auc_on_valid, dataset_to_train, dataset_to_validate = run_image_classification_problem( + train_dataset=training_set, + test_dataset=testing_set) diff --git a/test/unit/tasks/test_classification.py b/test/unit/tasks/test_classification.py index f80be27713..d228c143b6 100644 --- a/test/unit/tasks/test_classification.py +++ b/test/unit/tasks/test_classification.py @@ -1,10 +1,10 @@ import os import numpy as np -from examples.advanced.cust import run_image_classification_problem from sklearn.datasets import load_iris, make_classification from sklearn.metrics import roc_auc_score as roc_auc +from examples.simple.classification.image_classifcation_problem import run_image_classification_problem from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup from fedot.core.data.supplementary_data import SupplementaryData From 2a41490730fa69e810aa7b2922152a3e4207281d Mon Sep 17 00:00:00 2001 From: nicl-nno Date: Mon, 25 Dec 2023 17:31:27 +0300 Subject: [PATCH 08/12] Fix 2 --- test/integration/customization/__init__.py | 0 .../customization/test_custom_automl.py | 24 +++++++++++++++++++ test/unit/tasks/test_classification.py | 2 +- 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 test/integration/customization/__init__.py create mode 100644 test/integration/customization/test_custom_automl.py diff --git a/test/integration/customization/__init__.py b/test/integration/customization/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/integration/customization/test_custom_automl.py b/test/integration/customization/test_custom_automl.py new file mode 100644 index 0000000000..f4b0f95063 --- /dev/null +++ b/test/integration/customization/test_custom_automl.py @@ -0,0 +1,24 @@ +import os + +from examples.advanced.customization.image_classification_with_custom_models import run_image_classification_automl + + +def test_image_classification_automl(): + test_data_path = '../../data/test_data.npy' + test_labels_path = '../../data/test_labels.npy' + train_data_path = '../../data/training_data.npy' + train_labels_path = '../../data/training_labels.npy' + + test_file_path = str(os.path.dirname(__file__)) + training_path_features = os.path.join(test_file_path, train_data_path) + training_path_labels = os.path.join(test_file_path, train_labels_path) + test_path_features = os.path.join(test_file_path, test_data_path) + test_path_labels = os.path.join(test_file_path, test_labels_path) + + roc_auc_on_valid, dataset_to_train, dataset_to_validate = run_image_classification_automl( + train_dataset=(training_path_features, + training_path_labels), + test_dataset=(test_path_features, + test_path_labels)) + + return roc_auc_on_valid, dataset_to_train, dataset_to_validate diff --git a/test/unit/tasks/test_classification.py b/test/unit/tasks/test_classification.py index d228c143b6..c2f5ac9906 100644 --- a/test/unit/tasks/test_classification.py +++ b/test/unit/tasks/test_classification.py @@ -166,4 +166,4 @@ def test_output_mode_full_probs(): assert not np.array_equal(results_probs.predict, results.predict) assert np.array_equal(results_probs.predict, results_default.predict) assert results.predict.shape == (len(test_data.target), 2) - assert results_probs.predict.shape == (len(test_data.target), 1) + assert results_probs.predict.shape == (len(test_data.target), 1) \ No newline at end of file From 10d1be43866852526f0276a66df283bdf79abbf6 Mon Sep 17 00:00:00 2001 From: nicl-nno Date: Mon, 25 Dec 2023 18:04:21 +0300 Subject: [PATCH 09/12] Fix 3 --- test/unit/tasks/test_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/tasks/test_classification.py b/test/unit/tasks/test_classification.py index c2f5ac9906..d228c143b6 100644 --- a/test/unit/tasks/test_classification.py +++ b/test/unit/tasks/test_classification.py @@ -166,4 +166,4 @@ def test_output_mode_full_probs(): assert not np.array_equal(results_probs.predict, results.predict) assert np.array_equal(results_probs.predict, results_default.predict) assert results.predict.shape == (len(test_data.target), 2) - assert results_probs.predict.shape == (len(test_data.target), 1) \ No newline at end of file + assert results_probs.predict.shape == (len(test_data.target), 1) From d05b51d8da40b16a5d6d4fdb2c28263307bd62a7 Mon Sep 17 00:00:00 2001 From: nicl-nno Date: Mon, 26 Feb 2024 18:48:39 +0300 Subject: [PATCH 10/12] Code review fixes --- .../image_classification_with_custom_models.py | 3 +-- .../advanced/customization/implementations/cnn_impls.py | 6 +++--- ...ifcation_problem.py => image_classification_problem.py} | 0 fedot/core/pipelines/pipeline_advisor.py | 2 +- fedot/core/pipelines/tuning/hyperparams.py | 5 +---- fedot/core/pipelines/tuning/search_space.py | 7 +++++-- test/unit/tasks/test_classification.py | 2 +- 7 files changed, 12 insertions(+), 13 deletions(-) rename examples/simple/classification/{image_classifcation_problem.py => image_classification_problem.py} (100%) diff --git a/examples/advanced/customization/image_classification_with_custom_models.py b/examples/advanced/customization/image_classification_with_custom_models.py index aae44eca22..6cbc72ded5 100644 --- a/examples/advanced/customization/image_classification_with_custom_models.py +++ b/examples/advanced/customization/image_classification_with_custom_models.py @@ -17,7 +17,6 @@ from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements -from fedot.core.pipelines.tuning.hyperparams import ParametersChanger from fedot.core.pipelines.tuning.search_space import PipelineSearchSpace from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder from fedot.core.repository.metrics_repository import ClassificationMetricsEnum, ComplexityMetricsEnum @@ -113,7 +112,7 @@ def run_image_classification_automl(train_dataset: tuple, pop_size = 5 # search space for hyper-parametric mutation - ParametersChanger.custom_search_space = custom_search_space + PipelineSearchSpace.pre_defined_custom_search_space = custom_search_space params = GPAlgorithmParameters( selection_types=[SelectionTypesEnum.spea2], diff --git a/examples/advanced/customization/implementations/cnn_impls.py b/examples/advanced/customization/implementations/cnn_impls.py index 24b5b11444..c1ad5be98e 100644 --- a/examples/advanced/customization/implementations/cnn_impls.py +++ b/examples/advanced/customization/implementations/cnn_impls.py @@ -135,9 +135,9 @@ def fit(self, train_data): self.model = cnn_model_dict[self.params.get('architecture_type')](input_shape=train_data.features.shape[1:4], num_classes=len(self.classes)) - # self.model = fit_cnn(train_data=train_data, model=self.model, epochs=self.params.get('epochs'), - # batch_size=self.params.get('batch_size'), - # optimizer_params=self.params.get('optimizer_parameters'), logger=self.params.get('log')) + self.model = fit_cnn(train_data=train_data, model=self.model, epochs=self.params.get('epochs'), + batch_size=self.params.get('batch_size'), + optimizer_params=self.params.get('optimizer_parameters'), logger=self.params.get('log')) return self.model def predict(self, input_data): diff --git a/examples/simple/classification/image_classifcation_problem.py b/examples/simple/classification/image_classification_problem.py similarity index 100% rename from examples/simple/classification/image_classifcation_problem.py rename to examples/simple/classification/image_classification_problem.py diff --git a/fedot/core/pipelines/pipeline_advisor.py b/fedot/core/pipelines/pipeline_advisor.py index cfe8b37ffc..2708dd673d 100644 --- a/fedot/core/pipelines/pipeline_advisor.py +++ b/fedot/core/pipelines/pipeline_advisor.py @@ -45,7 +45,7 @@ def propose_change(self, node: OptNode, possible_operations: List[str]) -> List[ candidates = set.intersection({'lagged', 'sparse_lagged'}, set(possible_operations)) if 'cnn' in operation_id: - candidates = [c for c in candidates if 'cnn' in candidates] + candidates = [cand for cand in candidates if 'cnn' in cand] if operation_id in candidates: # the change to the same node is not meaningful diff --git a/fedot/core/pipelines/tuning/hyperparams.py b/fedot/core/pipelines/tuning/hyperparams.py index 7a64efbeda..5499200a54 100644 --- a/fedot/core/pipelines/tuning/hyperparams.py +++ b/fedot/core/pipelines/tuning/hyperparams.py @@ -11,13 +11,10 @@ class ParametersChanger: """ Class for the hyperparameters changing in the operation - :param operation_name: name of operation to get hyperparameters for :param current_params: current parameters value """ - custom_search_space = None - def __init__(self, operation_name, current_params): self.operation_name = operation_name self.current_params = current_params @@ -28,7 +25,7 @@ def get_new_operation_params(self): # Get available parameters for operation params_list = \ - PipelineSearchSpace(ParametersChanger.custom_search_space).get_parameters_for_operation(self.operation_name) + PipelineSearchSpace().get_parameters_for_operation(self.operation_name) if not params_list: params_dict = None diff --git a/fedot/core/pipelines/tuning/search_space.py b/fedot/core/pipelines/tuning/search_space.py index b1a59d364a..f65b1d568a 100644 --- a/fedot/core/pipelines/tuning/search_space.py +++ b/fedot/core/pipelines/tuning/search_space.py @@ -9,17 +9,20 @@ class PipelineSearchSpace(SearchSpace): """ - Class for extracting searching space + Class for extracting searching space for hyperparameters of pipeline :param custom_search_space: dictionary of dictionaries of tuples (hyperopt expression (e.g. hp.choice), *params) for applying custom hyperparameters search space :param replace_default_search_space: whether replace default dictionary (False) or append it (True) """ + pre_defined_custom_search_space = None # workaround to modify search space globally + def __init__(self, custom_search_space: Optional[OperationParametersMapping] = None, replace_default_search_space: bool = False): - self.custom_search_space = custom_search_space + self.custom_search_space = custom_search_space if PipelineSearchSpace.pre_defined_custom_search_space is None \ + else PipelineSearchSpace.pre_defined_custom_search_space self.replace_default_search_space = replace_default_search_space parameters_per_operation = self.get_parameters_dict() super().__init__(parameters_per_operation) diff --git a/test/unit/tasks/test_classification.py b/test/unit/tasks/test_classification.py index d228c143b6..7373f758be 100644 --- a/test/unit/tasks/test_classification.py +++ b/test/unit/tasks/test_classification.py @@ -4,7 +4,7 @@ from sklearn.datasets import load_iris, make_classification from sklearn.metrics import roc_auc_score as roc_auc -from examples.simple.classification.image_classifcation_problem import run_image_classification_problem +from examples.simple.classification.image_classification_problem import run_image_classification_problem from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup from fedot.core.data.supplementary_data import SupplementaryData From 93606a3a0fe74f0fc7bf721216e0984ae4cc458f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 26 Feb 2024 19:59:13 +0000 Subject: [PATCH 11/12] Automated autopep8 fixes --- .../customization/image_classification_with_custom_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/advanced/customization/image_classification_with_custom_models.py b/examples/advanced/customization/image_classification_with_custom_models.py index 6cbc72ded5..03f0a5a82a 100644 --- a/examples/advanced/customization/image_classification_with_custom_models.py +++ b/examples/advanced/customization/image_classification_with_custom_models.py @@ -35,7 +35,7 @@ 'hyperopt-dist': hp.uniformint, 'sampling-scope': [-254, 254], 'type': 'discrete'}, - } +} } From dcc87a3e1735d540c575a7bd6adc255bfb53ff20 Mon Sep 17 00:00:00 2001 From: nicl-nno Date: Mon, 26 Feb 2024 23:59:49 +0300 Subject: [PATCH 12/12] Code review fixes 2 --- ...image_classification_with_custom_models.py | 4 +-- .../customization/strategies/image_class.py | 30 ++---------------- .../operations/evaluation/classification.py | 2 +- test/data/test_labels.npy | Bin 138 -> 168 bytes test/data/training_labels.npy | Bin 138 -> 168 bytes 5 files changed, 6 insertions(+), 30 deletions(-) diff --git a/examples/advanced/customization/image_classification_with_custom_models.py b/examples/advanced/customization/image_classification_with_custom_models.py index 03f0a5a82a..87f4008bcc 100644 --- a/examples/advanced/customization/image_classification_with_custom_models.py +++ b/examples/advanced/customization/image_classification_with_custom_models.py @@ -87,7 +87,7 @@ def run_image_classification_automl(train_dataset: tuple, labels=y_test, task=task) - dataset_to_train = dataset_to_train.subset_range(0, 100) + dataset_to_train = dataset_to_train.subset_range(0, min(100, dataset_to_train.features.shape[0])) initial_pipeline = cnn_composite_pipeline() initial_pipeline.show() @@ -106,7 +106,7 @@ def run_image_classification_automl(train_dataset: tuple, composer_requirements = PipelineComposerRequirements( primary=get_operations_for_task(task=task, mode='all'), timeout=datetime.timedelta(minutes=3), - num_of_generations=20, n_jobs=1 + num_of_generations=20, n_jobs=1, cv_folds=None ) pop_size = 5 diff --git a/examples/advanced/customization/strategies/image_class.py b/examples/advanced/customization/strategies/image_class.py index def060bec8..947915c3a4 100644 --- a/examples/advanced/customization/strategies/image_class.py +++ b/examples/advanced/customization/strategies/image_class.py @@ -2,15 +2,15 @@ from typing import Optional from examples.advanced.customization.implementations.cnn_impls import MyCNNImplementation -from fedot.core.data.data import InputData, OutputData -from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy +from fedot.core.data.data import InputData +from fedot.core.operations.evaluation.classification import FedotClassificationStrategy from fedot.core.operations.operation_parameters import OperationParameters from fedot.utilities.random import ImplementationRandomStateHandler warnings.filterwarnings("ignore", category=UserWarning) -class ImageClassificationStrategy(EvaluationStrategy): +class ImageClassificationStrategy(FedotClassificationStrategy): _operations_by_types = { 'cnn_1': MyCNNImplementation } @@ -33,27 +33,3 @@ def fit(self, train_data: InputData): with ImplementationRandomStateHandler(implementation=operation_implementation): operation_implementation.fit(train_data) return operation_implementation - - def predict(self, trained_operation, predict_data: InputData) -> OutputData: - """ - Predict method for classification task for predict stage - - :param trained_operation: model object - :param predict_data: data used for prediction - :return: prediction target - """ - n_classes = len(trained_operation.classes_) - if self.output_mode == 'labels': - prediction = trained_operation.predict(predict_data) - elif self.output_mode in ['probs', 'full_probs', 'default']: - prediction = trained_operation.predict_proba(predict_data) - if n_classes < 2: - raise ValueError('Data set contain only 1 target class. Please reformat your data.') - elif n_classes == 2 and self.output_mode != 'full_probs' and len(prediction.shape) > 1: - prediction = prediction[:, 1] - else: - raise ValueError(f'Output model {self.output_mode} is not supported') - - # Convert prediction to output (if it is required) - converted = self._convert_to_output(prediction, predict_data) - return converted diff --git a/fedot/core/operations/evaluation/classification.py b/fedot/core/operations/evaluation/classification.py index 0b90b49648..5262e915bd 100644 --- a/fedot/core/operations/evaluation/classification.py +++ b/fedot/core/operations/evaluation/classification.py @@ -82,7 +82,7 @@ def predict(self, trained_operation, predict_data: InputData) -> OutputData: if n_classes < 2: raise ValueError('Data set contain only 1 target class. Please reformat your data.') elif n_classes == 2 and self.output_mode != 'full_probs' and len(prediction.shape) > 1: - prediction = prediction[:, 1] + prediction = prediction[:, prediction.shape[1] - 1] else: raise ValueError(f'Output model {self.output_mode} is not supported') diff --git a/test/data/test_labels.npy b/test/data/test_labels.npy index 815f538382ca1151827c2e28478a31919340b346..b3408a2e618d42a348dd92210cdcaef201f4d31a 100644 GIT binary patch delta 54 ecmeBTT){X&l-VZJWTHfd1_KZ