diff --git a/examples/advanced/customization/__init__.py b/examples/advanced/customization/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/advanced/customization/image_classification_with_custom_models.py b/examples/advanced/customization/image_classification_with_custom_models.py new file mode 100644 index 0000000000..87f4008bcc --- /dev/null +++ b/examples/advanced/customization/image_classification_with_custom_models.py @@ -0,0 +1,174 @@ +import datetime +from pathlib import Path + +import tensorflow as tf +from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters +from golem.core.optimisers.genetic.operators.base_mutations import MutationTypesEnum +from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum +from golem.core.optimisers.genetic.operators.selection import SelectionTypesEnum +from golem.core.tuning.simultaneous import SimultaneousTuner +from hyperopt import hp +from sklearn.metrics import roc_auc_score as roc_auc + +from fedot.core.composer.composer_builder import ComposerBuilder +from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation +from fedot.core.data.data import InputData, OutputData +from fedot.core.operations.operation_parameters import OperationParameters +from fedot.core.pipelines.node import PipelineNode +from fedot.core.pipelines.pipeline import Pipeline +from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements +from fedot.core.pipelines.tuning.search_space import PipelineSearchSpace +from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder +from fedot.core.repository.metrics_repository import ClassificationMetricsEnum, ComplexityMetricsEnum +from fedot.core.repository.operation_types_repository import get_operations_for_task, OperationTypesRepository +from fedot.core.repository.tasks import Task, TaskTypesEnum +from fedot.core.utils import set_random_seed, fedot_project_root + +custom_search_space = {'filter_1': { + 'r': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [-254, 254], + 'type': 'discrete'} +}, + 'filter_2': { + 'r': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [-254, 254], + 'type': 'discrete'}, +} +} + + +def calculate_validation_metric(predicted: OutputData, dataset_to_validate: InputData) -> float: + # the quality assessment for the simulation results + roc_auc_value = roc_auc(y_true=dataset_to_validate.target, + y_score=predicted.predict, + multi_class="ovo") + return roc_auc_value + + +def cnn_composite_pipeline() -> Pipeline: + node_first = PipelineNode('filter_1') + + node_second = PipelineNode('cnn_1', nodes_from=[node_first]) + + node_final = PipelineNode('rf', nodes_from=[node_second]) + + pipeline = Pipeline(node_final) + return pipeline + + +def setup_repository(): + repo_folder = Path(fedot_project_root(), 'examples', 'advanced', 'customization', + 'repositories') + OperationTypesRepository.__repository_dict__ = { + 'model': {'file': Path(repo_folder, 'my_model_repository.json'), 'initialized_repo': None, 'default_tags': []}, + 'data_operation': {'file': Path(repo_folder, 'my_data_operation_repository.json'), + 'initialized_repo': None, 'default_tags': []} + } + + OperationParameters.custom_default_params_path = Path(repo_folder, + 'my_default_operation_params.json') + + +def run_image_classification_automl(train_dataset: tuple, + test_dataset: tuple): + task = Task(TaskTypesEnum.classification) + + setup_repository() + + x_train, y_train = train_dataset[0], train_dataset[1] + x_test, y_test = test_dataset[0], test_dataset[1] + + dataset_to_train = InputData.from_image(images=x_train, + labels=y_train, + task=task) + dataset_to_validate = InputData.from_image(images=x_test, + labels=y_test, + task=task) + + dataset_to_train = dataset_to_train.subset_range(0, min(100, dataset_to_train.features.shape[0])) + + initial_pipeline = cnn_composite_pipeline() + initial_pipeline.show() + initial_pipeline.fit(dataset_to_train) + predictions = initial_pipeline.predict(dataset_to_validate) + roc_auc_on_valid = calculate_validation_metric(predictions, + dataset_to_validate) + + print(roc_auc_on_valid) + + # the choice of the metric for the pipeline quality assessment during composition + quality_metric = ClassificationMetricsEnum.f1 + complexity_metric = ComplexityMetricsEnum.node_number + metrics = [quality_metric, complexity_metric] + # the choice and initialisation of the GP search + composer_requirements = PipelineComposerRequirements( + primary=get_operations_for_task(task=task, mode='all'), + timeout=datetime.timedelta(minutes=3), + num_of_generations=20, n_jobs=1, cv_folds=None + ) + + pop_size = 5 + + # search space for hyper-parametric mutation + PipelineSearchSpace.pre_defined_custom_search_space = custom_search_space + + params = GPAlgorithmParameters( + selection_types=[SelectionTypesEnum.spea2], + genetic_scheme_type=GeneticSchemeTypesEnum.steady_state, + mutation_types=[MutationTypesEnum.single_change, parameter_change_mutation], + pop_size=pop_size + ) + + # Create composer and with required composer params + composer = ( + ComposerBuilder(task=task) + .with_optimizer_params(params) + .with_requirements(composer_requirements) + .with_metrics(metrics) + .with_initial_pipelines(initial_pipelines=[initial_pipeline] * pop_size) + .build() + ) + + # the optimal pipeline generation by composition - the most time-consuming task + pipeline_evo_composed = composer.compose_pipeline(data=dataset_to_train)[0] + + pipeline_evo_composed.show() + print(pipeline_evo_composed.descriptive_id) + + pipeline_evo_composed.fit(input_data=dataset_to_train) + + replace_default_search_space = True + cv_folds = 1 + search_space = PipelineSearchSpace(custom_search_space=custom_search_space, + replace_default_search_space=replace_default_search_space) + + pipeline_evo_composed.predict(dataset_to_validate) + + # .with_cv_folds(cv_folds) \ + pipeline_tuner = TunerBuilder(dataset_to_train.task) \ + .with_tuner(SimultaneousTuner) \ + .with_metric(ClassificationMetricsEnum.ROCAUC) \ + .with_cv_folds(cv_folds) \ + .with_iterations(50) \ + .with_search_space(search_space).build(dataset_to_train) + + pipeline_tuner.tune(pipeline_evo_composed) + + predictions = pipeline_evo_composed.predict(dataset_to_validate) + + roc_auc_on_valid = calculate_validation_metric(predictions, + dataset_to_validate) + return roc_auc_on_valid, dataset_to_train, dataset_to_validate + + +if __name__ == '__main__': + set_random_seed(1) + + training_set, testing_set = tf.keras.datasets.mnist.load_data(path='mnist.npz') + roc_auc_on_valid, dataset_to_train, dataset_to_validate = run_image_classification_automl( + train_dataset=training_set, + test_dataset=testing_set) + + print(roc_auc_on_valid) diff --git a/examples/advanced/customization/implementations/__init__.py b/examples/advanced/customization/implementations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/advanced/customization/implementations/cnn_impls.py b/examples/advanced/customization/implementations/cnn_impls.py new file mode 100644 index 0000000000..c1ad5be98e --- /dev/null +++ b/examples/advanced/customization/implementations/cnn_impls.py @@ -0,0 +1,168 @@ +import logging +import os +import random +from typing import Optional + +import numpy as np +from golem.utilities.requirements_notificator import warn_requirement + +from fedot.core.operations.operation_parameters import OperationParameters + +try: + import tensorflow as tf +except ModuleNotFoundError: + warn_requirement('tensorflow', 'fedot[extra]') + tf = None + +from fedot.core.data.data import InputData, OutputData +from golem.core.log import LoggerAdapter, default_log +from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import ModelImplementation +from sklearn import preprocessing + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' + + +def check_input_array(x_train): + if np.max(x_train) > 1: + transformed_x_train = x_train.astype("float32") / 255 + transform_flag = True + else: + transformed_x_train = x_train + transform_flag = False + + return transformed_x_train, transform_flag + + +def create_simple_cnn(input_shape: tuple, + num_classes: int): + model = tf.keras.Sequential( + [ + tf.keras.layers.InputLayer(input_shape=input_shape), + tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Flatten(), + tf.keras.layers.Dropout(0.5), + tf.keras.layers.Dense(num_classes, activation="softmax"), + ] + ) + + return model + + +def fit_cnn(train_data: InputData, + model, + epochs: int = 10, + batch_size: int = 128, + optimizer_params: dict = None, + logger: Optional[LoggerAdapter] = None): + x_train, y_train = train_data.features, train_data.target + transformed_x_train, transform_flag = check_input_array(x_train) + + if logger is None: + logger = default_log(prefix=__name__) + + if transform_flag: + logger.debug('Train data set was not scaled. The data was divided by 255.') + + if len(x_train.shape) == 3: + transformed_x_train = np.expand_dims(x_train, -1) + + if len(train_data.target.shape) < 2: + le = preprocessing.OneHotEncoder() + y_train = le.fit_transform(y_train.reshape(-1, 1)).toarray() + + if optimizer_params is None: + optimizer_params = {'loss': "categorical_crossentropy", + 'optimizer': "adam", + 'metrics': ["accuracy"]} + + model.compile(**optimizer_params) + model.num_classes = train_data.num_classes + if logger is None: + logger = default_log(prefix=__name__) + + if logger.logging_level > logging.DEBUG: + verbose = 0 + else: + verbose = 2 + + if epochs is None: + logger.warning('The number of training epochs was not set. The selected number of epochs is 10.') + + model.fit(transformed_x_train, y_train, batch_size=batch_size, epochs=epochs, + validation_split=0.1, verbose=verbose) + + return model + + +def predict_cnn(trained_model, predict_data: InputData, output_mode: str = 'labels', logger=None) -> OutputData: + prediction = np.asarray([[random.random()] for j in range(predict_data.features.shape[0])]) + return prediction + + +cnn_model_dict = {'simplified': create_simple_cnn} + + +class MyCNNImplementation(ModelImplementation): + def __init__(self, params: Optional[OperationParameters] = None): + super().__init__(params) + + default_params = {'log': default_log(prefix=__name__), + 'epochs': 10, + 'batch_size': 32, + 'output_mode': 'labels', + 'architecture_type': 'simplified', + 'optimizer_parameters': {'loss': "categorical_crossentropy", + 'optimizer': "adam", + 'metrics': ["accuracy"]}} + + complete_params = {**default_params, **self.params.to_dict()} + self.params.update(**complete_params) + + def fit(self, train_data): + """ Method fit model on a dataset + + :param train_data: data to train the model + """ + + # TODO make case for multiclass multioutput task + # check for multioutput target + if len(train_data.target.shape) < 2: + self.classes = np.unique(train_data.target) + else: + self.classes = np.arange(train_data.target.shape[1]) + + self.model = cnn_model_dict[self.params.get('architecture_type')](input_shape=train_data.features.shape[1:4], + num_classes=len(self.classes)) + + self.model = fit_cnn(train_data=train_data, model=self.model, epochs=self.params.get('epochs'), + batch_size=self.params.get('batch_size'), + optimizer_params=self.params.get('optimizer_parameters'), logger=self.params.get('log')) + return self.model + + def predict(self, input_data): + """ Method make prediction with labels of classes for predict stage + + :param input_data: data with features to process + """ + + return predict_cnn(trained_model=self.model, predict_data=input_data, + output_mode='labels', logger=self.params['log']) + + def predict_proba(self, input_data): + """ Method make prediction with probabilities of classes + + :param input_data: data with features to process + """ + + return predict_cnn(trained_model=self.model, predict_data=input_data, output_mode='probs') + + @property + def classes_(self): + return self.classes + + def __deepcopy__(self, memo=None): + clone_model = tf.keras.models.clone_model(self.model) + clone_model.compile(optimizer=self.model.optimizer, loss=self.model.loss, metrics=self.model.metrics) + clone_model.set_weights(self.model.get_weights()) + return clone_model diff --git a/examples/advanced/customization/implementations/preproc_impls.py b/examples/advanced/customization/implementations/preproc_impls.py new file mode 100644 index 0000000000..ee1d99fa1b --- /dev/null +++ b/examples/advanced/customization/implementations/preproc_impls.py @@ -0,0 +1,35 @@ +from copy import deepcopy +from typing import Optional + +import numpy as np + +from fedot.core.data.data import InputData, OutputData +from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import \ + DataOperationImplementation +from fedot.core.operations.operation_parameters import OperationParameters +from fedot.core.repository.dataset_types import DataTypesEnum + + +class GammaFiltImplementation(DataOperationImplementation): + def __init__(self, params: Optional[OperationParameters]): + super().__init__(params) + if not self.params: + # Default parameters + pass + else: + # Checking the appropriate params are using or not + pass + + def fit(self, input_data: InputData): + return None + + def transform(self, input_data: InputData) -> OutputData: + # example of custom data pre-processing for predict state + transformed_features = deepcopy(input_data.features) + for i in range(transformed_features.shape[0]): + transformed_features[i, :, :] = transformed_features[i, :, :] + np.random.normal(0, 30) + + output_data = self._convert_to_output(input_data, + transformed_features, data_type=DataTypesEnum.image) + + return output_data diff --git a/examples/advanced/customization/repositories/my_data_operation_repository.json b/examples/advanced/customization/repositories/my_data_operation_repository.json new file mode 100644 index 0000000000..c7556462c7 --- /dev/null +++ b/examples/advanced/customization/repositories/my_data_operation_repository.json @@ -0,0 +1,152 @@ +{ + "metadata": { + "data_sources": { + "tasks": "[TaskTypesEnum.classification, TaskTypesEnum.regression, TaskTypesEnum.clustering, TaskTypesEnum.ts_forecasting]", + "accepted_node_types": [ + "PrimaryNode" + ], + "forbidden_node_types": "[]", + "strategies": [ + "fedot.core.operations.evaluation.data_source", + "DataSourceStrategy" + ], + "tags": [ + "non-default", + "data_source" + ], + "description": "Implementations of data sources in multi-modal pipelines" + }, + "custom_preprocessing": { + "tasks": "[TaskTypesEnum.classification, TaskTypesEnum.regression]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "accepted_node_types": [ + "any" + ], + "forbidden_node_types": "[]", + "strategies": [ + "fedot.core.operations.evaluation.common_preprocessing", + "FedotPreprocessingStrategy" + ], + "tags": [ + "sklearn" + ], + "description": "Implementations of the preprocessing from scikit-learn framework" + }, + "image_preprocessing": { + "tasks": "[TaskTypesEnum.classification, TaskTypesEnum.regression]", + "input_type": "[DataTypesEnum.image]", + "output_type": "[DataTypesEnum.image]", + "accepted_node_types": [ + "PrimaryNode" + ], + "forbidden_node_types": "[]", + "strategies": [ + "examples.advanced.customization.strategies.image_preproc", + "ImagePreprocessingStrategy" + ], + "tags": [ + "custom" + ], + "description": "Implementations of the preprocessing from scikit-learn framework" + }, + "sklearn_categorical": { + "tasks": "[TaskTypesEnum.classification, TaskTypesEnum.regression, TaskTypesEnum.clustering]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "accepted_node_types": [ + "primary" + ], + "forbidden_node_types": "[]", + "strategies": [ + "fedot.core.operations.evaluation.common_preprocessing", + "FedotPreprocessingStrategy" + ], + "tags": [ + "sklearn" + ], + "description": "Implementations of OneHot encoding (etc.) from scikit-learn framework" + }, + "dimension_transformation": { + "tasks": "[TaskTypesEnum.classification, TaskTypesEnum.regression, TaskTypesEnum.clustering, TaskTypesEnum.ts_forecasting]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "accepted_node_types": [ + "any" + ], + "forbidden_node_types": "[]", + "strategies": [ + "fedot.core.operations.evaluation.common_preprocessing", + "FedotPreprocessingStrategy" + ], + "tags": [ + "sklearn" + ], + "description": "Implementations of the dimensionality transformation operations (e.g. PCA) from scikit-learn framework" + }, + "regression_preprocessing": { + "tasks": "[TaskTypesEnum.regression, TaskTypesEnum.ts_forecasting]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "accepted_node_types": [ + "any" + ], + "forbidden_node_types": "[]", + "strategies": [ + "fedot.core.operations.evaluation.regression", + "FedotRegressionPreprocessingStrategy" + ], + "tags": [ + "sklearn" + ], + "description": "Implementations of the feature selection operations and robust data filtering from scikit-learn framework for regression task" + }, + "classification_preprocessing": { + "tasks": "[TaskTypesEnum.classification]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "accepted_node_types": [ + "any" + ], + "forbidden_node_types": "[]", + "strategies": [ + "fedot.core.operations.evaluation.classification", + "FedotClassificationPreprocessingStrategy" + ], + "tags": [ + "sklearn" + ], + "description": "Implementations of the feature selection operations and robust data filtering from scikit-learn framework for classification task" + } + }, + "operations": { + "data_source_img": { + "meta": "data_sources", + "input_type": "[DataTypesEnum.image]", + "output_type": "[DataTypesEnum.image]", + "tags": [ + "data_source_img", + "nans-ignore", + "categorical-ignore" + ] + }, + "filter_1": { + "meta": "image_preprocessing", + "tags": [ + "filtering" + ] + }, + "filter_2": { + "meta": "image_preprocessing", + "tags": [ + "filtering" + ] + }, + "filter_3": { + "meta": "image_preprocessing", + "tags": [ + "filtering" + ] + } + } +} \ No newline at end of file diff --git a/examples/advanced/customization/repositories/my_default_operation_params.json b/examples/advanced/customization/repositories/my_default_operation_params.json new file mode 100644 index 0000000000..37e4fac670 --- /dev/null +++ b/examples/advanced/customization/repositories/my_default_operation_params.json @@ -0,0 +1,99 @@ +{ + "rf": { + "n_jobs": 1 + }, + "rfr": { + "n_jobs": 1 + }, + "xgboost": { + "eval_metric": "mlogloss", + "nthread": 1, + "n_jobs": 1, + "verbose": 0 + }, + "catboost": { + "allow_writing_files": false, + "verbose": false, + "iterations": 1000, + "use_eval_set": false, + "use_best_model": false, + "early_stopping_rounds": null, + "n_jobs": 1 + }, + "catboostreg": { + "allow_writing_files": false, + "verbose": false, + "iterations": 1000, + "use_eval_set": false, + "use_best_model": false, + "early_stopping_rounds": null, + "n_jobs": 1 + }, + "lgbm": { + "num_leaves": 32, + "colsample_bytree": 0.8, + "subsample": 0.8, + "subsample_freq": 10, + "learning_rate": 0.03, + "n_estimators": 100, + "n_jobs": 1, + "verbose": -1 + }, + "lgbmreg": { + "num_leaves": 32, + "colsample_bytree": 0.8, + "subsample": 0.8, + "subsample_freq": 10, + "learning_rate": 0.03, + "n_estimators": 100, + "n_jobs": 1, + "verbose": -1 + }, + "ransac_lin_reg": { + "min_samples": 0.4, + "residual_threshold": 10, + "max_trials": 100, + "max_skips": 1000 + }, + "ransac_non_lin_reg": { + "min_samples": 0.4, + "residual_threshold": 10, + "max_trials": 100, + "max_skips": 1000 + }, + "h2o_regr": { + "timeout": 20, + "seed": 42, + "max_models": 3 + }, + "h2o_class": { + "timeout": 20, + "seed": 42, + "max_models": 3 + }, + "tpot_class": { + "timeout": 2, + "generations": 3, + "population_size": 3 + }, + "tpot_regr": { + "timeout": 2, + "generations": 3, + "population_size": 3 + }, + "resample": { + "balance": "expand_minority", + "replace": false, + "balance_ratio": 1 + }, + "pca": { + "svd_solver": "full", + "n_components": 0.7 + }, + "fast_ica": { + "whiten": "unit-variance" + }, + "filter_1": { + "a": "42" + } +} \ No newline at end of file diff --git a/examples/advanced/customization/repositories/my_model_repository.json b/examples/advanced/customization/repositories/my_model_repository.json new file mode 100644 index 0000000000..c63d43c690 --- /dev/null +++ b/examples/advanced/customization/repositories/my_model_repository.json @@ -0,0 +1,487 @@ +{ + "metadata": { + "image_class": { + "accepted_node_types": [ + "any" + ], + "description": "Implementations of the custom classification models", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.image]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "examples.advanced.customization.strategies.image_class", + "ImageClassificationStrategy" + ], + "tags": [ + "ml", + "custom" + ], + "tasks": "[TaskTypesEnum.classification]" + }, + "custom_class": { + "accepted_node_types": [ + "any" + ], + "description": "Implementations of the custom classification models", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "fedot.core.operations.evaluation.classification", + "FedotClassificationStrategy" + ], + "tags": [ + "ml", + "custom" + ], + "tasks": "[TaskTypesEnum.classification]" + }, + "custom_regr": { + "accepted_node_types": [ + "any" + ], + "description": "Implementations of the custom regression models", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "fedot.core.operations.evaluation.regression", + "FedotRegressionStrategy" + ], + "tags": [ + "ml", + "custom" + ], + "tasks": "[TaskTypesEnum.regression]" + }, + "sklearn_class": { + "accepted_node_types": [ + "any" + ], + "description": "Implementations of the classification models from scikit-learn framework", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "fedot.core.operations.evaluation.classification", + "SkLearnClassificationStrategy" + ], + "tags": [ + "ml", + "sklearn" + ], + "tasks": "[TaskTypesEnum.classification]" + }, + "sklearn_clust": { + "accepted_node_types": [ + "any" + ], + "description": "Implementations of the clustering models from scikit-learn framework", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "fedot.core.operations.evaluation.clustering", + "SkLearnClusteringStrategy" + ], + "tags": [ + "ml", + "sklearn" + ], + "tasks": "[TaskTypesEnum.clustering]" + }, + "sklearn_regr": { + "accepted_node_types": [ + "any" + ], + "description": "Implementations of the regression models from scikit-learn framework", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "fedot.core.operations.evaluation.regression", + "SkLearnRegressionStrategy" + ], + "tags": [ + "ml", + "sklearn", + "composition" + ], + "tasks": "[TaskTypesEnum.regression]" + }, + "boosting_class": { + "accepted_node_types": [ + "any" + ], + "description": "Boosting models implementations for classification problems", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "fedot.core.operations.evaluation.boostings", + "BoostingClassificationStrategy" + ], + "tags": [ + "ml", + "boosting", + "composition" + ], + "tasks": "[TaskTypesEnum.classification]" + }, + "boosting_regr": { + "accepted_node_types": [ + "any" + ], + "description": "Boosting models implementations for regression problems", + "forbidden_node_types": "[]", + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "fedot.core.operations.evaluation.boostings", + "BoostingRegressionStrategy" + ], + "tags": [ + "ml", + "boosting", + "composition" + ], + "tasks": "[TaskTypesEnum.regression]" + }, + "custom_model": { + "description": "Implementations of the models specified by user with external code source", + "input_type": "[DataTypesEnum.ts, DataTypesEnum.table, DataTypesEnum.text]", + "output_type": "[DataTypesEnum.table]", + "strategies": [ + "fedot.core.operations.evaluation.custom", + "CustomModelStrategy" + ], + "tags": [ + "non-default" + ], + "tasks": "[TaskTypesEnum.regression, TaskTypesEnum.ts_forecasting, TaskTypesEnum.classification, TaskTypesEnum.clustering]" + } + }, + "operations": { + "adareg": { + "meta": "sklearn_regr", + "presets": [ + "fast_train", + "ts", + "*tree" + ], + "tags": [ + "boosting", + "non_multi", + "non_linear" + ] + }, + "bernb": { + "meta": "sklearn_class", + "presets": [ + "fast_train" + ], + "tags": [ + "simple", + "bayesian", + "non_multi", + "linear" + ] + }, + "catboost": { + "meta": "boosting_class", + "presets": [ + "*tree" + ], + "tags": [ + "simple", + "boosting" + ] + }, + "catboostreg": { + "meta": "boosting_regr", + "presets": [ + "*tree" + ], + "tags": [ + "simple", + "boosting" + ] + }, + "dt": { + "meta": "sklearn_class", + "presets": [ + "fast_train", + "*tree" + ], + "tags": [ + "simple", + "tree", + "interpretable", + "non_linear" + ] + }, + "dtreg": { + "meta": "sklearn_regr", + "presets": [ + "fast_train", + "ts", + "*tree" + ], + "tags": [ + "tree", + "interpretable", + "non_linear" + ] + }, + "gbr": { + "meta": "sklearn_regr", + "presets": [ + "*tree" + ], + "tags": [ + "boosting", + "non_multi", + "non_linear", + "non-default" + ] + }, + "kmeans": { + "meta": "sklearn_clust", + "presets": [ + "fast_train" + ], + "tags": [ + "linear" + ] + }, + "knn": { + "meta": "custom_class", + "presets": [ + "fast_train" + ], + "tags": [ + "simple", + "correct_params", + "non_linear" + ] + }, + "knnreg": { + "meta": "custom_regr", + "presets": [ + "fast_train", + "ts" + ], + "tags": [ + "simple", + "correct_params", + "non_linear" + ] + }, + "lasso": { + "meta": "sklearn_regr", + "presets": [ + "fast_train", + "ts" + ], + "tags": [ + "simple", + "linear", + "interpretable" + ] + }, + "lda": { + "meta": "custom_class", + "presets": [ + "fast_train" + ], + "tags": [ + "discriminant", + "linear", + "correct_params", + "non-default" + ] + }, + "lgbm": { + "meta": "sklearn_class", + "tags": [ + "tree", + "non_linear" + ] + }, + "lgbmreg": { + "meta": "sklearn_regr", + "presets": [ + "*tree" + ], + "tags": [ + "tree", + "non_multi", + "non_linear" + ] + }, + "linear": { + "meta": "sklearn_regr", + "presets": [ + "fast_train", + "ts" + ], + "tags": [ + "simple", + "linear", + "interpretable" + ] + }, + "logit": { + "meta": "sklearn_class", + "presets": [ + "fast_train" + ], + "tags": [ + "simple", + "linear", + "interpretable", + "non_multi" + ] + }, + "mlp": { + "meta": "sklearn_class", + "tags": [ + "neural", + "non_linear" + ] + }, + "multinb": { + "meta": "sklearn_class", + "presets": [ + "fast_train" + ], + "tags": [ + "non-default", + "bayesian", + "non_multi", + "linear" + ] + }, + "qda": { + "meta": "custom_class", + "presets": [ + "fast_train" + ], + "tags": [ + "discriminant", + "quadratic", + "non_linear" + ] + }, + "rf": { + "meta": "sklearn_class", + "presets": [ + "fast_train", + "*tree" + ], + "tags": [ + "tree", + "non_linear" + ] + }, + "rfr": { + "meta": "sklearn_regr", + "presets": [ + "fast_train", + "*tree" + ], + "tags": [ + "tree", + "non_linear" + ] + }, + "ridge": { + "meta": "sklearn_regr", + "presets": [ + "fast_train", + "ts" + ], + "tags": [ + "simple", + "linear", + "interpretable" + ] + }, + "sgdr": { + "meta": "sklearn_regr", + "presets": [ + "fast_train", + "ts" + ], + "tags": [ + "non_multi", + "non_linear" + ] + }, + "svc": { + "meta": "custom_class", + "tags": [ + "no_prob", + "expensive", + "non_linear" + ] + }, + "svr": { + "meta": "sklearn_regr", + "tags": [ + "non_multi", + "non_linear" + ] + }, + "treg": { + "meta": "sklearn_regr", + "presets": [ + "*tree" + ], + "tags": [ + "tree", + "non_linear", + "non-default" + ] + }, + "xgboost": { + "meta": "sklearn_class", + "presets": [ + "*tree" + ], + "tags": [ + "tree", + "non-default", + "non_linear" + ] + }, + "xgbreg": { + "meta": "sklearn_regr", + "presets": [ + "*tree" + ], + "tags": [ + "tree", + "non_multi", + "non-default", + "non_linear" + ] + }, + "cnn_1": { + "meta": "image_class", + "tags": [ + "deep", + "non_linear" + ], + "input_type": "[DataTypesEnum.image]", + "output_type": "[DataTypesEnum.table]" + }, + "custom": { + "meta": "custom_model", + "tags": [ + "custom_model", + "non-default" + ] + } + } +} \ No newline at end of file diff --git a/examples/advanced/customization/strategies/__init__.py b/examples/advanced/customization/strategies/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/advanced/customization/strategies/image_class.py b/examples/advanced/customization/strategies/image_class.py new file mode 100644 index 0000000000..947915c3a4 --- /dev/null +++ b/examples/advanced/customization/strategies/image_class.py @@ -0,0 +1,35 @@ +import warnings +from typing import Optional + +from examples.advanced.customization.implementations.cnn_impls import MyCNNImplementation +from fedot.core.data.data import InputData +from fedot.core.operations.evaluation.classification import FedotClassificationStrategy +from fedot.core.operations.operation_parameters import OperationParameters +from fedot.utilities.random import ImplementationRandomStateHandler + +warnings.filterwarnings("ignore", category=UserWarning) + + +class ImageClassificationStrategy(FedotClassificationStrategy): + _operations_by_types = { + 'cnn_1': MyCNNImplementation + } + + def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): + self.operation_impl = self._convert_to_operation(operation_type) + super().__init__(operation_type, params) + + def fit(self, train_data: InputData): + """ + This method is used for operation training with the data provided + :param InputData train_data: data used for operation training + :return: trained data operation + """ + + warnings.filterwarnings("ignore", category=RuntimeWarning) + + operation_implementation = self.operation_impl(self.params_for_fit) + + with ImplementationRandomStateHandler(implementation=operation_implementation): + operation_implementation.fit(train_data) + return operation_implementation diff --git a/examples/advanced/customization/strategies/image_preproc.py b/examples/advanced/customization/strategies/image_preproc.py new file mode 100644 index 0000000000..b5f4a9b725 --- /dev/null +++ b/examples/advanced/customization/strategies/image_preproc.py @@ -0,0 +1,39 @@ +import warnings +from typing import Optional + +from examples.advanced.customization.implementations.preproc_impls import GammaFiltImplementation +from fedot.core.data.data import InputData, OutputData +from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy +from fedot.core.operations.operation_parameters import OperationParameters +from fedot.utilities.random import ImplementationRandomStateHandler + + +class ImagePreprocessingStrategy(EvaluationStrategy): + _operations_by_types = { + 'filter_1': GammaFiltImplementation, + 'filter_2': GammaFiltImplementation, + 'filter_3': GammaFiltImplementation + } + + def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): + self.operation_impl = self._convert_to_operation(operation_type) + super().__init__(operation_type, params) + + def fit(self, train_data: InputData): + + warnings.filterwarnings("ignore", category=RuntimeWarning) + operation_implementation = self.operation_impl(self.params_for_fit) + with ImplementationRandomStateHandler(implementation=operation_implementation): + operation_implementation.fit(train_data) + return operation_implementation + + def predict(self, trained_operation, predict_data: InputData) -> OutputData: + prediction = trained_operation.transform(predict_data) + # Convert prediction to output (if it is required) + converted = self._convert_to_output(prediction, predict_data) + return converted + + def predict_for_fit(self, trained_operation, predict_data: InputData) -> OutputData: + prediction = trained_operation.transform_for_fit(predict_data) + converted = self._convert_to_output(prediction, predict_data) + return converted diff --git a/fedot/core/operations/evaluation/classification.py b/fedot/core/operations/evaluation/classification.py index a6bdf15069..5262e915bd 100644 --- a/fedot/core/operations/evaluation/classification.py +++ b/fedot/core/operations/evaluation/classification.py @@ -13,8 +13,6 @@ data_operations.sklearn_selectors import LinearClassFSImplementation, NonLinearClassFSImplementation from fedot.core.operations.evaluation.operation_implementations.models. \ discriminant_analysis import LDAImplementation, QDAImplementation -from fedot.core.operations.evaluation.operation_implementations.models. \ - keras import FedotCNNImplementation from fedot.core.operations.evaluation.operation_implementations.models.knn import FedotKnnClassImplementation from fedot.core.operations.evaluation.operation_implementations.models.svc import FedotSVCImplementation from fedot.core.operations.operation_parameters import OperationParameters @@ -46,7 +44,6 @@ class FedotClassificationStrategy(EvaluationStrategy): 'lda': LDAImplementation, 'qda': QDAImplementation, 'svc': FedotSVCImplementation, - 'cnn': FedotCNNImplementation, 'knn': FedotKnnClassImplementation } @@ -85,7 +82,7 @@ def predict(self, trained_operation, predict_data: InputData) -> OutputData: if n_classes < 2: raise ValueError('Data set contain only 1 target class. Please reformat your data.') elif n_classes == 2 and self.output_mode != 'full_probs' and len(prediction.shape) > 1: - prediction = prediction[:, 1] + prediction = prediction[:, prediction.shape[1] - 1] else: raise ValueError(f'Output model {self.output_mode} is not supported') diff --git a/fedot/core/operations/evaluation/operation_implementations/models/keras.py b/fedot/core/operations/evaluation/operation_implementations/models/keras.py index 2b307c7a20..faabc00e4e 100644 --- a/fedot/core/operations/evaluation/operation_implementations/models/keras.py +++ b/fedot/core/operations/evaluation/operation_implementations/models/keras.py @@ -194,9 +194,9 @@ def fit(self, train_data): self.model = cnn_model_dict[self.params.get('architecture_type')](input_shape=train_data.features.shape[1:4], num_classes=len(self.classes)) - self.model = fit_cnn(train_data=train_data, model=self.model, epochs=self.params.get('epochs'), - batch_size=self.params.get('batch_size'), - optimizer_params=self.params.get('optimizer_parameters'), logger=self.params.get('log')) + # self.model = fit_cnn(train_data=train_data, model=self.model, epochs=self.params.get('epochs'), + # batch_size=self.params.get('batch_size'), + # optimizer_params=self.params.get('optimizer_parameters'), logger=self.params.get('log')) return self.model def predict(self, input_data): diff --git a/fedot/core/operations/operation_parameters.py b/fedot/core/operations/operation_parameters.py index 1936c965ce..2ddf6b69e5 100644 --- a/fedot/core/operations/operation_parameters.py +++ b/fedot/core/operations/operation_parameters.py @@ -18,6 +18,8 @@ class OperationParameters: """ + custom_default_params_path = 'default_operation_params.json' + def __init__(self, **parameters): self._parameters = parameters self._changed_keys: list = [] @@ -65,5 +67,5 @@ def get_default_params(operation_type: str) -> dict: :return: default repository parameters for the model name """ - with DefaultOperationParamsRepository() as default_params_repo: + with DefaultOperationParamsRepository(OperationParameters.custom_default_params_path) as default_params_repo: return default_params_repo.get_default_params_for_operation(operation_type) diff --git a/fedot/core/optimisers/objective/data_source_splitter.py b/fedot/core/optimisers/objective/data_source_splitter.py index 70e5dd7e30..bfc05c9c2a 100644 --- a/fedot/core/optimisers/objective/data_source_splitter.py +++ b/fedot/core/optimisers/objective/data_source_splitter.py @@ -4,13 +4,13 @@ from golem.core.log import default_log from fedot.core.constants import default_data_split_ratio_by_task +from fedot.core.data.cv_folds import cv_generator from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup, _are_stratification_allowed from fedot.core.data.multi_modal import MultiModalData from fedot.core.optimisers.objective.data_objective_eval import DataSource from fedot.core.repository.tasks import TaskTypesEnum from fedot.remote.remote_evaluator import RemoteEvaluator, init_data_for_remote_execution -from fedot.core.data.cv_folds import cv_generator class DataSourceSplitter: @@ -59,7 +59,7 @@ def build(self, data: Union[InputData, MultiModalData]) -> DataSource: raise ValueError(f"cv_folds is not integer: {self.cv_folds}") if self.cv_folds < 2: self.cv_folds = None - if self.cv_folds > data.target.shape[0] - 1: + elif self.cv_folds > data.target.shape[0] - 1: raise ValueError((f"cv_folds ({self.cv_folds}) is greater than" f" the maximum allowed count {data.target.shape[0] - 1}")) diff --git a/fedot/core/pipelines/pipeline_advisor.py b/fedot/core/pipelines/pipeline_advisor.py index 7f38d056f9..2708dd673d 100644 --- a/fedot/core/pipelines/pipeline_advisor.py +++ b/fedot/core/pipelines/pipeline_advisor.py @@ -30,6 +30,7 @@ def propose_change(self, node: OptNode, possible_operations: List[str]) -> List[ :return: list of candidates with str operations """ operation_id = node.content['name'] + # data source, exog_ts and custom models replacement is useless if check_for_specific_operations(operation_id): return [] @@ -43,9 +44,13 @@ def propose_change(self, node: OptNode, possible_operations: List[str]) -> List[ # lagged transform can be replaced only to lagged candidates = set.intersection({'lagged', 'sparse_lagged'}, set(possible_operations)) + if 'cnn' in operation_id: + candidates = [cand for cand in candidates if 'cnn' in cand] + if operation_id in candidates: # the change to the same node is not meaningful candidates.remove(operation_id) + return candidates def propose_parent(self, node: OptNode, possible_operations: List[str]) -> List[str]: diff --git a/fedot/core/pipelines/tuning/hyperparams.py b/fedot/core/pipelines/tuning/hyperparams.py index 7800d0fca7..5499200a54 100644 --- a/fedot/core/pipelines/tuning/hyperparams.py +++ b/fedot/core/pipelines/tuning/hyperparams.py @@ -11,7 +11,6 @@ class ParametersChanger: """ Class for the hyperparameters changing in the operation - :param operation_name: name of operation to get hyperparameters for :param current_params: current parameters value """ @@ -25,7 +24,8 @@ def get_new_operation_params(self): """ Function return a dictionary with new parameters values """ # Get available parameters for operation - params_list = PipelineSearchSpace().get_parameters_for_operation(self.operation_name) + params_list = \ + PipelineSearchSpace().get_parameters_for_operation(self.operation_name) if not params_list: params_dict = None diff --git a/fedot/core/pipelines/tuning/search_space.py b/fedot/core/pipelines/tuning/search_space.py index b44a02b977..f65b1d568a 100644 --- a/fedot/core/pipelines/tuning/search_space.py +++ b/fedot/core/pipelines/tuning/search_space.py @@ -9,17 +9,20 @@ class PipelineSearchSpace(SearchSpace): """ - Class for extracting searching space + Class for extracting searching space for hyperparameters of pipeline :param custom_search_space: dictionary of dictionaries of tuples (hyperopt expression (e.g. hp.choice), *params) for applying custom hyperparameters search space :param replace_default_search_space: whether replace default dictionary (False) or append it (True) """ + pre_defined_custom_search_space = None # workaround to modify search space globally + def __init__(self, custom_search_space: Optional[OperationParametersMapping] = None, replace_default_search_space: bool = False): - self.custom_search_space = custom_search_space + self.custom_search_space = custom_search_space if PipelineSearchSpace.pre_defined_custom_search_space is None \ + else PipelineSearchSpace.pre_defined_custom_search_space self.replace_default_search_space = replace_default_search_space parameters_per_operation = self.get_parameters_dict() super().__init__(parameters_per_operation) @@ -791,6 +794,9 @@ def get_parameters_dict(self): parameters_per_operation.update(self.custom_search_space) else: for operation_name, operation_dct in self.custom_search_space.items(): - parameters_per_operation[operation_name].update(operation_dct) + if operation_name in parameters_per_operation.keys(): + parameters_per_operation[operation_name].update(operation_dct) + else: + parameters_per_operation[operation_name] = operation_dct return parameters_per_operation diff --git a/fedot/core/repository/default_params_repository.py b/fedot/core/repository/default_params_repository.py index 4cfd248e85..040a53733c 100644 --- a/fedot/core/repository/default_params_repository.py +++ b/fedot/core/repository/default_params_repository.py @@ -7,7 +7,11 @@ def __init__(self, repository_name: str = 'default_operation_params.json'): repo_folder_path = str(os.path.dirname(__file__)) file = os.path.join('data', repository_name) self._repo_path = os.path.join(repo_folder_path, file) - self._repo = self._initialise_repo() + try: + self._repo = self._initialise_repo() + except FileNotFoundError: + self._repo_path = repository_name + self._repo = self._initialise_repo() def __enter__(self): return self diff --git a/test/data/test_labels.npy b/test/data/test_labels.npy index 815f538382..b3408a2e61 100644 Binary files a/test/data/test_labels.npy and b/test/data/test_labels.npy differ diff --git a/test/data/training_labels.npy b/test/data/training_labels.npy index a0454f741b..b3408a2e61 100644 Binary files a/test/data/training_labels.npy and b/test/data/training_labels.npy differ diff --git a/test/integration/customization/__init__.py b/test/integration/customization/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/integration/customization/test_custom_automl.py b/test/integration/customization/test_custom_automl.py new file mode 100644 index 0000000000..f4b0f95063 --- /dev/null +++ b/test/integration/customization/test_custom_automl.py @@ -0,0 +1,24 @@ +import os + +from examples.advanced.customization.image_classification_with_custom_models import run_image_classification_automl + + +def test_image_classification_automl(): + test_data_path = '../../data/test_data.npy' + test_labels_path = '../../data/test_labels.npy' + train_data_path = '../../data/training_data.npy' + train_labels_path = '../../data/training_labels.npy' + + test_file_path = str(os.path.dirname(__file__)) + training_path_features = os.path.join(test_file_path, train_data_path) + training_path_labels = os.path.join(test_file_path, train_labels_path) + test_path_features = os.path.join(test_file_path, test_data_path) + test_path_labels = os.path.join(test_file_path, test_labels_path) + + roc_auc_on_valid, dataset_to_train, dataset_to_validate = run_image_classification_automl( + train_dataset=(training_path_features, + training_path_labels), + test_dataset=(test_path_features, + test_path_labels)) + + return roc_auc_on_valid, dataset_to_train, dataset_to_validate