From e989f1c2cbf9c1f572a9e01d8329992bbc81f335 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Mon, 18 Sep 2023 16:24:30 +0300 Subject: [PATCH 01/65] Add multiprocessing. Not finished yet. --- golem/core/optimisers/genetic/gp_optimizer.py | 30 ++--- .../optimisers/genetic/operators/crossover.py | 15 +-- .../optimisers/genetic/operators/mutation.py | 8 +- .../genetic/operators/regularization.py | 1 + .../genetic/operators/reproduction.py | 109 ++++++------------ .../core/optimisers/populational_optimizer.py | 1 + 6 files changed, 67 insertions(+), 97 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index eec6e4ae1..ce16fd046 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -1,7 +1,10 @@ +import time from copy import deepcopy from random import choice from typing import Sequence, Union, Any +from joblib import Parallel, delayed + from golem.core.constants import MAX_GRAPH_GEN_ATTEMPTS from golem.core.dag.graph import Graph from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters @@ -73,25 +76,26 @@ def _initial_population(self, evaluator: EvaluationOperator): self._update_population(evaluator(self.initial_individuals), 'extended_initial_assumptions') def _extend_population(self, pop: PopulationT, target_pop_size: int) -> PopulationT: - verifier = self.graph_generation_params.verifier - extended_pop = list(pop) - pop_graphs = [ind.graph for ind in extended_pop] - # Set mutation probabilities to 1.0 initial_req = deepcopy(self.requirements) initial_req.mutation_prob = 1.0 self.mutation.update_requirements(requirements=initial_req) - for iter_num in range(MAX_GRAPH_GEN_ATTEMPTS): - if len(extended_pop) == target_pop_size: - break - new_ind = self.mutation(choice(pop)) - if new_ind: - new_graph = new_ind.graph - if new_graph not in pop_graphs and verifier(new_graph): + extended_pop = list(pop) + pop_graphs = [ind.graph for ind in extended_pop] + verifier = self.graph_generation_params.verifier + with Parallel(n_jobs=self.requirements.n_jobs, prefer='processes', return_as='generator') as parallel: + new_ind_generator = parallel(delayed(lambda ind: self.mutation(ind))(ind) + for ind in (choice(pop) for _ in range(MAX_GRAPH_GEN_ATTEMPTS))) + + for new_ind in new_ind_generator: + if new_ind and new_ind.graph not in pop_graphs and verifier(new_ind.graph): extended_pop.append(new_ind) - pop_graphs.append(new_graph) - else: + pop_graphs.append(new_ind.graph) + if len(extended_pop) == target_pop_size: + break + + if len(extended_pop) != target_pop_size: self.log.warning(f'Exceeded max number of attempts for extending initial graphs, stopping.' f'Current size {len(pop)}, required {target_pop_size} graphs.') diff --git a/golem/core/optimisers/genetic/operators/crossover.py b/golem/core/optimisers/genetic/operators/crossover.py index 866b0d279..3e127fe4c 100644 --- a/golem/core/optimisers/genetic/operators/crossover.py +++ b/golem/core/optimisers/genetic/operators/crossover.py @@ -4,6 +4,8 @@ from random import choice, random, sample from typing import Callable, Union, Iterable, Tuple, TYPE_CHECKING +from joblib import Parallel, delayed + from golem.core.adapter import register_native from golem.core.dag.graph_utils import nodes_from_layer, node_depth from golem.core.optimisers.genetic.gp_operators import equivalent_subtree, replace_subtrees @@ -40,13 +42,12 @@ def __init__(self, self.graph_generation_params = graph_generation_params def __call__(self, population: PopulationT) -> PopulationT: - if len(population) == 1: - new_population = population - else: - new_population = [] - for ind_1, ind_2 in Crossover.crossover_parents_selection(population): - new_population += self._crossover(ind_1, ind_2) - return new_population + if len(population) > 1: + parallel = Parallel(n_jobs=self.requirements.n_jobs, prefer='processes') + population = parallel(delayed(self._crossover)(ind_1, ind_2) + for ind_1, ind_2 in Crossover.crossover_parents_selection(population)) + population = list(chain(*population)) + return population @staticmethod def crossover_parents_selection(population: PopulationT) -> Iterable[Tuple[Individual, Individual]]: diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index 2005377e9..b5713690b 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -3,6 +3,7 @@ from typing import Callable, Union, Tuple, TYPE_CHECKING, Mapping, Hashable, Optional import numpy as np +from joblib import Parallel, delayed from golem.core.dag.graph import Graph from golem.core.optimisers.adaptive.mab_agents.contextual_mab_agent import ContextualMultiArmedBanditAgent @@ -79,9 +80,12 @@ def agent(self) -> OperatorAgent: def __call__(self, population: Union[Individual, PopulationT]) -> Union[Individual, PopulationT]: if isinstance(population, Individual): + mutation_result = [[x] for x in self._mutation(population)] population = [population] - - final_population, mutations_applied, application_attempts = tuple(zip(*map(self._mutation, population))) + else: + parallel = Parallel(n_jobs=self.requirements.n_jobs, prefer='processes') + mutation_result = tuple(zip(*parallel(delayed(self._mutation)(ind) for ind in population))) + final_population, mutations_applied, application_attempts = mutation_result # drop individuals to which mutations could not be applied final_population = [ind for ind, init_ind, attempt in zip(final_population, population, application_attempts) diff --git a/golem/core/optimisers/genetic/operators/regularization.py b/golem/core/optimisers/genetic/operators/regularization.py index 7d8e7dbe2..8fe45d825 100644 --- a/golem/core/optimisers/genetic/operators/regularization.py +++ b/golem/core/optimisers/genetic/operators/regularization.py @@ -34,6 +34,7 @@ def __call__(self, population: PopulationT, evaluator: EvaluationOperator) -> Po raise ValueError(f'Required regularization type not found: {regularization_type}') def _decremental_regularization(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: + # TODO: do it in parallel if it can be done size = self.parameters.pop_size additional_inds = [] prev_nodes_ids = set() diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index bbaacde4c..32c78c81e 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -1,6 +1,9 @@ +from itertools import cycle +from random import choice from typing import Optional import numpy as np +from joblib import Parallel, delayed from golem.core.constants import MIN_POP_SIZE, EVALUATION_ATTEMPTS_NUMBER from golem.core.log import default_log @@ -9,6 +12,7 @@ from golem.core.optimisers.genetic.operators.mutation import Mutation from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.selection import Selection +from golem.core.optimisers.opt_history_objects.individual import Individual from golem.core.optimisers.populational_optimizer import EvaluationAttemptsError from golem.utilities.data_structures import ensure_wrapped_in_sequence @@ -18,26 +22,11 @@ class ReproductionController: Task of the Reproduction Controller is to reproduce population while keeping population size as specified in optimizer settings. - It implements a simple proportional controller that compensates for - invalid results each generation by computing average ratio of valid results. - Invalid results include cases when Operators, Evaluator or GraphVerifier - return output population that's smaller than the input population. - - Example. - Let's say we need a population of size 50. Let's say about 20% of individuals - are *usually* evaluated with an error. If we take select only 50 for the new population, - we will get about 40 valid ones. Not enough. Therefore, we need to take more. - How much more? Approximately by `target_pop_size / mean_success_rate = 50 / 0.8 ~= 62'. - Here `mean_success_rate` estimates number of successfully evaluated individuals. - Then we request 62, then approximately 62*0.8~=50 of them are valid in the end, - and we achieve target size more reliably. This runs in a loop to control stochasticity. - Args: parameters: genetic algorithm parameters. selection: operator used in reproduction. mutation: operator used in reproduction. crossover: operator used in reproduction. - window_size: size in iterations of the moving window to compute reproduction success rate. """ def __init__(self, @@ -45,7 +34,6 @@ def __init__(self, selection: Selection, mutation: Mutation, crossover: Crossover, - window_size: int = 10, ): self.parameters = parameters self.selection = selection @@ -53,38 +41,9 @@ def __init__(self, self.crossover = crossover self._minimum_valid_ratio = parameters.required_valid_ratio * 0.5 - self._window_size = window_size - self._success_rate_window = np.full(self._window_size, 1.0) self._log = default_log(self) - @property - def mean_success_rate(self) -> float: - """Returns mean success rate of reproduction + evaluation, - fraction of how many individuals were reproduced and mutated successfully. - Computed as average fraction for the last N iterations (N = window size param)""" - return float(np.mean(self._success_rate_window)) - - def reproduce_uncontrolled(self, - population: PopulationT, - evaluator: EvaluationOperator, - pop_size: Optional[int] = None, - ) -> PopulationT: - """Reproduces and evaluates population (select, crossover, mutate). - Doesn't implement any additional checks on population. - """ - # If operators can return unchanged individuals from previous population - # (e.g. both Mutation & Crossover are not applied with some probability) - # then there's a probability that duplicate individuals can appear - - # TODO: it can't choose more than len(population)! - # It can be faster if it could. - selected_individuals = self.selection(population, pop_size) - new_population = self.crossover(selected_individuals) - new_population = ensure_wrapped_in_sequence(self.mutation(new_population)) - new_population = evaluator(new_population) - return new_population - def reproduce(self, population: PopulationT, evaluator: EvaluationOperator @@ -93,42 +52,42 @@ def reproduce(self, Implements additional checks on population to ensure that population size follows required population size. """ - total_target_size = self.parameters.pop_size # next population size - collected_next_population = {} - for i in range(EVALUATION_ATTEMPTS_NUMBER): - # Estimate how many individuals we need to complete new population - # based on average success rate of valid results - residual_size = total_target_size - len(collected_next_population) - residual_size = max(MIN_POP_SIZE, - int(residual_size / self.mean_success_rate)) - residual_size = min(len(population), residual_size) - - # Reproduce the required number of individuals that equals residual size - partial_next_population = self.reproduce_uncontrolled(population, evaluator, residual_size) - # Avoid duplicate individuals that can come unchanged from previous population - collected_next_population.update({ind.uid: ind for ind in partial_next_population}) - - # Keep running average of transform success rate (if sample is big enough) - if len(partial_next_population) >= MIN_POP_SIZE: - valid_ratio = len(partial_next_population) / residual_size - self._success_rate_window = np.roll(self._success_rate_window, shift=1) - self._success_rate_window[0] = valid_ratio - - # Successful return: got enough individuals - if len(collected_next_population) >= total_target_size * self.parameters.required_valid_ratio: - self._log.info(f'Reproduction achieved pop size {len(collected_next_population)}' - f' using {i+1} attempt(s) with success rate {self.mean_success_rate:.3f}') - return list(collected_next_population.values())[:total_target_size] + selected_individuals = self.selection(population, self.parameters.pop_size) + population_after_crossover = self.crossover(selected_individuals) + + def mutation_n_evaluation(individual: Individual): + individual = self.mutation(individual) + if individual: + individuals = evaluator([individual]) + if individuals: + individual = individuals[0] + return individual + + with Parallel(n_jobs=self.mutation.requirements.n_jobs, prefer='processes', return_as='generator') as parallel: + new_ind_generator = parallel(delayed(mutation_n_evaluation)(ind) + for ind in population_after_crossover * EVALUATION_ATTEMPTS_NUMBER) + + new_population, pop_graphs = [], [] + for new_ind in new_ind_generator: + if new_ind and new_ind.graph not in pop_graphs: + new_population.append(new_ind) + pop_graphs.append(new_ind.graph) + if len(new_population) == self.parameters.pop_size: + break + + if len(new_population) >= self.parameters.pop_size * self.parameters.required_valid_ratio: + self._log.info(f'Reproduction achieved pop size {len(new_population)}') + return new_population else: # If number of evaluation attempts is exceeded return a warning or raise exception helpful_msg = ('Check objective, constraints and evo operators. ' 'Possibly they return too few valid individuals.') - if len(collected_next_population) >= total_target_size * self._minimum_valid_ratio: + if len(new_population) >= self.parameters.pop_size * self._minimum_valid_ratio: self._log.warning(f'Could not achieve required population size: ' - f'have {len(collected_next_population)},' - f' required {total_target_size}!\n' + helpful_msg) - return list(collected_next_population.values()) + f'have {len(new_population)},' + f' required {self.parameters.pop_size}!\n' + helpful_msg) + return new_population else: raise EvaluationAttemptsError('Could not collect valid individuals' ' for next population.' + helpful_msg) diff --git a/golem/core/optimisers/populational_optimizer.py b/golem/core/optimisers/populational_optimizer.py index c95be9ce2..fd5e0e1f6 100644 --- a/golem/core/optimisers/populational_optimizer.py +++ b/golem/core/optimisers/populational_optimizer.py @@ -104,6 +104,7 @@ def optimise(self, objective: ObjectiveFunction) -> Sequence[Graph]: break # Adding of new population to history self._update_population(new_population) + pbar.update() pbar.close() self._update_population(self.best_individuals, 'final_choices') return [ind.graph for ind in self.best_individuals] From 9107d68655e6e01c3ac28bfc1043de7c6c6088ec Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Mon, 18 Sep 2023 17:32:19 +0300 Subject: [PATCH 02/65] Refactoring. Not tested yet. --- golem/core/constants.py | 1 + golem/core/optimisers/genetic/gp_optimizer.py | 84 +++++++++++++------ 2 files changed, 60 insertions(+), 25 deletions(-) diff --git a/golem/core/constants.py b/golem/core/constants.py index 4104ead3a..7dd6484f2 100644 --- a/golem/core/constants.py +++ b/golem/core/constants.py @@ -1,5 +1,6 @@ import numpy as np +MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER = 3 MAX_GRAPH_GEN_ATTEMPTS = 1000 MAX_TUNING_METRIC_VALUE = np.inf MIN_TIME_FOR_TUNING_IN_SEC = 3 diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index ce16fd046..f1603a3a7 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -1,11 +1,12 @@ import time from copy import deepcopy +from itertools import cycle from random import choice from typing import Sequence, Union, Any from joblib import Parallel, delayed -from golem.core.constants import MAX_GRAPH_GEN_ATTEMPTS +from golem.core.constants import MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER from golem.core.dag.graph import Graph from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.optimisers.genetic.operators.crossover import Crossover @@ -14,7 +15,6 @@ from golem.core.optimisers.genetic.operators.mutation import Mutation from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.regularization import Regularization -from golem.core.optimisers.genetic.operators.reproduction import ReproductionController from golem.core.optimisers.genetic.operators.selection import Selection from golem.core.optimisers.genetic.parameters.graph_depth import AdaptiveGraphDepth from golem.core.optimisers.genetic.parameters.operators_prob import init_adaptive_operators_prob @@ -23,7 +23,7 @@ from golem.core.optimisers.opt_history_objects.individual import Individual from golem.core.optimisers.optimization_parameters import GraphRequirements from golem.core.optimisers.optimizer import GraphGenerationParams -from golem.core.optimisers.populational_optimizer import PopulationalOptimizer +from golem.core.optimisers.populational_optimizer import PopulationalOptimizer, EvaluationAttemptsError class EvoGraphOptimizer(PopulationalOptimizer): @@ -47,7 +47,6 @@ def __init__(self, self.elitism = Elitism(graph_optimizer_params) self.operators = [self.regularization, self.selection, self.crossover, self.mutation, self.inheritance, self.elitism] - self.reproducer = ReproductionController(graph_optimizer_params, self.selection, self.mutation, self.crossover) # Define adaptive parameters self._pop_size: PopulationSize = init_adaptive_pop_size(graph_optimizer_params, self.generations) @@ -71,33 +70,17 @@ def _initial_population(self, evaluator: EvaluationOperator): pop_size = self.graph_optimizer_params.pop_size if len(self.initial_individuals) < pop_size: - self.initial_individuals = self._extend_population(self.initial_individuals, pop_size) + self.initial_individuals = self._extend_population(self.initial_individuals, pop_size, evaluator) # Adding of extended population to history - self._update_population(evaluator(self.initial_individuals), 'extended_initial_assumptions') + self._update_population(self.initial_individuals, 'extended_initial_assumptions') - def _extend_population(self, pop: PopulationT, target_pop_size: int) -> PopulationT: + def _extend_population(self, pop: PopulationT, target_pop_size: int, evaluator: EvaluationOperator) -> PopulationT: # Set mutation probabilities to 1.0 initial_req = deepcopy(self.requirements) initial_req.mutation_prob = 1.0 self.mutation.update_requirements(requirements=initial_req) - extended_pop = list(pop) - pop_graphs = [ind.graph for ind in extended_pop] - verifier = self.graph_generation_params.verifier - with Parallel(n_jobs=self.requirements.n_jobs, prefer='processes', return_as='generator') as parallel: - new_ind_generator = parallel(delayed(lambda ind: self.mutation(ind))(ind) - for ind in (choice(pop) for _ in range(MAX_GRAPH_GEN_ATTEMPTS))) - - for new_ind in new_ind_generator: - if new_ind and new_ind.graph not in pop_graphs and verifier(new_ind.graph): - extended_pop.append(new_ind) - pop_graphs.append(new_ind.graph) - if len(extended_pop) == target_pop_size: - break - - if len(extended_pop) != target_pop_size: - self.log.warning(f'Exceeded max number of attempts for extending initial graphs, stopping.' - f'Current size {len(pop)}, required {target_pop_size} graphs.') + extended_pop = self._mutation_n_evaluation_in_parallel(population=list(pop), evaluator=evaluator) # Reset mutation probabilities to default self.mutation.update_requirements(requirements=self.requirements) @@ -113,7 +96,7 @@ def _evolve_population(self, evaluator: EvaluationOperator) -> PopulationT: # Regularize previous population individuals_to_select = self.regularization(self.population, evaluator) # Reproduce from previous pop to get next population - new_population = self.reproducer.reproduce(individuals_to_select, evaluator) + new_population = self._reproduce(individuals_to_select, evaluator) # Adaptive agent experience collection & learning # Must be called after reproduction (that collects the new experience) @@ -143,3 +126,54 @@ def _update_requirements(self): # update requirements in operators for operator in self.operators: operator.update_requirements(self.graph_optimizer_params, self.requirements) + + def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: + selected_individuals = self.selection(population, self.graph_optimizer_params.pop_size) + new_population = self.crossover(selected_individuals) + + new_population = self._mutation_n_evaluation_in_parallel(population=new_population, + evaluator=evaluator, + include_population=False) + + self._log.info(f'Reproduction achieved pop size {len(new_population)}') + return new_population + + def _mutation_n_evaluation_in_parallel(self, + population: PopulationT, + evaluator: EvaluationOperator, + include_population: bool = True) -> PopulationT: + def mutation_n_evaluation(individual: Individual): + individual = self.mutation(individual) + if individual: + individuals = evaluator([individual]) + if individuals: + individual = individuals[0] + return individual + + target_pop_size = self.graph_optimizer_params.pop_size + max_tries = target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER + verifier = self.graph_generation_params.verifier + + _population = cycle(population) + _population = [next(_population) for _ in range(max_tries)] + + if include_population: + new_population, pop_graphs = population, [ind.graph for ind in population] + else: + new_population, pop_graphs = [], [] + + with Parallel(n_jobs=self.mutation.requirements.n_jobs, prefer='processes', return_as='generator') as parallel: + new_ind_generator = parallel(delayed(mutation_n_evaluation)(ind) for ind in _population) + for new_ind in new_ind_generator: + if new_ind and new_ind.graph not in pop_graphs and verifier(new_ind.graph): + new_population.append(new_ind) + pop_graphs.append(new_ind.graph) + if len(new_population) == target_pop_size: + break + + if len(new_population) == 0: + helpful_msg = ('Check objective, constraints and evo operators. ' + 'Possibly they return too few valid individuals.') + raise EvaluationAttemptsError('Could not collect valid individuals' + ' for population.' + helpful_msg) + return new_population From 25681d46b838ad56bd60bccae2d233b441818962 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 19 Sep 2023 10:22:32 +0300 Subject: [PATCH 03/65] Some fixes --- golem/core/optimisers/genetic/gp_optimizer.py | 16 ++++++++-------- .../optimisers/genetic/operators/crossover.py | 8 ++++---- .../optimisers/genetic/operators/mutation.py | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index f1603a3a7..95353f8ac 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -130,7 +130,6 @@ def _update_requirements(self): def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: selected_individuals = self.selection(population, self.graph_optimizer_params.pop_size) new_population = self.crossover(selected_individuals) - new_population = self._mutation_n_evaluation_in_parallel(population=new_population, evaluator=evaluator, include_population=False) @@ -144,11 +143,10 @@ def _mutation_n_evaluation_in_parallel(self, include_population: bool = True) -> PopulationT: def mutation_n_evaluation(individual: Individual): individual = self.mutation(individual) - if individual: + if individual and verifier(new_ind.graph): individuals = evaluator([individual]) if individuals: - individual = individuals[0] - return individual + return individuals[0] target_pop_size = self.graph_optimizer_params.pop_size max_tries = target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER @@ -157,15 +155,17 @@ def mutation_n_evaluation(individual: Individual): _population = cycle(population) _population = [next(_population) for _ in range(max_tries)] + new_population, pop_graphs = [], [] if include_population: new_population, pop_graphs = population, [ind.graph for ind in population] - else: - new_population, pop_graphs = [], [] - with Parallel(n_jobs=self.mutation.requirements.n_jobs, prefer='processes', return_as='generator') as parallel: + with Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel: new_ind_generator = parallel(delayed(mutation_n_evaluation)(ind) for ind in _population) + # TODO: `new_ind.graph not in pop_graphs` in cycle has complexity ~N^2 (right?) + # maybe the right way is to calculate and compare hash for set? + # does graph have hash? for new_ind in new_ind_generator: - if new_ind and new_ind.graph not in pop_graphs and verifier(new_ind.graph): + if new_ind and new_ind.graph not in pop_graphs: new_population.append(new_ind) pop_graphs.append(new_ind.graph) if len(new_population) == target_pop_size: diff --git a/golem/core/optimisers/genetic/operators/crossover.py b/golem/core/optimisers/genetic/operators/crossover.py index 3e127fe4c..ccc585f16 100644 --- a/golem/core/optimisers/genetic/operators/crossover.py +++ b/golem/core/optimisers/genetic/operators/crossover.py @@ -43,10 +43,10 @@ def __init__(self, def __call__(self, population: PopulationT) -> PopulationT: if len(population) > 1: - parallel = Parallel(n_jobs=self.requirements.n_jobs, prefer='processes') - population = parallel(delayed(self._crossover)(ind_1, ind_2) - for ind_1, ind_2 in Crossover.crossover_parents_selection(population)) - population = list(chain(*population)) + with Parallel(n_jobs=self.requirements.n_jobs) as parallel: + population = parallel(delayed(self._crossover)(ind_1, ind_2) + for ind_1, ind_2 in Crossover.crossover_parents_selection(population)) + population = list(chain(*population)) return population @staticmethod diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index b5713690b..6a0658f1f 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -83,8 +83,8 @@ def __call__(self, population: Union[Individual, PopulationT]) -> Union[Individu mutation_result = [[x] for x in self._mutation(population)] population = [population] else: - parallel = Parallel(n_jobs=self.requirements.n_jobs, prefer='processes') - mutation_result = tuple(zip(*parallel(delayed(self._mutation)(ind) for ind in population))) + with Parallel(n_jobs=self.requirements.n_jobs) as parallel: + mutation_result = tuple(zip(*parallel(delayed(self._mutation)(ind) for ind in population))) final_population, mutations_applied, application_attempts = mutation_result # drop individuals to which mutations could not be applied From 9be885d65ce7c31c9f85c1dd814227450624948d Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 19 Sep 2023 10:24:07 +0300 Subject: [PATCH 04/65] Some fixes --- golem/core/optimisers/genetic/gp_optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index 95353f8ac..9848c7b4d 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -143,7 +143,7 @@ def _mutation_n_evaluation_in_parallel(self, include_population: bool = True) -> PopulationT: def mutation_n_evaluation(individual: Individual): individual = self.mutation(individual) - if individual and verifier(new_ind.graph): + if individual and verifier(individual.graph): individuals = evaluator([individual]) if individuals: return individuals[0] From 47fc3809ed570ee0a598a362283a4f1e9ed9a698 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 19 Sep 2023 10:25:19 +0300 Subject: [PATCH 05/65] Some fixes --- golem/core/optimisers/genetic/gp_optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index 9848c7b4d..0e5813449 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -134,7 +134,7 @@ def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> evaluator=evaluator, include_population=False) - self._log.info(f'Reproduction achieved pop size {len(new_population)}') + # self._log.info(f'Reproduction achieved pop size {len(new_population)}') return new_population def _mutation_n_evaluation_in_parallel(self, From 39101cbc60e0757df66337af078c167a9a411b5c Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 19 Sep 2023 10:58:39 +0300 Subject: [PATCH 06/65] Some fixes --- golem/core/optimisers/genetic/gp_optimizer.py | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index 0e5813449..24e3e726b 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -1,8 +1,6 @@ -import time from copy import deepcopy -from itertools import cycle -from random import choice from typing import Sequence, Union, Any +from math import ceil from joblib import Parallel, delayed @@ -70,16 +68,17 @@ def _initial_population(self, evaluator: EvaluationOperator): pop_size = self.graph_optimizer_params.pop_size if len(self.initial_individuals) < pop_size: - self.initial_individuals = self._extend_population(self.initial_individuals, pop_size, evaluator) + self.initial_individuals = self._extend_population(self.initial_individuals, evaluator) # Adding of extended population to history self._update_population(self.initial_individuals, 'extended_initial_assumptions') - def _extend_population(self, pop: PopulationT, target_pop_size: int, evaluator: EvaluationOperator) -> PopulationT: + def _extend_population(self, pop: PopulationT, evaluator: EvaluationOperator) -> PopulationT: # Set mutation probabilities to 1.0 initial_req = deepcopy(self.requirements) initial_req.mutation_prob = 1.0 self.mutation.update_requirements(requirements=initial_req) + # Make mutations extended_pop = self._mutation_n_evaluation_in_parallel(population=list(pop), evaluator=evaluator) # Reset mutation probabilities to default @@ -133,7 +132,7 @@ def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> new_population = self._mutation_n_evaluation_in_parallel(population=new_population, evaluator=evaluator, include_population=False) - + # TODO: are there need for log? # self._log.info(f'Reproduction achieved pop size {len(new_population)}') return new_population @@ -141,19 +140,20 @@ def _mutation_n_evaluation_in_parallel(self, population: PopulationT, evaluator: EvaluationOperator, include_population: bool = True) -> PopulationT: - def mutation_n_evaluation(individual: Individual): - individual = self.mutation(individual) - if individual and verifier(individual.graph): - individuals = evaluator([individual]) - if individuals: - return individuals[0] - target_pop_size = self.graph_optimizer_params.pop_size max_tries = target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER verifier = self.graph_generation_params.verifier + _population = (list(population) * ceil(max_tries / len(population)))[:max_tries] - _population = cycle(population) - _population = [next(_population) for _ in range(max_tries)] + def mutation_n_evaluation(individual: Individual, + mutation=self.mutation, + verifier=verifier, + evaluator=evaluator): + individual = mutation(individual) + if individual and verifier(individual.graph): + individuals = evaluator([individual]) + if individuals: + return individuals[0] new_population, pop_graphs = [], [] if include_population: @@ -162,8 +162,9 @@ def mutation_n_evaluation(individual: Individual): with Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel: new_ind_generator = parallel(delayed(mutation_n_evaluation)(ind) for ind in _population) # TODO: `new_ind.graph not in pop_graphs` in cycle has complexity ~N^2 (right?) - # maybe the right way is to calculate and compare hash for set? - # does graph have hash? + # maybe the right way is to calculate and compare + # graph hash (not by the `__hash__`, by any appropriate func) with set of hashes? + # does graph have hash? are there way to do it for random operation? for new_ind in new_ind_generator: if new_ind and new_ind.graph not in pop_graphs: new_population.append(new_ind) From f3d9907fc66ebe123073908398d92d2866c49b06 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 19 Sep 2023 11:06:41 +0300 Subject: [PATCH 07/65] Some fixes --- golem/core/optimisers/genetic/gp_optimizer.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index 24e3e726b..629bd805e 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -6,6 +6,7 @@ from golem.core.constants import MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER from golem.core.dag.graph import Graph +from golem.core.log import default_log from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.optimisers.genetic.operators.crossover import Crossover from golem.core.optimisers.genetic.operators.elitism import Elitism @@ -36,6 +37,7 @@ def __init__(self, graph_generation_params: GraphGenerationParams, graph_optimizer_params: GPAlgorithmParameters): super().__init__(objective, initial_graphs, requirements, graph_generation_params, graph_optimizer_params) + self._log = default_log(self) # Define genetic operators self.regularization = Regularization(graph_optimizer_params, graph_generation_params) self.selection = Selection(graph_optimizer_params) @@ -131,15 +133,14 @@ def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> new_population = self.crossover(selected_individuals) new_population = self._mutation_n_evaluation_in_parallel(population=new_population, evaluator=evaluator, - include_population=False) - # TODO: are there need for log? - # self._log.info(f'Reproduction achieved pop size {len(new_population)}') + include_population_to_new_population=False) + self._log.info(f'Reproduction achieved pop size {len(new_population)}') return new_population def _mutation_n_evaluation_in_parallel(self, population: PopulationT, evaluator: EvaluationOperator, - include_population: bool = True) -> PopulationT: + include_population_to_new_population: bool = True) -> PopulationT: target_pop_size = self.graph_optimizer_params.pop_size max_tries = target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER verifier = self.graph_generation_params.verifier @@ -156,14 +157,14 @@ def mutation_n_evaluation(individual: Individual, return individuals[0] new_population, pop_graphs = [], [] - if include_population: + if include_population_to_new_population: new_population, pop_graphs = population, [ind.graph for ind in population] with Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel: new_ind_generator = parallel(delayed(mutation_n_evaluation)(ind) for ind in _population) # TODO: `new_ind.graph not in pop_graphs` in cycle has complexity ~N^2 (right?) - # maybe the right way is to calculate and compare - # graph hash (not by the `__hash__`, by any appropriate func) with set of hashes? + # maybe the right way is to calculate and compare graph hash with set of hashes? + # not by the `__hash__`, by any appropriate func # does graph have hash? are there way to do it for random operation? for new_ind in new_ind_generator: if new_ind and new_ind.graph not in pop_graphs: @@ -172,9 +173,13 @@ def mutation_n_evaluation(individual: Individual, if len(new_population) == target_pop_size: break + helpful_msg = ('Check objective, constraints and evo operators. ' + 'Possibly they return too few valid individuals.') + if len(new_population) != target_pop_size: + self._log.warning(f'Could not achieve required population size: ' + f'have {len(new_population)},' + f' required {target_pop_size}!\n' + helpful_msg) if len(new_population) == 0: - helpful_msg = ('Check objective, constraints and evo operators. ' - 'Possibly they return too few valid individuals.') raise EvaluationAttemptsError('Could not collect valid individuals' ' for population.' + helpful_msg) return new_population From db34ad128d2d4420929b766a3dc0ad2f93ae3f6c Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 19 Sep 2023 11:07:16 +0300 Subject: [PATCH 08/65] Some fixes --- golem/core/optimisers/genetic/gp_optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index 629bd805e..c8970c0fa 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -175,7 +175,7 @@ def mutation_n_evaluation(individual: Individual, helpful_msg = ('Check objective, constraints and evo operators. ' 'Possibly they return too few valid individuals.') - if len(new_population) != target_pop_size: + if 0 < len(new_population) < target_pop_size: self._log.warning(f'Could not achieve required population size: ' f'have {len(new_population)},' f' required {target_pop_size}!\n' + helpful_msg) From 106fe0acf09ab70f6b23089abad200d70a4075fb Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 19 Sep 2023 11:07:57 +0300 Subject: [PATCH 09/65] Some fixes --- golem/core/optimisers/genetic/gp_optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index c8970c0fa..333083077 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -179,7 +179,7 @@ def mutation_n_evaluation(individual: Individual, self._log.warning(f'Could not achieve required population size: ' f'have {len(new_population)},' f' required {target_pop_size}!\n' + helpful_msg) - if len(new_population) == 0: + elif len(new_population) == 0: raise EvaluationAttemptsError('Could not collect valid individuals' ' for population.' + helpful_msg) return new_population From 4a67f817f30c95c4e5eccb1913fa16869f3328c5 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 19 Sep 2023 11:35:52 +0300 Subject: [PATCH 10/65] Some fixes --- golem/core/optimisers/populational_optimizer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/golem/core/optimisers/populational_optimizer.py b/golem/core/optimisers/populational_optimizer.py index fd5e0e1f6..c95be9ce2 100644 --- a/golem/core/optimisers/populational_optimizer.py +++ b/golem/core/optimisers/populational_optimizer.py @@ -104,7 +104,6 @@ def optimise(self, objective: ObjectiveFunction) -> Sequence[Graph]: break # Adding of new population to history self._update_population(new_population) - pbar.update() pbar.close() self._update_population(self.best_individuals, 'final_choices') return [ind.graph for ind in self.best_individuals] From 83f3c0b9f027378d268e0c5576449c88564398b3 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 27 Oct 2023 10:35:53 +0300 Subject: [PATCH 11/65] Fix `MultiprocessingDispatcher` --- golem/core/optimisers/genetic/evaluation.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/golem/core/optimisers/genetic/evaluation.py b/golem/core/optimisers/genetic/evaluation.py index 14ee73f82..8b23a20d4 100644 --- a/golem/core/optimisers/genetic/evaluation.py +++ b/golem/core/optimisers/genetic/evaluation.py @@ -241,15 +241,18 @@ def dispatch(self, objective: ObjectiveFunction, timer: Optional[Timer] = None) def evaluate_population(self, individuals: PopulationT) -> PopulationT: individuals_to_evaluate, individuals_to_skip = self.split_individuals_to_evaluate(individuals) - # Evaluate individuals without valid fitness in parallel. - n_jobs = determine_n_jobs(self._n_jobs, self.logger) - parallel = Parallel(n_jobs=n_jobs, verbose=0, pre_dispatch="2*n_jobs") + # Evaluate individuals without valid fitness eval_func = partial(self.evaluate_single, logs_initializer=Log().get_parameters()) - evaluation_results = parallel(delayed(eval_func)(ind.graph, ind.uid) for ind in individuals_to_evaluate) + + if len(individuals_to_evaluate) == 1 or self._n_jobs == 1: + evaluation_results = [eval_func(ind.graph, ind.uid) for ind in individuals_to_evaluate] + else: + n_jobs = determine_n_jobs(self._n_jobs, self.logger) + parallel = Parallel(n_jobs=n_jobs) + evaluation_results = parallel(delayed(eval_func)(ind.graph, ind.uid) for ind in individuals_to_evaluate) + individuals_evaluated = self.apply_evaluation_results(individuals_to_evaluate, evaluation_results) - # If there were no successful evals then try once again getting at least one, - # even if time limit was reached successful_evals = individuals_evaluated + individuals_to_skip self.population_evaluation_info(evaluated_pop_size=len(successful_evals), pop_size=len(individuals)) From 0d19cb74769bea8a08bb02497e5d619a76d5e0e2 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 26 Sep 2023 15:56:11 +0300 Subject: [PATCH 12/65] Fix `EvoGraphOptimizer` --- golem/core/optimisers/genetic/gp_optimizer.py | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index 333083077..a9129f194 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -1,4 +1,5 @@ from copy import deepcopy +from time import perf_counter from typing import Sequence, Union, Any from math import ceil @@ -6,6 +7,7 @@ from golem.core.constants import MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER from golem.core.dag.graph import Graph +from golem.core.dag.graph_verifier import GraphVerifier from golem.core.log import default_log from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.optimisers.genetic.operators.crossover import Crossover @@ -141,37 +143,36 @@ def _mutation_n_evaluation_in_parallel(self, population: PopulationT, evaluator: EvaluationOperator, include_population_to_new_population: bool = True) -> PopulationT: - target_pop_size = self.graph_optimizer_params.pop_size - max_tries = target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER - verifier = self.graph_generation_params.verifier - _population = (list(population) * ceil(max_tries / len(population)))[:max_tries] - def mutation_n_evaluation(individual: Individual, - mutation=self.mutation, - verifier=verifier, - evaluator=evaluator): + mutation: Mutation = self.mutation, + verifier: GraphVerifier = self.graph_generation_params.verifier, + evaluator: callable = evaluator): individual = mutation(individual) if individual and verifier(individual.graph): individuals = evaluator([individual]) if individuals: return individuals[0] + target_pop_size = self.graph_optimizer_params.pop_size + max_tries = target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER + _population = (list(population) * ceil(max_tries / len(population) + 1)) + new_population, pop_graphs = [], [] if include_population_to_new_population: - new_population, pop_graphs = population, [ind.graph for ind in population] + new_population, pop_graphs = population[:], [ind.graph.descriptive_id for ind in population] with Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel: new_ind_generator = parallel(delayed(mutation_n_evaluation)(ind) for ind in _population) - # TODO: `new_ind.graph not in pop_graphs` in cycle has complexity ~N^2 (right?) - # maybe the right way is to calculate and compare graph hash with set of hashes? - # not by the `__hash__`, by any appropriate func - # does graph have hash? are there way to do it for random operation? - for new_ind in new_ind_generator: - if new_ind and new_ind.graph not in pop_graphs: - new_population.append(new_ind) - pop_graphs.append(new_ind.graph) - if len(new_population) == target_pop_size: - break + for try_num, new_ind in enumerate(new_ind_generator): + if new_ind: + descriptive_id = new_ind.graph.descriptive_id + if descriptive_id not in pop_graphs: + new_population.append(new_ind) + pop_graphs.append(descriptive_id) + if len(new_population) >= target_pop_size: + break + if try_num >= max_tries: + break helpful_msg = ('Check objective, constraints and evo operators. ' 'Possibly they return too few valid individuals.') From b5abd1a2ce97e97492e3b58f3e2762dbfcadd039 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 26 Sep 2023 17:22:34 +0300 Subject: [PATCH 13/65] Add all graph comparison --- golem/core/optimisers/genetic/gp_optimizer.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index a9129f194..9a497f213 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -65,6 +65,9 @@ def __init__(self, self.initial_individuals = [Individual(graph, metadata=requirements.static_individual_metadata) for graph in self.initial_graphs] + # All individuals graphs + self._graphs = set() + def _initial_population(self, evaluator: EvaluationOperator): """ Initializes the initial population """ # Adding of initial assumptions to history as zero generation @@ -75,6 +78,8 @@ def _initial_population(self, evaluator: EvaluationOperator): self.initial_individuals = self._extend_population(self.initial_individuals, evaluator) # Adding of extended population to history self._update_population(self.initial_individuals, 'extended_initial_assumptions') + # Save graphs + self._save_graphs(self.initial_individuals) def _extend_population(self, pop: PopulationT, evaluator: EvaluationOperator) -> PopulationT: # Set mutation probabilities to 1.0 @@ -110,7 +115,8 @@ def _evolve_population(self, evaluator: EvaluationOperator) -> PopulationT: # Use some part of previous pop in the next pop new_population = self.inheritance(self.population, new_population) new_population = self.elitism(self.generations.best_individuals, new_population) - + # Save graphs + self._save_graphs(new_population) return new_population def _update_requirements(self): @@ -157,18 +163,18 @@ def mutation_n_evaluation(individual: Individual, max_tries = target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER _population = (list(population) * ceil(max_tries / len(population) + 1)) - new_population, pop_graphs = [], [] + new_population, pop_graphs = [], set() if include_population_to_new_population: - new_population, pop_graphs = population[:], [ind.graph.descriptive_id for ind in population] + new_population, pop_graphs = population[:], set([ind.graph.descriptive_id for ind in population]) with Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel: new_ind_generator = parallel(delayed(mutation_n_evaluation)(ind) for ind in _population) for try_num, new_ind in enumerate(new_ind_generator): if new_ind: descriptive_id = new_ind.graph.descriptive_id - if descriptive_id not in pop_graphs: + if descriptive_id not in pop_graphs and descriptive_id not in self._graphs: new_population.append(new_ind) - pop_graphs.append(descriptive_id) + pop_graphs.add(descriptive_id) if len(new_population) >= target_pop_size: break if try_num >= max_tries: @@ -184,3 +190,6 @@ def mutation_n_evaluation(individual: Individual, raise EvaluationAttemptsError('Could not collect valid individuals' ' for population.' + helpful_msg) return new_population + + def _save_graphs(self, population: PopulationT): + self._graphs |= set(ind.graph.descriptive_id for ind in population) From 9cf88de5ef9f3974fca7ced6fa5cbb326fd37ff3 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 27 Oct 2023 10:38:49 +0300 Subject: [PATCH 14/65] wip --- golem/core/optimisers/genetic/gp_optimizer.py | 99 ++-------- .../optimisers/genetic/operators/mutation.py | 44 ++--- .../genetic/operators/reproduction.py | 177 ++++++++++++------ 3 files changed, 157 insertions(+), 163 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index 9a497f213..9ebcf84c4 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -1,14 +1,9 @@ from copy import deepcopy -from time import perf_counter from typing import Sequence, Union, Any -from math import ceil -from joblib import Parallel, delayed +from golem.core.optimisers.genetic.operators.reproduction import ReproductionController -from golem.core.constants import MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER from golem.core.dag.graph import Graph -from golem.core.dag.graph_verifier import GraphVerifier -from golem.core.log import default_log from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.optimisers.genetic.operators.crossover import Crossover from golem.core.optimisers.genetic.operators.elitism import Elitism @@ -24,7 +19,7 @@ from golem.core.optimisers.opt_history_objects.individual import Individual from golem.core.optimisers.optimization_parameters import GraphRequirements from golem.core.optimisers.optimizer import GraphGenerationParams -from golem.core.optimisers.populational_optimizer import PopulationalOptimizer, EvaluationAttemptsError +from golem.core.optimisers.populational_optimizer import PopulationalOptimizer class EvoGraphOptimizer(PopulationalOptimizer): @@ -39,7 +34,6 @@ def __init__(self, graph_generation_params: GraphGenerationParams, graph_optimizer_params: GPAlgorithmParameters): super().__init__(objective, initial_graphs, requirements, graph_generation_params, graph_optimizer_params) - self._log = default_log(self) # Define genetic operators self.regularization = Regularization(graph_optimizer_params, graph_generation_params) self.selection = Selection(graph_optimizer_params) @@ -50,6 +44,12 @@ def __init__(self, self.operators = [self.regularization, self.selection, self.crossover, self.mutation, self.inheritance, self.elitism] + self.reproducer = ReproductionController(parameters=graph_optimizer_params, + selection=self.selection, + mutation=self.mutation, + crossover=self.crossover, + verifier=self.graph_generation_params.verifier) + # Define adaptive parameters self._pop_size: PopulationSize = init_adaptive_pop_size(graph_optimizer_params, self.generations) self._operators_prob = init_adaptive_operators_prob(graph_optimizer_params) @@ -65,9 +65,6 @@ def __init__(self, self.initial_individuals = [Individual(graph, metadata=requirements.static_individual_metadata) for graph in self.initial_graphs] - # All individuals graphs - self._graphs = set() - def _initial_population(self, evaluator: EvaluationOperator): """ Initializes the initial population """ # Adding of initial assumptions to history as zero generation @@ -75,24 +72,10 @@ def _initial_population(self, evaluator: EvaluationOperator): pop_size = self.graph_optimizer_params.pop_size if len(self.initial_individuals) < pop_size: - self.initial_individuals = self._extend_population(self.initial_individuals, evaluator) + self.initial_individuals += self.reproducer._mutate_over_population(population=self.initial_individuals, + evaluator=evaluator) # Adding of extended population to history self._update_population(self.initial_individuals, 'extended_initial_assumptions') - # Save graphs - self._save_graphs(self.initial_individuals) - - def _extend_population(self, pop: PopulationT, evaluator: EvaluationOperator) -> PopulationT: - # Set mutation probabilities to 1.0 - initial_req = deepcopy(self.requirements) - initial_req.mutation_prob = 1.0 - self.mutation.update_requirements(requirements=initial_req) - - # Make mutations - extended_pop = self._mutation_n_evaluation_in_parallel(population=list(pop), evaluator=evaluator) - - # Reset mutation probabilities to default - self.mutation.update_requirements(requirements=self.requirements) - return extended_pop def _evolve_population(self, evaluator: EvaluationOperator) -> PopulationT: """ Method realizing full evolution cycle """ @@ -104,7 +87,7 @@ def _evolve_population(self, evaluator: EvaluationOperator) -> PopulationT: # Regularize previous population individuals_to_select = self.regularization(self.population, evaluator) # Reproduce from previous pop to get next population - new_population = self._reproduce(individuals_to_select, evaluator) + new_population = self.reproducer.reproduce(individuals_to_select, evaluator) # Adaptive agent experience collection & learning # Must be called after reproduction (that collects the new experience) @@ -115,8 +98,6 @@ def _evolve_population(self, evaluator: EvaluationOperator) -> PopulationT: # Use some part of previous pop in the next pop new_population = self.inheritance(self.population, new_population) new_population = self.elitism(self.generations.best_individuals, new_population) - # Save graphs - self._save_graphs(new_population) return new_population def _update_requirements(self): @@ -135,61 +116,3 @@ def _update_requirements(self): # update requirements in operators for operator in self.operators: operator.update_requirements(self.graph_optimizer_params, self.requirements) - - def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: - selected_individuals = self.selection(population, self.graph_optimizer_params.pop_size) - new_population = self.crossover(selected_individuals) - new_population = self._mutation_n_evaluation_in_parallel(population=new_population, - evaluator=evaluator, - include_population_to_new_population=False) - self._log.info(f'Reproduction achieved pop size {len(new_population)}') - return new_population - - def _mutation_n_evaluation_in_parallel(self, - population: PopulationT, - evaluator: EvaluationOperator, - include_population_to_new_population: bool = True) -> PopulationT: - def mutation_n_evaluation(individual: Individual, - mutation: Mutation = self.mutation, - verifier: GraphVerifier = self.graph_generation_params.verifier, - evaluator: callable = evaluator): - individual = mutation(individual) - if individual and verifier(individual.graph): - individuals = evaluator([individual]) - if individuals: - return individuals[0] - - target_pop_size = self.graph_optimizer_params.pop_size - max_tries = target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER - _population = (list(population) * ceil(max_tries / len(population) + 1)) - - new_population, pop_graphs = [], set() - if include_population_to_new_population: - new_population, pop_graphs = population[:], set([ind.graph.descriptive_id for ind in population]) - - with Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel: - new_ind_generator = parallel(delayed(mutation_n_evaluation)(ind) for ind in _population) - for try_num, new_ind in enumerate(new_ind_generator): - if new_ind: - descriptive_id = new_ind.graph.descriptive_id - if descriptive_id not in pop_graphs and descriptive_id not in self._graphs: - new_population.append(new_ind) - pop_graphs.add(descriptive_id) - if len(new_population) >= target_pop_size: - break - if try_num >= max_tries: - break - - helpful_msg = ('Check objective, constraints and evo operators. ' - 'Possibly they return too few valid individuals.') - if 0 < len(new_population) < target_pop_size: - self._log.warning(f'Could not achieve required population size: ' - f'have {len(new_population)},' - f' required {target_pop_size}!\n' + helpful_msg) - elif len(new_population) == 0: - raise EvaluationAttemptsError('Could not collect valid individuals' - ' for population.' + helpful_msg) - return new_population - - def _save_graphs(self, population: PopulationT): - self._graphs |= set(ind.graph.descriptive_id for ind in population) diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index 6a0658f1f..eb89fa454 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -24,6 +24,7 @@ from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters MutationFunc = Callable[[Graph, GraphRequirements, GraphGenerationParams, AlgorithmParameters], Graph] +MutationType = Union[MutationTypesEnum, Callable] MutationIdType = Hashable MutationRepo = Mapping[MutationIdType, MutationFunc] @@ -78,14 +79,15 @@ def _init_operator_agent(graph_gen_params: GraphGenerationParams, def agent(self) -> OperatorAgent: return self._operator_agent - def __call__(self, population: Union[Individual, PopulationT]) -> Union[Individual, PopulationT]: + def __call__(self, + population: Union[Individual, PopulationT], + mutation_type: Union[None, MutationTypesEnum, Callable] = None, + ) -> Union[Individual, PopulationT]: if isinstance(population, Individual): - mutation_result = [[x] for x in self._mutation(population)] population = [population] - else: - with Parallel(n_jobs=self.requirements.n_jobs) as parallel: - mutation_result = tuple(zip(*parallel(delayed(self._mutation)(ind) for ind in population))) - final_population, mutations_applied, application_attempts = mutation_result + + final_population, _, application_attempts = \ + tuple(zip(*map(lambda individual: self._mutation(individual, mutation_type=mutation_type), population))) # drop individuals to which mutations could not be applied final_population = [ind for ind, init_ind, attempt in zip(final_population, population, application_attempts) @@ -96,25 +98,23 @@ def __call__(self, population: Union[Individual, PopulationT]) -> Union[Individu return final_population - def _mutation(self, individual: Individual) -> Tuple[Individual, Optional[MutationIdType], bool]: + def _mutation(self, + individual: Individual, + mutation_type: Union[None, MutationTypesEnum, Callable] = None, + ) -> Tuple[Individual, Union[MutationTypesEnum, Callable], bool]: """ Function applies mutation operator to graph """ - application_attempt = False - mutation_applied = None - for _ in range(self.parameters.max_num_of_operator_attempts): - new_graph = deepcopy(individual.graph) - - new_graph, mutation_applied = self._apply_mutations(new_graph) - if mutation_applied is None: - continue - application_attempt = True + new_graph = deepcopy(individual.graph) + mutation_type = mutation_type or self._operator_agent.choose_action(new_graph) + applied = self._will_mutation_be_applied(mutation_type) + if applied: + new_graph = self._apply_mutations(new_graph, mutation_type=mutation_type) is_correct_graph = self.graph_generation_params.verifier(new_graph) if is_correct_graph: parent_operator = ParentOperator(type_='mutation', - operators=mutation_applied, + operators=mutation_type, parent_individuals=individual) individual = Individual(new_graph, parent_operator, metadata=self.requirements.static_individual_metadata) - break else: # Collect invalid actions self.agent_experience.collect_experience(individual, mutation_applied, reward=-1.0) @@ -123,9 +123,11 @@ def _mutation(self, individual: Individual) -> Tuple[Individual, Optional[Mutati 'Please check optimization parameters for correctness.') return individual, mutation_applied, application_attempt - def _sample_num_of_mutations(self) -> int: + def _sample_num_of_mutations(self, mutation_type: Union[MutationTypesEnum, Callable]) -> int: # most of the time returns 1 or rarely several mutations - if self.parameters.variable_mutation_num: + # if mutation is custom apply it only once + is_custom_mutation = isinstance(mutation_type, Callable) + if not is_custom_mutation and self.parameters.variable_mutation_num: num_mut = max(int(round(np.random.lognormal(0, sigma=0.5))), 1) else: num_mut = 1 @@ -152,7 +154,7 @@ def _adapt_and_apply_mutation(self, new_graph: Graph, mutation_type) -> Tuple[Gr new_graph = mutation_func(new_graph, requirements=self.requirements, graph_gen_params=self.graph_generation_params, parameters=self.parameters) - return new_graph, applied + return new_graph def _will_mutation_be_applied(self, mutation_type: Union[MutationTypesEnum, Callable]) -> bool: return random() <= self.parameters.mutation_prob and mutation_type is not MutationTypesEnum.none diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 32c78c81e..5e885bf63 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -1,20 +1,14 @@ -from itertools import cycle -from random import choice -from typing import Optional - -import numpy as np -from joblib import Parallel, delayed - -from golem.core.constants import MIN_POP_SIZE, EVALUATION_ATTEMPTS_NUMBER +from golem.core.constants import MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER +from golem.core.dag.graph_verifier import GraphVerifier from golem.core.log import default_log from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters +from golem.core.optimisers.genetic.operators.base_mutations import MutationTypesEnum from golem.core.optimisers.genetic.operators.crossover import Crossover -from golem.core.optimisers.genetic.operators.mutation import Mutation +from golem.core.optimisers.genetic.operators.mutation import Mutation, MutationType from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.selection import Selection -from golem.core.optimisers.opt_history_objects.individual import Individual from golem.core.optimisers.populational_optimizer import EvaluationAttemptsError -from golem.utilities.data_structures import ensure_wrapped_in_sequence +from golem.core.optimisers.opt_history_objects.individual import Individual class ReproductionController: @@ -27,6 +21,7 @@ class ReproductionController: selection: operator used in reproduction. mutation: operator used in reproduction. crossover: operator used in reproduction. + window_size: size in iterations of the moving window to compute reproduction success rate. """ def __init__(self, @@ -34,60 +29,134 @@ def __init__(self, selection: Selection, mutation: Mutation, crossover: Crossover, - ): + verifier: GraphVerifier): self.parameters = parameters self.selection = selection self.mutation = mutation self.crossover = crossover - self._minimum_valid_ratio = parameters.required_valid_ratio * 0.5 self._log = default_log(self) - def reproduce(self, - population: PopulationT, - evaluator: EvaluationOperator - ) -> PopulationT: + def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: """Reproduces and evaluates population (select, crossover, mutate). - Implements additional checks on population to ensure that population size - follows required population size. + Implements additional checks on population to ensure that population size + follows required population size. """ selected_individuals = self.selection(population, self.parameters.pop_size) - population_after_crossover = self.crossover(selected_individuals) + new_population = self.crossover(selected_individuals) + new_population = self._mutate_over_population(new_population, evaluator) + return new_population + + def _mutate_over_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: + target_pop_size = self.parameters.pop_size + max_tries = target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER + max_attempts_count = self.parameters.max_num_of_operator_attempts + mutation_fun = partial(self._mutation_n_evaluation, evaluator=evaluator) + population_descriptive_ids_mapping = {ind.graph.descriptive_id: ind for ind in population} + + # mutations counters + mutation_types = self.parameters.mutation_types + mutation_count = {mutation_type: 0 for mutation_type in mutation_types} + mutation_count_for_each_ind = {descriptive_id: copy(mutation_count) + for descriptive_id in population_descriptive_ids_mapping} + mutation_tries_for_each_ind = {descriptive_id: copy(mutation_count) + for descriptive_id in population_descriptive_ids_mapping} + + + # prepare one mutation for each individual in population + # mutation_type is None, let Mutation() choose + mutation_queue = Queue() + for descriptive_id, individual in zip(population_descriptive_ids_mapping, population): + mutation_queue.put((descriptive_id, individual, None)) + + # run infinite cycle with evaluation in parallel + new_population = list() + with Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel: + ind_generator = parallel(delayed(mutation_fun)(mutation_queue) for _ in cycle([1])) + for try_num, (parent_descriptive_id, mutation_type, new_ind) in enumerate(ind_generator): + mutation_tries_for_each_ind[parent_descriptive_id][mutation_type] += 1 + if new_ind: + descriptive_id = new_ind.graph.descriptive_id + if descriptive_id not in self._pop_graph_descriptive_ids: + new_population.append(new_ind) + self._pop_graph_descriptive_ids.add(descriptive_id) + if len(new_population) >= target_pop_size: + break + + # count mutations + mutation_count_for_each_ind[parent_descriptive_id][mutation_type] += 1 + mutation_count[mutation_type] += 1 + + # choose new mutation for individual, rely on check probabilities + mutation_probabilities = self.mutation._operator_agent.get_action_probs() + # potential place for error if order in + # mutation_probabilities and mutation_real_propabilities differ + # needs to fix with all mabs and random agent + mutation_real_probabilities = [mutation_count / self.parameters.pop_size + for mutation_count in mutation_count.values()] + + allowed_mutation_types = [mutation_type + for mutation_type, prob, real_prob in + zip(mutation_types, + mutation_probabilities, + mutation_real_probabilities) + if prob > real_prob] + + if not allowed_mutation_types: + raise ValueError(f"Sum of mutation_probabilities is not equal to 1." + f"Check _operator_agent in mutation.") + + # get the most rare mutation for all inds + stop = False + lowest_mutation_count = (None, None, 0) + for _graph_id, _graph_id_mutation_count in mutation_count_for_each_ind.items(): + for _mutation_type, _mutation_count in _graph_id_mutation_count.items(): + if _mutation_type in allowed_mutation_types: + if _mutation_count == 0: + lowest_mutation_count = (_graph_id, _mutation_type, _mutation_count) + stop = True + elif _mutation_count < lowest_mutation_count[-1]: + lowest_mutation_count = (_graph_id, _mutation_type, _mutation_count) + if stop: + break + if stop: + break + graph_id_to_mutate, mutation_type, _ = lowest_mutation_count + mutation_queue.put((graph_id_to_mutate, + population_descriptive_ids_mapping[graph_id_to_mutate], + mutation_type)) + if try_num >= max_tries: + break + + self._check_final_population(new_population) + + return new_population + + def _check_final_population(self, population: PopulationT) -> None: + """ If population do not achieve required length return a warning or raise exception """ + target_pop_size = self.parameters.pop_size + helpful_msg = ('Check objective, constraints and evo operators. ' + 'Possibly they return too few valid individuals.') + + if len(population) < target_pop_size * self._minimum_valid_ratio: + raise EvaluationAttemptsError('Could not collect valid individuals' + ' for population.' + helpful_msg) + elif len(population) < target_pop_size: + self._log.warning(f'Could not achieve required population size: ' + f'have {len(population)},' + f' required {target_pop_size}!\n' + helpful_msg) - def mutation_n_evaluation(individual: Individual): - individual = self.mutation(individual) - if individual: + def _mutation_n_evaluation(self, mutation_queue: Queue, + evaluator: EvaluationOperator): + try: + # wait timeout in seconds for new task to reduce probability of process flooding + descriptive_id, individual, mutation_type = mutation_queue.get(timeout=1) + individual, mutation_type, applied = self.mutation._mutation(individual, mutation_type=mutation_type) + if applied and individual and self.verifier(individual.graph): individuals = evaluator([individual]) if individuals: - individual = individuals[0] - return individual - - with Parallel(n_jobs=self.mutation.requirements.n_jobs, prefer='processes', return_as='generator') as parallel: - new_ind_generator = parallel(delayed(mutation_n_evaluation)(ind) - for ind in population_after_crossover * EVALUATION_ATTEMPTS_NUMBER) - - new_population, pop_graphs = [], [] - for new_ind in new_ind_generator: - if new_ind and new_ind.graph not in pop_graphs: - new_population.append(new_ind) - pop_graphs.append(new_ind.graph) - if len(new_population) == self.parameters.pop_size: - break - - if len(new_population) >= self.parameters.pop_size * self.parameters.required_valid_ratio: - self._log.info(f'Reproduction achieved pop size {len(new_population)}') - return new_population - else: - # If number of evaluation attempts is exceeded return a warning or raise exception - helpful_msg = ('Check objective, constraints and evo operators. ' - 'Possibly they return too few valid individuals.') - - if len(new_population) >= self.parameters.pop_size * self._minimum_valid_ratio: - self._log.warning(f'Could not achieve required population size: ' - f'have {len(new_population)},' - f' required {self.parameters.pop_size}!\n' + helpful_msg) - return new_population - else: - raise EvaluationAttemptsError('Could not collect valid individuals' - ' for next population.' + helpful_msg) + return descriptive_id, mutation_type, individuals[0] + except queue.Empty: + pass + return descriptive_id, mutation_type, None From d19bcde0241876bb98d02d4a1882fef3b644dae3 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 20 Oct 2023 18:22:31 +0300 Subject: [PATCH 15/65] wip --- golem/core/optimisers/genetic/gp_optimizer.py | 3 +-- golem/core/optimisers/genetic/operators/reproduction.py | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index 9ebcf84c4..ed09d6f24 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -1,8 +1,6 @@ from copy import deepcopy from typing import Sequence, Union, Any -from golem.core.optimisers.genetic.operators.reproduction import ReproductionController - from golem.core.dag.graph import Graph from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.optimisers.genetic.operators.crossover import Crossover @@ -11,6 +9,7 @@ from golem.core.optimisers.genetic.operators.mutation import Mutation from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.regularization import Regularization +from golem.core.optimisers.genetic.operators.reproduction import ReproductionController from golem.core.optimisers.genetic.operators.selection import Selection from golem.core.optimisers.genetic.parameters.graph_depth import AdaptiveGraphDepth from golem.core.optimisers.genetic.parameters.operators_prob import init_adaptive_operators_prob diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 5e885bf63..2ec66031f 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -63,7 +63,6 @@ def _mutate_over_population(self, population: PopulationT, evaluator: Evaluation mutation_tries_for_each_ind = {descriptive_id: copy(mutation_count) for descriptive_id in population_descriptive_ids_mapping} - # prepare one mutation for each individual in population # mutation_type is None, let Mutation() choose mutation_queue = Queue() @@ -73,7 +72,7 @@ def _mutate_over_population(self, population: PopulationT, evaluator: Evaluation # run infinite cycle with evaluation in parallel new_population = list() with Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel: - ind_generator = parallel(delayed(mutation_fun)(mutation_queue) for _ in cycle([1])) + ind_generator = parallel(delayed(mutation_fun)(mutation_queue) for _ in [1] * 5) # cycle([1])) for try_num, (parent_descriptive_id, mutation_type, new_ind) in enumerate(ind_generator): mutation_tries_for_each_ind[parent_descriptive_id][mutation_type] += 1 if new_ind: @@ -147,7 +146,7 @@ def _check_final_population(self, population: PopulationT) -> None: f'have {len(population)},' f' required {target_pop_size}!\n' + helpful_msg) - def _mutation_n_evaluation(self, mutation_queue: Queue, + def _mutation_n_evaluation(self, i, mutation_queue: Queue, evaluator: EvaluationOperator): try: # wait timeout in seconds for new task to reduce probability of process flooding From 835d3260df757908b8707b25e40407bd72011383 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Mon, 23 Oct 2023 12:45:21 +0300 Subject: [PATCH 16/65] wip --- golem/core/constants.py | 2 +- .../genetic/operators/reproduction.py | 107 ++++++++---------- 2 files changed, 50 insertions(+), 59 deletions(-) diff --git a/golem/core/constants.py b/golem/core/constants.py index 7dd6484f2..73214eb21 100644 --- a/golem/core/constants.py +++ b/golem/core/constants.py @@ -1,6 +1,6 @@ import numpy as np -MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER = 3 +MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER = 5 MAX_GRAPH_GEN_ATTEMPTS = 1000 MAX_TUNING_METRIC_VALUE = np.inf MIN_TIME_FOR_TUNING_IN_SEC = 3 diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 2ec66031f..60c2ce547 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -1,3 +1,4 @@ + from golem.core.constants import MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER from golem.core.dag.graph_verifier import GraphVerifier from golem.core.log import default_log @@ -57,79 +58,65 @@ def _mutate_over_population(self, population: PopulationT, evaluator: Evaluation # mutations counters mutation_types = self.parameters.mutation_types - mutation_count = {mutation_type: 0 for mutation_type in mutation_types} - mutation_count_for_each_ind = {descriptive_id: copy(mutation_count) + mutation_count_for_each_ind = {descriptive_id: {mutation_type: 0 for mutation_type in mutation_types} for descriptive_id in population_descriptive_ids_mapping} - mutation_tries_for_each_ind = {descriptive_id: copy(mutation_count) + all_mutations_count_for_each_ind = {descriptive_id: 0 for descriptive_id in population_descriptive_ids_mapping} + mutation_tries_for_each_ind = {descriptive_id: {mutation_type: 0 for mutation_type in mutation_types} for descriptive_id in population_descriptive_ids_mapping} # prepare one mutation for each individual in population - # mutation_type is None, let Mutation() choose - mutation_queue = Queue() + mutation_queue = Manager().Queue() for descriptive_id, individual in zip(population_descriptive_ids_mapping, population): + # mutation_type is None, let Mutation() choose mutation_queue.put((descriptive_id, individual, None)) - # run infinite cycle with evaluation in parallel + # run cycle with evaluation in parallel + # made with joblib.Parallel due to + # it is simple + # it is reliable (joblib/loky solves some problems with multiprocessing) + # joblib is in requirements new_population = list() with Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel: - ind_generator = parallel(delayed(mutation_fun)(mutation_queue) for _ in [1] * 5) # cycle([1])) + ind_generator = parallel(delayed(mutation_fun)(mutation_queue) for _ in range(max_tries * 2)) for try_num, (parent_descriptive_id, mutation_type, new_ind) in enumerate(ind_generator): + if parent_descriptive_id is None: + continue mutation_tries_for_each_ind[parent_descriptive_id][mutation_type] += 1 if new_ind: + mutation_count_for_each_ind[parent_descriptive_id][mutation_type] += 1 + all_mutations_count_for_each_ind[parent_descriptive_id] += 1 + descriptive_id = new_ind.graph.descriptive_id if descriptive_id not in self._pop_graph_descriptive_ids: + # add ind to new population new_population.append(new_ind) self._pop_graph_descriptive_ids.add(descriptive_id) if len(new_population) >= target_pop_size: break - # count mutations - mutation_count_for_each_ind[parent_descriptive_id][mutation_type] += 1 - mutation_count[mutation_type] += 1 - - # choose new mutation for individual, rely on check probabilities + # filter mutations for individual, rely on probabilities + # place for error if mutation_types order in _operator_agent and in mutation_types is differ + allowed_mutation_types = [] mutation_probabilities = self.mutation._operator_agent.get_action_probs() - # potential place for error if order in - # mutation_probabilities and mutation_real_propabilities differ - # needs to fix with all mabs and random agent - mutation_real_probabilities = [mutation_count / self.parameters.pop_size - for mutation_count in mutation_count.values()] - - allowed_mutation_types = [mutation_type - for mutation_type, prob, real_prob in - zip(mutation_types, - mutation_probabilities, - mutation_real_probabilities) - if prob > real_prob] - - if not allowed_mutation_types: - raise ValueError(f"Sum of mutation_probabilities is not equal to 1." - f"Check _operator_agent in mutation.") - - # get the most rare mutation for all inds - stop = False - lowest_mutation_count = (None, None, 0) - for _graph_id, _graph_id_mutation_count in mutation_count_for_each_ind.items(): - for _mutation_type, _mutation_count in _graph_id_mutation_count.items(): - if _mutation_type in allowed_mutation_types: - if _mutation_count == 0: - lowest_mutation_count = (_graph_id, _mutation_type, _mutation_count) - stop = True - elif _mutation_count < lowest_mutation_count[-1]: - lowest_mutation_count = (_graph_id, _mutation_type, _mutation_count) - if stop: - break - if stop: - break - graph_id_to_mutate, mutation_type, _ = lowest_mutation_count - mutation_queue.put((graph_id_to_mutate, - population_descriptive_ids_mapping[graph_id_to_mutate], - mutation_type)) + for mutation_type, mutation_probability in zip(mutation_types, mutation_probabilities): + real_prob = (mutation_count_for_each_ind[parent_descriptive_id][mutation_type] / + all_mutations_count_for_each_ind[parent_descriptive_id]) + if real_prob < mutation_probability: + allowed_mutation_types.append(mutation_type) + + if allowed_mutation_types: + # choose next mutation with lowest tries count + next_mutation_type = min(allowed_mutation_types, + key=lambda mutation_type: + mutation_tries_for_each_ind[parent_descriptive_id][mutation_type]) + + mutation_queue.put((parent_descriptive_id, + population_descriptive_ids_mapping[parent_descriptive_id], + next_mutation_type)) if try_num >= max_tries: break self._check_final_population(new_population) - return new_population def _check_final_population(self, population: PopulationT) -> None: @@ -146,16 +133,20 @@ def _check_final_population(self, population: PopulationT) -> None: f'have {len(population)},' f' required {target_pop_size}!\n' + helpful_msg) - def _mutation_n_evaluation(self, i, mutation_queue: Queue, + def _mutation_n_evaluation(self, + mutation_queue: Queue, evaluator: EvaluationOperator): try: - # wait timeout in seconds for new task to reduce probability of process flooding - descriptive_id, individual, mutation_type = mutation_queue.get(timeout=1) - individual, mutation_type, applied = self.mutation._mutation(individual, mutation_type=mutation_type) - if applied and individual and self.verifier(individual.graph): - individuals = evaluator([individual]) - if individuals: - return descriptive_id, mutation_type, individuals[0] + descriptive_id, individual, mutation_type = mutation_queue.get(timeout=0.1) except queue.Empty: - pass + # is there is no task, then return nothing + return None, None, None + + individual, mutation_type, applied = self.mutation._mutation(individual, mutation_type=mutation_type) + if applied and individual and self.verifier(individual.graph): + individuals = evaluator([individual]) + if individuals: + # if all is ok return all data + return descriptive_id, mutation_type, individuals[0] + # if something go wrong do not return new individual return descriptive_id, mutation_type, None From 13f0ac1855214776fb74bca6313a03cdee85fe1f Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Mon, 23 Oct 2023 15:48:42 +0300 Subject: [PATCH 17/65] wip --- .../optimisers/genetic/operators/mutation.py | 1 - .../genetic/operators/reproduction.py | 147 +++++++++++------- 2 files changed, 92 insertions(+), 56 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index eb89fa454..dd767ade7 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -3,7 +3,6 @@ from typing import Callable, Union, Tuple, TYPE_CHECKING, Mapping, Hashable, Optional import numpy as np -from joblib import Parallel, delayed from golem.core.dag.graph import Graph from golem.core.optimisers.adaptive.mab_agents.contextual_mab_agent import ContextualMultiArmedBanditAgent diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 60c2ce547..8093bfc19 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -1,3 +1,14 @@ +ctions import deque +from functools import partial +from itertools import cycle, chain +from typing import Callable, Dict, Union, List, Optional +from multiprocessing import Queue, Manager +import queue +from copy import copy, deepcopy + +import numpy as np +from joblib import Parallel, delayed +from joblib.externals.loky import get_reusable_executor from golem.core.constants import MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER from golem.core.dag.graph_verifier import GraphVerifier @@ -35,6 +46,9 @@ def __init__(self, self.selection = selection self.mutation = mutation self.crossover = crossover + self.verifier = verifier + + self._pop_graph_descriptive_ids = set() self._minimum_valid_ratio = parameters.required_valid_ratio * 0.5 self._log = default_log(self) @@ -53,68 +67,95 @@ def _mutate_over_population(self, population: PopulationT, evaluator: Evaluation target_pop_size = self.parameters.pop_size max_tries = target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER max_attempts_count = self.parameters.max_num_of_operator_attempts - mutation_fun = partial(self._mutation_n_evaluation, evaluator=evaluator) + multiplier = target_pop_size / len(population) population_descriptive_ids_mapping = {ind.graph.descriptive_id: ind for ind in population} + finished_initial_individuals = {descriptive_id: False for descriptive_id in population_descriptive_ids_mapping} # mutations counters - mutation_types = self.parameters.mutation_types + mutation_types = self.mutation._operator_agent.actions mutation_count_for_each_ind = {descriptive_id: {mutation_type: 0 for mutation_type in mutation_types} for descriptive_id in population_descriptive_ids_mapping} all_mutations_count_for_each_ind = {descriptive_id: 0 for descriptive_id in population_descriptive_ids_mapping} mutation_tries_for_each_ind = {descriptive_id: {mutation_type: 0 for mutation_type in mutation_types} for descriptive_id in population_descriptive_ids_mapping} - # prepare one mutation for each individual in population - mutation_queue = Manager().Queue() - for descriptive_id, individual in zip(population_descriptive_ids_mapping, population): - # mutation_type is None, let Mutation() choose - mutation_queue.put((descriptive_id, individual, None)) - - # run cycle with evaluation in parallel - # made with joblib.Parallel due to - # it is simple - # it is reliable (joblib/loky solves some problems with multiprocessing) - # joblib is in requirements - new_population = list() - with Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel: - ind_generator = parallel(delayed(mutation_fun)(mutation_queue) for _ in range(max_tries * 2)) - for try_num, (parent_descriptive_id, mutation_type, new_ind) in enumerate(ind_generator): - if parent_descriptive_id is None: - continue - mutation_tries_for_each_ind[parent_descriptive_id][mutation_type] += 1 - if new_ind: - mutation_count_for_each_ind[parent_descriptive_id][mutation_type] += 1 - all_mutations_count_for_each_ind[parent_descriptive_id] += 1 + # increase probability of mutation + initial_parameters = deepcopy(self.parameters) + initial_parameters.mutation_prob = 1.0 + self.mutation.update_requirements(parameters=initial_parameters) + + executor = get_reusable_executor(max_workers=self.mutation.requirements.n_jobs) - descriptive_id = new_ind.graph.descriptive_id - if descriptive_id not in self._pop_graph_descriptive_ids: - # add ind to new population - new_population.append(new_ind) - self._pop_graph_descriptive_ids.add(descriptive_id) - if len(new_population) >= target_pop_size: - break + def try_mutation(descriptive_id: str, individual: Individual, mutation_type: Optional[MutationType] = None): + return executor.submit(self._mutation_n_evaluation, descriptive_id, individual, mutation_type, evaluator) - # filter mutations for individual, rely on probabilities + def add_new_individual_to_new_population(new_individual): + mutation_tries_for_each_ind[parent_descriptive_id][mutation_type] += 1 + if new_individual: + descriptive_id = new_individual.graph.descriptive_id + if descriptive_id not in self._pop_graph_descriptive_ids: + mutation_count_for_each_ind[parent_descriptive_id][mutation_type] += 1 + all_mutations_count_for_each_ind[parent_descriptive_id] += 1 + new_population.append(new_individual) + self._pop_graph_descriptive_ids.add(descriptive_id) + + def get_next_parent_descriptive_id_with_allowed_operations(): + for parent_descriptive_id, is_finished in finished_initial_individuals.items(): + if not is_finished: + if all_mutations_count_for_each_ind[parent_descriptive_id] == 0: + # if there are no mutations then make any mutation + allowed_mutation_types = mutation_types + else: + # filter mutations for individual, rely on probabilities and mutation_count # place for error if mutation_types order in _operator_agent and in mutation_types is differ allowed_mutation_types = [] mutation_probabilities = self.mutation._operator_agent.get_action_probs() - for mutation_type, mutation_probability in zip(mutation_types, mutation_probabilities): - real_prob = (mutation_count_for_each_ind[parent_descriptive_id][mutation_type] / - all_mutations_count_for_each_ind[parent_descriptive_id]) - if real_prob < mutation_probability: - allowed_mutation_types.append(mutation_type) - - if allowed_mutation_types: - # choose next mutation with lowest tries count - next_mutation_type = min(allowed_mutation_types, - key=lambda mutation_type: - mutation_tries_for_each_ind[parent_descriptive_id][mutation_type]) - - mutation_queue.put((parent_descriptive_id, - population_descriptive_ids_mapping[parent_descriptive_id], - next_mutation_type)) - if try_num >= max_tries: - break + allowed_mutations_count = [max(1, round(multiplier * x)) for x in mutation_probabilities] + for mutation_type, mutation_probability, allowed_count in zip(mutation_types, + mutation_probabilities, + allowed_mutations_count): + if allowed_count > mutation_count_for_each_ind[parent_descriptive_id][mutation_type]: + real_prob = (mutation_count_for_each_ind[parent_descriptive_id][mutation_type] / + all_mutations_count_for_each_ind[parent_descriptive_id]) + if real_prob < mutation_probability: + allowed_mutation_types.append(mutation_type) + if not allowed_mutation_types: + finished_initial_individuals[parent_descriptive_id] = True + return parent_descriptive_id, allowed_mutation_types + return None, None + + # set up len(mutation_types) // 2 evaluations for each ind + results = deque(try_mutation(*args) + for args in list(population_descriptive_ids_mapping.items()) * int(len(mutation_types) // 2)) + new_population = list() + print(f"{len(results)}") + for _ in range(max_tries - len(results)): + if len(new_population) >= target_pop_size or (not results): + break + + parent_descriptive_id, mutation_type, new_ind = results.popleft().result() + add_new_individual_to_new_population(new_ind) + + parent_descriptive_id, allowed_mutation_types = get_next_parent_descriptive_id_with_allowed_operations() + if allowed_mutation_types: + # choose next mutation with lowest tries count and run it + next_mutation_type = min(allowed_mutation_types, + key=lambda mutation_type: + mutation_tries_for_each_ind[parent_descriptive_id][mutation_type]) + new_res = try_mutation(parent_descriptive_id, + population_descriptive_ids_mapping[parent_descriptive_id], + next_mutation_type) + results.append(new_res) + print(f"{len(results)}: {sum(future._state == 'FINISHED' for future in results)}") + + # if there are any feature then process it and add new_ind to new_population if it is ready + for future in results: + if future._state == 'FINISHED': + add_new_individual_to_new_population(future.result()[-1]) + executor.shutdown(wait=False) + + # Reset mutation probabilities to default + self.mutation.update_requirements(requirements=self.parameters) self._check_final_population(new_population) return new_population @@ -134,14 +175,10 @@ def _check_final_population(self, population: PopulationT) -> None: f' required {target_pop_size}!\n' + helpful_msg) def _mutation_n_evaluation(self, - mutation_queue: Queue, + descriptive_id: str, + individual: Individual, + mutation_type: Optional[MutationType], evaluator: EvaluationOperator): - try: - descriptive_id, individual, mutation_type = mutation_queue.get(timeout=0.1) - except queue.Empty: - # is there is no task, then return nothing - return None, None, None - individual, mutation_type, applied = self.mutation._mutation(individual, mutation_type=mutation_type) if applied and individual and self.verifier(individual.graph): individuals = evaluator([individual]) From fb39d9549549a30a20e2ee599b1a3aaf164e8eb4 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Mon, 23 Oct 2023 17:42:10 +0300 Subject: [PATCH 18/65] wip --- .../genetic/operators/reproduction.py | 79 ++++++++++--------- 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 8093bfc19..263cd775a 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -66,10 +66,8 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P def _mutate_over_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: target_pop_size = self.parameters.pop_size max_tries = target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER - max_attempts_count = self.parameters.max_num_of_operator_attempts multiplier = target_pop_size / len(population) population_descriptive_ids_mapping = {ind.graph.descriptive_id: ind for ind in population} - finished_initial_individuals = {descriptive_id: False for descriptive_id in population_descriptive_ids_mapping} # mutations counters mutation_types = self.mutation._operator_agent.actions @@ -78,6 +76,8 @@ def _mutate_over_population(self, population: PopulationT, evaluator: Evaluation all_mutations_count_for_each_ind = {descriptive_id: 0 for descriptive_id in population_descriptive_ids_mapping} mutation_tries_for_each_ind = {descriptive_id: {mutation_type: 0 for mutation_type in mutation_types} for descriptive_id in population_descriptive_ids_mapping} + individuals_order = cycle(mutation_count_for_each_ind) + mutations_order = cycle(mutation_types) # increase probability of mutation initial_parameters = deepcopy(self.parameters) @@ -90,7 +90,6 @@ def try_mutation(descriptive_id: str, individual: Individual, mutation_type: Opt return executor.submit(self._mutation_n_evaluation, descriptive_id, individual, mutation_type, evaluator) def add_new_individual_to_new_population(new_individual): - mutation_tries_for_each_ind[parent_descriptive_id][mutation_type] += 1 if new_individual: descriptive_id = new_individual.graph.descriptive_id if descriptive_id not in self._pop_graph_descriptive_ids: @@ -99,34 +98,44 @@ def add_new_individual_to_new_population(new_individual): new_population.append(new_individual) self._pop_graph_descriptive_ids.add(descriptive_id) - def get_next_parent_descriptive_id_with_allowed_operations(): - for parent_descriptive_id, is_finished in finished_initial_individuals.items(): - if not is_finished: - if all_mutations_count_for_each_ind[parent_descriptive_id] == 0: - # if there are no mutations then make any mutation - allowed_mutation_types = mutation_types - else: - # filter mutations for individual, rely on probabilities and mutation_count - # place for error if mutation_types order in _operator_agent and in mutation_types is differ - allowed_mutation_types = [] - mutation_probabilities = self.mutation._operator_agent.get_action_probs() - allowed_mutations_count = [max(1, round(multiplier * x)) for x in mutation_probabilities] - for mutation_type, mutation_probability, allowed_count in zip(mutation_types, - mutation_probabilities, - allowed_mutations_count): - if allowed_count > mutation_count_for_each_ind[parent_descriptive_id][mutation_type]: - real_prob = (mutation_count_for_each_ind[parent_descriptive_id][mutation_type] / - all_mutations_count_for_each_ind[parent_descriptive_id]) - if real_prob < mutation_probability: - allowed_mutation_types.append(mutation_type) - if not allowed_mutation_types: - finished_initial_individuals[parent_descriptive_id] = True - return parent_descriptive_id, allowed_mutation_types + def get_next_parent_descriptive_id_with_next_mutation(): + min_mutation_count = min(all_mutations_count_for_each_ind.values()) + # for parent_descriptive_id, is_finished in finished_initial_individuals.items(): + for _ in range(len(population)): + parent_descriptive_id = next(individuals_order) + if all_mutations_count_for_each_ind[parent_descriptive_id] <= min_mutation_count: + for _ in range(len(mutation_types)): + mutation_type = next(mutations_order) + if mutation_tries_for_each_ind[parent_descriptive_id][mutation_type] == 0: + return parent_descriptive_id, mutation_type + + # place for error if mutation_types order in _operator_agent and in mutation_types is differ + # mutations_shares = dict() + # all_tries = max(1, sum(mutation_tries_for_each_ind[parent_descriptive_id].values())) + # for mutation_type, prob in zip(mutation_types, self.mutation._operator_agent.get_action_probs()): + # shares = (mutation_count_for_each_ind[parent_descriptive_id][mutation_type] / + # max(1, multiplier * prob), + # mutation_tries_for_each_ind[parent_descriptive_id][mutation_type] / all_tries) + # if shares[0] < 1: + # mutations_shares[mutation_type] = shares[0] * 2 + shares[1] + # return parent_descriptive_id, min(mutations_shares.items(), key=lambda x: x[1])[0] + + guessed_counts = dict() + for mutation_type, prob in zip(mutation_types, self.mutation._operator_agent.get_action_probs()): + allowed_count = max(1, multiplier * prob) + if (allowed_count <= mutation_count_for_each_ind[parent_descriptive_id][mutation_type]): + successful_rate = (mutation_count_for_each_ind[parent_descriptive_id][mutation_type] / + mutation_tries_for_each_ind[parent_descriptive_id][mutation_type]) + guess_next_count = (mutation_count_for_each_ind[parent_descriptive_id][mutation_type] + + successful_rate) + guessed_counts[mutation_type] = guess_next_count / allowed_count + return parent_descriptive_id, min(guessed_counts.items(), key=lambda x: x[1])[0] return None, None - # set up len(mutation_types) // 2 evaluations for each ind + # set up some mutations for each ind results = deque(try_mutation(*args) - for args in list(population_descriptive_ids_mapping.items()) * int(len(mutation_types) // 2)) + for args in (list(population_descriptive_ids_mapping.items()) + * self.mutation.requirements.n_jobs)[:self.mutation.requirements.n_jobs]) new_population = list() print(f"{len(results)}") for _ in range(max_tries - len(results)): @@ -136,17 +145,15 @@ def get_next_parent_descriptive_id_with_allowed_operations(): parent_descriptive_id, mutation_type, new_ind = results.popleft().result() add_new_individual_to_new_population(new_ind) - parent_descriptive_id, allowed_mutation_types = get_next_parent_descriptive_id_with_allowed_operations() - if allowed_mutation_types: - # choose next mutation with lowest tries count and run it - next_mutation_type = min(allowed_mutation_types, - key=lambda mutation_type: - mutation_tries_for_each_ind[parent_descriptive_id][mutation_type]) + parent_descriptive_id, next_mutation = get_next_parent_descriptive_id_with_next_mutation() + if next_mutation: + print(f"next_mutation: {next_mutation} / {mutation_tries_for_each_ind[parent_descriptive_id][next_mutation]}") + print(f"other mutations: {list(mutation_tries_for_each_ind[parent_descriptive_id].values())} / {list(mutation_count_for_each_ind[parent_descriptive_id].values())}") + mutation_tries_for_each_ind[parent_descriptive_id][next_mutation] += 1 new_res = try_mutation(parent_descriptive_id, population_descriptive_ids_mapping[parent_descriptive_id], - next_mutation_type) + next_mutation) results.append(new_res) - print(f"{len(results)}: {sum(future._state == 'FINISHED' for future in results)}") # if there are any feature then process it and add new_ind to new_population if it is ready for future in results: From 0052e5212c9891d6f4fa5a0387072659c88d0905 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 24 Oct 2023 12:16:05 +0300 Subject: [PATCH 19/65] wip, drive to 3 stages of controlling --- golem/core/constants.py | 2 +- .../genetic/operators/reproduction.py | 119 +++++++++++++++--- 2 files changed, 103 insertions(+), 18 deletions(-) diff --git a/golem/core/constants.py b/golem/core/constants.py index 73214eb21..78ff9e16f 100644 --- a/golem/core/constants.py +++ b/golem/core/constants.py @@ -1,6 +1,6 @@ import numpy as np -MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER = 5 +MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER = 20 MAX_GRAPH_GEN_ATTEMPTS = 1000 MAX_TUNING_METRIC_VALUE = np.inf MIN_TIME_FOR_TUNING_IN_SEC = 3 diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 263cd775a..8d7d2185f 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -1,6 +1,8 @@ -ctions import deque +from collections import deque +from concurrent.futures import as_completed from functools import partial from itertools import cycle, chain +from math import ceil from typing import Callable, Dict, Union, List, Optional from multiprocessing import Queue, Manager import queue @@ -60,36 +62,65 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P """ selected_individuals = self.selection(population, self.parameters.pop_size) new_population = self.crossover(selected_individuals) + + # increase probability of mutation + initial_parameters = deepcopy(self.parameters) + initial_parameters.mutation_prob = 1.0 + self.mutation.update_requirements(parameters=initial_parameters) + + # create new populatin with mutations new_population = self._mutate_over_population(new_population, evaluator) + + # Reset mutation probabilities to default + self.mutation.update_requirements(requirements=self.parameters) + return new_population def _mutate_over_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: + # some params + n_jobs = self.mutation.requirements.n_jobs + tasks_queue_length = n_jobs + 1 target_pop_size = self.parameters.pop_size - max_tries = target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER multiplier = target_pop_size / len(population) population_descriptive_ids_mapping = {ind.graph.descriptive_id: ind for ind in population} - - # mutations counters mutation_types = self.mutation._operator_agent.actions + + # counters and limits + max_tries = target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER + tries = [0, max_tries] # [current count, max count] + max_tries_for_each_mutation = 2 * ceil(max_tries / target_pop_size / len(mutation_types)) mutation_count_for_each_ind = {descriptive_id: {mutation_type: 0 for mutation_type in mutation_types} for descriptive_id in population_descriptive_ids_mapping} all_mutations_count_for_each_ind = {descriptive_id: 0 for descriptive_id in population_descriptive_ids_mapping} mutation_tries_for_each_ind = {descriptive_id: {mutation_type: 0 for mutation_type in mutation_types} for descriptive_id in population_descriptive_ids_mapping} - individuals_order = cycle(mutation_count_for_each_ind) - mutations_order = cycle(mutation_types) + guessed_mutation_count = {descriptive_id: {mutation_type: 0 for mutation_type in mutation_types} + for descriptive_id in population_descriptive_ids_mapping} - # increase probability of mutation - initial_parameters = deepcopy(self.parameters) - initial_parameters.mutation_prob = 1.0 - self.mutation.update_requirements(parameters=initial_parameters) + # queues + descriptive_id_queue = cycle(mutation_count_for_each_ind) + mutation_type_queue = cycle(mutation_types) + forbidden_mutations = {descriptive_id: set() for descriptive_id in population_descriptive_ids_mapping} + + def iterate_over_descriptive_ids(count: int = len(population_descriptive_ids_mapping)): + for _ in range(count): + yield next(descriptive_id_queue) - executor = get_reusable_executor(max_workers=self.mutation.requirements.n_jobs) + def iterate_over_mutations(descriptive_id: str, count: int = len(mutation_types)): + for _ in range(count): + mutation_type = next(mutation_type_queue) + if mutation_type not in forbidden_mutations[descriptive_id]: + yield mutation_type - def try_mutation(descriptive_id: str, individual: Individual, mutation_type: Optional[MutationType] = None): - return executor.submit(self._mutation_n_evaluation, descriptive_id, individual, mutation_type, evaluator) + # additional functions + def try_mutation(descriptive_id: str, mutation_type: Optional[MutationType] = None): + tries[0] += 1 + mutation_tries_for_each_ind[descriptive_id][mutation_type] += 1 + return executor.submit(self._mutation_n_evaluation, descriptive_id, + population_descriptive_ids_mapping[descriptive_id], + mutation_type, evaluator) - def add_new_individual_to_new_population(new_individual): + def add_new_individual_to_new_population(parent_descriptive_id, mutation_type, new_individual): if new_individual: descriptive_id = new_individual.graph.descriptive_id if descriptive_id not in self._pop_graph_descriptive_ids: @@ -97,6 +128,63 @@ def add_new_individual_to_new_population(new_individual): all_mutations_count_for_each_ind[parent_descriptive_id] += 1 new_population.append(new_individual) self._pop_graph_descriptive_ids.add(descriptive_id) + if len(new_population) == target_pop_size: + return True + return False + + # start reproducing + new_population = [] + executor = get_reusable_executor(max_workers=n_jobs) + + # stage 1 + # set up each type of mutation for each individual + futures = deque + for descriptive_id in np.random.permutation(list(population_descriptive_ids_mapping)): + for mutation_type in np.random.permutation(mutation_types): + futures.append(try_mutation(descriptive_id, mutation_type)) + # get some results from parallel computation for reducing calculation queue + for future in as_completed(futures): + population_is_prepared = add_new_individual_to_new_population(*future.result()) + if population_is_prepared: return new_population + if len(futures) <= tasks_queue_length: break + + + # stage 2 + # set up mutations until of all them will be applied once + # if mutation does not work for some times, then do not use it + + while True: + # get next finished future + for _ in range(len(futures)): + future = futures.popleft() + if future._state == 'FINISHED': break + futures.append(future) + + # add new individual to new population + parent_descriptive_id, mutation_type, new_ind = future.result() + population_is_prepared = add_new_individual_to_new_population(parent_descriptive_id, mutation_type, new_ind) + if population_is_prepared: return new_population + # if there are a lot of tasks go to next task + if len(futures) > tasks_queue_length: continue + + # create new futures with mutation if mutation is not applied yet and not forbidden + for descriptive_id in iterate_over_descriptive_ids(): + for mutation_type in iterate_over_mutations(descriptive_id): + if mutation_tries_for_each_ind[descriptive_id][mutation_type] < max_tries_for_each_mutation: + if mutation_count_for_each_ind[descriptive_id][mutation_type] == 0: + new_future = try_mutation(descriptive_id, mutation_type) + futures.append(new_future) + else: + # forbidd mutation if it not works + forbidden_mutations[descriptive_id].add(mutation_type) + + print(1) + # check that forbidden_mutations works + # check that as_completed(futures) works with list + + + + def get_next_parent_descriptive_id_with_next_mutation(): min_mutation_count = min(all_mutations_count_for_each_ind.values()) @@ -161,9 +249,6 @@ def get_next_parent_descriptive_id_with_next_mutation(): add_new_individual_to_new_population(future.result()[-1]) executor.shutdown(wait=False) - # Reset mutation probabilities to default - self.mutation.update_requirements(requirements=self.parameters) - self._check_final_population(new_population) return new_population From 72ee29a5025481db677c61916dafc5ce4e2dafda Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 24 Oct 2023 13:57:35 +0300 Subject: [PATCH 20/65] wip, back to simple version --- .../genetic/operators/reproduction.py | 208 ++++++------------ 1 file changed, 64 insertions(+), 144 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 8d7d2185f..35af25ce5 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -1,3 +1,4 @@ +import time from collections import deque from concurrent.futures import as_completed from functools import partial @@ -62,65 +63,54 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P """ selected_individuals = self.selection(population, self.parameters.pop_size) new_population = self.crossover(selected_individuals) - - # increase probability of mutation - initial_parameters = deepcopy(self.parameters) - initial_parameters.mutation_prob = 1.0 - self.mutation.update_requirements(parameters=initial_parameters) - - # create new populatin with mutations new_population = self._mutate_over_population(new_population, evaluator) - - # Reset mutation probabilities to default - self.mutation.update_requirements(requirements=self.parameters) - + self._check_final_population(new_population) return new_population def _mutate_over_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: # some params n_jobs = self.mutation.requirements.n_jobs - tasks_queue_length = n_jobs + 1 target_pop_size = self.parameters.pop_size - multiplier = target_pop_size / len(population) population_descriptive_ids_mapping = {ind.graph.descriptive_id: ind for ind in population} mutation_types = self.mutation._operator_agent.actions - - # counters and limits - max_tries = target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER - tries = [0, max_tries] # [current count, max count] - max_tries_for_each_mutation = 2 * ceil(max_tries / target_pop_size / len(mutation_types)) + left_tries = [target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER] + mutations_per_individual = left_tries[0] / len(population) + all_mutations_count_for_each_ind = {descriptive_id: 0 + for descriptive_id in population_descriptive_ids_mapping} mutation_count_for_each_ind = {descriptive_id: {mutation_type: 0 for mutation_type in mutation_types} for descriptive_id in population_descriptive_ids_mapping} - all_mutations_count_for_each_ind = {descriptive_id: 0 for descriptive_id in population_descriptive_ids_mapping} - mutation_tries_for_each_ind = {descriptive_id: {mutation_type: 0 for mutation_type in mutation_types} - for descriptive_id in population_descriptive_ids_mapping} - guessed_mutation_count = {descriptive_id: {mutation_type: 0 for mutation_type in mutation_types} - for descriptive_id in population_descriptive_ids_mapping} - # queues - descriptive_id_queue = cycle(mutation_count_for_each_ind) - mutation_type_queue = cycle(mutation_types) - forbidden_mutations = {descriptive_id: set() for descriptive_id in population_descriptive_ids_mapping} - - def iterate_over_descriptive_ids(count: int = len(population_descriptive_ids_mapping)): - for _ in range(count): - yield next(descriptive_id_queue) + # increase probability of mutation + initial_parameters = deepcopy(self.parameters) + initial_parameters.mutation_prob = 1.0 + self.mutation.update_requirements(parameters=initial_parameters) - def iterate_over_mutations(descriptive_id: str, count: int = len(mutation_types)): - for _ in range(count): - mutation_type = next(mutation_type_queue) - if mutation_type not in forbidden_mutations[descriptive_id]: - yield mutation_type # additional functions def try_mutation(descriptive_id: str, mutation_type: Optional[MutationType] = None): - tries[0] += 1 - mutation_tries_for_each_ind[descriptive_id][mutation_type] += 1 + left_tries[0] -= 1 return executor.submit(self._mutation_n_evaluation, descriptive_id, population_descriptive_ids_mapping[descriptive_id], mutation_type, evaluator) - def add_new_individual_to_new_population(parent_descriptive_id, mutation_type, new_individual): + def check_and_try_mutation(parent_descriptive_id: str, + mutation_type: Optional[MutationType] = None, + count: int = 1): + # probs should be the same order as mutation_types + probs = dict(zip(mutation_types, self.mutation._operator_agent.get_action_probs())) + # check probability allows to make mutations + if (probs[mutation_type] > (mutation_count_for_each_ind[parent_descriptive_id][mutation_type] / + all_mutations_count_for_each_ind[parent_descriptive_id])): + # check that there is not enough mutations + if all_mutations_count_for_each_ind[parent_descriptive_id] < mutations_per_individual: + for _ in range(count): + try_mutation(parent_descriptive_id, mutation_type) + return True + return False + + def add_new_individual_to_new_population(parent_descriptive_id: str, + mutation_type: MutationType, + new_individual: Individual): if new_individual: descriptive_id = new_individual.graph.descriptive_id if descriptive_id not in self._pop_graph_descriptive_ids: @@ -128,8 +118,7 @@ def add_new_individual_to_new_population(parent_descriptive_id, mutation_type, n all_mutations_count_for_each_ind[parent_descriptive_id] += 1 new_population.append(new_individual) self._pop_graph_descriptive_ids.add(descriptive_id) - if len(new_population) == target_pop_size: - return True + return True return False # start reproducing @@ -138,118 +127,49 @@ def add_new_individual_to_new_population(parent_descriptive_id, mutation_type, n # stage 1 # set up each type of mutation for each individual - futures = deque - for descriptive_id in np.random.permutation(list(population_descriptive_ids_mapping)): - for mutation_type in np.random.permutation(mutation_types): - futures.append(try_mutation(descriptive_id, mutation_type)) - # get some results from parallel computation for reducing calculation queue - for future in as_completed(futures): - population_is_prepared = add_new_individual_to_new_population(*future.result()) - if population_is_prepared: return new_population - if len(futures) <= tasks_queue_length: break - + futures = deque(try_mutation(descriptive_id, mutation_type) + for mutation_type in np.random.permutation(mutation_types) + for descriptive_id in np.random.permutation(list(population_descriptive_ids_mapping))) # stage 2 - # set up mutations until of all them will be applied once - # if mutation does not work for some times, then do not use it - - while True: - # get next finished future - for _ in range(len(futures)): - future = futures.popleft() - if future._state == 'FINISHED': break - futures.append(future) - - # add new individual to new population - parent_descriptive_id, mutation_type, new_ind = future.result() - population_is_prepared = add_new_individual_to_new_population(parent_descriptive_id, mutation_type, new_ind) - if population_is_prepared: return new_population - # if there are a lot of tasks go to next task - if len(futures) > tasks_queue_length: continue - - # create new futures with mutation if mutation is not applied yet and not forbidden - for descriptive_id in iterate_over_descriptive_ids(): - for mutation_type in iterate_over_mutations(descriptive_id): - if mutation_tries_for_each_ind[descriptive_id][mutation_type] < max_tries_for_each_mutation: - if mutation_count_for_each_ind[descriptive_id][mutation_type] == 0: - new_future = try_mutation(descriptive_id, mutation_type) - futures.append(new_future) - else: - # forbidd mutation if it not works - forbidden_mutations[descriptive_id].add(mutation_type) - - print(1) - # check that forbidden_mutations works - # check that as_completed(futures) works with list - - - - - - def get_next_parent_descriptive_id_with_next_mutation(): - min_mutation_count = min(all_mutations_count_for_each_ind.values()) - # for parent_descriptive_id, is_finished in finished_initial_individuals.items(): - for _ in range(len(population)): - parent_descriptive_id = next(individuals_order) - if all_mutations_count_for_each_ind[parent_descriptive_id] <= min_mutation_count: - for _ in range(len(mutation_types)): - mutation_type = next(mutations_order) - if mutation_tries_for_each_ind[parent_descriptive_id][mutation_type] == 0: - return parent_descriptive_id, mutation_type - - # place for error if mutation_types order in _operator_agent and in mutation_types is differ - # mutations_shares = dict() - # all_tries = max(1, sum(mutation_tries_for_each_ind[parent_descriptive_id].values())) - # for mutation_type, prob in zip(mutation_types, self.mutation._operator_agent.get_action_probs()): - # shares = (mutation_count_for_each_ind[parent_descriptive_id][mutation_type] / - # max(1, multiplier * prob), - # mutation_tries_for_each_ind[parent_descriptive_id][mutation_type] / all_tries) - # if shares[0] < 1: - # mutations_shares[mutation_type] = shares[0] * 2 + shares[1] - # return parent_descriptive_id, min(mutations_shares.items(), key=lambda x: x[1])[0] - - guessed_counts = dict() - for mutation_type, prob in zip(mutation_types, self.mutation._operator_agent.get_action_probs()): - allowed_count = max(1, multiplier * prob) - if (allowed_count <= mutation_count_for_each_ind[parent_descriptive_id][mutation_type]): - successful_rate = (mutation_count_for_each_ind[parent_descriptive_id][mutation_type] / - mutation_tries_for_each_ind[parent_descriptive_id][mutation_type]) - guess_next_count = (mutation_count_for_each_ind[parent_descriptive_id][mutation_type] + - successful_rate) - guessed_counts[mutation_type] = guess_next_count / allowed_count - return parent_descriptive_id, min(guessed_counts.items(), key=lambda x: x[1])[0] - return None, None - - # set up some mutations for each ind - results = deque(try_mutation(*args) - for args in (list(population_descriptive_ids_mapping.items()) - * self.mutation.requirements.n_jobs)[:self.mutation.requirements.n_jobs]) - new_population = list() - print(f"{len(results)}") - for _ in range(max_tries - len(results)): - if len(new_population) >= target_pop_size or (not results): + delayed_mutations = deque() + excessive_mutation_count = 0 + optimal_future_length = n_jobs + 4 + while futures: + if len(new_population) == target_pop_size or left_tries[0] == 0: break - parent_descriptive_id, mutation_type, new_ind = results.popleft().result() - add_new_individual_to_new_population(new_ind) - - parent_descriptive_id, next_mutation = get_next_parent_descriptive_id_with_next_mutation() - if next_mutation: - print(f"next_mutation: {next_mutation} / {mutation_tries_for_each_ind[parent_descriptive_id][next_mutation]}") - print(f"other mutations: {list(mutation_tries_for_each_ind[parent_descriptive_id].values())} / {list(mutation_count_for_each_ind[parent_descriptive_id].values())}") - mutation_tries_for_each_ind[parent_descriptive_id][next_mutation] += 1 - new_res = try_mutation(parent_descriptive_id, - population_descriptive_ids_mapping[parent_descriptive_id], - next_mutation) - results.append(new_res) + # add new individual to new population + parent_descriptive_id, mutation_type, new_ind = futures.popleft().result() + added = add_new_individual_to_new_population(parent_descriptive_id, mutation_type, new_ind) + + # skip new mutation + if added and excessive_mutation_count > 0 and len(futures) >= optimal_future_length: + excessive_mutation_count -= 1 + continue + + # create new future with same mutation and same individual + count = min(2, max(1, optimal_future_length - len(futures))) + applied = check_and_try_mutation(parent_descriptive_id, mutation_type, count) + if not applied or len(futures) < optimal_future_length: + if len(futures) < optimal_future_length: + print(1) + delayed_mutations.append((parent_descriptive_id, mutation_type)) + for _ in range(len(delayed_mutations) - 1): + parent_descriptive_id, mutation_type = delayed_mutations.popleft() + applied = check_and_try_mutation(parent_descriptive_id, mutation_type, count) + if applied: break + delayed_mutations.append((parent_descriptive_id, mutation_type)) + excessive_mutation_count += count - 1 if applied else 0 # if there are any feature then process it and add new_ind to new_population if it is ready - for future in results: + for future in futures: if future._state == 'FINISHED': - add_new_individual_to_new_population(future.result()[-1]) + add_new_individual_to_new_population(*future.result()) executor.shutdown(wait=False) - self._check_final_population(new_population) + # Reset mutation probabilities to default + self.mutation.update_requirements(requirements=self.parameters) return new_population def _check_final_population(self, population: PopulationT) -> None: From 0845c11f19ee99f10f7006a8fee5ac8cbb3f0041 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 24 Oct 2023 16:27:42 +0300 Subject: [PATCH 21/65] simple version is finished but not polished --- .../genetic/operators/reproduction.py | 56 ++++++++++++------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 35af25ce5..0d22d436b 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -68,15 +68,15 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P return new_population def _mutate_over_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: - # some params n_jobs = self.mutation.requirements.n_jobs target_pop_size = self.parameters.pop_size population_descriptive_ids_mapping = {ind.graph.descriptive_id: ind for ind in population} mutation_types = self.mutation._operator_agent.actions left_tries = [target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER] - mutations_per_individual = left_tries[0] / len(population) + mutations_per_individual = ceil(target_pop_size / len(population)) all_mutations_count_for_each_ind = {descriptive_id: 0 for descriptive_id in population_descriptive_ids_mapping} + mutations_count = {mutation_type: 0 for mutation_type in mutation_types} mutation_count_for_each_ind = {descriptive_id: {mutation_type: 0 for mutation_type in mutation_types} for descriptive_id in population_descriptive_ids_mapping} @@ -85,7 +85,6 @@ def _mutate_over_population(self, population: PopulationT, evaluator: Evaluation initial_parameters.mutation_prob = 1.0 self.mutation.update_requirements(parameters=initial_parameters) - # additional functions def try_mutation(descriptive_id: str, mutation_type: Optional[MutationType] = None): left_tries[0] -= 1 @@ -98,13 +97,17 @@ def check_and_try_mutation(parent_descriptive_id: str, count: int = 1): # probs should be the same order as mutation_types probs = dict(zip(mutation_types, self.mutation._operator_agent.get_action_probs())) + real_probs = {mutation_type: mutation_count_for_each_ind[parent_descriptive_id][mutation_type] / + max(1, all_mutations_count_for_each_ind[parent_descriptive_id]) + for mutation_type in mutation_types} # check probability allows to make mutations - if (probs[mutation_type] > (mutation_count_for_each_ind[parent_descriptive_id][mutation_type] / - all_mutations_count_for_each_ind[parent_descriptive_id])): + if (all_mutations_count_for_each_ind[parent_descriptive_id] == 0 or + probs[mutation_type] > real_probs[mutation_type] or + len(set(real_probs.values())) == 1): # check that there is not enough mutations if all_mutations_count_for_each_ind[parent_descriptive_id] < mutations_per_individual: for _ in range(count): - try_mutation(parent_descriptive_id, mutation_type) + futures.append(try_mutation(parent_descriptive_id, mutation_type)) return True return False @@ -116,6 +119,8 @@ def add_new_individual_to_new_population(parent_descriptive_id: str, if descriptive_id not in self._pop_graph_descriptive_ids: mutation_count_for_each_ind[parent_descriptive_id][mutation_type] += 1 all_mutations_count_for_each_ind[parent_descriptive_id] += 1 + mutations_count[mutation_type] += 1 + new_population.append(new_individual) self._pop_graph_descriptive_ids.add(descriptive_id) return True @@ -133,34 +138,45 @@ def add_new_individual_to_new_population(parent_descriptive_id: str, # stage 2 delayed_mutations = deque() - excessive_mutation_count = 0 - optimal_future_length = n_jobs + 4 + individual_id_with_lowest_mutations, rarest_mutation_type = None, None while futures: - if len(new_population) == target_pop_size or left_tries[0] == 0: + if len(new_population) == target_pop_size or left_tries[0] <= 0: break # add new individual to new population parent_descriptive_id, mutation_type, new_ind = futures.popleft().result() added = add_new_individual_to_new_population(parent_descriptive_id, mutation_type, new_ind) - # skip new mutation - if added and excessive_mutation_count > 0 and len(futures) >= optimal_future_length: - excessive_mutation_count -= 1 - continue + # define rarest ind and mutation + if added: + key_fun = lambda x: x[1] + frequent_mutation_type = max(mutations_count.items(), key=key_fun)[0] + rarest_mutation_type = min(mutations_count.items(), key=key_fun)[0] + individual_id_with_lowest_mutations = min(all_mutations_count_for_each_ind.items(), key=key_fun)[0] # create new future with same mutation and same individual - count = min(2, max(1, optimal_future_length - len(futures))) + count = (1 + + (individual_id_with_lowest_mutations == parent_descriptive_id) + + (rarest_mutation_type == mutation_type)) applied = check_and_try_mutation(parent_descriptive_id, mutation_type, count) - if not applied or len(futures) < optimal_future_length: - if len(futures) < optimal_future_length: - print(1) + + + + # if there is no need in parent_descriptive_id & mutation_type mutation + # then try to find new mutation + if not applied: delayed_mutations.append((parent_descriptive_id, mutation_type)) for _ in range(len(delayed_mutations) - 1): parent_descriptive_id, mutation_type = delayed_mutations.popleft() - applied = check_and_try_mutation(parent_descriptive_id, mutation_type, count) - if applied: break + if ((individual_id_with_lowest_mutations == parent_descriptive_id or + mutation_type == rarest_mutation_type) and + mutation_type != frequent_mutation_type): + futures.append(try_mutation(parent_descriptive_id, mutation_type)) + break + else: + applied = check_and_try_mutation(parent_descriptive_id, mutation_type) + if applied: break delayed_mutations.append((parent_descriptive_id, mutation_type)) - excessive_mutation_count += count - 1 if applied else 0 # if there are any feature then process it and add new_ind to new_population if it is ready for future in futures: From 1b19d34ce43c589572b0045b7992a508c8371ce4 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 24 Oct 2023 17:59:41 +0300 Subject: [PATCH 22/65] some fixes --- golem/core/constants.py | 2 +- .../core/optimisers/genetic/operators/reproduction.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/golem/core/constants.py b/golem/core/constants.py index 78ff9e16f..0d6d89958 100644 --- a/golem/core/constants.py +++ b/golem/core/constants.py @@ -1,6 +1,6 @@ import numpy as np -MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER = 20 +MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER = 10 MAX_GRAPH_GEN_ATTEMPTS = 1000 MAX_TUNING_METRIC_VALUE = np.inf MIN_TIME_FOR_TUNING_IN_SEC = 3 diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 0d22d436b..4fb203ddb 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -68,7 +68,9 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P return new_population def _mutate_over_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: + # TODO n_jobs may be -1, should be fixed n_jobs = self.mutation.requirements.n_jobs + n_jobs = 8 target_pop_size = self.parameters.pop_size population_descriptive_ids_mapping = {ind.graph.descriptive_id: ind for ind in population} mutation_types = self.mutation._operator_agent.actions @@ -139,12 +141,19 @@ def add_new_individual_to_new_population(parent_descriptive_id: str, # stage 2 delayed_mutations = deque() individual_id_with_lowest_mutations, rarest_mutation_type = None, None + times = [] while futures: if len(new_population) == target_pop_size or left_tries[0] <= 0: break + # get next finished future + for _ in range(int(len(futures) * 3)): + future = futures.popleft() + if future._state == 'FINISHED': break + futures.append(future) + # add new individual to new population - parent_descriptive_id, mutation_type, new_ind = futures.popleft().result() + parent_descriptive_id, mutation_type, new_ind = future.result() added = add_new_individual_to_new_population(parent_descriptive_id, mutation_type, new_ind) # define rarest ind and mutation From 97d6d9ce1c6c608fa1a98a0c06cb5ec966ee9697 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Wed, 25 Oct 2023 16:04:03 +0300 Subject: [PATCH 23/65] some fixes after tests --- golem/core/constants.py | 2 +- .../genetic/operators/reproduction.py | 26 ++++++++++++------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/golem/core/constants.py b/golem/core/constants.py index 0d6d89958..73214eb21 100644 --- a/golem/core/constants.py +++ b/golem/core/constants.py @@ -1,6 +1,6 @@ import numpy as np -MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER = 10 +MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER = 5 MAX_GRAPH_GEN_ATTEMPTS = 1000 MAX_TUNING_METRIC_VALUE = np.inf MIN_TIME_FOR_TUNING_IN_SEC = 3 diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 4fb203ddb..642d33383 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -147,10 +147,11 @@ def add_new_individual_to_new_population(parent_descriptive_id: str, break # get next finished future - for _ in range(int(len(futures) * 3)): + while True: future = futures.popleft() if future._state == 'FINISHED': break futures.append(future) + time.sleep(0.01) # to prevent flooding # add new individual to new population parent_descriptive_id, mutation_type, new_ind = future.result() @@ -158,10 +159,15 @@ def add_new_individual_to_new_population(parent_descriptive_id: str, # define rarest ind and mutation if added: - key_fun = lambda x: x[1] - frequent_mutation_type = max(mutations_count.items(), key=key_fun)[0] - rarest_mutation_type = min(mutations_count.items(), key=key_fun)[0] - individual_id_with_lowest_mutations = min(all_mutations_count_for_each_ind.items(), key=key_fun)[0] + all_mutations = sum(mutations_count.values()) + probs = dict(zip(mutation_types, self.mutation._operator_agent.get_action_probs())) + real_probs = {mutation_type: mutations_count[mutation_type] / (all_mutations * probs[mutation_type]) + for mutation_type in mutation_types} + + frequent_mutation_type = max(real_probs.items(), key=lambda x: x[1])[0] + rarest_mutation_type = min(real_probs.items(), key=lambda x: x[1])[0] + individual_id_with_lowest_mutations = min(all_mutations_count_for_each_ind.items(), + key=lambda x: x[1])[0] # create new future with same mutation and same individual count = (1 + @@ -169,10 +175,9 @@ def add_new_individual_to_new_population(parent_descriptive_id: str, (rarest_mutation_type == mutation_type)) applied = check_and_try_mutation(parent_descriptive_id, mutation_type, count) - - # if there is no need in parent_descriptive_id & mutation_type mutation # then try to find new mutation + count = n_jobs + 1 - len(futures) if not applied: delayed_mutations.append((parent_descriptive_id, mutation_type)) for _ in range(len(delayed_mutations) - 1): @@ -181,11 +186,12 @@ def add_new_individual_to_new_population(parent_descriptive_id: str, mutation_type == rarest_mutation_type) and mutation_type != frequent_mutation_type): futures.append(try_mutation(parent_descriptive_id, mutation_type)) - break + applied = True else: applied = check_and_try_mutation(parent_descriptive_id, mutation_type) - if applied: break - delayed_mutations.append((parent_descriptive_id, mutation_type)) + count -= applied + if count <= 0: break + if not applied: delayed_mutations.append((parent_descriptive_id, mutation_type)) # if there are any feature then process it and add new_ind to new_population if it is ready for future in futures: From d14272f2a572acb083c6044d87e513f17285c465 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 27 Oct 2023 10:41:07 +0300 Subject: [PATCH 24/65] Some fixes --- golem/core/optimisers/populational_optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/golem/core/optimisers/populational_optimizer.py b/golem/core/optimisers/populational_optimizer.py index c95be9ce2..23b05d348 100644 --- a/golem/core/optimisers/populational_optimizer.py +++ b/golem/core/optimisers/populational_optimizer.py @@ -175,4 +175,4 @@ def __init__(self, *args): self.message = args[0] or None def __str__(self): - return self.message or 'Too many fitness evaluation errors.' + return self.message or 'Too many fitness evaluation errors.' \ No newline at end of file From 1a4e5e1ccd5f45cd2d01e8e03a68635dfbce3596 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 26 Oct 2023 18:37:29 +0300 Subject: [PATCH 25/65] wip --- .../optimisers/genetic/operators/mutation.py | 67 +++++--- .../genetic/operators/reproduction.py | 160 +++--------------- 2 files changed, 65 insertions(+), 162 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index dd767ade7..f09cd4de9 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -22,8 +22,8 @@ if TYPE_CHECKING: from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters -MutationFunc = Callable[[Graph, GraphRequirements, GraphGenerationParams, AlgorithmParameters], Graph] MutationType = Union[MutationTypesEnum, Callable] +MutationFunc = Callable[[Graph, GraphRequirements, GraphGenerationParams, AlgorithmParameters], Graph] MutationIdType = Hashable MutationRepo = Mapping[MutationIdType, MutationFunc] @@ -78,42 +78,40 @@ def _init_operator_agent(graph_gen_params: GraphGenerationParams, def agent(self) -> OperatorAgent: return self._operator_agent - def __call__(self, - population: Union[Individual, PopulationT], - mutation_type: Union[None, MutationTypesEnum, Callable] = None, - ) -> Union[Individual, PopulationT]: + def __call__(self, population: Union[Individual, PopulationT]) -> Union[Individual, PopulationT]: if isinstance(population, Individual): population = [population] - final_population, _, application_attempts = \ - tuple(zip(*map(lambda individual: self._mutation(individual, mutation_type=mutation_type), population))) - - # drop individuals to which mutations could not be applied - final_population = [ind for ind, init_ind, attempt in zip(final_population, population, application_attempts) - if not attempt or ind.graph != init_ind.graph] + final_population = [] + for individual in population: + new_ind, _, applied = self._mutation(individual) + if not applied or new_ind.graph != individual.graph: + final_population.append(new_ind) if len(population) == 1: return final_population[0] if final_population else final_population return final_population - def _mutation(self, - individual: Individual, - mutation_type: Union[None, MutationTypesEnum, Callable] = None, - ) -> Tuple[Individual, Union[MutationTypesEnum, Callable], bool]: + def _mutation(self, individual: Individual) -> Tuple[Individual, Optional[MutationIdType], bool]: """ Function applies mutation operator to graph """ - new_graph = deepcopy(individual.graph) - mutation_type = mutation_type or self._operator_agent.choose_action(new_graph) - applied = self._will_mutation_be_applied(mutation_type) - if applied: - new_graph = self._apply_mutations(new_graph, mutation_type=mutation_type) + application_attempt = False + mutation_applied = None + for _ in range(self.parameters.max_num_of_operator_attempts): + new_graph = deepcopy(individual.graph) + + new_graph, mutation_applied = self._apply_mutations(new_graph) + if mutation_applied is None: + continue + application_attempt = True is_correct_graph = self.graph_generation_params.verifier(new_graph) if is_correct_graph: parent_operator = ParentOperator(type_='mutation', - operators=mutation_type, + operators=mutation_applied, parent_individuals=individual) individual = Individual(new_graph, parent_operator, metadata=self.requirements.static_individual_metadata) + break else: # Collect invalid actions self.agent_experience.collect_experience(individual, mutation_applied, reward=-1.0) @@ -122,11 +120,9 @@ def _mutation(self, 'Please check optimization parameters for correctness.') return individual, mutation_applied, application_attempt - def _sample_num_of_mutations(self, mutation_type: Union[MutationTypesEnum, Callable]) -> int: + def _sample_num_of_mutations(self) -> int: # most of the time returns 1 or rarely several mutations - # if mutation is custom apply it only once - is_custom_mutation = isinstance(mutation_type, Callable) - if not is_custom_mutation and self.parameters.variable_mutation_num: + if self.parameters.variable_mutation_num: num_mut = max(int(round(np.random.lognormal(0, sigma=0.5))), 1) else: num_mut = 1 @@ -153,7 +149,7 @@ def _adapt_and_apply_mutation(self, new_graph: Graph, mutation_type) -> Tuple[Gr new_graph = mutation_func(new_graph, requirements=self.requirements, graph_gen_params=self.graph_generation_params, parameters=self.parameters) - return new_graph + return new_graph, applied def _will_mutation_be_applied(self, mutation_type: Union[MutationTypesEnum, Callable]) -> bool: return random() <= self.parameters.mutation_prob and mutation_type is not MutationTypesEnum.none @@ -165,3 +161,22 @@ def _get_mutation_func(self, mutation_type: Union[MutationTypesEnum, Callable]) mutation_func = self._mutations_repo[mutation_type] adapted_mutation_func = self.graph_generation_params.adapter.adapt_func(mutation_func) return adapted_mutation_func + + +class FastSingleMutation(Mutation): + def __call__(self, individual: Individual) -> Individual: + new_graph = deepcopy(individual.graph) + + mutation_type = self._operator_agent.choose_action(new_graph) + mutation_func = self._get_mutation_func(mutation_type) + + new_graph = mutation_func(new_graph, requirements=self.requirements, + graph_gen_params=self.graph_generation_params, + parameters=self.parameters) + + parent_operator = ParentOperator(type_='mutation', + operators=mutation_type, + parent_individuals=individual) + individual = Individual(new_graph, parent_operator, + metadata=self.requirements.static_individual_metadata) + return individual diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 642d33383..253dca610 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -68,136 +68,30 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P return new_population def _mutate_over_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: - # TODO n_jobs may be -1, should be fixed - n_jobs = self.mutation.requirements.n_jobs - n_jobs = 8 - target_pop_size = self.parameters.pop_size - population_descriptive_ids_mapping = {ind.graph.descriptive_id: ind for ind in population} - mutation_types = self.mutation._operator_agent.actions - left_tries = [target_pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER] - mutations_per_individual = ceil(target_pop_size / len(population)) - all_mutations_count_for_each_ind = {descriptive_id: 0 - for descriptive_id in population_descriptive_ids_mapping} - mutations_count = {mutation_type: 0 for mutation_type in mutation_types} - mutation_count_for_each_ind = {descriptive_id: {mutation_type: 0 for mutation_type in mutation_types} - for descriptive_id in population_descriptive_ids_mapping} - - # increase probability of mutation + # increase probability of mutation to not spend tries for no mutations initial_parameters = deepcopy(self.parameters) initial_parameters.mutation_prob = 1.0 self.mutation.update_requirements(parameters=initial_parameters) - # additional functions - def try_mutation(descriptive_id: str, mutation_type: Optional[MutationType] = None): - left_tries[0] -= 1 - return executor.submit(self._mutation_n_evaluation, descriptive_id, - population_descriptive_ids_mapping[descriptive_id], - mutation_type, evaluator) - - def check_and_try_mutation(parent_descriptive_id: str, - mutation_type: Optional[MutationType] = None, - count: int = 1): - # probs should be the same order as mutation_types - probs = dict(zip(mutation_types, self.mutation._operator_agent.get_action_probs())) - real_probs = {mutation_type: mutation_count_for_each_ind[parent_descriptive_id][mutation_type] / - max(1, all_mutations_count_for_each_ind[parent_descriptive_id]) - for mutation_type in mutation_types} - # check probability allows to make mutations - if (all_mutations_count_for_each_ind[parent_descriptive_id] == 0 or - probs[mutation_type] > real_probs[mutation_type] or - len(set(real_probs.values())) == 1): - # check that there is not enough mutations - if all_mutations_count_for_each_ind[parent_descriptive_id] < mutations_per_individual: - for _ in range(count): - futures.append(try_mutation(parent_descriptive_id, mutation_type)) - return True - return False - - def add_new_individual_to_new_population(parent_descriptive_id: str, - mutation_type: MutationType, - new_individual: Individual): - if new_individual: - descriptive_id = new_individual.graph.descriptive_id - if descriptive_id not in self._pop_graph_descriptive_ids: - mutation_count_for_each_ind[parent_descriptive_id][mutation_type] += 1 - all_mutations_count_for_each_ind[parent_descriptive_id] += 1 - mutations_count[mutation_type] += 1 - - new_population.append(new_individual) - self._pop_graph_descriptive_ids.add(descriptive_id) - return True - return False - - # start reproducing + max_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER + mutation_fun = partial(self._mutation_n_evaluation, evaluator=evaluator) new_population = [] - executor = get_reusable_executor(max_workers=n_jobs) - - # stage 1 - # set up each type of mutation for each individual - futures = deque(try_mutation(descriptive_id, mutation_type) - for mutation_type in np.random.permutation(mutation_types) - for descriptive_id in np.random.permutation(list(population_descriptive_ids_mapping))) - - # stage 2 - delayed_mutations = deque() - individual_id_with_lowest_mutations, rarest_mutation_type = None, None - times = [] - while futures: - if len(new_population) == target_pop_size or left_tries[0] <= 0: - break - - # get next finished future - while True: - future = futures.popleft() - if future._state == 'FINISHED': break - futures.append(future) - time.sleep(0.01) # to prevent flooding - - # add new individual to new population - parent_descriptive_id, mutation_type, new_ind = future.result() - added = add_new_individual_to_new_population(parent_descriptive_id, mutation_type, new_ind) - - # define rarest ind and mutation - if added: - all_mutations = sum(mutations_count.values()) - probs = dict(zip(mutation_types, self.mutation._operator_agent.get_action_probs())) - real_probs = {mutation_type: mutations_count[mutation_type] / (all_mutations * probs[mutation_type]) - for mutation_type in mutation_types} - - frequent_mutation_type = max(real_probs.items(), key=lambda x: x[1])[0] - rarest_mutation_type = min(real_probs.items(), key=lambda x: x[1])[0] - individual_id_with_lowest_mutations = min(all_mutations_count_for_each_ind.items(), - key=lambda x: x[1])[0] - - # create new future with same mutation and same individual - count = (1 + - (individual_id_with_lowest_mutations == parent_descriptive_id) + - (rarest_mutation_type == mutation_type)) - applied = check_and_try_mutation(parent_descriptive_id, mutation_type, count) - - # if there is no need in parent_descriptive_id & mutation_type mutation - # then try to find new mutation - count = n_jobs + 1 - len(futures) - if not applied: - delayed_mutations.append((parent_descriptive_id, mutation_type)) - for _ in range(len(delayed_mutations) - 1): - parent_descriptive_id, mutation_type = delayed_mutations.popleft() - if ((individual_id_with_lowest_mutations == parent_descriptive_id or - mutation_type == rarest_mutation_type) and - mutation_type != frequent_mutation_type): - futures.append(try_mutation(parent_descriptive_id, mutation_type)) - applied = True - else: - applied = check_and_try_mutation(parent_descriptive_id, mutation_type) - count -= applied - if count <= 0: break - if not applied: delayed_mutations.append((parent_descriptive_id, mutation_type)) - - # if there are any feature then process it and add new_ind to new_population if it is ready - for future in futures: - if future._state == 'FINISHED': - add_new_individual_to_new_population(*future.result()) - executor.shutdown(wait=False) + + new_population = list(map(mutation_fun, cycle(population))) + + with Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel: + new_ind_generator = parallel(delayed(mutation_fun)(ind) + for ind, _ in zip(cycle(population), range(max_tries))) + for new_ind, mutation_type, applied in new_ind_generator: + if applied: + descriptive_id = new_ind.graph.descriptive_id + if descriptive_id not in self._pop_graph_descriptive_ids: + new_population.append(new_ind) + self._pop_graph_descriptive_ids.add(descriptive_id) + if len(new_population) >= self.parameters.pop_size: + break + else: + self.mutation.agent_experience.collect_experience(new_ind.graph, mutation_type, reward=-1.0) # Reset mutation probabilities to default self.mutation.update_requirements(requirements=self.parameters) @@ -217,16 +111,10 @@ def _check_final_population(self, population: PopulationT) -> None: f'have {len(population)},' f' required {target_pop_size}!\n' + helpful_msg) - def _mutation_n_evaluation(self, - descriptive_id: str, - individual: Individual, - mutation_type: Optional[MutationType], - evaluator: EvaluationOperator): - individual, mutation_type, applied = self.mutation._mutation(individual, mutation_type=mutation_type) - if applied and individual and self.verifier(individual.graph): + def _mutation_n_evaluation(self, individual: Individual, evaluator: EvaluationOperator): + individual, mutation_type, applied = self.mutation._mutation(individual) + if individual and self.verifier(individual.graph): individuals = evaluator([individual]) if individuals: - # if all is ok return all data - return descriptive_id, mutation_type, individuals[0] - # if something go wrong do not return new individual - return descriptive_id, mutation_type, None + return individuals[0], mutation_type, applied + return individual, mutation_type, False From cdbef7d542a02fb2fb33e44df12b6eddc5769520 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 27 Oct 2023 10:32:15 +0300 Subject: [PATCH 26/65] wip --- .../optimisers/genetic/operators/mutation.py | 17 ++++- .../genetic/operators/reproduction.py | 69 ++++++++++--------- 2 files changed, 52 insertions(+), 34 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index f09cd4de9..b1aaa30cd 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -163,7 +163,22 @@ def _get_mutation_func(self, mutation_type: Union[MutationTypesEnum, Callable]) return adapted_mutation_func -class FastSingleMutation(Mutation): +class SpecialSingleMutation(Mutation): + def __init__(self, + parameters: 'GPAlgorithmParameters', + requirements: GraphRequirements, + graph_gen_params: GraphGenerationParams, + mutations_repo: MutationRepo, + operator_agent: OperatorAgent, + agent_experience: ExperienceBuffer, + ): + super().__init__(parameters=parameters, + requirements=requirements, + graph_gen_params=graph_gen_params, + mutations_repo=mutations_repo) + self._operator_agent = operator_agent + self.agent_experience = agent_experience + def __call__(self, individual: Individual) -> Individual: new_graph = deepcopy(individual.graph) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 253dca610..b82070d52 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -19,7 +19,7 @@ from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.optimisers.genetic.operators.base_mutations import MutationTypesEnum from golem.core.optimisers.genetic.operators.crossover import Crossover -from golem.core.optimisers.genetic.operators.mutation import Mutation, MutationType +from golem.core.optimisers.genetic.operators.mutation import Mutation, MutationType, SpecialSingleMutation from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.selection import Selection from golem.core.optimisers.populational_optimizer import EvaluationAttemptsError @@ -68,34 +68,38 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P return new_population def _mutate_over_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: - # increase probability of mutation to not spend tries for no mutations - initial_parameters = deepcopy(self.parameters) - initial_parameters.mutation_prob = 1.0 - self.mutation.update_requirements(parameters=initial_parameters) - - max_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER - mutation_fun = partial(self._mutation_n_evaluation, evaluator=evaluator) - new_population = [] - - new_population = list(map(mutation_fun, cycle(population))) - - with Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel: - new_ind_generator = parallel(delayed(mutation_fun)(ind) - for ind, _ in zip(cycle(population), range(max_tries))) - for new_ind, mutation_type, applied in new_ind_generator: - if applied: - descriptive_id = new_ind.graph.descriptive_id - if descriptive_id not in self._pop_graph_descriptive_ids: - new_population.append(new_ind) - self._pop_graph_descriptive_ids.add(descriptive_id) - if len(new_population) >= self.parameters.pop_size: - break - else: - self.mutation.agent_experience.collect_experience(new_ind.graph, mutation_type, reward=-1.0) - - # Reset mutation probabilities to default - self.mutation.update_requirements(requirements=self.parameters) - return new_population + # create common objects for parallel use + with Manager() as manager: + # create new mutation that suitable for parallel evaluation + initial_parameters = deepcopy(self.parameters) + initial_parameters.mutation_prob = 1.0 + + operator_agent = manager.Value('operator_agent', self.mutation._operator_agent) + agent_experience = manager.Value('agent_experience', self.mutation.agent_experience) + + mutation = SpecialSingleMutation(parameters=initial_parameters, + requirements=self.mutation.requirements, + graph_gen_params=self.mutation.graph_generation_params, + mutations_repo=self.mutation._mutations_repo, + operator_agent=operator_agent, + agent_experience=agent_experience) + mutation_fun = partial(self._mutation_n_evaluation, mutation=mutation, evaluator=evaluator) + + max_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER + new_population = [] + + with Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel: + new_ind_generator = parallel(delayed(mutation_fun)(ind) + for ind, _ in zip(cycle(population), range(max_tries))) + for new_ind in new_ind_generator: + if new_ind: + descriptive_id = new_ind.graph.descriptive_id + if descriptive_id not in self._pop_graph_descriptive_ids: + new_population.append(new_ind) + self._pop_graph_descriptive_ids.add(descriptive_id) + if len(new_population) >= self.parameters.pop_size: + break + return new_population def _check_final_population(self, population: PopulationT) -> None: """ If population do not achieve required length return a warning or raise exception """ @@ -111,10 +115,9 @@ def _check_final_population(self, population: PopulationT) -> None: f'have {len(population)},' f' required {target_pop_size}!\n' + helpful_msg) - def _mutation_n_evaluation(self, individual: Individual, evaluator: EvaluationOperator): - individual, mutation_type, applied = self.mutation._mutation(individual) + def _mutation_n_evaluation(self, individual: Individual, mutation: Mutation, evaluator: EvaluationOperator): + individual = mutation(individual) if individual and self.verifier(individual.graph): individuals = evaluator([individual]) if individuals: - return individuals[0], mutation_type, applied - return individual, mutation_type, False + return individuals[0] From c3f8cf43ab380faa4f7e98c1a5b24a03793174bb Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 27 Oct 2023 11:48:06 +0300 Subject: [PATCH 27/65] new approach with shared memory objects --- .../optimisers/genetic/operators/mutation.py | 11 ++-- .../genetic/operators/reproduction.py | 57 +++++++++++-------- 2 files changed, 37 insertions(+), 31 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index b1aaa30cd..bc4e14201 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -1,4 +1,5 @@ from copy import deepcopy +from multiprocessing.managers import ValueProxy from random import random from typing import Callable, Union, Tuple, TYPE_CHECKING, Mapping, Hashable, Optional @@ -169,8 +170,8 @@ def __init__(self, requirements: GraphRequirements, graph_gen_params: GraphGenerationParams, mutations_repo: MutationRepo, - operator_agent: OperatorAgent, - agent_experience: ExperienceBuffer, + operator_agent: ValueProxy, + agent_experience: ValueProxy, ): super().__init__(parameters=parameters, requirements=requirements, @@ -182,16 +183,14 @@ def __init__(self, def __call__(self, individual: Individual) -> Individual: new_graph = deepcopy(individual.graph) - mutation_type = self._operator_agent.choose_action(new_graph) + mutation_type = self._operator_agent.value.choose_action(new_graph) mutation_func = self._get_mutation_func(mutation_type) new_graph = mutation_func(new_graph, requirements=self.requirements, graph_gen_params=self.graph_generation_params, parameters=self.parameters) - parent_operator = ParentOperator(type_='mutation', - operators=mutation_type, - parent_individuals=individual) + parent_operator = ParentOperator(type_='mutation', operators=mutation_type, parent_individuals=individual) individual = Individual(new_graph, parent_operator, metadata=self.requirements.static_individual_metadata) return individual diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index b82070d52..95bb7ee5e 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -4,6 +4,7 @@ from functools import partial from itertools import cycle, chain from math import ceil +from multiprocessing.managers import ValueProxy, DictProxy from typing import Callable, Dict, Union, List, Optional from multiprocessing import Queue, Manager import queue @@ -68,37 +69,38 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P return new_population def _mutate_over_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: - # create common objects for parallel use - with Manager() as manager: - # create new mutation that suitable for parallel evaluation + with (Manager() as manager, + Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel): + initial_parameters = deepcopy(self.parameters) initial_parameters.mutation_prob = 1.0 operator_agent = manager.Value('operator_agent', self.mutation._operator_agent) agent_experience = manager.Value('agent_experience', self.mutation.agent_experience) - mutation = SpecialSingleMutation(parameters=initial_parameters, - requirements=self.mutation.requirements, - graph_gen_params=self.mutation.graph_generation_params, - mutations_repo=self.mutation._mutations_repo, - operator_agent=operator_agent, - agent_experience=agent_experience) - mutation_fun = partial(self._mutation_n_evaluation, mutation=mutation, evaluator=evaluator) + requirements=self.mutation.requirements, + graph_gen_params=self.mutation.graph_generation_params, + mutations_repo=self.mutation._mutations_repo, + operator_agent=operator_agent, + agent_experience=agent_experience) + pop_graph_descriptive_ids = manager.dict(zip(self._pop_graph_descriptive_ids, + range(len(self._pop_graph_descriptive_ids)))) + mutation_fun = partial(self._mutation_n_evaluation, + pop_graph_descriptive_ids=pop_graph_descriptive_ids, + mutation=mutation, + evaluator=evaluator) max_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER new_population = [] - with Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel: - new_ind_generator = parallel(delayed(mutation_fun)(ind) - for ind, _ in zip(cycle(population), range(max_tries))) - for new_ind in new_ind_generator: - if new_ind: - descriptive_id = new_ind.graph.descriptive_id - if descriptive_id not in self._pop_graph_descriptive_ids: - new_population.append(new_ind) - self._pop_graph_descriptive_ids.add(descriptive_id) - if len(new_population) >= self.parameters.pop_size: - break + new_ind_generator = parallel(delayed(mutation_fun)(ind) + for ind, _ in zip(cycle(population), range(max_tries))) + for new_ind in new_ind_generator: + if new_ind: + new_population.append(new_ind) + if len(new_population) >= self.parameters.pop_size: + break + self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids) return new_population def _check_final_population(self, population: PopulationT) -> None: @@ -115,9 +117,14 @@ def _check_final_population(self, population: PopulationT) -> None: f'have {len(population)},' f' required {target_pop_size}!\n' + helpful_msg) - def _mutation_n_evaluation(self, individual: Individual, mutation: Mutation, evaluator: EvaluationOperator): + def _mutation_n_evaluation(self, individual: Individual, pop_graph_descriptive_ids: DictProxy, + mutation: Mutation, evaluator: EvaluationOperator): individual = mutation(individual) if individual and self.verifier(individual.graph): - individuals = evaluator([individual]) - if individuals: - return individuals[0] + descriptive_id = individual.graph.descriptive_id + if descriptive_id not in pop_graph_descriptive_ids: + individuals = evaluator([individual]) + if individuals: + pop_graph_descriptive_ids[descriptive_id] = True + return individuals[0] + pop_graph_descriptive_ids[descriptive_id] = False From 74bebbf05bd70de3578e26712ace68b509551b2d Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 27 Oct 2023 13:53:08 +0300 Subject: [PATCH 28/65] simple way to repeat mutations --- golem/core/optimisers/genetic/gp_params.py | 1 + .../optimisers/genetic/operators/mutation.py | 6 ++-- .../genetic/operators/reproduction.py | 31 ++++++++++--------- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_params.py b/golem/core/optimisers/genetic/gp_params.py index 7579dc965..7a8457637 100644 --- a/golem/core/optimisers/genetic/gp_params.py +++ b/golem/core/optimisers/genetic/gp_params.py @@ -76,6 +76,7 @@ class GPAlgorithmParameters(AlgorithmParameters): mutation_prob: float = 0.8 variable_mutation_num: bool = True max_num_of_operator_attempts: int = 100 + max_num_of_mutation_attempts: int = 3 mutation_strength: MutationStrengthEnum = MutationStrengthEnum.mean min_pop_size_with_elitism: int = 5 required_valid_ratio: float = 0.9 diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index bc4e14201..8f8db4630 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -180,10 +180,10 @@ def __init__(self, self._operator_agent = operator_agent self.agent_experience = agent_experience - def __call__(self, individual: Individual) -> Individual: + def __call__(self, individual: Individual, mutation_type: Optional[MutationType] = None) -> Individual: new_graph = deepcopy(individual.graph) - mutation_type = self._operator_agent.value.choose_action(new_graph) + mutation_type = mutation_type or self._operator_agent.value.choose_action(new_graph) mutation_func = self._get_mutation_func(mutation_type) new_graph = mutation_func(new_graph, requirements=self.requirements, @@ -193,4 +193,4 @@ def __call__(self, individual: Individual) -> Individual: parent_operator = ParentOperator(type_='mutation', operators=mutation_type, parent_individuals=individual) individual = Individual(new_graph, parent_operator, metadata=self.requirements.static_individual_metadata) - return individual + return individual, mutation_type diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 95bb7ee5e..95ff19d51 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -72,12 +72,9 @@ def _mutate_over_population(self, population: PopulationT, evaluator: Evaluation with (Manager() as manager, Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel): - initial_parameters = deepcopy(self.parameters) - initial_parameters.mutation_prob = 1.0 - operator_agent = manager.Value('operator_agent', self.mutation._operator_agent) agent_experience = manager.Value('agent_experience', self.mutation.agent_experience) - mutation = SpecialSingleMutation(parameters=initial_parameters, + mutation = SpecialSingleMutation(parameters=self.mutation.parameters, requirements=self.mutation.requirements, graph_gen_params=self.mutation.graph_generation_params, mutations_repo=self.mutation._mutations_repo, @@ -86,6 +83,7 @@ def _mutate_over_population(self, population: PopulationT, evaluator: Evaluation pop_graph_descriptive_ids = manager.dict(zip(self._pop_graph_descriptive_ids, range(len(self._pop_graph_descriptive_ids)))) mutation_fun = partial(self._mutation_n_evaluation, + count=self.parameters.max_num_of_mutation_attempts, pop_graph_descriptive_ids=pop_graph_descriptive_ids, mutation=mutation, evaluator=evaluator) @@ -117,14 +115,19 @@ def _check_final_population(self, population: PopulationT) -> None: f'have {len(population)},' f' required {target_pop_size}!\n' + helpful_msg) - def _mutation_n_evaluation(self, individual: Individual, pop_graph_descriptive_ids: DictProxy, - mutation: Mutation, evaluator: EvaluationOperator): - individual = mutation(individual) - if individual and self.verifier(individual.graph): - descriptive_id = individual.graph.descriptive_id - if descriptive_id not in pop_graph_descriptive_ids: - individuals = evaluator([individual]) - if individuals: + def _mutation_n_evaluation(self, + individual: Individual, + count: int, + pop_graph_descriptive_ids: DictProxy, + mutation: SpecialSingleMutation, + evaluator: EvaluationOperator): + origin, mutation_type = individual, None + for _ in range(count): + individual, mutation_type = mutation(origin, mutation_type=mutation_type) + if individual and self.verifier(individual.graph): + descriptive_id = individual.graph.descriptive_id + if descriptive_id not in pop_graph_descriptive_ids: pop_graph_descriptive_ids[descriptive_id] = True - return individuals[0] - pop_graph_descriptive_ids[descriptive_id] = False + individuals = evaluator([individual]) + if individuals: + return individuals[0] From 70af4c605228cf4e417c362a397be5115071759b Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 27 Oct 2023 14:18:25 +0300 Subject: [PATCH 29/65] a little bit complex way to repeat mutations --- .../genetic/operators/reproduction.py | 74 +++++++++++++------ 1 file changed, 50 insertions(+), 24 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 95ff19d51..4be498b6f 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -69,9 +69,7 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P return new_population def _mutate_over_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: - with (Manager() as manager, - Parallel(n_jobs=self.mutation.requirements.n_jobs, return_as='generator') as parallel): - + with Manager() as manager: operator_agent = manager.Value('operator_agent', self.mutation._operator_agent) agent_experience = manager.Value('agent_experience', self.mutation.agent_experience) mutation = SpecialSingleMutation(parameters=self.mutation.parameters, @@ -82,22 +80,50 @@ def _mutate_over_population(self, population: PopulationT, evaluator: Evaluation agent_experience=agent_experience) pop_graph_descriptive_ids = manager.dict(zip(self._pop_graph_descriptive_ids, range(len(self._pop_graph_descriptive_ids)))) - mutation_fun = partial(self._mutation_n_evaluation, - count=self.parameters.max_num_of_mutation_attempts, - pop_graph_descriptive_ids=pop_graph_descriptive_ids, - mutation=mutation, - evaluator=evaluator) - max_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER + left_tries = [self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER] + executor = get_reusable_executor(max_workers=self.mutation.requirements.n_jobs) + cycled_population = cycle(population) new_population = [] + futures = deque() + + def try_mutation(ind, mutation_type=None, count=self.parameters.max_num_of_mutation_attempts): + return executor.submit(self._mutation_n_evaluation, + individual=ind, + count=count, + mutation_type=mutation_type, + pop_graph_descriptive_ids=pop_graph_descriptive_ids, + mutation=mutation, + evaluator=evaluator) + + while True: + if left_tries[0] <= 0: + break + + # create new tasks if there is not enough load + if len(futures) < self.mutation.requirements.n_jobs + 2: + futures.append(try_mutation(next(cycled_population))) + continue + + # get next finished future + while True: + future = futures.popleft() + if future._state == 'FINISHED': + left_tries[0] -= 1 + break + futures.append(future) + time.sleep(0.01) # to prevent flooding - new_ind_generator = parallel(delayed(mutation_fun)(ind) - for ind, _ in zip(cycle(population), range(max_tries))) - for new_ind in new_ind_generator: - if new_ind: - new_population.append(new_ind) + # process result + applied, ind, mutation_type, count = future.result() + if applied: + new_population.append(ind) if len(new_population) >= self.parameters.pop_size: break + elif count > 0: + futures.append(try_mutation(ind, mutation_type, count)) + + executor.shutdown(wait=False) self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids) return new_population @@ -118,16 +144,16 @@ def _check_final_population(self, population: PopulationT) -> None: def _mutation_n_evaluation(self, individual: Individual, count: int, + mutation_type: MutationType, pop_graph_descriptive_ids: DictProxy, mutation: SpecialSingleMutation, evaluator: EvaluationOperator): - origin, mutation_type = individual, None - for _ in range(count): - individual, mutation_type = mutation(origin, mutation_type=mutation_type) - if individual and self.verifier(individual.graph): - descriptive_id = individual.graph.descriptive_id - if descriptive_id not in pop_graph_descriptive_ids: - pop_graph_descriptive_ids[descriptive_id] = True - individuals = evaluator([individual]) - if individuals: - return individuals[0] + new_ind, mutation_type = mutation(individual, mutation_type=mutation_type) + if new_ind and self.verifier(new_ind.graph): + descriptive_id = new_ind.graph.descriptive_id + if descriptive_id not in pop_graph_descriptive_ids: + pop_graph_descriptive_ids[descriptive_id] = True + new_inds = evaluator([new_ind]) + if new_inds: + return True, new_inds[0], mutation_type, count - 1 + return False, individual, mutation_type, count - 1 From f3c1014cbe5ddba670278c33358decbfa8b0a98d Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 27 Oct 2023 17:15:02 +0300 Subject: [PATCH 30/65] delete structural diversity check and polish reproducer --- golem/core/optimisers/genetic/gp_params.py | 2 +- .../optimisers/genetic/operators/mutation.py | 21 +--- .../genetic/operators/reproduction.py | 115 +++++++++++------- golem/core/optimisers/optimizer.py | 3 - .../core/optimisers/populational_optimizer.py | 20 +-- test/integration/test_structural_diversity.py | 6 +- 6 files changed, 77 insertions(+), 90 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_params.py b/golem/core/optimisers/genetic/gp_params.py index 7a8457637..e0df526bd 100644 --- a/golem/core/optimisers/genetic/gp_params.py +++ b/golem/core/optimisers/genetic/gp_params.py @@ -76,7 +76,7 @@ class GPAlgorithmParameters(AlgorithmParameters): mutation_prob: float = 0.8 variable_mutation_num: bool = True max_num_of_operator_attempts: int = 100 - max_num_of_mutation_attempts: int = 3 + max_num_of_mutation_attempts: int = 1 mutation_strength: MutationStrengthEnum = MutationStrengthEnum.mean min_pop_size_with_elitism: int = 5 required_valid_ratio: float = 0.9 diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index 8f8db4630..38a26a4ab 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -164,26 +164,9 @@ def _get_mutation_func(self, mutation_type: Union[MutationTypesEnum, Callable]) return adapted_mutation_func -class SpecialSingleMutation(Mutation): - def __init__(self, - parameters: 'GPAlgorithmParameters', - requirements: GraphRequirements, - graph_gen_params: GraphGenerationParams, - mutations_repo: MutationRepo, - operator_agent: ValueProxy, - agent_experience: ValueProxy, - ): - super().__init__(parameters=parameters, - requirements=requirements, - graph_gen_params=graph_gen_params, - mutations_repo=mutations_repo) - self._operator_agent = operator_agent - self.agent_experience = agent_experience - - def __call__(self, individual: Individual, mutation_type: Optional[MutationType] = None) -> Individual: +class SinglePredefinedMutation(Mutation): + def __call__(self, individual: Individual, mutation_type: MutationType) -> Individual: new_graph = deepcopy(individual.graph) - - mutation_type = mutation_type or self._operator_agent.value.choose_action(new_graph) mutation_func = self._get_mutation_func(mutation_type) new_graph = mutation_func(new_graph, requirements=self.requirements, diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 4be498b6f..90aa00002 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -20,7 +20,7 @@ from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.optimisers.genetic.operators.base_mutations import MutationTypesEnum from golem.core.optimisers.genetic.operators.crossover import Crossover -from golem.core.optimisers.genetic.operators.mutation import Mutation, MutationType, SpecialSingleMutation +from golem.core.optimisers.genetic.operators.mutation import Mutation, MutationType, SinglePredefinedMutation from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.selection import Selection from golem.core.optimisers.populational_optimizer import EvaluationAttemptsError @@ -37,7 +37,6 @@ class ReproductionController: selection: operator used in reproduction. mutation: operator used in reproduction. crossover: operator used in reproduction. - window_size: size in iterations of the moving window to compute reproduction success rate. """ def __init__(self, @@ -59,8 +58,6 @@ def __init__(self, def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: """Reproduces and evaluates population (select, crossover, mutate). - Implements additional checks on population to ensure that population size - follows required population size. """ selected_individuals = self.selection(population, self.parameters.pop_size) new_population = self.crossover(selected_individuals) @@ -69,25 +66,26 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P return new_population def _mutate_over_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: + """Generate new individuals by mutation in parallel. + Implements additional checks on population to ensure that population size follows + required population size. Also controls uniqueness of population. + """ with Manager() as manager: - operator_agent = manager.Value('operator_agent', self.mutation._operator_agent) - agent_experience = manager.Value('agent_experience', self.mutation.agent_experience) - mutation = SpecialSingleMutation(parameters=self.mutation.parameters, - requirements=self.mutation.requirements, - graph_gen_params=self.mutation.graph_generation_params, - mutations_repo=self.mutation._mutations_repo, - operator_agent=operator_agent, - agent_experience=agent_experience) + mutation = SinglePredefinedMutation(parameters=self.mutation.parameters, + requirements=self.mutation.requirements, + graph_gen_params=self.mutation.graph_generation_params, + mutations_repo=self.mutation._mutations_repo) pop_graph_descriptive_ids = manager.dict(zip(self._pop_graph_descriptive_ids, range(len(self._pop_graph_descriptive_ids)))) - left_tries = [self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER] + left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER executor = get_reusable_executor(max_workers=self.mutation.requirements.n_jobs) cycled_population = cycle(population) new_population = [] futures = deque() def try_mutation(ind, mutation_type=None, count=self.parameters.max_num_of_mutation_attempts): + mutation_type = mutation_type or self.mutation.agent.choose_action(ind.graph) return executor.submit(self._mutation_n_evaluation, individual=ind, count=count, @@ -96,10 +94,7 @@ def try_mutation(ind, mutation_type=None, count=self.parameters.max_num_of_mutat mutation=mutation, evaluator=evaluator) - while True: - if left_tries[0] <= 0: - break - + while left_tries > 0: # create new tasks if there is not enough load if len(futures) < self.mutation.requirements.n_jobs + 2: futures.append(try_mutation(next(cycled_population))) @@ -108,25 +103,74 @@ def try_mutation(ind, mutation_type=None, count=self.parameters.max_num_of_mutat # get next finished future while True: future = futures.popleft() - if future._state == 'FINISHED': - left_tries[0] -= 1 - break + if future._state == 'FINISHED': break futures.append(future) time.sleep(0.01) # to prevent flooding # process result - applied, ind, mutation_type, count = future.result() - if applied: - new_population.append(ind) + left_tries -= 1 + failed_stage, individual, mutation_type, retained_count = future.result() + if failed_stage is None: + new_population.append(individual) if len(new_population) >= self.parameters.pop_size: break - elif count > 0: - futures.append(try_mutation(ind, mutation_type, count)) - + else: + if failed_stage == 'verification': + # add experience to mutation + self.mutation.agent_experience.collect_experience(individual, mutation_type, reward=-1.0) + if retained_count > 0: + futures.append(try_mutation(individual, mutation_type, retained_count)) + + # get finished mutations + for future in futures: + if future._state == 'FINISHED': + applied, ind, *_ = future.result() + if applied: new_population.append(ind) + + # shutdown workers and add pop_graph_descriptive_ids to self._pop_graph_descriptive_ids executor.shutdown(wait=False) - self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids) + for _ in range(10): + try: + self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids) + break + except RuntimeError as exception: + time.sleep(0.1) # time for finish all processes + else: + raise exception + return new_population + def _mutation_n_evaluation(self, + individual: Individual, + count: int, + mutation_type: MutationType, + pop_graph_descriptive_ids: DictProxy, + mutation: SinglePredefinedMutation, + evaluator: EvaluationOperator): + # mutation + new_ind, mutation_type = mutation(individual, mutation_type=mutation_type) + if not new_ind: + return 'mutation', individual, mutation_type, count - 1 + + # verification + if not self.verifier(new_ind.graph): + return 'verification', individual, mutation_type, count - 1 + + # unique check + descriptive_id = new_ind.graph.descriptive_id + if descriptive_id in pop_graph_descriptive_ids: + # worker can't send nonempty string! wtf? + return '', individual, mutation_type, count - 1 + pop_graph_descriptive_ids[descriptive_id] = True + + # evaluation + new_inds = evaluator([new_ind]) + if not new_inds: + # worker can't send nonempty string! wtf? + return '', individual, mutation_type, count - 1 + + return None, new_inds[0], mutation_type, count - 1 + def _check_final_population(self, population: PopulationT) -> None: """ If population do not achieve required length return a warning or raise exception """ target_pop_size = self.parameters.pop_size @@ -140,20 +184,3 @@ def _check_final_population(self, population: PopulationT) -> None: self._log.warning(f'Could not achieve required population size: ' f'have {len(population)},' f' required {target_pop_size}!\n' + helpful_msg) - - def _mutation_n_evaluation(self, - individual: Individual, - count: int, - mutation_type: MutationType, - pop_graph_descriptive_ids: DictProxy, - mutation: SpecialSingleMutation, - evaluator: EvaluationOperator): - new_ind, mutation_type = mutation(individual, mutation_type=mutation_type) - if new_ind and self.verifier(new_ind.graph): - descriptive_id = new_ind.graph.descriptive_id - if descriptive_id not in pop_graph_descriptive_ids: - pop_graph_descriptive_ids[descriptive_id] = True - new_inds = evaluator([new_ind]) - if new_inds: - return True, new_inds[0], mutation_type, count - 1 - return False, individual, mutation_type, count - 1 diff --git a/golem/core/optimisers/optimizer.py b/golem/core/optimisers/optimizer.py index 5a54bb501..d67b2394a 100644 --- a/golem/core/optimisers/optimizer.py +++ b/golem/core/optimisers/optimizer.py @@ -20,8 +20,6 @@ from golem.core.optimisers.random_graph_factory import RandomGraphFactory, RandomGrowthGraphFactory from golem.utilities.random import RandomStateHandler -STRUCTURAL_DIVERSITY_FREQUENCY_CHECK = 5 - def do_nothing_callback(*args, **kwargs): pass @@ -46,7 +44,6 @@ class AlgorithmParameters: max_pop_size: Optional[int] = 55 adaptive_depth: bool = False adaptive_depth_max_stagnation: int = 3 - structural_diversity_frequency_check: int = STRUCTURAL_DIVERSITY_FREQUENCY_CHECK @dataclass diff --git a/golem/core/optimisers/populational_optimizer.py b/golem/core/optimisers/populational_optimizer.py index 23b05d348..fa6eba7b0 100644 --- a/golem/core/optimisers/populational_optimizer.py +++ b/golem/core/optimisers/populational_optimizer.py @@ -69,10 +69,7 @@ def __init__(self, 'Optimisation finished: Early stopping iterations criteria was satisfied' ).add_condition( lambda: self.generations.stagnation_time_duration >= max_stagnation_time, - 'Optimisation finished: Early stopping timeout criteria was satisfied' - ) - # in how many generations structural diversity check should be performed - self.gen_structural_diversity_check = self.graph_optimizer_params.structural_diversity_frequency_check + 'Optimisation finished: Early stopping timeout criteria was satisfied') @property def current_generation_num(self) -> int: @@ -94,10 +91,6 @@ def optimise(self, objective: ObjectiveFunction) -> Sequence[Graph]: while not self.stop_optimization(): try: new_population = self._evolve_population(evaluator) - if self.gen_structural_diversity_check != -1 \ - and self.generations.generation_num % self.gen_structural_diversity_check == 0 \ - and self.generations.generation_num != 0: - new_population = self.get_structure_unique_population(new_population, evaluator) pbar.update() except EvaluationAttemptsError as ex: self.log.warning(f'Composition process was stopped due to: {ex}') @@ -150,17 +143,6 @@ def _log_to_history(self, population: PopulationT, label: Optional[str] = None, if self.requirements.history_dir: self.history.save_current_results(self.requirements.history_dir) - def get_structure_unique_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: - """ Increases structurally uniqueness of population to prevent stagnation in optimization process. - Returned population may be not entirely unique, if the size of unique population is lower than MIN_POP_SIZE. """ - unique_population_with_ids = {ind.graph.descriptive_id: ind for ind in population} - unique_population = list(unique_population_with_ids.values()) - - # if size of unique population is too small, then extend it to MIN_POP_SIZE by repeating individuals - if len(unique_population) < MIN_POP_SIZE: - unique_population = self._extend_population(pop=unique_population, target_pop_size=MIN_POP_SIZE) - return evaluator(unique_population) - # TODO: remove this hack (e.g. provide smth like FitGraph with fit/unfit interface) def _try_unfit_graph(graph: Any): diff --git a/test/integration/test_structural_diversity.py b/test/integration/test_structural_diversity.py index c5f4fe6a2..4c05213af 100644 --- a/test/integration/test_structural_diversity.py +++ b/test/integration/test_structural_diversity.py @@ -8,14 +8,13 @@ from golem.core.optimisers.genetic.operators.base_mutations import MutationTypesEnum from golem.core.optimisers.genetic.operators.crossover import CrossoverTypesEnum from golem.core.optimisers.objective import Objective -from golem.core.optimisers.optimizer import STRUCTURAL_DIVERSITY_FREQUENCY_CHECK from golem.metrics.edit_distance import tree_edit_dist from golem.metrics.graph_metrics import degree_distance DIVERSITY_THRESHOLD = 0.5 -def set_up_params(gen_structural_check: int): +def set_up_params(gen_structural_check: int = -1): """ It is possible to run test with and without structural check. To run test without structural test set `gen_structural_check` to -1, otherwise it has to be set to positive integer value. """ @@ -32,7 +31,6 @@ def set_up_params(gen_structural_check: int): MutationTypesEnum.single_drop, ], crossover_types=[CrossoverTypesEnum.none], - structural_diversity_frequency_check=gen_structural_check ) return gp_params @@ -41,7 +39,7 @@ def test_structural_diversity(): """ Checks population's structural diversity. Diversity should not be lower than DIVERSITY_THRESHOLD. """ target_graph = generate_labeled_graph('tree', 4, node_labels=['x']) node_types = ['x', 'y', 'z', 'w', 'v', 'u'] - gen_structural_check = STRUCTURAL_DIVERSITY_FREQUENCY_CHECK + gen_structural_check = -1 reset_diversity_threshold = 0.90 gp_params = set_up_params(gen_structural_check=gen_structural_check) From 696e6e6c8789c3a67ea6e709cfbf3191efe96189 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 27 Oct 2023 17:33:51 +0300 Subject: [PATCH 31/65] add some reqiured changes --- golem/core/constants.py | 2 +- golem/core/optimisers/genetic/operators/crossover.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/golem/core/constants.py b/golem/core/constants.py index 73214eb21..70cbf0c26 100644 --- a/golem/core/constants.py +++ b/golem/core/constants.py @@ -1,6 +1,6 @@ import numpy as np -MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER = 5 +MAX_GRAPH_GEN_ATTEMPTS_PER_IND = 5 MAX_GRAPH_GEN_ATTEMPTS = 1000 MAX_TUNING_METRIC_VALUE = np.inf MIN_TIME_FOR_TUNING_IN_SEC = 3 diff --git a/golem/core/optimisers/genetic/operators/crossover.py b/golem/core/optimisers/genetic/operators/crossover.py index ccc585f16..61c47370c 100644 --- a/golem/core/optimisers/genetic/operators/crossover.py +++ b/golem/core/optimisers/genetic/operators/crossover.py @@ -44,10 +44,12 @@ def __init__(self, def __call__(self, population: PopulationT) -> PopulationT: if len(population) > 1: with Parallel(n_jobs=self.requirements.n_jobs) as parallel: - population = parallel(delayed(self._crossover)(ind_1, ind_2) + new_population = parallel(delayed(self._crossover)(ind_1, ind_2) for ind_1, ind_2 in Crossover.crossover_parents_selection(population)) - population = list(chain(*population)) - return population + new_population = list(chain(*new_population)) + else: + new_population = population[:] + return new_population @staticmethod def crossover_parents_selection(population: PopulationT) -> Iterable[Tuple[Individual, Individual]]: From ebad0d6bda461cda6822c665d62b386d1a15741c Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 27 Oct 2023 18:22:55 +0300 Subject: [PATCH 32/65] strange problem with workers --- golem/core/optimisers/genetic/gp_params.py | 2 +- .../genetic/operators/reproduction.py | 81 ++++++++----------- .../core/optimisers/populational_optimizer.py | 6 +- 3 files changed, 37 insertions(+), 52 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_params.py b/golem/core/optimisers/genetic/gp_params.py index e0df526bd..7a8457637 100644 --- a/golem/core/optimisers/genetic/gp_params.py +++ b/golem/core/optimisers/genetic/gp_params.py @@ -76,7 +76,7 @@ class GPAlgorithmParameters(AlgorithmParameters): mutation_prob: float = 0.8 variable_mutation_num: bool = True max_num_of_operator_attempts: int = 100 - max_num_of_mutation_attempts: int = 1 + max_num_of_mutation_attempts: int = 3 mutation_strength: MutationStrengthEnum = MutationStrengthEnum.mean min_pop_size_with_elitism: int = 5 required_valid_ratio: float = 0.9 diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 90aa00002..11c6a7a0e 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -1,24 +1,14 @@ import time -from collections import deque -from concurrent.futures import as_completed -from functools import partial -from itertools import cycle, chain -from math import ceil -from multiprocessing.managers import ValueProxy, DictProxy -from typing import Callable, Dict, Union, List, Optional -from multiprocessing import Queue, Manager -import queue -from copy import copy, deepcopy - -import numpy as np -from joblib import Parallel, delayed +from itertools import cycle +from multiprocessing.managers import DictProxy +from multiprocessing import Manager + from joblib.externals.loky import get_reusable_executor -from golem.core.constants import MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER +from golem.core.constants import MAX_GRAPH_GEN_ATTEMPTS_PER_IND from golem.core.dag.graph_verifier import GraphVerifier from golem.core.log import default_log from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters -from golem.core.optimisers.genetic.operators.base_mutations import MutationTypesEnum from golem.core.optimisers.genetic.operators.crossover import Crossover from golem.core.optimisers.genetic.operators.mutation import Mutation, MutationType, SinglePredefinedMutation from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator @@ -75,25 +65,22 @@ def _mutate_over_population(self, population: PopulationT, evaluator: Evaluation requirements=self.mutation.requirements, graph_gen_params=self.mutation.graph_generation_params, mutations_repo=self.mutation._mutations_repo) - pop_graph_descriptive_ids = manager.dict(zip(self._pop_graph_descriptive_ids, - range(len(self._pop_graph_descriptive_ids)))) - - left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_AS_POP_SIZE_MULTIPLIER + pop_graph_descriptive_ids = manager.dict([(ids, True) for ids in self._pop_graph_descriptive_ids]) executor = get_reusable_executor(max_workers=self.mutation.requirements.n_jobs) - cycled_population = cycle(population) - new_population = [] - futures = deque() - def try_mutation(ind, mutation_type=None, count=self.parameters.max_num_of_mutation_attempts): + def try_mutation(ind, mutation_type=None, tries=self.parameters.max_num_of_mutation_attempts): mutation_type = mutation_type or self.mutation.agent.choose_action(ind.graph) return executor.submit(self._mutation_n_evaluation, individual=ind, - count=count, + tries=tries, mutation_type=mutation_type, pop_graph_descriptive_ids=pop_graph_descriptive_ids, mutation=mutation, evaluator=evaluator) + left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_PER_IND + cycled_population = cycle(population) + new_population, futures = list(), list() while left_tries > 0: # create new tasks if there is not enough load if len(futures) < self.mutation.requirements.n_jobs + 2: @@ -101,15 +88,15 @@ def try_mutation(ind, mutation_type=None, count=self.parameters.max_num_of_mutat continue # get next finished future - while True: - future = futures.popleft() - if future._state == 'FINISHED': break - futures.append(future) + for i in cycle(range(len(futures))): time.sleep(0.01) # to prevent flooding + if futures[i]._state == 'FINISHED': + future = futures.pop(i) + left_tries -= 1 + break # process result - left_tries -= 1 - failed_stage, individual, mutation_type, retained_count = future.result() + failed_stage, individual, mutation_type, retained_tries = future.result() if failed_stage is None: new_population.append(individual) if len(new_population) >= self.parameters.pop_size: @@ -118,8 +105,8 @@ def try_mutation(ind, mutation_type=None, count=self.parameters.max_num_of_mutat if failed_stage == 'verification': # add experience to mutation self.mutation.agent_experience.collect_experience(individual, mutation_type, reward=-1.0) - if retained_count > 0: - futures.append(try_mutation(individual, mutation_type, retained_count)) + if retained_tries > 0: + futures.append(try_mutation(individual, mutation_type, retained_tries)) # get finished mutations for future in futures: @@ -129,20 +116,14 @@ def try_mutation(ind, mutation_type=None, count=self.parameters.max_num_of_mutat # shutdown workers and add pop_graph_descriptive_ids to self._pop_graph_descriptive_ids executor.shutdown(wait=False) - for _ in range(10): - try: - self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids) - break - except RuntimeError as exception: - time.sleep(0.1) # time for finish all processes - else: - raise exception + time.sleep(0.1) # time for finish all processes, otherwise may crash + self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids) return new_population def _mutation_n_evaluation(self, individual: Individual, - count: int, + tries: int, mutation_type: MutationType, pop_graph_descriptive_ids: DictProxy, mutation: SinglePredefinedMutation, @@ -150,26 +131,28 @@ def _mutation_n_evaluation(self, # mutation new_ind, mutation_type = mutation(individual, mutation_type=mutation_type) if not new_ind: - return 'mutation', individual, mutation_type, count - 1 + return 'mutation', individual, mutation_type, tries - 1 # verification if not self.verifier(new_ind.graph): - return 'verification', individual, mutation_type, count - 1 + return 'verification', individual, mutation_type, tries - 1 # unique check descriptive_id = new_ind.graph.descriptive_id - if descriptive_id in pop_graph_descriptive_ids: - # worker can't send nonempty string! wtf? - return '', individual, mutation_type, count - 1 + lock = pop_graph_descriptive_ids._mutex._lock + lock.acquire() + not_unique = descriptive_id in pop_graph_descriptive_ids + lock.release() + if not_unique: + return 'non unique', individual, mutation_type, tries - 1 pop_graph_descriptive_ids[descriptive_id] = True # evaluation new_inds = evaluator([new_ind]) if not new_inds: - # worker can't send nonempty string! wtf? - return '', individual, mutation_type, count - 1 + return 'evaluation', individual, mutation_type, tries - 1 - return None, new_inds[0], mutation_type, count - 1 + return None, new_inds[0], mutation_type, tries - 1 def _check_final_population(self, population: PopulationT) -> None: """ If population do not achieve required length return a warning or raise exception """ diff --git a/golem/core/optimisers/populational_optimizer.py b/golem/core/optimisers/populational_optimizer.py index fa6eba7b0..424466d25 100644 --- a/golem/core/optimisers/populational_optimizer.py +++ b/golem/core/optimisers/populational_optimizer.py @@ -44,8 +44,10 @@ def __init__(self, self.generations = GenerationKeeper(self.objective, keep_n_best=requirements.keep_n_best) self.timer = OptimisationTimer(timeout=self.requirements.timeout) - dispatcher_type = MultiprocessingDispatcher if self.requirements.parallelization_mode == 'populational' else \ - SequentialDispatcher + # dispatcher_type = MultiprocessingDispatcher if self.requirements.parallelization_mode == 'populational' else \ + # SequentialDispatcher + + dispatcher_type = SequentialDispatcher self.eval_dispatcher = dispatcher_type(adapter=graph_generation_params.adapter, n_jobs=requirements.n_jobs, From 3c77cfa18a07cce1663a6baff2296b5b4cd52e4e Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Mon, 30 Oct 2023 14:24:14 +0300 Subject: [PATCH 33/65] annoying problem with workers is solved, woohoo --- .../genetic/operators/reproduction.py | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 11c6a7a0e..8fde4e564 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -1,7 +1,9 @@ import time +from copy import deepcopy from itertools import cycle from multiprocessing.managers import DictProxy from multiprocessing import Manager +import pickle from joblib.externals.loky import get_reusable_executor @@ -13,6 +15,7 @@ from golem.core.optimisers.genetic.operators.mutation import Mutation, MutationType, SinglePredefinedMutation from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.selection import Selection +from golem.core.optimisers.opt_history_objects.parent_operator import ParentOperator from golem.core.optimisers.populational_optimizer import EvaluationAttemptsError from golem.core.optimisers.opt_history_objects.individual import Individual @@ -97,14 +100,21 @@ def try_mutation(ind, mutation_type=None, tries=self.parameters.max_num_of_mutat # process result failed_stage, individual, mutation_type, retained_tries = future.result() - if failed_stage is None: + if failed_stage == 0: new_population.append(individual) if len(new_population) >= self.parameters.pop_size: break else: - if failed_stage == 'verification': + if failed_stage == 2: # add experience to mutation - self.mutation.agent_experience.collect_experience(individual, mutation_type, reward=-1.0) + # need to create new individual due to problems with parallel workers + # they cannot receive old individual after experience collection + parent_operator = ParentOperator(type_='mutation', operators=mutation_type, + parent_individuals=individual) + new_individual = Individual(deepcopy(individual.graph), + parent_operator, + metadata=self.mutation.requirements.static_individual_metadata) + self.mutation.agent_experience.collect_experience(new_individual, mutation_type, reward=-1.0) if retained_tries > 0: futures.append(try_mutation(individual, mutation_type, retained_tries)) @@ -131,28 +141,24 @@ def _mutation_n_evaluation(self, # mutation new_ind, mutation_type = mutation(individual, mutation_type=mutation_type) if not new_ind: - return 'mutation', individual, mutation_type, tries - 1 + return 1, individual, mutation_type, tries - 1 # verification if not self.verifier(new_ind.graph): - return 'verification', individual, mutation_type, tries - 1 + return 2, individual, mutation_type, tries - 1 # unique check descriptive_id = new_ind.graph.descriptive_id - lock = pop_graph_descriptive_ids._mutex._lock - lock.acquire() - not_unique = descriptive_id in pop_graph_descriptive_ids - lock.release() - if not_unique: - return 'non unique', individual, mutation_type, tries - 1 + if descriptive_id in pop_graph_descriptive_ids: + return 3, individual, mutation_type, tries - 1 pop_graph_descriptive_ids[descriptive_id] = True # evaluation new_inds = evaluator([new_ind]) - if not new_inds: - return 'evaluation', individual, mutation_type, tries - 1 + if not new_inds or new_inds[0].fitness.value is None: + return 4, individual, mutation_type, tries - 1 - return None, new_inds[0], mutation_type, tries - 1 + return 0, new_inds[0], mutation_type, tries - 1 def _check_final_population(self, population: PopulationT) -> None: """ If population do not achieve required length return a warning or raise exception """ From 461bfe80161ae02c9033e78818e51ba09260cac4 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Mon, 30 Oct 2023 16:04:50 +0300 Subject: [PATCH 34/65] fix tests and reproducer --- .../genetic/operators/reproduction.py | 7 ++++--- .../test_reproduction_controller.py | 21 +++---------------- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 8fde4e564..06398fdbc 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -4,6 +4,7 @@ from multiprocessing.managers import DictProxy from multiprocessing import Manager import pickle +from typing import Optional from joblib.externals.loky import get_reusable_executor @@ -37,12 +38,12 @@ def __init__(self, selection: Selection, mutation: Mutation, crossover: Crossover, - verifier: GraphVerifier): + verifier: Optional[GraphVerifier] = None): self.parameters = parameters self.selection = selection self.mutation = mutation self.crossover = crossover - self.verifier = verifier + self.verifier = verifier or self.mutation.graph_generation_params.verifier self._pop_graph_descriptive_ids = set() self._minimum_valid_ratio = parameters.required_valid_ratio * 0.5 @@ -155,7 +156,7 @@ def _mutation_n_evaluation(self, # evaluation new_inds = evaluator([new_ind]) - if not new_inds or new_inds[0].fitness.value is None: + if not new_inds:# or new_inds[0].fitness.value is None: return 4, individual, mutation_type, tries - 1 return 0, new_inds[0], mutation_type, tries - 1 diff --git a/test/unit/optimizers/gp_operators/test_reproduction_controller.py b/test/unit/optimizers/gp_operators/test_reproduction_controller.py index e1034f453..8560235a7 100644 --- a/test/unit/optimizers/gp_operators/test_reproduction_controller.py +++ b/test/unit/optimizers/gp_operators/test_reproduction_controller.py @@ -2,7 +2,6 @@ from math import ceil from typing import Optional -import numpy as np import pytest from examples.synthetic_graph_evolution.generators import generate_labeled_graph @@ -69,21 +68,7 @@ def reproducer() -> ReproductionController: return reproduction -@pytest.mark.parametrize('success_rate', [0.4, 0.5, 0.9, 1.0]) -def test_mean_success_rate(reproducer: ReproductionController, success_rate: float): - """Tests that Reproducer correctly estimates average success rate""" - assert np.isclose(reproducer.mean_success_rate, 1.0) - - evaluator = MockEvaluator(success_rate) - pop = get_rand_population(reproducer.parameters.pop_size) - num_iters = 50 - for i in range(num_iters): - pop = reproducer.reproduce(pop, evaluator) - - assert np.isclose(reproducer.mean_success_rate, success_rate, rtol=0.1) - - -@pytest.mark.parametrize('success_rate', [0.0, 0.1]) +@pytest.mark.parametrize('success_rate', [0.0]) def test_too_little_valid_evals(reproducer: ReproductionController, success_rate: float): evaluator = MockEvaluator(success_rate) pop = get_rand_population(reproducer.parameters.pop_size) @@ -101,7 +86,7 @@ def test_minimal_valid_evals(reproducer: ReproductionController, success_rate: f for i in range(num_iters): pop = reproducer.reproduce(pop, evaluator) actual_valid_ratio = len(pop) / parameters.pop_size - assert parameters.required_valid_ratio > actual_valid_ratio >= reproducer._minimum_valid_ratio + assert actual_valid_ratio >= reproducer._minimum_valid_ratio @pytest.mark.parametrize('success_rate', [0.4, 0.9, 1.0]) @@ -125,7 +110,7 @@ def test_pop_size_progression(reproducer: ReproductionController, success_rate: assert (actual_pop_size > len(prev_pop) or actual_pop_size >= parameters.max_pop_size * required_valid) # and that this increase follows the one from parameters - assert 1.0 >= (actual_pop_size / parameters.pop_size) >= required_valid + assert (actual_pop_size / parameters.pop_size) >= required_valid # update pop size parameters.pop_size = pop_size_progress.next(pop) From eb876ac7ac70aa029aa91069581008a015297a86 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 31 Oct 2023 11:27:41 +0300 Subject: [PATCH 35/65] fix problem with inds but not with workers stopping --- .../genetic/operators/reproduction.py | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 06398fdbc..33d6abd76 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -1,6 +1,6 @@ import time from copy import deepcopy -from itertools import cycle +from itertools import cycle, chain from multiprocessing.managers import DictProxy from multiprocessing import Manager import pickle @@ -130,7 +130,24 @@ def try_mutation(ind, mutation_type=None, tries=self.parameters.max_num_of_mutat time.sleep(0.1) # time for finish all processes, otherwise may crash self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids) - return new_population + # rebuild population due to problem with changing id of individuals in parallel individuals building + to_add = chain(*[ind.parents + ind.parents_from_prev_generation + [ind] for ind in population]) + population_uid_map = {ind.uid: ind for ind in to_add} + rebuilded_population = [] + for individual in new_population: + if individual.parent_operator: + parent_operator = ParentOperator(type_=individual.parent_operator.type_, + operators=individual.parent_operator.operators, + parent_individuals=population_uid_map[ + individual.parent_operator.parent_individuals[0].uid]) + else: + parent_operator = None + individual = Individual(deepcopy(individual.graph), + parent_operator, + fitness=individual.fitness, + metadata=self.mutation.requirements.static_individual_metadata) + rebuilded_population.append(individual) + return rebuilded_population def _mutation_n_evaluation(self, individual: Individual, From cb948333bf072ff96ac5473232388713517399b4 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 31 Oct 2023 11:33:39 +0300 Subject: [PATCH 36/65] may be fix --- .../genetic/operators/reproduction.py | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 33d6abd76..52aec884c 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -85,6 +85,7 @@ def try_mutation(ind, mutation_type=None, tries=self.parameters.max_num_of_mutat left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_PER_IND cycled_population = cycle(population) new_population, futures = list(), list() + inds_for_experience = [] while left_tries > 0: # create new tasks if there is not enough load if len(futures) < self.mutation.requirements.n_jobs + 2: @@ -107,15 +108,7 @@ def try_mutation(ind, mutation_type=None, tries=self.parameters.max_num_of_mutat break else: if failed_stage == 2: - # add experience to mutation - # need to create new individual due to problems with parallel workers - # they cannot receive old individual after experience collection - parent_operator = ParentOperator(type_='mutation', operators=mutation_type, - parent_individuals=individual) - new_individual = Individual(deepcopy(individual.graph), - parent_operator, - metadata=self.mutation.requirements.static_individual_metadata) - self.mutation.agent_experience.collect_experience(new_individual, mutation_type, reward=-1.0) + inds_for_experience.append((individual, mutation_type)) if retained_tries > 0: futures.append(try_mutation(individual, mutation_type, retained_tries)) @@ -127,12 +120,17 @@ def try_mutation(ind, mutation_type=None, tries=self.parameters.max_num_of_mutat # shutdown workers and add pop_graph_descriptive_ids to self._pop_graph_descriptive_ids executor.shutdown(wait=False) - time.sleep(0.1) # time for finish all processes, otherwise may crash + + # add experience for agent + for individual, mutation_type in inds_for_experience: + self.mutation.agent_experience.collect_experience(individual, mutation_type, reward=-1.0) + + # update looked graphs self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids) # rebuild population due to problem with changing id of individuals in parallel individuals building - to_add = chain(*[ind.parents + ind.parents_from_prev_generation + [ind] for ind in population]) - population_uid_map = {ind.uid: ind for ind in to_add} + population_uid_map = {ind.uid: ind + for ind in chain(*[ind.parents + [ind] for ind in population])} rebuilded_population = [] for individual in new_population: if individual.parent_operator: From 80450857ba7c339454e31bdecebf782df8683d2f Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 31 Oct 2023 12:36:40 +0300 Subject: [PATCH 37/65] small fixes --- golem/core/optimisers/genetic/operators/reproduction.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 52aec884c..547368bcd 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -84,8 +84,7 @@ def try_mutation(ind, mutation_type=None, tries=self.parameters.max_num_of_mutat left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_PER_IND cycled_population = cycle(population) - new_population, futures = list(), list() - inds_for_experience = [] + new_population, futures, inds_for_experience = list(), list(), list() while left_tries > 0: # create new tasks if there is not enough load if len(futures) < self.mutation.requirements.n_jobs + 2: @@ -126,7 +125,7 @@ def try_mutation(ind, mutation_type=None, tries=self.parameters.max_num_of_mutat self.mutation.agent_experience.collect_experience(individual, mutation_type, reward=-1.0) # update looked graphs - self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids) + self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids.keys()) # rebuild population due to problem with changing id of individuals in parallel individuals building population_uid_map = {ind.uid: ind From 14a458b5fdabd482f918ccb4def8935da5c0fadc Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 31 Oct 2023 13:23:37 +0300 Subject: [PATCH 38/65] delete tests for deleted functional --- .../gp_operators/test_gp_operators.py | 31 ------------------- 1 file changed, 31 deletions(-) diff --git a/test/unit/optimizers/gp_operators/test_gp_operators.py b/test/unit/optimizers/gp_operators/test_gp_operators.py index 77ff0a6be..a204dac09 100644 --- a/test/unit/optimizers/gp_operators/test_gp_operators.py +++ b/test/unit/optimizers/gp_operators/test_gp_operators.py @@ -128,34 +128,3 @@ def test_graphs_with_multi_root_equivalent_subtree(): similar_nodes_first_and_second = equivalent_subtree(graph_first=graph_first, graph_second=graph_second, with_primary_nodes=True) assert len(similar_nodes_first_and_second) == 8 - - -def test_structural_diversity(): - """ Checks if `get_structure_unique_population` method returns population without structural duplicates. """ - operations = ['a', 'b', 'c', 'd', 'e'] - population_with_reps = population_with_structural_duplicates(operations=operations) - optimizer, objective = set_up_optimizer(operations=operations) - - adapter = DirectAdapter() - evaluator = SequentialDispatcher(adapter).dispatch(objective) - new_population = optimizer.get_structure_unique_population(population_with_reps, evaluator) - - target_new_population = [] - for op in operations: - target_new_population += [Individual(adapter.adapt(get_graph_with_operation(operation=op)))] - - for i in range(len(target_new_population)): - assert graphs_same(new_population[i].graph, target_new_population[i].graph) - - -def test_recover_pop_size_after_structure_check(): - """ Checks that `get_structure_unique_population` extends population - if after structural check there sre less than MIN_POP_SIZE individuals in population. """ - operations = ['a', 'b', 'c'] - population_with_reps = population_with_structural_duplicates(operations=operations) - optimizer, objective = set_up_optimizer(operations=operations) - adapter = DirectAdapter() - evaluator = SequentialDispatcher(adapter).dispatch(objective) - new_population = optimizer.get_structure_unique_population(population_with_reps, evaluator) - - assert len(new_population) == MIN_POP_SIZE From ce755921f45a3328eddf78275ba36e44a0ecd4d5 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 31 Oct 2023 13:28:00 +0300 Subject: [PATCH 39/65] small fixes --- golem/core/optimisers/genetic/operators/reproduction.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 547368bcd..240df5702 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -3,7 +3,6 @@ from itertools import cycle, chain from multiprocessing.managers import DictProxy from multiprocessing import Manager -import pickle from typing import Optional from joblib.externals.loky import get_reusable_executor @@ -72,7 +71,9 @@ def _mutate_over_population(self, population: PopulationT, evaluator: Evaluation pop_graph_descriptive_ids = manager.dict([(ids, True) for ids in self._pop_graph_descriptive_ids]) executor = get_reusable_executor(max_workers=self.mutation.requirements.n_jobs) - def try_mutation(ind, mutation_type=None, tries=self.parameters.max_num_of_mutation_attempts): + def try_mutation(ind: Individual, + mutation_type: Optional[MutationType] = None, + tries: int = self.parameters.max_num_of_mutation_attempts): mutation_type = mutation_type or self.mutation.agent.choose_action(ind.graph) return executor.submit(self._mutation_n_evaluation, individual=ind, @@ -170,7 +171,7 @@ def _mutation_n_evaluation(self, # evaluation new_inds = evaluator([new_ind]) - if not new_inds:# or new_inds[0].fitness.value is None: + if not new_inds: return 4, individual, mutation_type, tries - 1 return 0, new_inds[0], mutation_type, tries - 1 From 063826e9d92973a72d3086f1a7613338d38beb3c Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 31 Oct 2023 14:03:00 +0300 Subject: [PATCH 40/65] small fixes --- golem/core/optimisers/genetic/operators/reproduction.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 240df5702..99d700667 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -60,7 +60,7 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P def _mutate_over_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: """Generate new individuals by mutation in parallel. - Implements additional checks on population to ensure that population size follows + Implements additional checks on population to ensure that population size is greater or equal to required population size. Also controls uniqueness of population. """ with Manager() as manager: @@ -118,7 +118,7 @@ def try_mutation(ind: Individual, applied, ind, *_ = future.result() if applied: new_population.append(ind) - # shutdown workers and add pop_graph_descriptive_ids to self._pop_graph_descriptive_ids + # shutdown workers executor.shutdown(wait=False) # add experience for agent From 5c95a71e3b8ee0c320af4def66b3aafb0431792b Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 2 Nov 2023 15:05:36 +0300 Subject: [PATCH 41/65] make requested changes --- .../genetic/operators/reproduction.py | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 99d700667..924efc045 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -1,5 +1,6 @@ import time from copy import deepcopy +from enum import Enum from itertools import cycle, chain from multiprocessing.managers import DictProxy from multiprocessing import Manager @@ -20,6 +21,14 @@ from golem.core.optimisers.opt_history_objects.individual import Individual +class FailedStageEnum(Enum): + NONE = 0 + MUTATION = 1 + VERIFICATION = 2 + UNIQUENESS_CHECK = 3 + EVALUATION = 4 + + class ReproductionController: """ Task of the Reproduction Controller is to reproduce population @@ -63,7 +72,7 @@ def _mutate_over_population(self, population: PopulationT, evaluator: Evaluation Implements additional checks on population to ensure that population size is greater or equal to required population size. Also controls uniqueness of population. """ - with Manager() as manager: + with (Manager() as manager): mutation = SinglePredefinedMutation(parameters=self.mutation.parameters, requirements=self.mutation.requirements, graph_gen_params=self.mutation.graph_generation_params, @@ -71,12 +80,13 @@ def _mutate_over_population(self, population: PopulationT, evaluator: Evaluation pop_graph_descriptive_ids = manager.dict([(ids, True) for ids in self._pop_graph_descriptive_ids]) executor = get_reusable_executor(max_workers=self.mutation.requirements.n_jobs) - def try_mutation(ind: Individual, + def try_mutation(individual: Individual, mutation_type: Optional[MutationType] = None, - tries: int = self.parameters.max_num_of_mutation_attempts): - mutation_type = mutation_type or self.mutation.agent.choose_action(ind.graph) + tries: int = self.parameters.max_num_of_mutation_attempts + ) -> (FailedStageEnum, Individual, MutationType, int): + mutation_type = mutation_type or self.mutation.agent.choose_action(individual.graph) return executor.submit(self._mutation_n_evaluation, - individual=ind, + individual=individual, tries=tries, mutation_type=mutation_type, pop_graph_descriptive_ids=pop_graph_descriptive_ids, @@ -102,12 +112,12 @@ def try_mutation(ind: Individual, # process result failed_stage, individual, mutation_type, retained_tries = future.result() - if failed_stage == 0: + if failed_stage is FailedStageEnum.NONE: new_population.append(individual) if len(new_population) >= self.parameters.pop_size: break else: - if failed_stage == 2: + if failed_stage is FailedStageEnum.VERIFICATION: inds_for_experience.append((individual, mutation_type)) if retained_tries > 0: futures.append(try_mutation(individual, mutation_type, retained_tries)) @@ -157,24 +167,24 @@ def _mutation_n_evaluation(self, # mutation new_ind, mutation_type = mutation(individual, mutation_type=mutation_type) if not new_ind: - return 1, individual, mutation_type, tries - 1 + return FailedStageEnum.MUTATION, individual, mutation_type, tries - 1 # verification if not self.verifier(new_ind.graph): - return 2, individual, mutation_type, tries - 1 + return FailedStageEnum.VERIFICATION, individual, mutation_type, tries - 1 # unique check descriptive_id = new_ind.graph.descriptive_id if descriptive_id in pop_graph_descriptive_ids: - return 3, individual, mutation_type, tries - 1 + return FailedStageEnum.UNIQUENESS_CHECK, individual, mutation_type, tries - 1 pop_graph_descriptive_ids[descriptive_id] = True # evaluation new_inds = evaluator([new_ind]) if not new_inds: - return 4, individual, mutation_type, tries - 1 + return FailedStageEnum.EVALUATION, individual, mutation_type, tries - 1 - return 0, new_inds[0], mutation_type, tries - 1 + return FailedStageEnum.NONE, new_inds[0], mutation_type, tries - 1 def _check_final_population(self, population: PopulationT) -> None: """ If population do not achieve required length return a warning or raise exception """ From 7ed6107d5c1114dee707ffdd6c3cbd887c60b371 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 2 Nov 2023 15:33:45 +0300 Subject: [PATCH 42/65] pep8 --- golem/core/optimisers/genetic/gp_optimizer.py | 1 - golem/core/optimisers/genetic/operators/crossover.py | 2 +- golem/core/optimisers/genetic/operators/mutation.py | 1 - golem/core/optimisers/genetic/operators/reproduction.py | 3 ++- golem/core/optimisers/populational_optimizer.py | 3 +-- test/unit/optimizers/gp_operators/test_gp_operators.py | 4 +--- 6 files changed, 5 insertions(+), 9 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index ed09d6f24..8e768e8f0 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -1,4 +1,3 @@ -from copy import deepcopy from typing import Sequence, Union, Any from golem.core.dag.graph import Graph diff --git a/golem/core/optimisers/genetic/operators/crossover.py b/golem/core/optimisers/genetic/operators/crossover.py index 61c47370c..e4f28ea19 100644 --- a/golem/core/optimisers/genetic/operators/crossover.py +++ b/golem/core/optimisers/genetic/operators/crossover.py @@ -45,7 +45,7 @@ def __call__(self, population: PopulationT) -> PopulationT: if len(population) > 1: with Parallel(n_jobs=self.requirements.n_jobs) as parallel: new_population = parallel(delayed(self._crossover)(ind_1, ind_2) - for ind_1, ind_2 in Crossover.crossover_parents_selection(population)) + for ind_1, ind_2 in Crossover.crossover_parents_selection(population)) new_population = list(chain(*new_population)) else: new_population = population[:] diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index 38a26a4ab..d24de3252 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -1,5 +1,4 @@ from copy import deepcopy -from multiprocessing.managers import ValueProxy from random import random from typing import Callable, Union, Tuple, TYPE_CHECKING, Mapping, Hashable, Optional diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 924efc045..fb0784117 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -126,7 +126,8 @@ def try_mutation(individual: Individual, for future in futures: if future._state == 'FINISHED': applied, ind, *_ = future.result() - if applied: new_population.append(ind) + if applied: + new_population.append(ind) # shutdown workers executor.shutdown(wait=False) diff --git a/golem/core/optimisers/populational_optimizer.py b/golem/core/optimisers/populational_optimizer.py index 424466d25..2144f19d3 100644 --- a/golem/core/optimisers/populational_optimizer.py +++ b/golem/core/optimisers/populational_optimizer.py @@ -2,7 +2,6 @@ from random import choice from typing import Any, Optional, Sequence, Dict -from golem.core.constants import MIN_POP_SIZE from golem.core.dag.graph import Graph from golem.core.optimisers.archive import GenerationKeeper from golem.core.optimisers.genetic.evaluation import MultiprocessingDispatcher, SequentialDispatcher @@ -159,4 +158,4 @@ def __init__(self, *args): self.message = args[0] or None def __str__(self): - return self.message or 'Too many fitness evaluation errors.' \ No newline at end of file + return self.message or 'Too many fitness evaluation errors.' diff --git a/test/unit/optimizers/gp_operators/test_gp_operators.py b/test/unit/optimizers/gp_operators/test_gp_operators.py index a204dac09..33b4cb5fe 100644 --- a/test/unit/optimizers/gp_operators/test_gp_operators.py +++ b/test/unit/optimizers/gp_operators/test_gp_operators.py @@ -4,13 +4,11 @@ import pytest from golem.core.adapter import DirectAdapter -from golem.core.constants import MIN_POP_SIZE from golem.core.dag.graph_utils import nodes_from_layer from golem.core.dag.linked_graph import LinkedGraph from golem.core.dag.linked_graph_node import LinkedGraphNode from golem.core.optimisers.archive import ParetoFront from golem.core.optimisers.fitness.multi_objective_fitness import MultiObjFitness -from golem.core.optimisers.genetic.evaluation import SequentialDispatcher from golem.core.optimisers.genetic.gp_operators import filter_duplicates, replace_subtrees, equivalent_subtree from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.optimisers.objective import Objective @@ -19,7 +17,7 @@ from golem.core.optimisers.optimizer import GraphGenerationParams from golem.core.optimisers.populational_optimizer import PopulationalOptimizer from test.unit.utils import graph_first, graph_second, graph_third, graph_fourth, graph_with_multi_roots_second, \ - graph_with_multi_roots_first, graphs_same, RandomMetric + graph_with_multi_roots_first, RandomMetric def get_graph_with_operation(operation: str) -> LinkedGraph: From cfe8e553322034de296c5b0c1da1578dd615ae62 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 2 Nov 2023 15:58:11 +0300 Subject: [PATCH 43/65] fix python 3.8 error --- golem/core/optimisers/genetic/operators/reproduction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index fb0784117..acb9ed460 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -72,7 +72,7 @@ def _mutate_over_population(self, population: PopulationT, evaluator: Evaluation Implements additional checks on population to ensure that population size is greater or equal to required population size. Also controls uniqueness of population. """ - with (Manager() as manager): + with Manager() as manager: mutation = SinglePredefinedMutation(parameters=self.mutation.parameters, requirements=self.mutation.requirements, graph_gen_params=self.mutation.graph_generation_params, From 1a1aa6122e469e442bc5ad121af6260f590ac421 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 3 Nov 2023 13:19:53 +0300 Subject: [PATCH 44/65] small future extracting --- golem/core/optimisers/genetic/operators/reproduction.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index acb9ed460..253474e3f 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -1,4 +1,5 @@ import time +from concurrent.futures import as_completed from copy import deepcopy from enum import Enum from itertools import cycle, chain @@ -103,12 +104,8 @@ def try_mutation(individual: Individual, continue # get next finished future - for i in cycle(range(len(futures))): - time.sleep(0.01) # to prevent flooding - if futures[i]._state == 'FINISHED': - future = futures.pop(i) - left_tries -= 1 - break + future = next(as_completed(futures)) + futures.remove(future) # process result failed_stage, individual, mutation_type, retained_tries = future.result() From cbc9f92d8f6b16e38f653ed87f72937d55b55c8a Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 3 Nov 2023 13:24:03 +0300 Subject: [PATCH 45/65] small fix --- golem/core/optimisers/genetic/operators/reproduction.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 253474e3f..5d5422156 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -99,9 +99,8 @@ def try_mutation(individual: Individual, new_population, futures, inds_for_experience = list(), list(), list() while left_tries > 0: # create new tasks if there is not enough load - if len(futures) < self.mutation.requirements.n_jobs + 2: + while len(futures) < self.mutation.requirements.n_jobs + 1: futures.append(try_mutation(next(cycled_population))) - continue # get next finished future future = next(as_completed(futures)) From 2112b4c31d6b43201b0b65d00d65b2684f652d52 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 3 Nov 2023 15:28:34 +0300 Subject: [PATCH 46/65] speedup wip --- .../optimisers/genetic/operators/mutation.py | 2 +- .../genetic/operators/reproduction.py | 150 +++++++++--------- 2 files changed, 78 insertions(+), 74 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index d24de3252..1d708b902 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -175,4 +175,4 @@ def __call__(self, individual: Individual, mutation_type: MutationType) -> Indiv parent_operator = ParentOperator(type_='mutation', operators=mutation_type, parent_individuals=individual) individual = Individual(new_graph, parent_operator, metadata=self.requirements.static_individual_metadata) - return individual, mutation_type + return individual diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 5d5422156..6b044ff19 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -5,16 +5,17 @@ from itertools import cycle, chain from multiprocessing.managers import DictProxy from multiprocessing import Manager -from typing import Optional +from typing import Optional, Dict from joblib.externals.loky import get_reusable_executor +from joblib.externals.loky.backend.queues import Queue from golem.core.constants import MAX_GRAPH_GEN_ATTEMPTS_PER_IND from golem.core.dag.graph_verifier import GraphVerifier from golem.core.log import default_log from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.optimisers.genetic.operators.crossover import Crossover -from golem.core.optimisers.genetic.operators.mutation import Mutation, MutationType, SinglePredefinedMutation +from golem.core.optimisers.genetic.operators.mutation import Mutation, SinglePredefinedMutation from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.selection import Selection from golem.core.optimisers.opt_history_objects.parent_operator import ParentOperator @@ -62,10 +63,16 @@ def __init__(self, def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: """Reproduces and evaluates population (select, crossover, mutate). """ + t0 = time.perf_counter() selected_individuals = self.selection(population, self.parameters.pop_size) + t1 = time.perf_counter() new_population = self.crossover(selected_individuals) + t2 = time.perf_counter() new_population = self._mutate_over_population(new_population, evaluator) + t3 = time.perf_counter() self._check_final_population(new_population) + t4 = time.perf_counter() + print('\n\n reproduce:', t1-t0, t2-t1, t3-t2, t4-t3, '\n\n') return new_population def _mutate_over_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: @@ -74,56 +81,82 @@ def _mutate_over_population(self, population: PopulationT, evaluator: Evaluation required population size. Also controls uniqueness of population. """ with Manager() as manager: + population_uid_map = {ind.uid: ind for ind in population} + left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_PER_IND + cycled_population_uid = cycle(population_uid_map) + new_population, inds_for_experience = list(), list() mutation = SinglePredefinedMutation(parameters=self.mutation.parameters, requirements=self.mutation.requirements, graph_gen_params=self.mutation.graph_generation_params, mutations_repo=self.mutation._mutations_repo) pop_graph_descriptive_ids = manager.dict([(ids, True) for ids in self._pop_graph_descriptive_ids]) + task_queue, result_queue = manager.Queue(), manager.Queue() + + def worker(pop_graph_descriptive_ids: DictProxy = pop_graph_descriptive_ids, + mutation: SinglePredefinedMutation = mutation, + evaluator: EvaluationOperator = evaluator, + population_uid_map: Dict[str, Individual] = population_uid_map, + task_queue: Queue = task_queue, + result_queue: Queue = result_queue) -> None: + while True: + individual_uid, mutation_type, tries = task_queue.get() + individual = population_uid_map[individual_uid] + + # mutation + new_ind = mutation(individual, mutation_type=mutation_type) + if not new_ind: + result_queue.put((FailedStageEnum.MUTATION, individual_uid, mutation_type)) + if tries > 0: + task_queue.put((individual_uid, mutation_type, tries - 1)) + continue + + # verification + if not self.verifier(new_ind.graph): + result_queue.put((FailedStageEnum.VERIFICATION, individual_uid, mutation_type)) + if tries > 0: + task_queue.put((individual_uid, mutation_type, tries - 1)) + continue + + # unique check + descriptive_id = new_ind.graph.descriptive_id + if descriptive_id in pop_graph_descriptive_ids: + result_queue.put((FailedStageEnum.UNIQUENESS_CHECK, individual_uid, mutation_type)) + if tries > 0: + task_queue.put((individual_uid, mutation_type, tries - 1)) + continue + pop_graph_descriptive_ids[descriptive_id] = True + + # evaluation + new_inds = evaluator([new_ind]) + if not new_inds: + result_queue.put((FailedStageEnum.EVALUATION, individual_uid, mutation_type)) + if tries > 0: + task_queue.put((individual_uid, mutation_type, tries - 1)) + continue + + result_queue.put((FailedStageEnum.NONE, new_inds[0], mutation_type)) + + + # create pool executor = get_reusable_executor(max_workers=self.mutation.requirements.n_jobs) + for _ in range(self.mutation.requirements.n_jobs - 1): executor.submit(worker) - def try_mutation(individual: Individual, - mutation_type: Optional[MutationType] = None, - tries: int = self.parameters.max_num_of_mutation_attempts - ) -> (FailedStageEnum, Individual, MutationType, int): - mutation_type = mutation_type or self.mutation.agent.choose_action(individual.graph) - return executor.submit(self._mutation_n_evaluation, - individual=individual, - tries=tries, - mutation_type=mutation_type, - pop_graph_descriptive_ids=pop_graph_descriptive_ids, - mutation=mutation, - evaluator=evaluator) - - left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_PER_IND - cycled_population = cycle(population) - new_population, futures, inds_for_experience = list(), list(), list() - while left_tries > 0: - # create new tasks if there is not enough load - while len(futures) < self.mutation.requirements.n_jobs + 1: - futures.append(try_mutation(next(cycled_population))) - - # get next finished future - future = next(as_completed(futures)) - futures.remove(future) + while left_tries > 0 and len(new_population) < self.parameters.pop_size: + # if there is not enough jobs, create new + while task_queue.qsize() < 2: + individual_uid = next(cycled_population_uid) + mutation_type = self.mutation.agent.choose_action(population_uid_map[individual_uid].graph) + task_queue.put((individual_uid, mutation_type, self.parameters.max_num_of_mutation_attempts)) + time.sleep(0.01) # process result - failed_stage, individual, mutation_type, retained_tries = future.result() - if failed_stage is FailedStageEnum.NONE: - new_population.append(individual) - if len(new_population) >= self.parameters.pop_size: - break - else: - if failed_stage is FailedStageEnum.VERIFICATION: - inds_for_experience.append((individual, mutation_type)) - if retained_tries > 0: - futures.append(try_mutation(individual, mutation_type, retained_tries)) - - # get finished mutations - for future in futures: - if future._state == 'FINISHED': - applied, ind, *_ = future.result() - if applied: - new_population.append(ind) + if result_queue.qsize() > 0: + failed_stage, individual, mutation_type = result_queue.get() + left_tries -= 1 + if failed_stage is FailedStageEnum.NONE: + new_population.append(individual) + elif failed_stage is FailedStageEnum.VERIFICATION: + inds_for_experience.append((population_uid_map[individual], mutation_type)) # shutdown workers executor.shutdown(wait=False) @@ -154,35 +187,6 @@ def try_mutation(individual: Individual, rebuilded_population.append(individual) return rebuilded_population - def _mutation_n_evaluation(self, - individual: Individual, - tries: int, - mutation_type: MutationType, - pop_graph_descriptive_ids: DictProxy, - mutation: SinglePredefinedMutation, - evaluator: EvaluationOperator): - # mutation - new_ind, mutation_type = mutation(individual, mutation_type=mutation_type) - if not new_ind: - return FailedStageEnum.MUTATION, individual, mutation_type, tries - 1 - - # verification - if not self.verifier(new_ind.graph): - return FailedStageEnum.VERIFICATION, individual, mutation_type, tries - 1 - - # unique check - descriptive_id = new_ind.graph.descriptive_id - if descriptive_id in pop_graph_descriptive_ids: - return FailedStageEnum.UNIQUENESS_CHECK, individual, mutation_type, tries - 1 - pop_graph_descriptive_ids[descriptive_id] = True - - # evaluation - new_inds = evaluator([new_ind]) - if not new_inds: - return FailedStageEnum.EVALUATION, individual, mutation_type, tries - 1 - - return FailedStageEnum.NONE, new_inds[0], mutation_type, tries - 1 - def _check_final_population(self, population: PopulationT) -> None: """ If population do not achieve required length return a warning or raise exception """ target_pop_size = self.parameters.pop_size @@ -195,4 +199,4 @@ def _check_final_population(self, population: PopulationT) -> None: elif len(population) < target_pop_size: self._log.warning(f'Could not achieve required population size: ' f'have {len(population)},' - f' required {target_pop_size}!\n' + helpful_msg) + f' required {target_pop_size}!\n' + helpful_msg) \ No newline at end of file From 596cf52c2d50de7c991018c97517edc19afb781f Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 3 Nov 2023 17:01:57 +0300 Subject: [PATCH 47/65] wip --- .../genetic/operators/reproduction.py | 123 ++++++++++++------ 1 file changed, 85 insertions(+), 38 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 6b044ff19..e1b524081 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -5,7 +5,7 @@ from itertools import cycle, chain from multiprocessing.managers import DictProxy from multiprocessing import Manager -from typing import Optional, Dict +from typing import Optional, Dict, Union from joblib.externals.loky import get_reusable_executor from joblib.externals.loky.backend.queues import Queue @@ -15,7 +15,7 @@ from golem.core.log import default_log from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.optimisers.genetic.operators.crossover import Crossover -from golem.core.optimisers.genetic.operators.mutation import Mutation, SinglePredefinedMutation +from golem.core.optimisers.genetic.operators.mutation import Mutation, SinglePredefinedMutation, MutationType from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.selection import Selection from golem.core.optimisers.opt_history_objects.parent_operator import ParentOperator @@ -67,8 +67,10 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P selected_individuals = self.selection(population, self.parameters.pop_size) t1 = time.perf_counter() new_population = self.crossover(selected_individuals) + print('crossover is over') t2 = time.perf_counter() new_population = self._mutate_over_population(new_population, evaluator) + print('mutation is over') t3 = time.perf_counter() self._check_final_population(new_population) t4 = time.perf_counter() @@ -76,6 +78,42 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P return new_population def _mutate_over_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: + if self.mutation.requirements.n_jobs > 1: + new_population = self._mutate_over_population_parallel(population, evaluator) + else: + new_population = self._mutate_over_population_single_thread(population, evaluator) + return new_population + + def _mutate_over_population_single_thread(self, + population: PopulationT, + evaluator: EvaluationOperator) -> PopulationT: + left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_PER_IND + new_population, cycled_population = [], cycle(population) + mutation = SinglePredefinedMutation(parameters=self.mutation.parameters, + requirements=self.mutation.requirements, + graph_gen_params=self.mutation.graph_generation_params, + mutations_repo=self.mutation._mutations_repo) + pop_graph_descriptive_ids = {key: True for key in self._pop_graph_descriptive_ids} + + while left_tries > 0 and len(new_population) < self.parameters.pop_size: + individual = next(cycled_population) + mutation_type = self.mutation.agent.choose_action(individual.graph) + + failed_stage, individual, mutation_type, retained_tries = \ + self._mutation_n_evaluation(individual=next(cycled_population), + tries=1, + mutation_type=mutation_type, + pop_graph_descriptive_ids=pop_graph_descriptive_ids, + mutation=mutation, + evaluator=evaluator) + if failed_stage is FailedStageEnum.NONE: + new_population.append(individual) + + # update looked graphs + self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids.keys()) + return new_population + + def _mutate_over_population_parallel(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: """Generate new individuals by mutation in parallel. Implements additional checks on population to ensure that population size is greater or equal to required population size. Also controls uniqueness of population. @@ -100,46 +138,26 @@ def worker(pop_graph_descriptive_ids: DictProxy = pop_graph_descriptive_ids, result_queue: Queue = result_queue) -> None: while True: individual_uid, mutation_type, tries = task_queue.get() - individual = population_uid_map[individual_uid] - - # mutation - new_ind = mutation(individual, mutation_type=mutation_type) - if not new_ind: - result_queue.put((FailedStageEnum.MUTATION, individual_uid, mutation_type)) - if tries > 0: - task_queue.put((individual_uid, mutation_type, tries - 1)) - continue - - # verification - if not self.verifier(new_ind.graph): - result_queue.put((FailedStageEnum.VERIFICATION, individual_uid, mutation_type)) - if tries > 0: - task_queue.put((individual_uid, mutation_type, tries - 1)) - continue - - # unique check - descriptive_id = new_ind.graph.descriptive_id - if descriptive_id in pop_graph_descriptive_ids: - result_queue.put((FailedStageEnum.UNIQUENESS_CHECK, individual_uid, mutation_type)) - if tries > 0: - task_queue.put((individual_uid, mutation_type, tries - 1)) - continue - pop_graph_descriptive_ids[descriptive_id] = True - - # evaluation - new_inds = evaluator([new_ind]) - if not new_inds: - result_queue.put((FailedStageEnum.EVALUATION, individual_uid, mutation_type)) - if tries > 0: - task_queue.put((individual_uid, mutation_type, tries - 1)) - continue - - result_queue.put((FailedStageEnum.NONE, new_inds[0], mutation_type)) + + failed_stage, individual, mutation_type, retained_tries = \ + self._mutation_n_evaluation(individual=population_uid_map[individual_uid], + tries=tries, + mutation_type=mutation_type, + pop_graph_descriptive_ids=pop_graph_descriptive_ids, + mutation=mutation, + evaluator=evaluator) + + if failed_stage is FailedStageEnum.NONE: + result_queue.put((failed_stage, individual, mutation_type)) + else: + if failed_stage is FailedStageEnum.VERIFICATION: + result_queue.put((failed_stage, individual_uid, mutation_type)) + task_queue.put((individual_uid, mutation_type, tries - 1)) # create pool executor = get_reusable_executor(max_workers=self.mutation.requirements.n_jobs) - for _ in range(self.mutation.requirements.n_jobs - 1): executor.submit(worker) + for _ in range(max(2, self.mutation.requirements.n_jobs - 1)): executor.submit(worker) while left_tries > 0 and len(new_population) < self.parameters.pop_size: # if there is not enough jobs, create new @@ -187,6 +205,35 @@ def worker(pop_graph_descriptive_ids: DictProxy = pop_graph_descriptive_ids, rebuilded_population.append(individual) return rebuilded_population + def _mutation_n_evaluation(self, + individual: Individual, + tries: int, + mutation_type: MutationType, + pop_graph_descriptive_ids: Union[Dict, DictProxy], + mutation: SinglePredefinedMutation, + evaluator: EvaluationOperator): + # mutation + new_ind = mutation(individual, mutation_type=mutation_type) + if not new_ind: + return FailedStageEnum.MUTATION, individual, mutation_type, tries - 1 + + # verification + if not self.verifier(new_ind.graph): + return FailedStageEnum.VERIFICATION, individual, mutation_type, tries - 1 + + # unique check + descriptive_id = new_ind.graph.descriptive_id + if descriptive_id in pop_graph_descriptive_ids: + return FailedStageEnum.UNIQUENESS_CHECK, individual, mutation_type, tries - 1 + pop_graph_descriptive_ids[descriptive_id] = True + + # evaluation + new_inds = evaluator([new_ind]) + if not new_inds: + return FailedStageEnum.EVALUATION, individual, mutation_type, tries - 1 + + return FailedStageEnum.NONE, new_inds[0], mutation_type, tries - 1 + def _check_final_population(self, population: PopulationT) -> None: """ If population do not achieve required length return a warning or raise exception """ target_pop_size = self.parameters.pop_size From 47d92f12edb3d3dc79756f4d8c527d55532ecabe Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 3 Nov 2023 17:25:09 +0300 Subject: [PATCH 48/65] wip --- .../optimisers/genetic/operators/reproduction.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index e1b524081..1b7928913 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -63,18 +63,10 @@ def __init__(self, def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: """Reproduces and evaluates population (select, crossover, mutate). """ - t0 = time.perf_counter() selected_individuals = self.selection(population, self.parameters.pop_size) - t1 = time.perf_counter() new_population = self.crossover(selected_individuals) - print('crossover is over') - t2 = time.perf_counter() new_population = self._mutate_over_population(new_population, evaluator) - print('mutation is over') - t3 = time.perf_counter() self._check_final_population(new_population) - t4 = time.perf_counter() - print('\n\n reproduce:', t1-t0, t2-t1, t3-t2, t4-t3, '\n\n') return new_population def _mutate_over_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: @@ -127,7 +119,7 @@ def _mutate_over_population_parallel(self, population: PopulationT, evaluator: E requirements=self.mutation.requirements, graph_gen_params=self.mutation.graph_generation_params, mutations_repo=self.mutation._mutations_repo) - pop_graph_descriptive_ids = manager.dict([(ids, True) for ids in self._pop_graph_descriptive_ids]) + pop_graph_descriptive_ids = manager.dict({ids: True for ids in self._pop_graph_descriptive_ids}) task_queue, result_queue = manager.Queue(), manager.Queue() def worker(pop_graph_descriptive_ids: DictProxy = pop_graph_descriptive_ids, @@ -187,8 +179,6 @@ def worker(pop_graph_descriptive_ids: DictProxy = pop_graph_descriptive_ids, self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids.keys()) # rebuild population due to problem with changing id of individuals in parallel individuals building - population_uid_map = {ind.uid: ind - for ind in chain(*[ind.parents + [ind] for ind in population])} rebuilded_population = [] for individual in new_population: if individual.parent_operator: From aab0bcd153ae84d130de98cbbc87ec8e344efacf Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 8 Nov 2023 13:55:35 +0300 Subject: [PATCH 49/65] Fix misprint in reproducer single thread evaluation --- golem/core/optimisers/genetic/operators/reproduction.py | 1 + 1 file changed, 1 insertion(+) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 1b7928913..f11205c07 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -98,6 +98,7 @@ def _mutate_over_population_single_thread(self, pop_graph_descriptive_ids=pop_graph_descriptive_ids, mutation=mutation, evaluator=evaluator) + left_tries -= 1 if failed_stage is FailedStageEnum.NONE: new_population.append(individual) From d301ade068f793a6acac3de59aca024145b59a81 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 8 Nov 2023 14:57:59 +0300 Subject: [PATCH 50/65] fix error with infinity reproducing --- golem/core/optimisers/genetic/operators/reproduction.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index f11205c07..b06adbfc8 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -143,8 +143,7 @@ def worker(pop_graph_descriptive_ids: DictProxy = pop_graph_descriptive_ids, if failed_stage is FailedStageEnum.NONE: result_queue.put((failed_stage, individual, mutation_type)) else: - if failed_stage is FailedStageEnum.VERIFICATION: - result_queue.put((failed_stage, individual_uid, mutation_type)) + result_queue.put((failed_stage, individual_uid, mutation_type)) task_queue.put((individual_uid, mutation_type, tries - 1)) From 0b0b549a2b21bc0ba6945b65b3f2a2d1b018f5e8 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 8 Nov 2023 16:16:54 +0300 Subject: [PATCH 51/65] fix unstopping workers --- .../genetic/operators/reproduction.py | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index b06adbfc8..0221d2a34 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -149,27 +149,28 @@ def worker(pop_graph_descriptive_ids: DictProxy = pop_graph_descriptive_ids, # create pool executor = get_reusable_executor(max_workers=self.mutation.requirements.n_jobs) - for _ in range(max(2, self.mutation.requirements.n_jobs - 1)): executor.submit(worker) - - while left_tries > 0 and len(new_population) < self.parameters.pop_size: - # if there is not enough jobs, create new - while task_queue.qsize() < 2: - individual_uid = next(cycled_population_uid) - mutation_type = self.mutation.agent.choose_action(population_uid_map[individual_uid].graph) - task_queue.put((individual_uid, mutation_type, self.parameters.max_num_of_mutation_attempts)) - time.sleep(0.01) - - # process result - if result_queue.qsize() > 0: - failed_stage, individual, mutation_type = result_queue.get() - left_tries -= 1 - if failed_stage is FailedStageEnum.NONE: - new_population.append(individual) - elif failed_stage is FailedStageEnum.VERIFICATION: - inds_for_experience.append((population_uid_map[individual], mutation_type)) - - # shutdown workers - executor.shutdown(wait=False) + try: + for _ in range(max(2, self.mutation.requirements.n_jobs - 1)): executor.submit(worker) + + while left_tries > 0 and len(new_population) < self.parameters.pop_size: + # if there is not enough jobs, create new + while task_queue.qsize() < 2: + individual_uid = next(cycled_population_uid) + mutation_type = self.mutation.agent.choose_action(population_uid_map[individual_uid].graph) + task_queue.put((individual_uid, mutation_type, self.parameters.max_num_of_mutation_attempts)) + time.sleep(0.01) + + # process result + if result_queue.qsize() > 0: + failed_stage, individual, mutation_type = result_queue.get() + left_tries -= 1 + if failed_stage is FailedStageEnum.NONE: + new_population.append(individual) + elif failed_stage is FailedStageEnum.VERIFICATION: + inds_for_experience.append((population_uid_map[individual], mutation_type)) + finally: + # shutdown workers + executor.shutdown(wait=False) # add experience for agent for individual, mutation_type in inds_for_experience: From 1cea85d2b99c5da5215968b39be84a2cbb7244a3 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 8 Nov 2023 19:30:24 +0300 Subject: [PATCH 52/65] add parallelization for crossover --- golem/core/optimisers/genetic/gp_optimizer.py | 23 +- golem/core/optimisers/genetic/gp_params.py | 2 +- .../optimisers/genetic/operators/crossover.py | 17 +- .../optimisers/genetic/operators/mutation.py | 17 +- .../genetic/operators/reproduction.py | 412 ++++++++++++------ 5 files changed, 304 insertions(+), 167 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index 8e768e8f0..0b8231e74 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -2,10 +2,11 @@ from golem.core.dag.graph import Graph from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters -from golem.core.optimisers.genetic.operators.crossover import Crossover +from golem.core.optimisers.genetic.operators.crossover import Crossover, SinglePredefinedGraphCrossover from golem.core.optimisers.genetic.operators.elitism import Elitism from golem.core.optimisers.genetic.operators.inheritance import Inheritance -from golem.core.optimisers.genetic.operators.mutation import Mutation +from golem.core.optimisers.genetic.operators.mutation import Mutation, SinglePredefinedMutation, \ + SinglePredefinedGraphMutation from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.regularization import Regularization from golem.core.optimisers.genetic.operators.reproduction import ReproductionController @@ -35,8 +36,8 @@ def __init__(self, # Define genetic operators self.regularization = Regularization(graph_optimizer_params, graph_generation_params) self.selection = Selection(graph_optimizer_params) - self.crossover = Crossover(graph_optimizer_params, requirements, graph_generation_params) - self.mutation = Mutation(graph_optimizer_params, requirements, graph_generation_params) + self.crossover = SinglePredefinedGraphCrossover(graph_optimizer_params, requirements, graph_generation_params) + self.mutation = SinglePredefinedGraphMutation(graph_optimizer_params, requirements, graph_generation_params) self.inheritance = Inheritance(graph_optimizer_params, self.selection) self.elitism = Elitism(graph_optimizer_params) self.operators = [self.regularization, self.selection, self.crossover, @@ -67,13 +68,13 @@ def _initial_population(self, evaluator: EvaluationOperator): """ Initializes the initial population """ # Adding of initial assumptions to history as zero generation self._update_population(evaluator(self.initial_individuals), 'initial_assumptions') - pop_size = self.graph_optimizer_params.pop_size - - if len(self.initial_individuals) < pop_size: - self.initial_individuals += self.reproducer._mutate_over_population(population=self.initial_individuals, - evaluator=evaluator) - # Adding of extended population to history - self._update_population(self.initial_individuals, 'extended_initial_assumptions') + # pop_size = self.graph_optimizer_params.pop_size + # + # if len(self.initial_individuals) < pop_size: + # self.initial_individuals += self.reproducer._mutate_over_population(population=self.initial_individuals, + # evaluator=evaluator) + # # Adding of extended population to history + # self._update_population(self.initial_individuals, 'extended_initial_assumptions') def _evolve_population(self, evaluator: EvaluationOperator) -> PopulationT: """ Method realizing full evolution cycle """ diff --git a/golem/core/optimisers/genetic/gp_params.py b/golem/core/optimisers/genetic/gp_params.py index 7a8457637..9792863be 100644 --- a/golem/core/optimisers/genetic/gp_params.py +++ b/golem/core/optimisers/genetic/gp_params.py @@ -76,7 +76,7 @@ class GPAlgorithmParameters(AlgorithmParameters): mutation_prob: float = 0.8 variable_mutation_num: bool = True max_num_of_operator_attempts: int = 100 - max_num_of_mutation_attempts: int = 3 + max_num_of_reproducer_attempts: int = 3 mutation_strength: MutationStrengthEnum = MutationStrengthEnum.mean min_pop_size_with_elitism: int = 5 required_valid_ratio: float = 0.9 diff --git a/golem/core/optimisers/genetic/operators/crossover.py b/golem/core/optimisers/genetic/operators/crossover.py index e4f28ea19..399952f96 100644 --- a/golem/core/optimisers/genetic/operators/crossover.py +++ b/golem/core/optimisers/genetic/operators/crossover.py @@ -2,7 +2,7 @@ from itertools import chain from math import ceil from random import choice, random, sample -from typing import Callable, Union, Iterable, Tuple, TYPE_CHECKING +from typing import Callable, Union, Iterable, Tuple, TYPE_CHECKING, Optional from joblib import Parallel, delayed @@ -109,6 +109,21 @@ def _will_crossover_be_applied(self, graph_first, graph_second, crossover_type) crossover_type is CrossoverTypesEnum.none) +class SinglePredefinedGraphCrossover(Crossover): + """ Crossover that tries to create new graph/graphs from only two graphs + in one attempt without any checks + """ + def __call__(self, + graph_1: OptGraph, + graph_2: OptGraph, + crossover_type: Optional[CrossoverTypesEnum] = None) -> Tuple[OptGraph, CrossoverTypesEnum]: + crossover_type = crossover_type or choice(self.parameters.crossover_types) + crossover_func = self._get_crossover_function(crossover_type) + + new_graphs = crossover_func(deepcopy(graph_1), deepcopy(graph_2), max_depth=self.requirements.max_depth) + return tuple(new_graphs) + (crossover_type, ) + + @register_native def subtree_crossover(graph_1: OptGraph, graph_2: OptGraph, max_depth: int, inplace: bool = True) -> Tuple[OptGraph, OptGraph]: diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index 1d708b902..2d2a82164 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -163,16 +163,15 @@ def _get_mutation_func(self, mutation_type: Union[MutationTypesEnum, Callable]) return adapted_mutation_func -class SinglePredefinedMutation(Mutation): - def __call__(self, individual: Individual, mutation_type: MutationType) -> Individual: - new_graph = deepcopy(individual.graph) +class SinglePredefinedGraphMutation(Mutation): + """ Mutation that tries to create new graph (not individual) from the only graph in one attempt + without any checks + """ + def __call__(self, graph: Graph, mutation_type: Optional[MutationType] = None) -> Tuple[Graph, MutationIdType]: + new_graph = deepcopy(graph) + mutation_type = mutation_type or self._operator_agent.choose_action(new_graph) mutation_func = self._get_mutation_func(mutation_type) - new_graph = mutation_func(new_graph, requirements=self.requirements, graph_gen_params=self.graph_generation_params, parameters=self.parameters) - - parent_operator = ParentOperator(type_='mutation', operators=mutation_type, parent_individuals=individual) - individual = Individual(new_graph, parent_operator, - metadata=self.requirements.static_individual_metadata) - return individual + return new_graph diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 0221d2a34..a6a08a547 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -1,11 +1,13 @@ import time from concurrent.futures import as_completed -from copy import deepcopy +from copy import deepcopy, copy +from dataclasses import dataclass from enum import Enum from itertools import cycle, chain from multiprocessing.managers import DictProxy from multiprocessing import Manager -from typing import Optional, Dict, Union +from random import choice, sample +from typing import Optional, Dict, Union, List from joblib.externals.loky import get_reusable_executor from joblib.externals.loky.backend.queues import Queue @@ -13,24 +15,18 @@ from golem.core.constants import MAX_GRAPH_GEN_ATTEMPTS_PER_IND from golem.core.dag.graph_verifier import GraphVerifier from golem.core.log import default_log +from golem.core.optimisers.fitness import Fitness from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters -from golem.core.optimisers.genetic.operators.crossover import Crossover +from golem.core.optimisers.genetic.operators.crossover import Crossover, CrossoverTypesEnum from golem.core.optimisers.genetic.operators.mutation import Mutation, SinglePredefinedMutation, MutationType from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.selection import Selection +from golem.core.optimisers.graph import OptGraph from golem.core.optimisers.opt_history_objects.parent_operator import ParentOperator from golem.core.optimisers.populational_optimizer import EvaluationAttemptsError from golem.core.optimisers.opt_history_objects.individual import Individual -class FailedStageEnum(Enum): - NONE = 0 - MUTATION = 1 - VERIFICATION = 2 - UNIQUENESS_CHECK = 3 - EVALUATION = 4 - - class ReproductionController: """ Task of the Reproduction Controller is to reproduce population @@ -65,165 +61,71 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P """ selected_individuals = self.selection(population, self.parameters.pop_size) new_population = self.crossover(selected_individuals) - new_population = self._mutate_over_population(new_population, evaluator) + new_population = self._reproduce(new_population, evaluator) self._check_final_population(new_population) return new_population - def _mutate_over_population(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: - if self.mutation.requirements.n_jobs > 1: - new_population = self._mutate_over_population_parallel(population, evaluator) - else: - new_population = self._mutate_over_population_single_thread(population, evaluator) - return new_population - - def _mutate_over_population_single_thread(self, - population: PopulationT, - evaluator: EvaluationOperator) -> PopulationT: - left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_PER_IND - new_population, cycled_population = [], cycle(population) - mutation = SinglePredefinedMutation(parameters=self.mutation.parameters, - requirements=self.mutation.requirements, - graph_gen_params=self.mutation.graph_generation_params, - mutations_repo=self.mutation._mutations_repo) - pop_graph_descriptive_ids = {key: True for key in self._pop_graph_descriptive_ids} - - while left_tries > 0 and len(new_population) < self.parameters.pop_size: - individual = next(cycled_population) - mutation_type = self.mutation.agent.choose_action(individual.graph) - - failed_stage, individual, mutation_type, retained_tries = \ - self._mutation_n_evaluation(individual=next(cycled_population), - tries=1, - mutation_type=mutation_type, - pop_graph_descriptive_ids=pop_graph_descriptive_ids, - mutation=mutation, - evaluator=evaluator) - left_tries -= 1 - if failed_stage is FailedStageEnum.NONE: - new_population.append(individual) - - # update looked graphs - self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids.keys()) - return new_population - - def _mutate_over_population_parallel(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: + def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: """Generate new individuals by mutation in parallel. Implements additional checks on population to ensure that population size is greater or equal to required population size. Also controls uniqueness of population. """ with Manager() as manager: - population_uid_map = {ind.uid: ind for ind in population} left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_PER_IND - cycled_population_uid = cycle(population_uid_map) - new_population, inds_for_experience = list(), list() - mutation = SinglePredefinedMutation(parameters=self.mutation.parameters, - requirements=self.mutation.requirements, - graph_gen_params=self.mutation.graph_generation_params, - mutations_repo=self.mutation._mutations_repo) + new_population = list() pop_graph_descriptive_ids = manager.dict({ids: True for ids in self._pop_graph_descriptive_ids}) - task_queue, result_queue = manager.Queue(), manager.Queue() - - def worker(pop_graph_descriptive_ids: DictProxy = pop_graph_descriptive_ids, - mutation: SinglePredefinedMutation = mutation, - evaluator: EvaluationOperator = evaluator, - population_uid_map: Dict[str, Individual] = population_uid_map, - task_queue: Queue = task_queue, - result_queue: Queue = result_queue) -> None: - while True: - individual_uid, mutation_type, tries = task_queue.get() - - failed_stage, individual, mutation_type, retained_tries = \ - self._mutation_n_evaluation(individual=population_uid_map[individual_uid], - tries=tries, - mutation_type=mutation_type, - pop_graph_descriptive_ids=pop_graph_descriptive_ids, - mutation=mutation, - evaluator=evaluator) - - if failed_stage is FailedStageEnum.NONE: - result_queue.put((failed_stage, individual, mutation_type)) - else: - result_queue.put((failed_stage, individual_uid, mutation_type)) - task_queue.put((individual_uid, mutation_type, tries - 1)) - - - # create pool + task_queue, result_queue, experience_queue = [manager.Queue() for _ in range(3)] + + worker = ReproduceWorker(crossover=self.crossover, mutation=self.mutation, + verifier=self.verifier, evaluator=evaluator, + pop_graph_descriptive_ids=pop_graph_descriptive_ids, + population=population, + task_queue=task_queue, result_queue=result_queue, + experience_queue=experience_queue) + + # TODO there is problem with random seed in parallel workers + # TODO tries in one thread + + # create pool with workers executor = get_reusable_executor(max_workers=self.mutation.requirements.n_jobs) - try: - for _ in range(max(2, self.mutation.requirements.n_jobs - 1)): executor.submit(worker) + for _ in range(max(1, self.mutation.requirements.n_jobs - 1)): + executor.submit(worker) + try: + # create new population while left_tries > 0 and len(new_population) < self.parameters.pop_size: - # if there is not enough jobs, create new + # if there is not enough jobs, create new empty job + # for fully random starting individuals and operation types while task_queue.qsize() < 2: - individual_uid = next(cycled_population_uid) - mutation_type = self.mutation.agent.choose_action(population_uid_map[individual_uid].graph) - task_queue.put((individual_uid, mutation_type, self.parameters.max_num_of_mutation_attempts)) - time.sleep(0.01) + tries = self.parameters.max_num_of_reproducer_attempts + task_queue.put(ReproducerWorkerTask(crossover_tries=tries, mutation_tries=tries)) + time.sleep(0.01) # get workers some time to get tasks from queue # process result if result_queue.qsize() > 0: failed_stage, individual, mutation_type = result_queue.get() left_tries -= 1 - if failed_stage is FailedStageEnum.NONE: - new_population.append(individual) - elif failed_stage is FailedStageEnum.VERIFICATION: - inds_for_experience.append((population_uid_map[individual], mutation_type)) + new_population.append(individual) + + # process unsuccessful creation attempt + if experience_queue.qsize() > 0: + failed_stage, individual_uid, mutation_type = experience_queue.get() + left_tries -= 1 + if failed_stage is ReproducerWorkerStageEnum.MUTATION_VERIFICATION: + # experience for mab + self.mutation.agent_experience.collect_experience(population_uid_map[individual_uid], + mutation_type, + reward=-1.0) finally: # shutdown workers executor.shutdown(wait=False) - # add experience for agent - for individual, mutation_type in inds_for_experience: - self.mutation.agent_experience.collect_experience(individual, mutation_type, reward=-1.0) - # update looked graphs self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids.keys()) - # rebuild population due to problem with changing id of individuals in parallel individuals building - rebuilded_population = [] - for individual in new_population: - if individual.parent_operator: - parent_operator = ParentOperator(type_=individual.parent_operator.type_, - operators=individual.parent_operator.operators, - parent_individuals=population_uid_map[ - individual.parent_operator.parent_individuals[0].uid]) - else: - parent_operator = None - individual = Individual(deepcopy(individual.graph), - parent_operator, - fitness=individual.fitness, - metadata=self.mutation.requirements.static_individual_metadata) - rebuilded_population.append(individual) - return rebuilded_population - - def _mutation_n_evaluation(self, - individual: Individual, - tries: int, - mutation_type: MutationType, - pop_graph_descriptive_ids: Union[Dict, DictProxy], - mutation: SinglePredefinedMutation, - evaluator: EvaluationOperator): - # mutation - new_ind = mutation(individual, mutation_type=mutation_type) - if not new_ind: - return FailedStageEnum.MUTATION, individual, mutation_type, tries - 1 - - # verification - if not self.verifier(new_ind.graph): - return FailedStageEnum.VERIFICATION, individual, mutation_type, tries - 1 - - # unique check - descriptive_id = new_ind.graph.descriptive_id - if descriptive_id in pop_graph_descriptive_ids: - return FailedStageEnum.UNIQUENESS_CHECK, individual, mutation_type, tries - 1 - pop_graph_descriptive_ids[descriptive_id] = True - - # evaluation - new_inds = evaluator([new_ind]) - if not new_inds: - return FailedStageEnum.EVALUATION, individual, mutation_type, tries - 1 - - return FailedStageEnum.NONE, new_inds[0], mutation_type, tries - 1 + # rebuild population + new_population = self._rebuild_final_population(population=population, new_population=new_population) + return new_population def _check_final_population(self, population: PopulationT) -> None: """ If population do not achieve required length return a warning or raise exception """ @@ -237,4 +139,224 @@ def _check_final_population(self, population: PopulationT) -> None: elif len(population) < target_pop_size: self._log.warning(f'Could not achieve required population size: ' f'have {len(population)},' - f' required {target_pop_size}!\n' + helpful_msg) \ No newline at end of file + f' required {target_pop_size}!\n' + helpful_msg) + + def _rebuild_final_population(self, population: PopulationT, new_population: PopulationT) -> PopulationT: + """ Recreate new_population in main thread with parents from population """ + population_uid_map = {individual.uid: individual for individual in population} + rebuilded_population = [] + for individual in new_population: + if individual.parent_operator: + parent_uid = individual.parent_operator.parent_individuals[0].uid + parent_operator = ParentOperator(type_=individual.parent_operator.type_, + operators=individual.parent_operator.operators, + parent_individuals=population_uid_map[parent_uid]) + else: + parent_operator = None + individual = Individual(deepcopy(individual.graph), + parent_operator, + fitness=individual.fitness, + metadata=self.mutation.requirements.static_individual_metadata) + rebuilded_population.append(individual) + return rebuilded_population + + +class ReproducerWorkerStageEnum(Enum): + # TODO test that check that nums start from 0 and go to max with 1 steps + FINISH = -100 + CROSSOVER = 0 + CROSSOVER_VERIFICATION = 1 + CROSSOVER_UNIQUENESS_CHECK = 2 + CROSSOVER_EVALUATION = 3 + MUTATION = 4 + MUTATION_VERIFICATION = 5 + MUTATION_UNIQUENESS_CHECK = 6 + MUTATION_EVALUATION = 7 + + +@dataclass +class ReproducerWorkerTask: + stage: ReproducerWorkerStageEnum = ReproducerWorkerStageEnum(0) + fail: bool = False + + # crossover data + graph_1_for_crossover: Optional[OptGraph] = None + graph_2_for_crossover: Optional[OptGraph] = None + crossover_type: Optional[CrossoverTypesEnum] = None + crossover_tries: int = 1 + crossover_fitness = Optional[Fitness] = None + + # mutation data + graph_for_mutation: Optional[OptGraph] = None + mutation_type: Optional[MutationType] = None + mutation_tries: int = 1 + + # result + final_graph: Optional[OptGraph] = None + final_fitness = Optional[Fitness] = None + + @property + def is_crossover(self): + return self.stage in [ReproducerWorkerStageEnum.CROSSOVER, + ReproducerWorkerStageEnum.CROSSOVER_VERIFICATION, + ReproducerWorkerStageEnum.CROSSOVER_UNIQUENESS_CHECK, + ReproducerWorkerStageEnum.CROSSOVER_EVALUATION] + + @property + def is_mutation(self): + return not self.is_crossover + + @property + def tries(self): + return self.crossover_tries if self.is_crossover else self.mutation_tries + + @property.setter + def tries(self, value: int): + if self.is_crossover: + self.crossover_tries = value + else: + self.mutation_tries = value + + def step_in_stage(self, flag: bool): + step = 1 if flag else -1 + self.stage = ReproducerWorkerStageEnum(self.stage.value + step) + + +class ReproduceWorker: + def __init__(self, + crossover: Crossover, + mutation: MutationType, + verifier: GraphVerifier, + evaluator: EvaluationOperator, + pop_graph_descriptive_ids: Union[DictProxy, Dict], + population: PopulationT, + task_queue: Queue, + result_queue: Queue, + experience_queue: Queue + ): + self.crossover = crossover + self.mutation = mutation + self.verifier = verifier + self.evaluator = evaluator + self._pop_graph_descriptive_ids = pop_graph_descriptive_ids + self._population = population + self._task_queue = task_queue + self._result_queue = result_queue + self._experience_queue = experience_queue + + def __call__(self): + tasks = [] + while True: + # work with existing task from tasks or from queue + if not tasks: + tasks.append(self._task_queue.get()) + processed_tasks = self.process_task(tasks.pop()) + + # process result + for processed_task in processed_tasks: + if processed_task.stage is ReproducerWorkerStageEnum.FINISH: + self._result_queue.put(processed_task) + continue + if processed_task.fail: + self._experience_queue.put(processed_task) + if processed_task.tries > 0: + # task is not finished, need new try + tasks.append(processed_task) + + # if there are some tasks, add it to parallel queue + for _ in range(len(tasks)): + self._task_queue.put(tasks.pop()) + + def process_task(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: + task = copy(task) # input task + + # crossover + if task.stage is ReproducerWorkerStageEnum.CROSSOVER: + return self.crossover_stage(task) + + # crossover result verification + if task.stage is ReproducerWorkerStageEnum.CROSSOVER_VERIFICATION: + task.step_in_stage(self.verifier(task.graph_for_mutation)) + return [task] + + # crossover uniqueness check + if task.stage is ReproducerWorkerStageEnum.CROSSOVER_UNIQUENESS_CHECK: + return self.uniqueness_check_stage(task) + + # crossover result evaluation + if task.stage is ReproducerWorkerStageEnum.CROSSOVER_EVALUATION: + return self.evaluation_stage(task) + + # mutation + # TODO add some mutation for each crossover result + if task.stage is ReproducerWorkerStageEnum.MUTATION: + return self.mutation_stage(task) + + # mutation result verification + if task.stage is ReproducerWorkerStageEnum.MUTATION_VERIFICATION: + task.step_in_stage(self.verifier(task.final_graph)) + return [task] + + # mutation uniqueness check + if task.stage is ReproducerWorkerStageEnum.MUTATION_UNIQUENESS_CHECK: + return self.uniqueness_check_stage(task) + + # mutation result evaluation + if task.stage is ReproducerWorkerStageEnum.MUTATION_EVALUATION: + return self.evaluation_stage(task) + + def crossover_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: + tasks = [] # tasks to return + + # if there is no graphs for crossover then get random graphs + if task.graph_1_for_crossover is None or task.graph_1_for_crossover is None: + inds_for_crossover = sample(self._population, k=2) + task.graph_1_for_crossover, task.graph_2_for_crossover = [ind.graph for ind in inds_for_crossover] + + # make crossover + task.crossover_tries -= 1 + *new_graphs, task.crossover_type = self.crossover(task.graph_1_for_crossover, + task.graph_2_for_crossover, + task.crossover_type) + + # if there is no new_graphs than go to new try + if not new_graphs: + tasks.append(task) + else: + # create new task for each new graph after crossover for next stage + task.step_in_stage(True) + for graph in new_graphs: + new_task = copy(task) + new_task.graph_for_mutation = graph + tasks.append(new_task) + return tasks + + def mutation_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: + task.final_graph, task.mutation_type = self.mutation(task.graph_for_mutation, task.mutation_type) + task.mutation_tries -= 1 + task.step_in_stage(True) + return [task] + + def uniqueness_check_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: + graph = task.graph_for_mutation if task.is_crossover else task.final_graph + descriptive_id = graph.descriptive_id + if descriptive_id not in self._pop_graph_descriptive_ids: + self._pop_graph_descriptive_ids[descriptive_id] = True + task.step_in_stage(True) + else: + task.step_in_stage(False) + return [task] + + def evaluation_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: + graph = task.graph_for_mutation if task.is_crossover else task.final_graph + individual = Individual(deepcopy(graph), metadata=self.mutation.requirements.static_individual_metadata) + evaluated_individuals = self.evaluator([individual]) + if evaluated_individuals: + # TODO add null_fitness as flag for previous stage + task.step_in_stage(True) + else: + task.step_in_stage(False) + # TODO return fitness + return [task] + + From 2eb41e5f6165ced5f7b5b425abdcf56c10095ec8 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 9 Nov 2023 09:56:52 +0300 Subject: [PATCH 53/65] wip --- golem/core/optimisers/genetic/gp_params.py | 4 +- .../genetic/operators/reproduction.py | 135 ++++++++++-------- 2 files changed, 81 insertions(+), 58 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_params.py b/golem/core/optimisers/genetic/gp_params.py index 9792863be..22a829117 100644 --- a/golem/core/optimisers/genetic/gp_params.py +++ b/golem/core/optimisers/genetic/gp_params.py @@ -76,7 +76,9 @@ class GPAlgorithmParameters(AlgorithmParameters): mutation_prob: float = 0.8 variable_mutation_num: bool = True max_num_of_operator_attempts: int = 100 - max_num_of_reproducer_attempts: int = 3 + max_num_of_crossover_reproducer_attempts: int = 3 + max_num_of_mutation_reproducer_attempts: int = 3 + mutation_attempts_per_each_crossover_reproducer: int = 3 mutation_strength: MutationStrengthEnum = MutationStrengthEnum.mean min_pop_size_with_elitism: int = 5 required_valid_ratio: float = 0.9 diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index a6a08a547..78f77818d 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -1,12 +1,10 @@ import time -from concurrent.futures import as_completed from copy import deepcopy, copy from dataclasses import dataclass from enum import Enum -from itertools import cycle, chain from multiprocessing.managers import DictProxy from multiprocessing import Manager -from random import choice, sample +from random import sample from typing import Optional, Dict, Union, List from joblib.externals.loky import get_reusable_executor @@ -18,7 +16,7 @@ from golem.core.optimisers.fitness import Fitness from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.optimisers.genetic.operators.crossover import Crossover, CrossoverTypesEnum -from golem.core.optimisers.genetic.operators.mutation import Mutation, SinglePredefinedMutation, MutationType +from golem.core.optimisers.genetic.operators.mutation import Mutation, MutationType from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.selection import Selection from golem.core.optimisers.graph import OptGraph @@ -72,19 +70,17 @@ def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> """ with Manager() as manager: left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_PER_IND - new_population = list() pop_graph_descriptive_ids = manager.dict({ids: True for ids in self._pop_graph_descriptive_ids}) - task_queue, result_queue, experience_queue = [manager.Queue() for _ in range(3)] + task_queue, result_queue, failed_queue = [manager.Queue() for _ in range(3)] worker = ReproduceWorker(crossover=self.crossover, mutation=self.mutation, verifier=self.verifier, evaluator=evaluator, pop_graph_descriptive_ids=pop_graph_descriptive_ids, population=population, task_queue=task_queue, result_queue=result_queue, - experience_queue=experience_queue) + failed_queue=failed_queue) # TODO there is problem with random seed in parallel workers - # TODO tries in one thread # create pool with workers executor = get_reusable_executor(max_workers=self.mutation.requirements.n_jobs) @@ -93,29 +89,26 @@ def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> try: # create new population - while left_tries > 0 and len(new_population) < self.parameters.pop_size: + finished_tasks, failed_tasks = list(), list() + while left_tries > 0 and len(finished_tasks) < self.parameters.pop_size: # if there is not enough jobs, create new empty job # for fully random starting individuals and operation types while task_queue.qsize() < 2: - tries = self.parameters.max_num_of_reproducer_attempts - task_queue.put(ReproducerWorkerTask(crossover_tries=tries, mutation_tries=tries)) - time.sleep(0.01) # get workers some time to get tasks from queue + task_queue.put(ReproducerWorkerTask( + crossover_tries=self.parameters.max_num_of_crossover_reproducer_attempts, + mutation_tries=self.parameters.max_num_of_mutation_reproducer_attempts, + mutation_attempts_per_each_crossover=self.parameters.mutation_attempts_per_each_crossover_reproducer)) + time.sleep(0.01) # give workers some time to get tasks from queue # process result if result_queue.qsize() > 0: - failed_stage, individual, mutation_type = result_queue.get() left_tries -= 1 - new_population.append(individual) + finished_tasks.append(result_queue.get()) # process unsuccessful creation attempt - if experience_queue.qsize() > 0: - failed_stage, individual_uid, mutation_type = experience_queue.get() + if failed_queue.qsize() > 0: left_tries -= 1 - if failed_stage is ReproducerWorkerStageEnum.MUTATION_VERIFICATION: - # experience for mab - self.mutation.agent_experience.collect_experience(population_uid_map[individual_uid], - mutation_type, - reward=-1.0) + failed_tasks.append(failed_queue.get()) finally: # shutdown workers executor.shutdown(wait=False) @@ -124,9 +117,22 @@ def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids.keys()) # rebuild population - new_population = self._rebuild_final_population(population=population, new_population=new_population) + new_population = self._process_tasks(population=population, + finished_tasks=finished_tasks, + failed_tasks=failed_tasks) return new_population + def _process_tasks(self, + population: PopulationT, + finished_tasks: List['ReproducerWorkerTask'], + failed_tasks: List['ReproducerWorkerTask']): + # if failed_stage is ReproducerWorkerStageEnum.MUTATION_VERIFICATION: + # # experience for mab + # self.mutation.agent_experience.collect_experience(population_uid_map[individual_uid], + # mutation_type, + # reward=-1.0) + pass + def _check_final_population(self, population: PopulationT) -> None: """ If population do not achieve required length return a warning or raise exception """ target_pop_size = self.parameters.pop_size @@ -162,8 +168,7 @@ def _rebuild_final_population(self, population: PopulationT, new_population: Pop class ReproducerWorkerStageEnum(Enum): - # TODO test that check that nums start from 0 and go to max with 1 steps - FINISH = -100 + # TODO test that check that nums start from 0 and go to max (FINISH) with 1 steps CROSSOVER = 0 CROSSOVER_VERIFICATION = 1 CROSSOVER_UNIQUENESS_CHECK = 2 @@ -172,19 +177,23 @@ class ReproducerWorkerStageEnum(Enum): MUTATION_VERIFICATION = 5 MUTATION_UNIQUENESS_CHECK = 6 MUTATION_EVALUATION = 7 + FINISH = 8 @dataclass class ReproducerWorkerTask: stage: ReproducerWorkerStageEnum = ReproducerWorkerStageEnum(0) fail: bool = False + mutation_attempts_per_each_crossover: int = 1 # crossover data + graph_1_uid: Optional[str] = None + graph_2_uid: Optional[str] = None graph_1_for_crossover: Optional[OptGraph] = None graph_2_for_crossover: Optional[OptGraph] = None crossover_type: Optional[CrossoverTypesEnum] = None crossover_tries: int = 1 - crossover_fitness = Optional[Fitness] = None + crossover_fitness: Optional[Fitness] = None # mutation data graph_for_mutation: Optional[OptGraph] = None @@ -193,7 +202,7 @@ class ReproducerWorkerTask: # result final_graph: Optional[OptGraph] = None - final_fitness = Optional[Fitness] = None + final_fitness: Optional[Fitness] = None @property def is_crossover(self): @@ -210,16 +219,8 @@ def is_mutation(self): def tries(self): return self.crossover_tries if self.is_crossover else self.mutation_tries - @property.setter - def tries(self, value: int): - if self.is_crossover: - self.crossover_tries = value - else: - self.mutation_tries = value - - def step_in_stage(self, flag: bool): - step = 1 if flag else -1 - self.stage = ReproducerWorkerStageEnum(self.stage.value + step) + def step_in_stage(self, steps: int): + self.stage = ReproducerWorkerStageEnum(self.stage.value + steps) class ReproduceWorker: @@ -232,7 +233,7 @@ def __init__(self, population: PopulationT, task_queue: Queue, result_queue: Queue, - experience_queue: Queue + failed_queue: Queue ): self.crossover = crossover self.mutation = mutation @@ -242,7 +243,7 @@ def __init__(self, self._population = population self._task_queue = task_queue self._result_queue = result_queue - self._experience_queue = experience_queue + self._failed_queue = failed_queue def __call__(self): tasks = [] @@ -258,17 +259,20 @@ def __call__(self): self._result_queue.put(processed_task) continue if processed_task.fail: - self._experience_queue.put(processed_task) + self._failed_queue.put(processed_task) + processed_task.fail = False if processed_task.tries > 0: # task is not finished, need new try tasks.append(processed_task) # if there are some tasks, add it to parallel queue - for _ in range(len(tasks)): + for _ in range(len(tasks) - 1): self._task_queue.put(tasks.pop()) def process_task(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: + """ Get task, make 1 stage and return processed task """ task = copy(task) # input task + task.fail = False # crossover if task.stage is ReproducerWorkerStageEnum.CROSSOVER: @@ -276,34 +280,48 @@ def process_task(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask] # crossover result verification if task.stage is ReproducerWorkerStageEnum.CROSSOVER_VERIFICATION: - task.step_in_stage(self.verifier(task.graph_for_mutation)) + task.fail = not self.verifier(task.graph_for_mutation) + task.step_in_stage(-1 if task.fail else 1) return [task] # crossover uniqueness check if task.stage is ReproducerWorkerStageEnum.CROSSOVER_UNIQUENESS_CHECK: - return self.uniqueness_check_stage(task) + processed_task = self.uniqueness_check_stage(task)[0] + processed_task.step_in_stage(-2 if processed_task.fail else 1) + return [processed_task] # crossover result evaluation if task.stage is ReproducerWorkerStageEnum.CROSSOVER_EVALUATION: - return self.evaluation_stage(task) + processed_task = self.evaluation_stage(task)[0] + if processed_task.fail: + processed_task.step_in_stage(-3) + return [processed_task] + else: + # create some tasks for mutation for crossover result + processed_task.step_in_stage(1) + return [copy(processed_task) for _ in range(task.mutation_attempts_per_each_crossover)] # mutation - # TODO add some mutation for each crossover result if task.stage is ReproducerWorkerStageEnum.MUTATION: return self.mutation_stage(task) # mutation result verification if task.stage is ReproducerWorkerStageEnum.MUTATION_VERIFICATION: - task.step_in_stage(self.verifier(task.final_graph)) + task.fail = not self.verifier(task.final_graph) + task.step_in_stage(-1 if task.fail else 1) return [task] # mutation uniqueness check if task.stage is ReproducerWorkerStageEnum.MUTATION_UNIQUENESS_CHECK: - return self.uniqueness_check_stage(task) + processed_task = self.uniqueness_check_stage(task)[0] + processed_task.step_in_stage(-2 if processed_task.fail else 1) + return [processed_task] # mutation result evaluation if task.stage is ReproducerWorkerStageEnum.MUTATION_EVALUATION: - return self.evaluation_stage(task) + processed_task = self.evaluation_stage(task)[0] + processed_task.step_in_stage(-3 if processed_task.fail else 1) + return [processed_task] def crossover_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: tasks = [] # tasks to return @@ -311,7 +329,8 @@ def crossover_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTa # if there is no graphs for crossover then get random graphs if task.graph_1_for_crossover is None or task.graph_1_for_crossover is None: inds_for_crossover = sample(self._population, k=2) - task.graph_1_for_crossover, task.graph_2_for_crossover = [ind.graph for ind in inds_for_crossover] + task.graph_1_uid, task.graph_1_for_crossover = inds_for_crossover[0].uid, inds_for_crossover[0].graph + task.graph_2_uid, task.graph_2_for_crossover = inds_for_crossover[1].uid, inds_for_crossover[1].graph # make crossover task.crossover_tries -= 1 @@ -319,12 +338,13 @@ def crossover_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTa task.graph_2_for_crossover, task.crossover_type) - # if there is no new_graphs than go to new try if not new_graphs: + # if there is no new_graphs then go to new try + task.fail = True tasks.append(task) else: # create new task for each new graph after crossover for next stage - task.step_in_stage(True) + task.step_in_stage(1) for graph in new_graphs: new_task = copy(task) new_task.graph_for_mutation = graph @@ -334,7 +354,10 @@ def crossover_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTa def mutation_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: task.final_graph, task.mutation_type = self.mutation(task.graph_for_mutation, task.mutation_type) task.mutation_tries -= 1 - task.step_in_stage(True) + if task.final_graph is None: + task.fail = True + else: + task.step_in_stage(1) return [task] def uniqueness_check_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: @@ -342,9 +365,9 @@ def uniqueness_check_stage(self, task: ReproducerWorkerTask) -> List[ReproducerW descriptive_id = graph.descriptive_id if descriptive_id not in self._pop_graph_descriptive_ids: self._pop_graph_descriptive_ids[descriptive_id] = True - task.step_in_stage(True) + task.fail = False else: - task.step_in_stage(False) + task.fail = True return [task] def evaluation_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: @@ -353,10 +376,8 @@ def evaluation_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerT evaluated_individuals = self.evaluator([individual]) if evaluated_individuals: # TODO add null_fitness as flag for previous stage - task.step_in_stage(True) + task.fail = False else: - task.step_in_stage(False) + task.fail = True # TODO return fitness return [task] - - From 7f4194c68c3bed94cd9458f6ed09376a36623fab Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 9 Nov 2023 11:16:06 +0300 Subject: [PATCH 54/65] wip --- golem/core/optimisers/genetic/gp_optimizer.py | 3 +- golem/core/optimisers/genetic/gp_params.py | 6 +- .../optimisers/genetic/operators/mutation.py | 2 +- .../genetic/operators/reproduction.py | 58 ++++++++++++------- 4 files changed, 41 insertions(+), 28 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index 0b8231e74..eff76164a 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -5,8 +5,7 @@ from golem.core.optimisers.genetic.operators.crossover import Crossover, SinglePredefinedGraphCrossover from golem.core.optimisers.genetic.operators.elitism import Elitism from golem.core.optimisers.genetic.operators.inheritance import Inheritance -from golem.core.optimisers.genetic.operators.mutation import Mutation, SinglePredefinedMutation, \ - SinglePredefinedGraphMutation +from golem.core.optimisers.genetic.operators.mutation import Mutation, SinglePredefinedGraphMutation from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.regularization import Regularization from golem.core.optimisers.genetic.operators.reproduction import ReproductionController diff --git a/golem/core/optimisers/genetic/gp_params.py b/golem/core/optimisers/genetic/gp_params.py index 22a829117..2f02b3d1e 100644 --- a/golem/core/optimisers/genetic/gp_params.py +++ b/golem/core/optimisers/genetic/gp_params.py @@ -76,9 +76,9 @@ class GPAlgorithmParameters(AlgorithmParameters): mutation_prob: float = 0.8 variable_mutation_num: bool = True max_num_of_operator_attempts: int = 100 - max_num_of_crossover_reproducer_attempts: int = 3 - max_num_of_mutation_reproducer_attempts: int = 3 - mutation_attempts_per_each_crossover_reproducer: int = 3 + max_num_of_crossover_reproducer_attempts: int = 2 + max_num_of_mutation_reproducer_attempts: int = 2 + mutation_attempts_per_each_crossover_reproducer: int = 2 mutation_strength: MutationStrengthEnum = MutationStrengthEnum.mean min_pop_size_with_elitism: int = 5 required_valid_ratio: float = 0.9 diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index 2d2a82164..d42ffc2fc 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -174,4 +174,4 @@ def __call__(self, graph: Graph, mutation_type: Optional[MutationType] = None) - new_graph = mutation_func(new_graph, requirements=self.requirements, graph_gen_params=self.graph_generation_params, parameters=self.parameters) - return new_graph + return new_graph, mutation_type diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 78f77818d..f31ff665f 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -4,11 +4,12 @@ from enum import Enum from multiprocessing.managers import DictProxy from multiprocessing import Manager +from queue import Queue from random import sample from typing import Optional, Dict, Union, List +from joblib import Parallel, delayed from joblib.externals.loky import get_reusable_executor -from joblib.externals.loky.backend.queues import Queue from golem.core.constants import MAX_GRAPH_GEN_ATTEMPTS_PER_IND from golem.core.dag.graph_verifier import GraphVerifier @@ -58,8 +59,7 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P """Reproduces and evaluates population (select, crossover, mutate). """ selected_individuals = self.selection(population, self.parameters.pop_size) - new_population = self.crossover(selected_individuals) - new_population = self._reproduce(new_population, evaluator) + new_population = self._reproduce(selected_individuals, evaluator) self._check_final_population(new_population) return new_population @@ -78,40 +78,44 @@ def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> pop_graph_descriptive_ids=pop_graph_descriptive_ids, population=population, task_queue=task_queue, result_queue=result_queue, - failed_queue=failed_queue) + failed_queue=failed_queue, + log=self._log) + + empty_task = ReproducerWorkerTask( + crossover_tries=self.parameters.max_num_of_crossover_reproducer_attempts, + mutation_tries=self.parameters.max_num_of_mutation_reproducer_attempts, + mutation_attempts_per_each_crossover=self.parameters.mutation_attempts_per_each_crossover_reproducer) # TODO there is problem with random seed in parallel workers - # create pool with workers - executor = get_reusable_executor(max_workers=self.mutation.requirements.n_jobs) - for _ in range(max(1, self.mutation.requirements.n_jobs - 1)): - executor.submit(worker) + n_jobs = self.mutation.requirements.n_jobs + with Parallel(n_jobs=n_jobs, prefer='processes', return_as='generator') as parallel: + _ = parallel(delayed(worker)() for _ in range(n_jobs)) - try: - # create new population finished_tasks, failed_tasks = list(), list() while left_tries > 0 and len(finished_tasks) < self.parameters.pop_size: + self._log.warning('Cycle') + time.sleep(0.02) + # if there is not enough jobs, create new empty job - # for fully random starting individuals and operation types - while task_queue.qsize() < 2: - task_queue.put(ReproducerWorkerTask( - crossover_tries=self.parameters.max_num_of_crossover_reproducer_attempts, - mutation_tries=self.parameters.max_num_of_mutation_reproducer_attempts, - mutation_attempts_per_each_crossover=self.parameters.mutation_attempts_per_each_crossover_reproducer)) + if task_queue.qsize() < 2: + self._log.warning('Put task to task_queue') + task_queue.put(empty_task) time.sleep(0.01) # give workers some time to get tasks from queue # process result if result_queue.qsize() > 0: + self._log.warning(f'Get finished task, left tries: {left_tries}') left_tries -= 1 finished_tasks.append(result_queue.get()) # process unsuccessful creation attempt if failed_queue.qsize() > 0: + self._log.warning(f'Get failed task, left tries: {left_tries}') left_tries -= 1 failed_tasks.append(failed_queue.get()) - finally: - # shutdown workers - executor.shutdown(wait=False) + + self._log.warning('Kill workers') # update looked graphs self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids.keys()) @@ -233,7 +237,8 @@ def __init__(self, population: PopulationT, task_queue: Queue, result_queue: Queue, - failed_queue: Queue + failed_queue: Queue, + log ): self.crossover = crossover self.mutation = mutation @@ -244,6 +249,7 @@ def __init__(self, self._task_queue = task_queue self._result_queue = result_queue self._failed_queue = failed_queue + self._log = log def __call__(self): tasks = [] @@ -255,6 +261,8 @@ def __call__(self): # process result for processed_task in processed_tasks: + # self._log.warning(f"PTask {id(processed_task)}: {processed_task.stage}:{processed_task.fail} " + # f"{processed_task.crossover_tries}:{processed_task.mutation_tries}") if processed_task.stage is ReproducerWorkerStageEnum.FINISH: self._result_queue.put(processed_task) continue @@ -361,7 +369,10 @@ def mutation_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTas return [task] def uniqueness_check_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: - graph = task.graph_for_mutation if task.is_crossover else task.final_graph + if task.is_crossover: + graph = task.graph_for_mutation + else: + graph = task.final_graph descriptive_id = graph.descriptive_id if descriptive_id not in self._pop_graph_descriptive_ids: self._pop_graph_descriptive_ids[descriptive_id] = True @@ -371,7 +382,10 @@ def uniqueness_check_stage(self, task: ReproducerWorkerTask) -> List[ReproducerW return [task] def evaluation_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: - graph = task.graph_for_mutation if task.is_crossover else task.final_graph + if task.is_crossover: + graph = task.graph_for_mutation + else: + graph = task.final_graph individual = Individual(deepcopy(graph), metadata=self.mutation.requirements.static_individual_metadata) evaluated_individuals = self.evaluator([individual]) if evaluated_individuals: From b03a9794429e424aa7db6c002bb6853b42b53b20 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 9 Nov 2023 13:44:57 +0300 Subject: [PATCH 55/65] New way to parallel population reproducing --- golem/core/constants.py | 2 +- golem/core/optimisers/genetic/gp_optimizer.py | 7 +- golem/core/optimisers/genetic/gp_params.py | 4 +- .../optimisers/genetic/operators/crossover.py | 4 +- .../optimisers/genetic/operators/mutation.py | 16 +- .../genetic/operators/reproduction.py | 146 +++++++++--------- 6 files changed, 96 insertions(+), 83 deletions(-) diff --git a/golem/core/constants.py b/golem/core/constants.py index 70cbf0c26..6a1bbff40 100644 --- a/golem/core/constants.py +++ b/golem/core/constants.py @@ -1,6 +1,6 @@ import numpy as np -MAX_GRAPH_GEN_ATTEMPTS_PER_IND = 5 +MAX_GRAPH_GEN_ATTEMPTS_PER_IND = 100 MAX_GRAPH_GEN_ATTEMPTS = 1000 MAX_TUNING_METRIC_VALUE = np.inf MIN_TIME_FOR_TUNING_IN_SEC = 3 diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index eff76164a..759d7c8b4 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -8,7 +8,7 @@ from golem.core.optimisers.genetic.operators.mutation import Mutation, SinglePredefinedGraphMutation from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.regularization import Regularization -from golem.core.optimisers.genetic.operators.reproduction import ReproductionController +from golem.core.optimisers.genetic.operators.reproduction import ReproductionController, ReproducerWorkerStageEnum from golem.core.optimisers.genetic.operators.selection import Selection from golem.core.optimisers.genetic.parameters.graph_depth import AdaptiveGraphDepth from golem.core.optimisers.genetic.parameters.operators_prob import init_adaptive_operators_prob @@ -70,8 +70,9 @@ def _initial_population(self, evaluator: EvaluationOperator): # pop_size = self.graph_optimizer_params.pop_size # # if len(self.initial_individuals) < pop_size: - # self.initial_individuals += self.reproducer._mutate_over_population(population=self.initial_individuals, - # evaluator=evaluator) + # self.initial_individuals += self.reproducer._reproduce(population=self.initial_individuals, + # evaluator=evaluator, + # start_stage=ReproducerWorkerStageEnum.MUTATION) # # Adding of extended population to history # self._update_population(self.initial_individuals, 'extended_initial_assumptions') diff --git a/golem/core/optimisers/genetic/gp_params.py b/golem/core/optimisers/genetic/gp_params.py index 2f02b3d1e..92b8dd887 100644 --- a/golem/core/optimisers/genetic/gp_params.py +++ b/golem/core/optimisers/genetic/gp_params.py @@ -76,9 +76,9 @@ class GPAlgorithmParameters(AlgorithmParameters): mutation_prob: float = 0.8 variable_mutation_num: bool = True max_num_of_operator_attempts: int = 100 - max_num_of_crossover_reproducer_attempts: int = 2 + max_num_of_crossover_reproducer_attempts: int = 10 max_num_of_mutation_reproducer_attempts: int = 2 - mutation_attempts_per_each_crossover_reproducer: int = 2 + mutation_attempts_per_each_crossover_reproducer: int = 10 mutation_strength: MutationStrengthEnum = MutationStrengthEnum.mean min_pop_size_with_elitism: int = 5 required_valid_ratio: float = 0.9 diff --git a/golem/core/optimisers/genetic/operators/crossover.py b/golem/core/optimisers/genetic/operators/crossover.py index 399952f96..fe0b0b4a5 100644 --- a/golem/core/optimisers/genetic/operators/crossover.py +++ b/golem/core/optimisers/genetic/operators/crossover.py @@ -96,12 +96,12 @@ def _crossover_by_type(self, crossover_type: CrossoverTypesEnum) -> CrossoverCal raise ValueError(f'Required crossover type is not found: {crossover_type}') def _get_individuals(self, new_graphs: Tuple[OptGraph, OptGraph], parent_individuals: Tuple[Individual, Individual], - crossover_type: Union[CrossoverTypesEnum, Callable]) -> Tuple[Individual, Individual]: + crossover_type: Union[CrossoverTypesEnum, Callable], **kwargs) -> Tuple[Individual, Individual]: operator = ParentOperator(type_='crossover', operators=str(crossover_type), parent_individuals=parent_individuals) metadata = self.requirements.static_individual_metadata - return tuple(Individual(graph, operator, metadata=metadata) for graph in new_graphs) + return tuple(Individual(graph, operator, metadata=metadata, **kwargs) for graph in new_graphs) def _will_crossover_be_applied(self, graph_first, graph_second, crossover_type) -> bool: return not (graph_first is graph_second or diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index d42ffc2fc..e80f34a4b 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -106,11 +106,9 @@ def _mutation(self, individual: Individual) -> Tuple[Individual, Optional[Mutati application_attempt = True is_correct_graph = self.graph_generation_params.verifier(new_graph) if is_correct_graph: - parent_operator = ParentOperator(type_='mutation', - operators=mutation_applied, - parent_individuals=individual) - individual = Individual(new_graph, parent_operator, - metadata=self.requirements.static_individual_metadata) + individual = self._get_individual(new_graph=new_graph, + mutation_type=mutation_applied, + parent=individual) break else: # Collect invalid actions @@ -162,6 +160,14 @@ def _get_mutation_func(self, mutation_type: Union[MutationTypesEnum, Callable]) adapted_mutation_func = self.graph_generation_params.adapter.adapt_func(mutation_func) return adapted_mutation_func + def _get_individual(self, new_graph: Graph, mutation_type: MutationType, parent: Individual, **kwargs): + parent_operator = ParentOperator(type_='mutation', + operators=mutation_type, + parent_individuals=parent) + individual = Individual(new_graph, parent_operator, + metadata=self.requirements.static_individual_metadata, **kwargs) + return individual + class SinglePredefinedGraphMutation(Mutation): """ Mutation that tries to create new graph (not individual) from the only graph in one attempt diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index f31ff665f..530dcaf61 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -4,12 +4,11 @@ from enum import Enum from multiprocessing.managers import DictProxy from multiprocessing import Manager -from queue import Queue +from queue import Empty, Queue from random import sample from typing import Optional, Dict, Union, List from joblib import Parallel, delayed -from joblib.externals.loky import get_reusable_executor from golem.core.constants import MAX_GRAPH_GEN_ATTEMPTS_PER_IND from golem.core.dag.graph_verifier import GraphVerifier @@ -21,11 +20,28 @@ from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.selection import Selection from golem.core.optimisers.graph import OptGraph -from golem.core.optimisers.opt_history_objects.parent_operator import ParentOperator from golem.core.optimisers.populational_optimizer import EvaluationAttemptsError from golem.core.optimisers.opt_history_objects.individual import Individual +class ReproducerWorkerStageEnum(Enum): + # TODO test that check that nums start from 0 and go to max (FINISH) with 1 steps + CROSSOVER = 0 + CROSSOVER_VERIFICATION = 1 + CROSSOVER_UNIQUENESS_CHECK = 2 + CROSSOVER_EVALUATION = 3 + MUTATION = 4 + MUTATION_VERIFICATION = 5 + MUTATION_UNIQUENESS_CHECK = 6 + MUTATION_EVALUATION = 7 + FINISH = 8 + + def __lt__(self, other): + if self.__class__ is other.__class__: + return self.value < other.value + return NotImplemented + + class ReproductionController: """ Task of the Reproduction Controller is to reproduce population @@ -63,63 +79,64 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P self._check_final_population(new_population) return new_population - def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: + def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator, + start_stage: ReproducerWorkerStageEnum = ReproducerWorkerStageEnum.CROSSOVER) -> PopulationT: """Generate new individuals by mutation in parallel. Implements additional checks on population to ensure that population size is greater or equal to required population size. Also controls uniqueness of population. """ with Manager() as manager: - left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_PER_IND pop_graph_descriptive_ids = manager.dict({ids: True for ids in self._pop_graph_descriptive_ids}) task_queue, result_queue, failed_queue = [manager.Queue() for _ in range(3)] + empty_task = ReproducerWorkerTask( + stage=start_stage, + crossover_tries=self.parameters.max_num_of_crossover_reproducer_attempts, + mutation_tries=self.parameters.max_num_of_mutation_reproducer_attempts, + mutation_attempts_per_each_crossover=self.parameters.mutation_attempts_per_each_crossover_reproducer) + worker = ReproduceWorker(crossover=self.crossover, mutation=self.mutation, verifier=self.verifier, evaluator=evaluator, pop_graph_descriptive_ids=pop_graph_descriptive_ids, population=population, task_queue=task_queue, result_queue=result_queue, - failed_queue=failed_queue, + failed_queue=failed_queue, empty_task=empty_task, log=self._log) - empty_task = ReproducerWorkerTask( - crossover_tries=self.parameters.max_num_of_crossover_reproducer_attempts, - mutation_tries=self.parameters.max_num_of_mutation_reproducer_attempts, - mutation_attempts_per_each_crossover=self.parameters.mutation_attempts_per_each_crossover_reproducer) - # TODO there is problem with random seed in parallel workers + # TODO do not put failed tasks in queue, return it after worker stop + # TODO kill workers, collect tasks and then update _pop_graph_descriptive_ids + # TODO only get results in main thread, push empty task in parallel workers n_jobs = self.mutation.requirements.n_jobs with Parallel(n_jobs=n_jobs, prefer='processes', return_as='generator') as parallel: _ = parallel(delayed(worker)() for _ in range(n_jobs)) finished_tasks, failed_tasks = list(), list() + left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_PER_IND + left_tries = int(left_tries / (ReproducerWorkerStageEnum.FINISH.value - start_stage.value) * + ReproducerWorkerStageEnum.FINISH.value) while left_tries > 0 and len(finished_tasks) < self.parameters.pop_size: - self._log.warning('Cycle') - time.sleep(0.02) + time.sleep(0.01) # if there is not enough jobs, create new empty job - if task_queue.qsize() < 2: - self._log.warning('Put task to task_queue') + while task_queue.qsize() < 2: task_queue.put(empty_task) - time.sleep(0.01) # give workers some time to get tasks from queue # process result if result_queue.qsize() > 0: - self._log.warning(f'Get finished task, left tries: {left_tries}') left_tries -= 1 finished_tasks.append(result_queue.get()) # process unsuccessful creation attempt if failed_queue.qsize() > 0: - self._log.warning(f'Get failed task, left tries: {left_tries}') left_tries -= 1 failed_tasks.append(failed_queue.get()) - self._log.warning('Kill workers') - # update looked graphs self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids.keys()) + self._log.warning(f'left_tries {left_tries}, fin_tasks {len(finished_tasks)}, fail_tasks {len(failed_tasks)}') # rebuild population new_population = self._process_tasks(population=population, finished_tasks=finished_tasks, @@ -130,12 +147,33 @@ def _process_tasks(self, population: PopulationT, finished_tasks: List['ReproducerWorkerTask'], failed_tasks: List['ReproducerWorkerTask']): - # if failed_stage is ReproducerWorkerStageEnum.MUTATION_VERIFICATION: - # # experience for mab - # self.mutation.agent_experience.collect_experience(population_uid_map[individual_uid], - # mutation_type, - # reward=-1.0) - pass + population_uid_map = {ind.uid: ind for ind in population} + + crossover_individuals, new_population = dict(), [] + for task in finished_tasks + failed_tasks: + if task.stage > ReproducerWorkerStageEnum.MUTATION: + uids = (task.graph_1_uid, task.graph_2_uid) + # create individuals, generated by crossover + if uids not in crossover_individuals: + individuals = self.crossover._get_individuals(new_graphs=[task.graph_for_mutation], + parent_individuals=[population_uid_map[uid] + for uid in uids], + crossover_type=task.crossover_type, + fitness=task.crossover_fitness) + crossover_individuals[uids] = individuals[0] + + # create individuals, generated by mutation + if uids in crossover_individuals: + individual = self.mutation._get_individual(new_graph=task.final_graph, + mutation_type=task.mutation_type, + parent=crossover_individuals[uids], + fitness=task.final_fitness) + if task.stage is ReproducerWorkerStageEnum.FINISH: + new_population.append(individual) + elif task.stage is ReproducerWorkerStageEnum.MUTATION_VERIFICATION: + # experience for mab + self.mutation.agent_experience.collect_experience(individual, task.mutation_type, reward=-1.0) + return new_population def _check_final_population(self, population: PopulationT) -> None: """ If population do not achieve required length return a warning or raise exception """ @@ -151,38 +189,6 @@ def _check_final_population(self, population: PopulationT) -> None: f'have {len(population)},' f' required {target_pop_size}!\n' + helpful_msg) - def _rebuild_final_population(self, population: PopulationT, new_population: PopulationT) -> PopulationT: - """ Recreate new_population in main thread with parents from population """ - population_uid_map = {individual.uid: individual for individual in population} - rebuilded_population = [] - for individual in new_population: - if individual.parent_operator: - parent_uid = individual.parent_operator.parent_individuals[0].uid - parent_operator = ParentOperator(type_=individual.parent_operator.type_, - operators=individual.parent_operator.operators, - parent_individuals=population_uid_map[parent_uid]) - else: - parent_operator = None - individual = Individual(deepcopy(individual.graph), - parent_operator, - fitness=individual.fitness, - metadata=self.mutation.requirements.static_individual_metadata) - rebuilded_population.append(individual) - return rebuilded_population - - -class ReproducerWorkerStageEnum(Enum): - # TODO test that check that nums start from 0 and go to max (FINISH) with 1 steps - CROSSOVER = 0 - CROSSOVER_VERIFICATION = 1 - CROSSOVER_UNIQUENESS_CHECK = 2 - CROSSOVER_EVALUATION = 3 - MUTATION = 4 - MUTATION_VERIFICATION = 5 - MUTATION_UNIQUENESS_CHECK = 6 - MUTATION_EVALUATION = 7 - FINISH = 8 - @dataclass class ReproducerWorkerTask: @@ -210,14 +216,7 @@ class ReproducerWorkerTask: @property def is_crossover(self): - return self.stage in [ReproducerWorkerStageEnum.CROSSOVER, - ReproducerWorkerStageEnum.CROSSOVER_VERIFICATION, - ReproducerWorkerStageEnum.CROSSOVER_UNIQUENESS_CHECK, - ReproducerWorkerStageEnum.CROSSOVER_EVALUATION] - - @property - def is_mutation(self): - return not self.is_crossover + return self.stage < ReproducerWorkerStageEnum.MUTATION @property def tries(self): @@ -238,6 +237,7 @@ def __init__(self, task_queue: Queue, result_queue: Queue, failed_queue: Queue, + empty_task: ReproducerWorkerTask, log ): self.crossover = crossover @@ -249,6 +249,7 @@ def __init__(self, self._task_queue = task_queue self._result_queue = result_queue self._failed_queue = failed_queue + self._empty_task = empty_task self._log = log def __call__(self): @@ -256,7 +257,10 @@ def __call__(self): while True: # work with existing task from tasks or from queue if not tasks: - tasks.append(self._task_queue.get()) + try: + tasks.append(self._task_queue.get(timeout=0.02)) + except Empty: + tasks.append(self._empty_task) processed_tasks = self.process_task(tasks.pop()) # process result @@ -388,10 +392,12 @@ def evaluation_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerT graph = task.final_graph individual = Individual(deepcopy(graph), metadata=self.mutation.requirements.static_individual_metadata) evaluated_individuals = self.evaluator([individual]) - if evaluated_individuals: - # TODO add null_fitness as flag for previous stage + if evaluated_individuals and evaluated_individuals[0].fitness.valid: task.fail = False + if task.is_crossover: + task.crossover_fitness = evaluated_individuals[0].fitness + else: + task.final_fitness = evaluated_individuals[0].fitness else: task.fail = True - # TODO return fitness return [task] From 1d40d90ef3cb6e57261f052cf350fb4f81a2d3bc Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 9 Nov 2023 14:22:28 +0300 Subject: [PATCH 56/65] Fix some problems --- .../genetic/operators/reproduction.py | 108 +++++++++--------- 1 file changed, 52 insertions(+), 56 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 530dcaf61..f2e3c1af6 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -1,11 +1,13 @@ +import sys import time from copy import deepcopy, copy from dataclasses import dataclass from enum import Enum +from functools import partial from multiprocessing.managers import DictProxy from multiprocessing import Manager from queue import Empty, Queue -from random import sample +from random import sample, randint from typing import Optional, Dict, Union, List from joblib import Parallel, delayed @@ -22,6 +24,7 @@ from golem.core.optimisers.graph import OptGraph from golem.core.optimisers.populational_optimizer import EvaluationAttemptsError from golem.core.optimisers.opt_history_objects.individual import Individual +from golem.utilities.random import RandomStateHandler class ReproducerWorkerStageEnum(Enum): @@ -89,59 +92,52 @@ def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator, pop_graph_descriptive_ids = manager.dict({ids: True for ids in self._pop_graph_descriptive_ids}) task_queue, result_queue, failed_queue = [manager.Queue() for _ in range(3)] + # empty task for worker if there is no work empty_task = ReproducerWorkerTask( stage=start_stage, crossover_tries=self.parameters.max_num_of_crossover_reproducer_attempts, mutation_tries=self.parameters.max_num_of_mutation_reproducer_attempts, mutation_attempts_per_each_crossover=self.parameters.mutation_attempts_per_each_crossover_reproducer) - worker = ReproduceWorker(crossover=self.crossover, mutation=self.mutation, + # parameters for worker + worker_parameters = dict(crossover=self.crossover, mutation=self.mutation, verifier=self.verifier, evaluator=evaluator, pop_graph_descriptive_ids=pop_graph_descriptive_ids, population=population, task_queue=task_queue, result_queue=result_queue, - failed_queue=failed_queue, empty_task=empty_task, - log=self._log) - - # TODO there is problem with random seed in parallel workers - # TODO do not put failed tasks in queue, return it after worker stop - # TODO kill workers, collect tasks and then update _pop_graph_descriptive_ids - # TODO only get results in main thread, push empty task in parallel workers + failed_queue=failed_queue, empty_task=empty_task) n_jobs = self.mutation.requirements.n_jobs with Parallel(n_jobs=n_jobs, prefer='processes', return_as='generator') as parallel: - _ = parallel(delayed(worker)() for _ in range(n_jobs)) + workers = [ReproduceWorker(seed=randint(0, sys.maxsize), **worker_parameters) for _ in range(n_jobs)] + _ = parallel(delayed(worker)() for worker in workers) finished_tasks, failed_tasks = list(), list() left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_PER_IND left_tries = int(left_tries / (ReproducerWorkerStageEnum.FINISH.value - start_stage.value) * ReproducerWorkerStageEnum.FINISH.value) while left_tries > 0 and len(finished_tasks) < self.parameters.pop_size: - time.sleep(0.01) - - # if there is not enough jobs, create new empty job - while task_queue.qsize() < 2: - task_queue.put(empty_task) + time.sleep(1) + while failed_queue.qsize() > 0: + left_tries -= 1 + failed_tasks.append(failed_queue.get()) - # process result - if result_queue.qsize() > 0: + while result_queue.qsize() > 0: left_tries -= 1 finished_tasks.append(result_queue.get()) - # process unsuccessful creation attempt - if failed_queue.qsize() > 0: - left_tries -= 1 - failed_tasks.append(failed_queue.get()) + # get all finished works + failed_tasks += list(failed_queue.queue) + finished_tasks += list(result_queue.queue) # update looked graphs self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids.keys()) - self._log.warning(f'left_tries {left_tries}, fin_tasks {len(finished_tasks)}, fail_tasks {len(failed_tasks)}') - # rebuild population - new_population = self._process_tasks(population=population, - finished_tasks=finished_tasks, - failed_tasks=failed_tasks) - return new_population + # rebuild population + new_population = self._process_tasks(population=population, + finished_tasks=finished_tasks, + failed_tasks=failed_tasks) + return new_population def _process_tasks(self, population: PopulationT, @@ -238,7 +234,7 @@ def __init__(self, result_queue: Queue, failed_queue: Queue, empty_task: ReproducerWorkerTask, - log + seed: int ): self.crossover = crossover self.mutation = mutation @@ -250,36 +246,36 @@ def __init__(self, self._result_queue = result_queue self._failed_queue = failed_queue self._empty_task = empty_task - self._log = log + self._seed = seed def __call__(self): - tasks = [] - while True: - # work with existing task from tasks or from queue - if not tasks: - try: - tasks.append(self._task_queue.get(timeout=0.02)) - except Empty: - tasks.append(self._empty_task) - processed_tasks = self.process_task(tasks.pop()) - - # process result - for processed_task in processed_tasks: - # self._log.warning(f"PTask {id(processed_task)}: {processed_task.stage}:{processed_task.fail} " - # f"{processed_task.crossover_tries}:{processed_task.mutation_tries}") - if processed_task.stage is ReproducerWorkerStageEnum.FINISH: - self._result_queue.put(processed_task) - continue - if processed_task.fail: - self._failed_queue.put(processed_task) - processed_task.fail = False - if processed_task.tries > 0: - # task is not finished, need new try - tasks.append(processed_task) - - # if there are some tasks, add it to parallel queue - for _ in range(len(tasks) - 1): - self._task_queue.put(tasks.pop()) + with RandomStateHandler(self._seed): + tasks = [self._empty_task] + while True: + # is there is no tasks, try to get 1. task from queue 2. empty task + if not tasks: + try: + tasks.append(self._task_queue.get(timeout=0.02)) + except Empty: + tasks.append(self._empty_task) + + # work with task + processed_tasks = self.process_task(tasks.pop()) + + # process result + tasks = [] + for processed_task in processed_tasks: + if processed_task.stage is ReproducerWorkerStageEnum.FINISH: + self._result_queue.put(processed_task) + continue + if processed_task.fail: + self._failed_queue.put(processed_task) + if processed_task.tries > 0: + tasks.append(processed_task) + + # if there are some tasks, add it to parallel queue + for _ in range(len(tasks) - 1): + self._task_queue.put(tasks.pop()) def process_task(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: """ Get task, make 1 stage and return processed task """ From 30d03a6e2eb4e032408f1458d4fac17576601042 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 9 Nov 2023 14:56:32 +0300 Subject: [PATCH 57/65] Fix error with random state handler --- golem/core/constants.py | 2 +- .../optimisers/genetic/operators/reproduction.py | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/golem/core/constants.py b/golem/core/constants.py index 6a1bbff40..97c7dc4c7 100644 --- a/golem/core/constants.py +++ b/golem/core/constants.py @@ -1,6 +1,6 @@ import numpy as np -MAX_GRAPH_GEN_ATTEMPTS_PER_IND = 100 +MAX_GRAPH_GEN_ATTEMPTS_PER_IND = 200 MAX_GRAPH_GEN_ATTEMPTS = 1000 MAX_TUNING_METRIC_VALUE = np.inf MIN_TIME_FOR_TUNING_IN_SEC = 3 diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index f2e3c1af6..94f131c6f 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -105,11 +105,12 @@ def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator, pop_graph_descriptive_ids=pop_graph_descriptive_ids, population=population, task_queue=task_queue, result_queue=result_queue, - failed_queue=failed_queue, empty_task=empty_task) + failed_queue=failed_queue, empty_task=empty_task, + log=self._log) n_jobs = self.mutation.requirements.n_jobs with Parallel(n_jobs=n_jobs, prefer='processes', return_as='generator') as parallel: - workers = [ReproduceWorker(seed=randint(0, sys.maxsize), **worker_parameters) for _ in range(n_jobs)] + workers = [ReproduceWorker(seed=randint(0, int(2**32 - 1)), **worker_parameters) for _ in range(n_jobs)] _ = parallel(delayed(worker)() for worker in workers) finished_tasks, failed_tasks = list(), list() @@ -127,8 +128,10 @@ def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator, finished_tasks.append(result_queue.get()) # get all finished works - failed_tasks += list(failed_queue.queue) - finished_tasks += list(result_queue.queue) + while failed_queue.qsize() > 0: + failed_tasks.append(failed_queue.get()) + while result_queue.qsize() > 0: + finished_tasks.append(result_queue.get()) # update looked graphs self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids.keys()) @@ -234,7 +237,8 @@ def __init__(self, result_queue: Queue, failed_queue: Queue, empty_task: ReproducerWorkerTask, - seed: int + seed: int, + log ): self.crossover = crossover self.mutation = mutation @@ -247,6 +251,7 @@ def __init__(self, self._failed_queue = failed_queue self._empty_task = empty_task self._seed = seed + self._log = log def __call__(self): with RandomStateHandler(self._seed): @@ -263,7 +268,6 @@ def __call__(self): processed_tasks = self.process_task(tasks.pop()) # process result - tasks = [] for processed_task in processed_tasks: if processed_task.stage is ReproducerWorkerStageEnum.FINISH: self._result_queue.put(processed_task) From eedbebe1c6afe9db72f4c267f1434d22899973ed Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 9 Nov 2023 17:53:07 +0300 Subject: [PATCH 58/65] wip --- golem/core/optimisers/genetic/operators/crossover.py | 2 ++ golem/core/optimisers/genetic/operators/mutation.py | 2 ++ golem/core/optimisers/genetic/operators/reproduction.py | 8 +++++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/golem/core/optimisers/genetic/operators/crossover.py b/golem/core/optimisers/genetic/operators/crossover.py index fe0b0b4a5..6f389a211 100644 --- a/golem/core/optimisers/genetic/operators/crossover.py +++ b/golem/core/optimisers/genetic/operators/crossover.py @@ -118,6 +118,8 @@ def __call__(self, graph_2: OptGraph, crossover_type: Optional[CrossoverTypesEnum] = None) -> Tuple[OptGraph, CrossoverTypesEnum]: crossover_type = crossover_type or choice(self.parameters.crossover_types) + if crossover_type is CrossoverTypesEnum.none: + return (None, ) crossover_func = self._get_crossover_function(crossover_type) new_graphs = crossover_func(deepcopy(graph_1), deepcopy(graph_2), max_depth=self.requirements.max_depth) diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index e80f34a4b..07ec1c701 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -176,6 +176,8 @@ class SinglePredefinedGraphMutation(Mutation): def __call__(self, graph: Graph, mutation_type: Optional[MutationType] = None) -> Tuple[Graph, MutationIdType]: new_graph = deepcopy(graph) mutation_type = mutation_type or self._operator_agent.choose_action(new_graph) + if mutation_type is MutationTypesEnum.none: + return None, None mutation_func = self._get_mutation_func(mutation_type) new_graph = mutation_func(new_graph, requirements=self.requirements, graph_gen_params=self.graph_generation_params, diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 94f131c6f..5bf244efb 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -108,7 +108,7 @@ def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator, failed_queue=failed_queue, empty_task=empty_task, log=self._log) - n_jobs = self.mutation.requirements.n_jobs + n_jobs = max(2, self.mutation.requirements.n_jobs) with Parallel(n_jobs=n_jobs, prefer='processes', return_as='generator') as parallel: workers = [ReproduceWorker(seed=randint(0, int(2**32 - 1)), **worker_parameters) for _ in range(n_jobs)] _ = parallel(delayed(worker)() for worker in workers) @@ -254,8 +254,10 @@ def __init__(self, self._log = log def __call__(self): + self._log.warning(f"CALLED") with RandomStateHandler(self._seed): tasks = [self._empty_task] + self._log.warning(f"START CYCLE") while True: # is there is no tasks, try to get 1. task from queue 2. empty task if not tasks: @@ -269,6 +271,7 @@ def __call__(self): # process result for processed_task in processed_tasks: + self._log.warning(f"PROCESS: {processed_task.stage} {processed_task.crossover_tries}:{processed_task.mutation_tries}") if processed_task.stage is ReproducerWorkerStageEnum.FINISH: self._result_queue.put(processed_task) continue @@ -278,11 +281,14 @@ def __call__(self): tasks.append(processed_task) # if there are some tasks, add it to parallel queue + self._log.warning(f"TASKS: {len(tasks)}") for _ in range(len(tasks) - 1): self._task_queue.put(tasks.pop()) + self._log.warning(f"TASKS: {len(tasks)}") def process_task(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: """ Get task, make 1 stage and return processed task """ + self._log.warning(f"START: {task.stage} {task.crossover_tries}:{task.mutation_tries}") task = copy(task) # input task task.fail = False From bf6e283e59ce08cfe393b5c14a646b991393ced0 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 10 Nov 2023 18:20:06 +0300 Subject: [PATCH 59/65] wip --- golem/core/constants.py | 2 +- golem/core/optimisers/genetic/gp_optimizer.py | 3 +- golem/core/optimisers/genetic/gp_params.py | 4 +- .../optimisers/genetic/operators/crossover.py | 2 +- .../genetic/operators/reproduction.py | 92 ++++++++++--------- .../test_reproduction_controller.py | 9 +- 6 files changed, 61 insertions(+), 51 deletions(-) diff --git a/golem/core/constants.py b/golem/core/constants.py index 97c7dc4c7..523c46f45 100644 --- a/golem/core/constants.py +++ b/golem/core/constants.py @@ -1,6 +1,6 @@ import numpy as np -MAX_GRAPH_GEN_ATTEMPTS_PER_IND = 200 +MAX_GRAPH_GEN_ATTEMPTS_PER_IND = 50 MAX_GRAPH_GEN_ATTEMPTS = 1000 MAX_TUNING_METRIC_VALUE = np.inf MIN_TIME_FOR_TUNING_IN_SEC = 3 diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index 759d7c8b4..89401cbff 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -71,8 +71,7 @@ def _initial_population(self, evaluator: EvaluationOperator): # # if len(self.initial_individuals) < pop_size: # self.initial_individuals += self.reproducer._reproduce(population=self.initial_individuals, - # evaluator=evaluator, - # start_stage=ReproducerWorkerStageEnum.MUTATION) + # evaluator=evaluator) # # Adding of extended population to history # self._update_population(self.initial_individuals, 'extended_initial_assumptions') diff --git a/golem/core/optimisers/genetic/gp_params.py b/golem/core/optimisers/genetic/gp_params.py index 92b8dd887..8ad3e06d2 100644 --- a/golem/core/optimisers/genetic/gp_params.py +++ b/golem/core/optimisers/genetic/gp_params.py @@ -76,9 +76,9 @@ class GPAlgorithmParameters(AlgorithmParameters): mutation_prob: float = 0.8 variable_mutation_num: bool = True max_num_of_operator_attempts: int = 100 - max_num_of_crossover_reproducer_attempts: int = 10 + max_num_of_crossover_reproducer_attempts: int = 1 max_num_of_mutation_reproducer_attempts: int = 2 - mutation_attempts_per_each_crossover_reproducer: int = 10 + mutation_attempts_per_each_crossover_reproducer: int = 8 mutation_strength: MutationStrengthEnum = MutationStrengthEnum.mean min_pop_size_with_elitism: int = 5 required_valid_ratio: float = 0.9 diff --git a/golem/core/optimisers/genetic/operators/crossover.py b/golem/core/optimisers/genetic/operators/crossover.py index 6f389a211..461d1af52 100644 --- a/golem/core/optimisers/genetic/operators/crossover.py +++ b/golem/core/optimisers/genetic/operators/crossover.py @@ -119,7 +119,7 @@ def __call__(self, crossover_type: Optional[CrossoverTypesEnum] = None) -> Tuple[OptGraph, CrossoverTypesEnum]: crossover_type = crossover_type or choice(self.parameters.crossover_types) if crossover_type is CrossoverTypesEnum.none: - return (None, ) + return (deepcopy(graph_1), deepcopy(graph_2), crossover_type) crossover_func = self._get_crossover_function(crossover_type) new_graphs = crossover_func(deepcopy(graph_1), deepcopy(graph_2), max_depth=self.requirements.max_depth) diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 5bf244efb..8ecb510cf 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -1,9 +1,7 @@ -import sys import time from copy import deepcopy, copy from dataclasses import dataclass from enum import Enum -from functools import partial from multiprocessing.managers import DictProxy from multiprocessing import Manager from queue import Empty, Queue @@ -29,15 +27,8 @@ class ReproducerWorkerStageEnum(Enum): # TODO test that check that nums start from 0 and go to max (FINISH) with 1 steps - CROSSOVER = 0 - CROSSOVER_VERIFICATION = 1 - CROSSOVER_UNIQUENESS_CHECK = 2 - CROSSOVER_EVALUATION = 3 - MUTATION = 4 - MUTATION_VERIFICATION = 5 - MUTATION_UNIQUENESS_CHECK = 6 - MUTATION_EVALUATION = 7 - FINISH = 8 + (CROSSOVER, CROSSOVER_VERIFICATION, CROSSOVER_UNIQUENESS_CHECK, CROSSOVER_EVALUATION, + MUTATION, MUTATION_VERIFICATION, MUTATION_UNIQUENESS_CHECK, MUTATION_EVALUATION, FINISH) = range(9) def __lt__(self, other): if self.__class__ is other.__class__: @@ -82,8 +73,7 @@ def reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> P self._check_final_population(new_population) return new_population - def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator, - start_stage: ReproducerWorkerStageEnum = ReproducerWorkerStageEnum.CROSSOVER) -> PopulationT: + def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> PopulationT: """Generate new individuals by mutation in parallel. Implements additional checks on population to ensure that population size is greater or equal to required population size. Also controls uniqueness of population. @@ -94,7 +84,6 @@ def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator, # empty task for worker if there is no work empty_task = ReproducerWorkerTask( - stage=start_stage, crossover_tries=self.parameters.max_num_of_crossover_reproducer_attempts, mutation_tries=self.parameters.max_num_of_mutation_reproducer_attempts, mutation_attempts_per_each_crossover=self.parameters.mutation_attempts_per_each_crossover_reproducer) @@ -108,16 +97,22 @@ def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator, failed_queue=failed_queue, empty_task=empty_task, log=self._log) - n_jobs = max(2, self.mutation.requirements.n_jobs) - with Parallel(n_jobs=n_jobs, prefer='processes', return_as='generator') as parallel: - workers = [ReproduceWorker(seed=randint(0, int(2**32 - 1)), **worker_parameters) for _ in range(n_jobs)] - _ = parallel(delayed(worker)() for worker in workers) + n_jobs = self.mutation.requirements.n_jobs + with Parallel(n_jobs=n_jobs + 1, prefer='processes', return_as='generator') as parallel: + # prepare (n_jobs + 1) workers + workers = [ReproduceWorker(seed=randint(0, int(2**32 - 1)), **worker_parameters) + for _ in range(n_jobs + 1)] + # run n_jobs workers with run_flag = True + # and one worker with run_flag = False + # It guarantees n_jobs workers parallel execution also if n_jobs == 1 + # because joblib for n_jobs == 1 does not start parallel pool + _ = parallel(delayed(worker)(run_flag) for worker, run_flag in zip(workers, [True] * n_jobs + [False])) finished_tasks, failed_tasks = list(), list() - left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_PER_IND - left_tries = int(left_tries / (ReproducerWorkerStageEnum.FINISH.value - start_stage.value) * - ReproducerWorkerStageEnum.FINISH.value) + left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_PER_IND * n_jobs while left_tries > 0 and len(finished_tasks) < self.parameters.pop_size: + # main thread is fast + # frequent queues blocking with qsize is not good idea time.sleep(1) while failed_queue.qsize() > 0: left_tries -= 1 @@ -169,7 +164,7 @@ def _process_tasks(self, fitness=task.final_fitness) if task.stage is ReproducerWorkerStageEnum.FINISH: new_population.append(individual) - elif task.stage is ReproducerWorkerStageEnum.MUTATION_VERIFICATION: + elif task.failed_stage is ReproducerWorkerStageEnum.MUTATION_VERIFICATION: # experience for mab self.mutation.agent_experience.collect_experience(individual, task.mutation_type, reward=-1.0) return new_population @@ -192,7 +187,8 @@ def _check_final_population(self, population: PopulationT) -> None: @dataclass class ReproducerWorkerTask: stage: ReproducerWorkerStageEnum = ReproducerWorkerStageEnum(0) - fail: bool = False + _fail: bool = False + failed_stage: ReproducerWorkerStageEnum = None mutation_attempts_per_each_crossover: int = 1 # crossover data @@ -213,6 +209,16 @@ class ReproducerWorkerTask: final_graph: Optional[OptGraph] = None final_fitness: Optional[Fitness] = None + @property + def fail(self): + return self._fail + + @fail.setter + def fail(self, value): + if value: + self.failed_stage = self.stage + self._fail = value + @property def is_crossover(self): return self.stage < ReproducerWorkerStageEnum.MUTATION @@ -253,12 +259,12 @@ def __init__(self, self._seed = seed self._log = log - def __call__(self): + def __call__(self, run: bool = True): self._log.warning(f"CALLED") with RandomStateHandler(self._seed): tasks = [self._empty_task] self._log.warning(f"START CYCLE") - while True: + while run: # is there is no tasks, try to get 1. task from queue 2. empty task if not tasks: try: @@ -271,24 +277,24 @@ def __call__(self): # process result for processed_task in processed_tasks: - self._log.warning(f"PROCESS: {processed_task.stage} {processed_task.crossover_tries}:{processed_task.mutation_tries}") if processed_task.stage is ReproducerWorkerStageEnum.FINISH: self._result_queue.put(processed_task) continue if processed_task.fail: + self._log.warning(f"FAIL: {processed_task.failed_stage}") self._failed_queue.put(processed_task) - if processed_task.tries > 0: + if processed_task.tries > 0: + tasks.append(processed_task) + else: tasks.append(processed_task) # if there are some tasks, add it to parallel queue - self._log.warning(f"TASKS: {len(tasks)}") for _ in range(len(tasks) - 1): self._task_queue.put(tasks.pop()) - self._log.warning(f"TASKS: {len(tasks)}") def process_task(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: """ Get task, make 1 stage and return processed task """ - self._log.warning(f"START: {task.stage} {task.crossover_tries}:{task.mutation_tries}") + # self._log.warning(f"START: {task.stage} {task.crossover_tries}:{task.mutation_tries}") task = copy(task) # input task task.fail = False @@ -304,20 +310,24 @@ def process_task(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask] # crossover uniqueness check if task.stage is ReproducerWorkerStageEnum.CROSSOVER_UNIQUENESS_CHECK: - processed_task = self.uniqueness_check_stage(task)[0] - processed_task.step_in_stage(-2 if processed_task.fail else 1) - return [processed_task] + task.step_in_stage(1) + return [task] + # processed_task = self.uniqueness_check_stage(task)[0] + # processed_task.step_in_stage(-2 if processed_task.fail else 1) + # return [processed_task] # crossover result evaluation if task.stage is ReproducerWorkerStageEnum.CROSSOVER_EVALUATION: - processed_task = self.evaluation_stage(task)[0] - if processed_task.fail: - processed_task.step_in_stage(-3) - return [processed_task] - else: - # create some tasks for mutation for crossover result - processed_task.step_in_stage(1) - return [copy(processed_task) for _ in range(task.mutation_attempts_per_each_crossover)] + task.step_in_stage(1) + return [copy(task) for _ in range(task.mutation_attempts_per_each_crossover)] + # processed_task = self.evaluation_stage(task)[0] + # if processed_task.fail: + # processed_task.step_in_stage(-3) + # return [processed_task] + # else: + # # create some tasks for mutation for crossover result + # processed_task.step_in_stage(1) + # return [copy(processed_task) for _ in range(task.mutation_attempts_per_each_crossover)] # mutation if task.stage is ReproducerWorkerStageEnum.MUTATION: diff --git a/test/unit/optimizers/gp_operators/test_reproduction_controller.py b/test/unit/optimizers/gp_operators/test_reproduction_controller.py index 8560235a7..752a404d0 100644 --- a/test/unit/optimizers/gp_operators/test_reproduction_controller.py +++ b/test/unit/optimizers/gp_operators/test_reproduction_controller.py @@ -8,8 +8,9 @@ from golem.core.adapter.nx_adapter import BaseNetworkxAdapter from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.optimisers.genetic.operators.base_mutations import MutationTypesEnum -from golem.core.optimisers.genetic.operators.crossover import Crossover, CrossoverTypesEnum -from golem.core.optimisers.genetic.operators.mutation import Mutation +from golem.core.optimisers.genetic.operators.crossover import Crossover, CrossoverTypesEnum, \ + SinglePredefinedGraphCrossover +from golem.core.optimisers.genetic.operators.mutation import Mutation, SinglePredefinedGraphMutation from golem.core.optimisers.genetic.operators.operator import EvaluationOperator, PopulationT from golem.core.optimisers.genetic.operators.reproduction import ReproductionController from golem.core.optimisers.genetic.operators.selection import Selection @@ -60,8 +61,8 @@ def reproducer() -> ReproductionController: rules_for_constraint=[]) requirements = GraphRequirements() - mutation = Mutation(params, requirements, graph_gen_params) - crossover = Crossover(params, requirements, graph_gen_params) + mutation = SinglePredefinedGraphMutation(params, requirements, graph_gen_params) + crossover = SinglePredefinedGraphCrossover(params, requirements, graph_gen_params) selection = Selection(params, requirements) reproduction = ReproductionController(params, selection, mutation, crossover) From 8987b871dff2d272cc0324f459976e0811416696 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Tue, 14 Nov 2023 17:52:00 +0300 Subject: [PATCH 60/65] add genetic node --- golem/core/constants.py | 2 +- .../core/optimisers/genetic/operators/node.py | 65 +++++++++++++++++++ .../genetic/operators/reproduction.py | 2 +- 3 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 golem/core/optimisers/genetic/operators/node.py diff --git a/golem/core/constants.py b/golem/core/constants.py index 523c46f45..4cc1e297e 100644 --- a/golem/core/constants.py +++ b/golem/core/constants.py @@ -1,6 +1,6 @@ import numpy as np -MAX_GRAPH_GEN_ATTEMPTS_PER_IND = 50 +MAX_GRAPH_GEN_ATTEMPTS_PER_IND = 20 MAX_GRAPH_GEN_ATTEMPTS = 1000 MAX_TUNING_METRIC_VALUE = np.inf MIN_TIME_FOR_TUNING_IN_SEC = 3 diff --git a/golem/core/optimisers/genetic/operators/node.py b/golem/core/optimisers/genetic/operators/node.py new file mode 100644 index 000000000..43c4d3bd4 --- /dev/null +++ b/golem/core/optimisers/genetic/operators/node.py @@ -0,0 +1,65 @@ +from dataclasses import dataclass, replace +from queue import Queue +from typing import Optional, List, Union, Any + +from golem.core.optimisers.genetic.operators.operator import Operator +from golem.core.optimisers.graph import OptGraph + + +@dataclass +class GeneticOperatorTask: + stage: 'GeneticNode' + + graphs: List[OptGraph] + operator_type: Optional[Any] = None + + # parent data + parent_task: Optional['GeneticOperatorTask'] = None + + fail: bool = False + fail_message: str = '' + left_tries: int = 1 + + def copy(self): + return replace(self) + + def create_failed_task(self, exception: Exception): + failed_task = self.copy() + failed_task.fail = True + failed_task.fail_message = exception.__str__() + failed_task.left_tries -= 1 + return failed_task + + def create_successive_task(self, graphs: List[OptGraph], operator_type: Any): + task = self.copy() + task.graphs = graphs + task.operator_type = operator_type + task.parent_task = self + return task + + +@dataclass(frozen=True) +class GeneticNode: + name: str + operator: Operator + processed_queue: Queue + success_outputs: List['GeneticNode'] = None + fail_outputs: Optional[List['GeneticNode']] = None + + def __call__(self, task: GeneticOperatorTask): + if task.left_tries > 0: + try: + *grouped_graphs, operator_type = self.operator(task.graphs, task.operator_type) + tasks = [task.create_successive_task(graphs, operator_type) for graphs in grouped_graphs] + next_nodes = self.success_outputs + except Exception as exception: + tasks = [task.create_failed_task(exception)] + next_nodes = self.fail_outputs + + final_tasks = list() + for _task in tasks: + for _node in next_nodes: + new_task = _task.copy() + new_task.stage = _node + final_tasks.append(new_task) + return final_tasks diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 8ecb510cf..197301af0 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -408,7 +408,7 @@ def evaluation_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerT graph = task.final_graph individual = Individual(deepcopy(graph), metadata=self.mutation.requirements.static_individual_metadata) evaluated_individuals = self.evaluator([individual]) - if evaluated_individuals and evaluated_individuals[0].fitness.valid: + if evaluated_individuals:# and evaluated_individuals[0].fitness.valid: task.fail = False if task.is_crossover: task.crossover_fitness = evaluated_individuals[0].fitness From 6ff8db489fcb24712424992c7f1a52c04e995747 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 15 Nov 2023 15:31:28 +0300 Subject: [PATCH 61/65] wip --- .../core/optimisers/genetic/operators/node.py | 131 ++++++++++++++++-- 1 file changed, 116 insertions(+), 15 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/node.py b/golem/core/optimisers/genetic/operators/node.py index 43c4d3bd4..68dae9480 100644 --- a/golem/core/optimisers/genetic/operators/node.py +++ b/golem/core/optimisers/genetic/operators/node.py @@ -1,18 +1,25 @@ -from dataclasses import dataclass, replace -from queue import Queue -from typing import Optional, List, Union, Any +from dataclasses import dataclass, replace, field +from enum import Enum +from itertools import chain +from typing import Optional, List, Union, Any, Dict from golem.core.optimisers.genetic.operators.operator import Operator from golem.core.optimisers.graph import OptGraph +class TaskStagesEnum(Enum): + (INIT, SUCCESS, FAIL, FINISH) = range(4) + + @dataclass class GeneticOperatorTask: - stage: 'GeneticNode' - + """ Contain graphs and information what to do with it and what was made """ graphs: List[OptGraph] operator_type: Optional[Any] = None + stage: TaskStagesEnum = TaskStagesEnum.INIT + stage_node: Optional['GeneticNode'] = None + # parent data parent_task: Optional['GeneticOperatorTask'] = None @@ -20,31 +27,46 @@ class GeneticOperatorTask: fail_message: str = '' left_tries: int = 1 + def __copy__(self): + return self.copy() + + def __deepcopy__(self, memodict: Dict = dict()): + raise NotImplementedError('Deepcopy is not allowed for task') + def copy(self): return replace(self) def create_failed_task(self, exception: Exception): failed_task = self.copy() - failed_task.fail = True + failed_task.stage = TaskStagesEnum.FAIL failed_task.fail_message = exception.__str__() failed_task.left_tries -= 1 return failed_task def create_successive_task(self, graphs: List[OptGraph], operator_type: Any): - task = self.copy() - task.graphs = graphs - task.operator_type = operator_type - task.parent_task = self - return task + successive_task = self.copy() + successive_task.stage = TaskStagesEnum.SUCCESS + successive_task.graphs = graphs + successive_task.operator_type = operator_type + successive_task.parent_task = self + return successive_task @dataclass(frozen=True) class GeneticNode: + """ Operator wrapper with data/tools for task routing """ + name: str operator: Operator - processed_queue: Queue - success_outputs: List['GeneticNode'] = None - fail_outputs: Optional[List['GeneticNode']] = None + success_outputs: List[Union['GeneticNode', None]] + fail_outputs: Optional[Union[List['GeneticNode'], None]] = field(default_factory=lambda: [None]) + + def __post_init__(self): + # some checks + _check_list_with_genetic_nodes(self.success_outputs, allow_none=True) + _check_list_with_genetic_nodes(self.fail_outputs, allow_none=True) + + # TODO check interface of operator def __call__(self, task: GeneticOperatorTask): if task.left_tries > 0: @@ -60,6 +82,85 @@ def __call__(self, task: GeneticOperatorTask): for _task in tasks: for _node in next_nodes: new_task = _task.copy() - new_task.stage = _node + new_task.stage_node = _node.name final_tasks.append(new_task) return final_tasks + + def __hash__(self): + # TODO add test for hash + return self.name.__hash__() + + def __copy__(self): + """ because hash is the name """ + raise NotImplementedError('Use ``copy`` function instead') + + def __deepcopy__(self, memodict: Dict = dict()): + """ because hash is the name """ + raise NotImplementedError('Use ``copy`` function instead') + + def copy(self, name: str): + """ Create new node with same data but new name """ + # TODO add tests that all fields are copied + # new_node = replace(self) + return GeneticNode(name=name, operator=self.operator, + success_outputs=self.success_outputs, + fail_outputs=self.fail_outputs) + + +@dataclass(frozen=True) +class GeneticPipeline: + """ Pool of connected nodes with useful checks + Call only a one node in time + """ + + name: str + nodes: List[GeneticNode] + __nodes_map: Optional[Dict[int, GeneticNode]] = None + + def __post_init__(self): + # some checks + _check_list_with_genetic_nodes(self.nodes) + + # check that all connection between nodes connect existing nodes + connection_goals = set(chain(*[chain(*(node.success_outputs + node.fail_outputs)) for node in self.nodes])) + if not (set(self.nodes) > connection_goals): + raise ValueError('Some nodes have connection with nonexisting nodes') + + self.__setattr__('__nodes_map', {node: node for node in self.nodes}) + + if self.__nodes_map is None: + raise ValueError('there is no ``__nodes_map``') + + def __call__(self, task: GeneticOperatorTask): + """ Call one of node and return result """ + if not isinstance(task, GeneticOperatorTask): + raise ValueError(f"``task`` should be ``GeneticOperatorTask``, get {type(task)} instead") + + if task.stage in (TaskStagesEnum.INIT, TaskStagesEnum.FINISH): + raise ValueError('Unappropriate task') + + if task.stage_node not in self.__nodes_map: + raise ValueError(f"Unknown stage node {task.stage}") + + return self.__nodes_map[task.stage_node](task) + + +def _check_list_with_genetic_nodes(list_with_nodes, allow_none=False): + # check that nodes is list with nodes + list_with_nodes_is_appropriate = True + list_with_nodes_is_appropriate &= isinstance(list_with_nodes, list) + list_with_nodes_is_appropriate &= len(list_with_nodes) > 0 + if allow_none: + list_with_nodes_is_appropriate &= all(isinstance(node, GeneticNode) or node is None for node in list_with_nodes) + else: + list_with_nodes_is_appropriate &= all(isinstance(node, GeneticNode) for node in list_with_nodes) + + if not list_with_nodes_is_appropriate: + raise ValueError('``nodes`` parameter should be list with ``GeneticNodes``') + + # check that all nodes have unique name + # hash of node is calculated as hash of it is name, therefore check may be done as: + if len(set(list_with_nodes)) != len(list_with_nodes): + # TODO add test for that line + # TODO add test for that line works as is + raise AttributeError(f"nodes names should be unique") From 1172caa07e0005be9b1db204ebb8a17bbb6c336b Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 15 Nov 2023 19:15:52 +0300 Subject: [PATCH 62/65] wip --- .../optimisers/genetic/operators/crossover.py | 17 +- .../core/optimisers/genetic/operators/node.py | 127 ++++-- .../genetic/operators/reproduction.py | 417 ++++++++---------- 3 files changed, 276 insertions(+), 285 deletions(-) diff --git a/golem/core/optimisers/genetic/operators/crossover.py b/golem/core/optimisers/genetic/operators/crossover.py index 461d1af52..071c0928f 100644 --- a/golem/core/optimisers/genetic/operators/crossover.py +++ b/golem/core/optimisers/genetic/operators/crossover.py @@ -2,7 +2,7 @@ from itertools import chain from math import ceil from random import choice, random, sample -from typing import Callable, Union, Iterable, Tuple, TYPE_CHECKING, Optional +from typing import Callable, Union, Iterable, Tuple, TYPE_CHECKING, Optional, List from joblib import Parallel, delayed @@ -114,16 +114,21 @@ class SinglePredefinedGraphCrossover(Crossover): in one attempt without any checks """ def __call__(self, - graph_1: OptGraph, - graph_2: OptGraph, + graphs: List[OptGraph], crossover_type: Optional[CrossoverTypesEnum] = None) -> Tuple[OptGraph, CrossoverTypesEnum]: + if len(graphs) < 2: + raise ValueError(f"Crossover needs 2 graphs, get {len(graphs)}") + elif len(graphs) > 2: + graphs = sample(graphs, 2) + graphs = list(map(deepcopy, graphs)) + crossover_type = crossover_type or choice(self.parameters.crossover_types) if crossover_type is CrossoverTypesEnum.none: - return (deepcopy(graph_1), deepcopy(graph_2), crossover_type) + return graphs, crossover_type crossover_func = self._get_crossover_function(crossover_type) - new_graphs = crossover_func(deepcopy(graph_1), deepcopy(graph_2), max_depth=self.requirements.max_depth) - return tuple(new_graphs) + (crossover_type, ) + new_graphs = crossover_func(*graphs, max_depth=self.requirements.max_depth) + return new_graphs, crossover_type @register_native diff --git a/golem/core/optimisers/genetic/operators/node.py b/golem/core/optimisers/genetic/operators/node.py index 68dae9480..8877f55ab 100644 --- a/golem/core/optimisers/genetic/operators/node.py +++ b/golem/core/optimisers/genetic/operators/node.py @@ -7,6 +7,9 @@ from golem.core.optimisers.graph import OptGraph +GeneticNodeAllowedType = Union['GeneticNode', str, None] + + class TaskStagesEnum(Enum): (INIT, SUCCESS, FAIL, FINISH) = range(4) @@ -18,38 +21,45 @@ class GeneticOperatorTask: operator_type: Optional[Any] = None stage: TaskStagesEnum = TaskStagesEnum.INIT - stage_node: Optional['GeneticNode'] = None + stage_node: GeneticNodeAllowedType = None # parent data parent_task: Optional['GeneticOperatorTask'] = None - fail: bool = False fail_message: str = '' left_tries: int = 1 + def __repr__(self): + s = (f"{self.__class__.__name__}('{self.stage.name}', " + f"next: '{self.stage_node}', " + f"graphs: {len(self.graphs) if isinstance(self.graphs, list) else type(self.graphs)}, " + f"operator_type: '{None if not self.operator_type else 'Operator'}', " + f"tries: {self.left_tries})") + return s + def __copy__(self): return self.copy() def __deepcopy__(self, memodict: Dict = dict()): raise NotImplementedError('Deepcopy is not allowed for task') - def copy(self): - return replace(self) + def copy(self, **parameters): + new_task = replace(self) + for parameter, value in parameters.items(): + setattr(new_task, parameter, value) + return new_task - def create_failed_task(self, exception: Exception): - failed_task = self.copy() - failed_task.stage = TaskStagesEnum.FAIL - failed_task.fail_message = exception.__str__() - failed_task.left_tries -= 1 - return failed_task + def create_failed_task(self, exception: Exception, **parameters): + parameters = {**parameters, 'stage': TaskStagesEnum.FAIL, + 'fail_message': exception.__str__(), 'left_tries': self.left_tries - 1} + return self.copy(**parameters) - def create_successive_task(self, graphs: List[OptGraph], operator_type: Any): - successive_task = self.copy() - successive_task.stage = TaskStagesEnum.SUCCESS - successive_task.graphs = graphs - successive_task.operator_type = operator_type - successive_task.parent_task = self - return successive_task + def create_successive_task(self, graphs: List[OptGraph], operator_type: Any, **parameters): + if not isinstance(graphs, list): + raise ValueError(f"graphs should be list, got {type(graphs)} instead") + parameters = {**parameters, 'stage': TaskStagesEnum.SUCCESS, 'graphs': graphs, + 'operator_type': operator_type, 'parent_task': self} + return self.copy(**parameters) @dataclass(frozen=True) @@ -58,31 +68,44 @@ class GeneticNode: name: str operator: Operator - success_outputs: List[Union['GeneticNode', None]] - fail_outputs: Optional[Union[List['GeneticNode'], None]] = field(default_factory=lambda: [None]) + success_outputs: Optional[List[GeneticNodeAllowedType]] = field(default_factory=lambda: [None]) + fail_outputs: Optional[List[GeneticNodeAllowedType]] = field(default_factory=lambda: [None]) + + task_params_if_success: Dict[str, Any] = field(default_factory=dict) + task_params_if_fail: Dict[str, Any] = field(default_factory=dict) + max_graphs_input = False # TODO add support for task splitting + max_graphs_output = True # TODO add support for task splitting def __post_init__(self): # some checks - _check_list_with_genetic_nodes(self.success_outputs, allow_none=True) - _check_list_with_genetic_nodes(self.fail_outputs, allow_none=True) + _check_list_with_genetic_nodes(self.success_outputs) + _check_list_with_genetic_nodes(self.fail_outputs) # TODO check interface of operator def __call__(self, task: GeneticOperatorTask): if task.left_tries > 0: try: - *grouped_graphs, operator_type = self.operator(task.graphs, task.operator_type) - tasks = [task.create_successive_task(graphs, operator_type) for graphs in grouped_graphs] + # TODO all operator should return list of lists of graph + graphs, operator_type = self.operator(task.graphs, task.operator_type) + # tasks = [task.create_successive_task(graphs, operator_type) for graphs in grouped_graphs] + tasks = [task.create_successive_task(graphs, operator_type, **self.task_params_if_success)] next_nodes = self.success_outputs except Exception as exception: - tasks = [task.create_failed_task(exception)] + # TODO save where it fails + tasks = [task.create_failed_task(exception, **self.task_params_if_fail)] next_nodes = self.fail_outputs final_tasks = list() for _task in tasks: for _node in next_nodes: new_task = _task.copy() - new_task.stage_node = _node.name + if _node is None: + if task.stage is TaskStagesEnum.SUCCESS: + new_task.stage = TaskStagesEnum.FINISH + elif task.stage is TaskStagesEnum.FAIL: + new_task.left_tries = -1 + new_task.stage_node = _node final_tasks.append(new_task) return final_tasks @@ -106,8 +129,20 @@ def copy(self, name: str): success_outputs=self.success_outputs, fail_outputs=self.fail_outputs) + # def call_operation(self, task: GeneticOperatorTask): + # graphs_grouped, operator_type = self.operator(task.graphs, task.operator_type) + # graphs_grouped = [([graph] if not isinstance(graph, list) else graph) for graph in graphs_grouped] + # + # new_graphs_grouped = list() + # for graphs in graphs_grouped: + # if len(graphs) > self.max_graphs_output: + # raise NotImplementedError() + # else: + # new_graphs_grouped.append(graphs) + # return graphs, operator_type -@dataclass(frozen=True) + +@dataclass class GeneticPipeline: """ Pool of connected nodes with useful checks Call only a one node in time @@ -115,45 +150,51 @@ class GeneticPipeline: name: str nodes: List[GeneticNode] - __nodes_map: Optional[Dict[int, GeneticNode]] = None + __nodes_map: Optional[Dict[str, GeneticNode]] = None def __post_init__(self): # some checks - _check_list_with_genetic_nodes(self.nodes) + _check_list_with_genetic_nodes(self.nodes, force_genetic_node_type_check=True) # check that all connection between nodes connect existing nodes - connection_goals = set(chain(*[chain(*(node.success_outputs + node.fail_outputs)) for node in self.nodes])) - if not (set(self.nodes) > connection_goals): - raise ValueError('Some nodes have connection with nonexisting nodes') + # TODO fix + # connection_goals = set(chain(*[node.success_outputs + node.fail_outputs for node in self.nodes])) + # connection_goals -= {None} + # if not (set(self.nodes) > connection_goals): + # raise ValueError('Some nodes have connection with nonexisting nodes') - self.__setattr__('__nodes_map', {node: node for node in self.nodes}) - - if self.__nodes_map is None: - raise ValueError('there is no ``__nodes_map``') + self.__nodes_map = {node.name: node for node in self.nodes} def __call__(self, task: GeneticOperatorTask): - """ Call one of node and return result """ + """ Call one node and return result """ if not isinstance(task, GeneticOperatorTask): raise ValueError(f"``task`` should be ``GeneticOperatorTask``, get {type(task)} instead") - if task.stage in (TaskStagesEnum.INIT, TaskStagesEnum.FINISH): - raise ValueError('Unappropriate task') + if task.stage is TaskStagesEnum.FINISH: + raise ValueError('Task is finished') if task.stage_node not in self.__nodes_map: raise ValueError(f"Unknown stage node {task.stage}") return self.__nodes_map[task.stage_node](task) + def __getitem__(self, node_name: str): + if node_name not in self.__nodes_map: + raise KeyError(f"Unknown node {node_name}") + return self.__nodes_map[node_name] + + def __contains__(self, node_name: str): + # TODO test that contains also return true when getitem works + return node_name in self.__nodes_map -def _check_list_with_genetic_nodes(list_with_nodes, allow_none=False): +def _check_list_with_genetic_nodes(list_with_nodes, force_genetic_node_type_check=False): # check that nodes is list with nodes list_with_nodes_is_appropriate = True list_with_nodes_is_appropriate &= isinstance(list_with_nodes, list) list_with_nodes_is_appropriate &= len(list_with_nodes) > 0 - if allow_none: - list_with_nodes_is_appropriate &= all(isinstance(node, GeneticNode) or node is None for node in list_with_nodes) - else: - list_with_nodes_is_appropriate &= all(isinstance(node, GeneticNode) for node in list_with_nodes) + checked_type = GeneticNode if force_genetic_node_type_check else GeneticNodeAllowedType + # TODO fix it + # list_with_nodes_is_appropriate &= all(isinstance(node, checked_type) for node in list_with_nodes) if not list_with_nodes_is_appropriate: raise ValueError('``nodes`` parameter should be list with ``GeneticNodes``') diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 197301af0..2230c670a 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -17,6 +17,8 @@ from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.optimisers.genetic.operators.crossover import Crossover, CrossoverTypesEnum from golem.core.optimisers.genetic.operators.mutation import Mutation, MutationType +from golem.core.optimisers.genetic.operators.node import GeneticPipeline, TaskStagesEnum, GeneticNode, \ + GeneticOperatorTask from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.selection import Selection from golem.core.optimisers.graph import OptGraph @@ -25,16 +27,6 @@ from golem.utilities.random import RandomStateHandler -class ReproducerWorkerStageEnum(Enum): - # TODO test that check that nums start from 0 and go to max (FINISH) with 1 steps - (CROSSOVER, CROSSOVER_VERIFICATION, CROSSOVER_UNIQUENESS_CHECK, CROSSOVER_EVALUATION, - MUTATION, MUTATION_VERIFICATION, MUTATION_UNIQUENESS_CHECK, MUTATION_EVALUATION, FINISH) = range(9) - - def __lt__(self, other): - if self.__class__ is other.__class__: - return self.value < other.value - return NotImplemented - class ReproductionController: """ @@ -79,22 +71,35 @@ def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> required population size. Also controls uniqueness of population. """ with Manager() as manager: - pop_graph_descriptive_ids = manager.dict({ids: True for ids in self._pop_graph_descriptive_ids}) - task_queue, result_queue, failed_queue = [manager.Queue() for _ in range(3)] - - # empty task for worker if there is no work - empty_task = ReproducerWorkerTask( - crossover_tries=self.parameters.max_num_of_crossover_reproducer_attempts, - mutation_tries=self.parameters.max_num_of_mutation_reproducer_attempts, - mutation_attempts_per_each_crossover=self.parameters.mutation_attempts_per_each_crossover_reproducer) + task_queue, result_queue = [manager.Queue() for _ in range(2)] + + def evaluate(graphs, operator_type, evaluator=evaluator): + individuals = [Individual(deepcopy(graph), metadata=self.mutation.requirements.static_individual_metadata) + for graph in graphs] + evaluated_individuals = self.evaluator(individuals) + if evaluated_individuals: + return evaluated_individuals[0].graph, None + raise ValueError('evaluator error') + + empty_task = GeneticOperatorTask([x.graph for x in population], + stage_node='crossover') + + crossover = GeneticNode(name='crossover', operator=self.crossover, + success_outputs=['mutation_1', 'mutation_2'], + task_params_if_success={'operation_type': None}) + mutation_1 = GeneticNode(name='mutation_1', operator=self.mutation, + success_outputs=['evaluation']) + mutation_2 = GeneticNode(name='mutation_2', operator=self.mutation, + success_outputs=['mutation_1']) + evaluation = GeneticNode(name='evaluation', operator=evaluate) + + pipeline = GeneticPipeline('main', [crossover, mutation_1, mutation_2, evaluation]) # parameters for worker - worker_parameters = dict(crossover=self.crossover, mutation=self.mutation, - verifier=self.verifier, evaluator=evaluator, - pop_graph_descriptive_ids=pop_graph_descriptive_ids, - population=population, - task_queue=task_queue, result_queue=result_queue, - failed_queue=failed_queue, empty_task=empty_task, + worker_parameters = dict(pipeline=pipeline, + empty_task=empty_task, + task_queue=task_queue, + result_queue=result_queue, log=self._log) n_jobs = self.mutation.requirements.n_jobs @@ -114,22 +119,21 @@ def _reproduce(self, population: PopulationT, evaluator: EvaluationOperator) -> # main thread is fast # frequent queues blocking with qsize is not good idea time.sleep(1) - while failed_queue.qsize() > 0: - left_tries -= 1 - failed_tasks.append(failed_queue.get()) - while result_queue.qsize() > 0: left_tries -= 1 - finished_tasks.append(result_queue.get()) + task = result_queue.get() + if task.stage is TaskStagesEnum.FINISH: + finished_tasks.append(task) + else: + failed_tasks.append(task) # get all finished works - while failed_queue.qsize() > 0: - failed_tasks.append(failed_queue.get()) while result_queue.qsize() > 0: - finished_tasks.append(result_queue.get()) - - # update looked graphs - self._pop_graph_descriptive_ids |= set(pop_graph_descriptive_ids.keys()) + task = result_queue.get() + if task.stage is TaskStagesEnum.FINISH: + finished_tasks.append(task) + else: + failed_tasks.append(task) # rebuild population new_population = self._process_tasks(population=population, @@ -143,7 +147,8 @@ def _process_tasks(self, failed_tasks: List['ReproducerWorkerTask']): population_uid_map = {ind.uid: ind for ind in population} - crossover_individuals, new_population = dict(), [] + individuals = list() + new_population = list() for task in finished_tasks + failed_tasks: if task.stage > ReproducerWorkerStageEnum.MUTATION: uids = (task.graph_1_uid, task.graph_2_uid) @@ -184,106 +189,46 @@ def _check_final_population(self, population: PopulationT) -> None: f' required {target_pop_size}!\n' + helpful_msg) -@dataclass -class ReproducerWorkerTask: - stage: ReproducerWorkerStageEnum = ReproducerWorkerStageEnum(0) - _fail: bool = False - failed_stage: ReproducerWorkerStageEnum = None - mutation_attempts_per_each_crossover: int = 1 - - # crossover data - graph_1_uid: Optional[str] = None - graph_2_uid: Optional[str] = None - graph_1_for_crossover: Optional[OptGraph] = None - graph_2_for_crossover: Optional[OptGraph] = None - crossover_type: Optional[CrossoverTypesEnum] = None - crossover_tries: int = 1 - crossover_fitness: Optional[Fitness] = None - - # mutation data - graph_for_mutation: Optional[OptGraph] = None - mutation_type: Optional[MutationType] = None - mutation_tries: int = 1 - - # result - final_graph: Optional[OptGraph] = None - final_fitness: Optional[Fitness] = None - - @property - def fail(self): - return self._fail - - @fail.setter - def fail(self, value): - if value: - self.failed_stage = self.stage - self._fail = value - - @property - def is_crossover(self): - return self.stage < ReproducerWorkerStageEnum.MUTATION - - @property - def tries(self): - return self.crossover_tries if self.is_crossover else self.mutation_tries - - def step_in_stage(self, steps: int): - self.stage = ReproducerWorkerStageEnum(self.stage.value + steps) - - class ReproduceWorker: def __init__(self, - crossover: Crossover, - mutation: MutationType, - verifier: GraphVerifier, - evaluator: EvaluationOperator, - pop_graph_descriptive_ids: Union[DictProxy, Dict], - population: PopulationT, - task_queue: Queue, - result_queue: Queue, - failed_queue: Queue, - empty_task: ReproducerWorkerTask, + pipeline: GeneticPipeline, + empty_task, + task_queue, + result_queue, seed: int, log ): - self.crossover = crossover - self.mutation = mutation - self.verifier = verifier - self.evaluator = evaluator - self._pop_graph_descriptive_ids = pop_graph_descriptive_ids - self._population = population + self.pipeline = pipeline + self._seed = seed + self._log = log self._task_queue = task_queue self._result_queue = result_queue - self._failed_queue = failed_queue self._empty_task = empty_task - self._seed = seed - self._log = log def __call__(self, run: bool = True): self._log.warning(f"CALLED") with RandomStateHandler(self._seed): - tasks = [self._empty_task] - self._log.warning(f"START CYCLE") + tasks = [self._empty_task.copy()] while run: # is there is no tasks, try to get 1. task from queue 2. empty task if not tasks: try: tasks.append(self._task_queue.get(timeout=0.02)) except Empty: - tasks.append(self._empty_task) + tasks.append(self._empty_task.copy()) - # work with task - processed_tasks = self.process_task(tasks.pop()) + # send task to pipeline + processed_tasks = self.pipeline(tasks.pop()) # process result for processed_task in processed_tasks: - if processed_task.stage is ReproducerWorkerStageEnum.FINISH: + if processed_task.stage is TaskStagesEnum.FINISH: self._result_queue.put(processed_task) continue - if processed_task.fail: + if processed_task.stage is TaskStagesEnum.FAIL: self._log.warning(f"FAIL: {processed_task.failed_stage}") - self._failed_queue.put(processed_task) - if processed_task.tries > 0: + self._result_queue.put(processed_task) + if processed_task.left_tries > 0: tasks.append(processed_task) else: tasks.append(processed_task) @@ -292,128 +237,128 @@ def __call__(self, run: bool = True): for _ in range(len(tasks) - 1): self._task_queue.put(tasks.pop()) - def process_task(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: - """ Get task, make 1 stage and return processed task """ - # self._log.warning(f"START: {task.stage} {task.crossover_tries}:{task.mutation_tries}") - task = copy(task) # input task - task.fail = False - - # crossover - if task.stage is ReproducerWorkerStageEnum.CROSSOVER: - return self.crossover_stage(task) - - # crossover result verification - if task.stage is ReproducerWorkerStageEnum.CROSSOVER_VERIFICATION: - task.fail = not self.verifier(task.graph_for_mutation) - task.step_in_stage(-1 if task.fail else 1) - return [task] - - # crossover uniqueness check - if task.stage is ReproducerWorkerStageEnum.CROSSOVER_UNIQUENESS_CHECK: - task.step_in_stage(1) - return [task] - # processed_task = self.uniqueness_check_stage(task)[0] - # processed_task.step_in_stage(-2 if processed_task.fail else 1) - # return [processed_task] - - # crossover result evaluation - if task.stage is ReproducerWorkerStageEnum.CROSSOVER_EVALUATION: - task.step_in_stage(1) - return [copy(task) for _ in range(task.mutation_attempts_per_each_crossover)] - # processed_task = self.evaluation_stage(task)[0] - # if processed_task.fail: - # processed_task.step_in_stage(-3) - # return [processed_task] - # else: - # # create some tasks for mutation for crossover result - # processed_task.step_in_stage(1) - # return [copy(processed_task) for _ in range(task.mutation_attempts_per_each_crossover)] - - # mutation - if task.stage is ReproducerWorkerStageEnum.MUTATION: - return self.mutation_stage(task) - - # mutation result verification - if task.stage is ReproducerWorkerStageEnum.MUTATION_VERIFICATION: - task.fail = not self.verifier(task.final_graph) - task.step_in_stage(-1 if task.fail else 1) - return [task] - - # mutation uniqueness check - if task.stage is ReproducerWorkerStageEnum.MUTATION_UNIQUENESS_CHECK: - processed_task = self.uniqueness_check_stage(task)[0] - processed_task.step_in_stage(-2 if processed_task.fail else 1) - return [processed_task] - - # mutation result evaluation - if task.stage is ReproducerWorkerStageEnum.MUTATION_EVALUATION: - processed_task = self.evaluation_stage(task)[0] - processed_task.step_in_stage(-3 if processed_task.fail else 1) - return [processed_task] - - def crossover_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: - tasks = [] # tasks to return - - # if there is no graphs for crossover then get random graphs - if task.graph_1_for_crossover is None or task.graph_1_for_crossover is None: - inds_for_crossover = sample(self._population, k=2) - task.graph_1_uid, task.graph_1_for_crossover = inds_for_crossover[0].uid, inds_for_crossover[0].graph - task.graph_2_uid, task.graph_2_for_crossover = inds_for_crossover[1].uid, inds_for_crossover[1].graph - - # make crossover - task.crossover_tries -= 1 - *new_graphs, task.crossover_type = self.crossover(task.graph_1_for_crossover, - task.graph_2_for_crossover, - task.crossover_type) - - if not new_graphs: - # if there is no new_graphs then go to new try - task.fail = True - tasks.append(task) - else: - # create new task for each new graph after crossover for next stage - task.step_in_stage(1) - for graph in new_graphs: - new_task = copy(task) - new_task.graph_for_mutation = graph - tasks.append(new_task) - return tasks - - def mutation_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: - task.final_graph, task.mutation_type = self.mutation(task.graph_for_mutation, task.mutation_type) - task.mutation_tries -= 1 - if task.final_graph is None: - task.fail = True - else: - task.step_in_stage(1) - return [task] - - def uniqueness_check_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: - if task.is_crossover: - graph = task.graph_for_mutation - else: - graph = task.final_graph - descriptive_id = graph.descriptive_id - if descriptive_id not in self._pop_graph_descriptive_ids: - self._pop_graph_descriptive_ids[descriptive_id] = True - task.fail = False - else: - task.fail = True - return [task] - - def evaluation_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: - if task.is_crossover: - graph = task.graph_for_mutation - else: - graph = task.final_graph - individual = Individual(deepcopy(graph), metadata=self.mutation.requirements.static_individual_metadata) - evaluated_individuals = self.evaluator([individual]) - if evaluated_individuals:# and evaluated_individuals[0].fitness.valid: - task.fail = False - if task.is_crossover: - task.crossover_fitness = evaluated_individuals[0].fitness - else: - task.final_fitness = evaluated_individuals[0].fitness - else: - task.fail = True - return [task] + # def process_task(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: + # """ Get task, make 1 stage and return processed task """ + # # self._log.warning(f"START: {task.stage} {task.crossover_tries}:{task.mutation_tries}") + # task = copy(task) # input task + # task.fail = False + # + # # crossover + # if task.stage is ReproducerWorkerStageEnum.CROSSOVER: + # return self.crossover_stage(task) + # + # # crossover result verification + # if task.stage is ReproducerWorkerStageEnum.CROSSOVER_VERIFICATION: + # task.fail = not self.verifier(task.graph_for_mutation) + # task.step_in_stage(-1 if task.fail else 1) + # return [task] + # + # # crossover uniqueness check + # if task.stage is ReproducerWorkerStageEnum.CROSSOVER_UNIQUENESS_CHECK: + # task.step_in_stage(1) + # return [task] + # # processed_task = self.uniqueness_check_stage(task)[0] + # # processed_task.step_in_stage(-2 if processed_task.fail else 1) + # # return [processed_task] + # + # # crossover result evaluation + # if task.stage is ReproducerWorkerStageEnum.CROSSOVER_EVALUATION: + # task.step_in_stage(1) + # return [copy(task) for _ in range(task.mutation_attempts_per_each_crossover)] + # # processed_task = self.evaluation_stage(task)[0] + # # if processed_task.fail: + # # processed_task.step_in_stage(-3) + # # return [processed_task] + # # else: + # # # create some tasks for mutation for crossover result + # # processed_task.step_in_stage(1) + # # return [copy(processed_task) for _ in range(task.mutation_attempts_per_each_crossover)] + # + # # mutation + # if task.stage is ReproducerWorkerStageEnum.MUTATION: + # return self.mutation_stage(task) + # + # # mutation result verification + # if task.stage is ReproducerWorkerStageEnum.MUTATION_VERIFICATION: + # task.fail = not self.verifier(task.final_graph) + # task.step_in_stage(-1 if task.fail else 1) + # return [task] + # + # # mutation uniqueness check + # if task.stage is ReproducerWorkerStageEnum.MUTATION_UNIQUENESS_CHECK: + # processed_task = self.uniqueness_check_stage(task)[0] + # processed_task.step_in_stage(-2 if processed_task.fail else 1) + # return [processed_task] + # + # # mutation result evaluation + # if task.stage is ReproducerWorkerStageEnum.MUTATION_EVALUATION: + # processed_task = self.evaluation_stage(task)[0] + # processed_task.step_in_stage(-3 if processed_task.fail else 1) + # return [processed_task] + # + # def crossover_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: + # tasks = [] # tasks to return + # + # # if there is no graphs for crossover then get random graphs + # if task.graph_1_for_crossover is None or task.graph_1_for_crossover is None: + # inds_for_crossover = sample(self._population, k=2) + # task.graph_1_uid, task.graph_1_for_crossover = inds_for_crossover[0].uid, inds_for_crossover[0].graph + # task.graph_2_uid, task.graph_2_for_crossover = inds_for_crossover[1].uid, inds_for_crossover[1].graph + # + # # make crossover + # task.crossover_tries -= 1 + # *new_graphs, task.crossover_type = self.crossover(task.graph_1_for_crossover, + # task.graph_2_for_crossover, + # task.crossover_type) + # + # if not new_graphs: + # # if there is no new_graphs then go to new try + # task.fail = True + # tasks.append(task) + # else: + # # create new task for each new graph after crossover for next stage + # task.step_in_stage(1) + # for graph in new_graphs: + # new_task = copy(task) + # new_task.graph_for_mutation = graph + # tasks.append(new_task) + # return tasks + # + # def mutation_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: + # task.final_graph, task.mutation_type = self.mutation(task.graph_for_mutation, task.mutation_type) + # task.mutation_tries -= 1 + # if task.final_graph is None: + # task.fail = True + # else: + # task.step_in_stage(1) + # return [task] + # + # def uniqueness_check_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: + # if task.is_crossover: + # graph = task.graph_for_mutation + # else: + # graph = task.final_graph + # descriptive_id = graph.descriptive_id + # if descriptive_id not in self._pop_graph_descriptive_ids: + # self._pop_graph_descriptive_ids[descriptive_id] = True + # task.fail = False + # else: + # task.fail = True + # return [task] + # + # def evaluation_stage(self, task: ReproducerWorkerTask) -> List[ReproducerWorkerTask]: + # if task.is_crossover: + # graph = task.graph_for_mutation + # else: + # graph = task.final_graph + # individual = Individual(deepcopy(graph), metadata=self.mutation.requirements.static_individual_metadata) + # evaluated_individuals = self.evaluator([individual]) + # if evaluated_individuals:# and evaluated_individuals[0].fitness.valid: + # task.fail = False + # if task.is_crossover: + # task.crossover_fitness = evaluated_individuals[0].fitness + # else: + # task.final_fitness = evaluated_individuals[0].fitness + # else: + # task.fail = True + # return [task] From e62fa6598cb5c1f9d2158dd5bd0582b7afe8ca10 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Thu, 16 Nov 2023 15:03:03 +0300 Subject: [PATCH 63/65] wip --- .../optimisers/genetic/operators/crossover.py | 21 +- .../optimisers/genetic/operators/mutation.py | 13 +- .../core/optimisers/genetic/operators/node.py | 92 ++++++--- .../genetic/operators/reproduction.py | 69 ++++--- .../gp_operators/test_genetic_pipelines.py | 182 ++++++++++++++++++ 5 files changed, 303 insertions(+), 74 deletions(-) create mode 100644 test/unit/optimizers/gp_operators/test_genetic_pipelines.py diff --git a/golem/core/optimisers/genetic/operators/crossover.py b/golem/core/optimisers/genetic/operators/crossover.py index 071c0928f..471604821 100644 --- a/golem/core/optimisers/genetic/operators/crossover.py +++ b/golem/core/optimisers/genetic/operators/crossover.py @@ -114,21 +114,24 @@ class SinglePredefinedGraphCrossover(Crossover): in one attempt without any checks """ def __call__(self, - graphs: List[OptGraph], - crossover_type: Optional[CrossoverTypesEnum] = None) -> Tuple[OptGraph, CrossoverTypesEnum]: - if len(graphs) < 2: - raise ValueError(f"Crossover needs 2 graphs, get {len(graphs)}") - elif len(graphs) > 2: - graphs = sample(graphs, 2) - graphs = list(map(deepcopy, graphs)) + individuals: List[Individual], + crossover_type: Optional[CrossoverTypesEnum] = None) -> Tuple[List[Individual], CrossoverTypesEnum]: + if len(individuals) < 2: + raise ValueError(f"Crossover needs 2 individuals, get {len(individuals)}") + elif len(individuals) > 2: + individuals = sample(individuals, 2) + graphs = [deepcopy(ind.graph) for ind in individuals] crossover_type = crossover_type or choice(self.parameters.crossover_types) if crossover_type is CrossoverTypesEnum.none: - return graphs, crossover_type + return individuals, crossover_type crossover_func = self._get_crossover_function(crossover_type) new_graphs = crossover_func(*graphs, max_depth=self.requirements.max_depth) - return new_graphs, crossover_type + new_individuals = self._get_individuals(new_graphs=new_graphs, + parent_individuals=individuals, + crossover_type=crossover_type) + return new_individuals, crossover_type @register_native diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index 07ec1c701..f54e3bcf0 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -1,6 +1,6 @@ from copy import deepcopy from random import random -from typing import Callable, Union, Tuple, TYPE_CHECKING, Mapping, Hashable, Optional +from typing import Callable, Union, Tuple, TYPE_CHECKING, Mapping, Hashable, Optional, List import numpy as np @@ -173,8 +173,12 @@ class SinglePredefinedGraphMutation(Mutation): """ Mutation that tries to create new graph (not individual) from the only graph in one attempt without any checks """ - def __call__(self, graph: Graph, mutation_type: Optional[MutationType] = None) -> Tuple[Graph, MutationIdType]: - new_graph = deepcopy(graph) + def __call__(self, individuals: List[Individual], mutation_type: Optional[MutationType] = None) -> Tuple[Graph, MutationIdType]: + if len(individuals) != 1: + raise ValueError('individuals should be len 1') + + individual = individuals[0] + new_graph = deepcopy(individual.graph) mutation_type = mutation_type or self._operator_agent.choose_action(new_graph) if mutation_type is MutationTypesEnum.none: return None, None @@ -182,4 +186,5 @@ def __call__(self, graph: Graph, mutation_type: Optional[MutationType] = None) - new_graph = mutation_func(new_graph, requirements=self.requirements, graph_gen_params=self.graph_generation_params, parameters=self.parameters) - return new_graph, mutation_type + new_individual = self._get_individual(new_graph=new_graph, mutation_type=mutation_type, parent=individual) + return new_individual, mutation_type diff --git a/golem/core/optimisers/genetic/operators/node.py b/golem/core/optimisers/genetic/operators/node.py index 8877f55ab..6e6615fe0 100644 --- a/golem/core/optimisers/genetic/operators/node.py +++ b/golem/core/optimisers/genetic/operators/node.py @@ -1,11 +1,12 @@ from dataclasses import dataclass, replace, field from enum import Enum from itertools import chain +from math import ceil from typing import Optional, List, Union, Any, Dict from golem.core.optimisers.genetic.operators.operator import Operator from golem.core.optimisers.graph import OptGraph - +from golem.core.optimisers.opt_history_objects.individual import Individual GeneticNodeAllowedType = Union['GeneticNode', str, None] @@ -16,49 +17,54 @@ class TaskStagesEnum(Enum): @dataclass class GeneticOperatorTask: - """ Contain graphs and information what to do with it and what was made """ - graphs: List[OptGraph] + """ Contain individuals and information what to do with it and what was made """ + individuals: List[Individual] operator_type: Optional[Any] = None stage: TaskStagesEnum = TaskStagesEnum.INIT - stage_node: GeneticNodeAllowedType = None + next_stage_node: GeneticNodeAllowedType = None + prev_stage_node: GeneticNodeAllowedType = None # parent data parent_task: Optional['GeneticOperatorTask'] = None - fail_message: str = '' + exception: Optional[Exception] = None left_tries: int = 1 def __repr__(self): s = (f"{self.__class__.__name__}('{self.stage.name}', " - f"next: '{self.stage_node}', " - f"graphs: {len(self.graphs) if isinstance(self.graphs, list) else type(self.graphs)}, " - f"operator_type: '{None if not self.operator_type else 'Operator'}', " - f"tries: {self.left_tries})") + f"next: '{self.next_stage_node}', prev: '{self.prev_stage_node}', " + f"individuals: {len(self.individuals) if isinstance(self.individuals, list) else type(self.individuals)}, " + f"operator_type: '{self.operator_type}', " + f"tries: {self.left_tries}, " + f"parent: {int(self.parent_task is not None)})") return s def __copy__(self): + # TODO test return self.copy() def __deepcopy__(self, memodict: Dict = dict()): + # TODO test raise NotImplementedError('Deepcopy is not allowed for task') def copy(self, **parameters): + # TODO test new_task = replace(self) for parameter, value in parameters.items(): setattr(new_task, parameter, value) return new_task def create_failed_task(self, exception: Exception, **parameters): - parameters = {**parameters, 'stage': TaskStagesEnum.FAIL, - 'fail_message': exception.__str__(), 'left_tries': self.left_tries - 1} + parameters = {'stage': TaskStagesEnum.FAIL, 'exception': exception, + 'left_tries': self.left_tries - 1, **parameters} return self.copy(**parameters) - def create_successive_task(self, graphs: List[OptGraph], operator_type: Any, **parameters): - if not isinstance(graphs, list): - raise ValueError(f"graphs should be list, got {type(graphs)} instead") - parameters = {**parameters, 'stage': TaskStagesEnum.SUCCESS, 'graphs': graphs, - 'operator_type': operator_type, 'parent_task': self} + def create_successive_task(self, individuals: List[Individual], **parameters): + if not isinstance(individuals, list): + raise ValueError(f"individuals should be list, got {type(individuals)} instead") + parameters = {'stage': TaskStagesEnum.SUCCESS, 'individuals': individuals, + 'parent_task': self, **parameters} return self.copy(**parameters) @@ -73,8 +79,10 @@ class GeneticNode: task_params_if_success: Dict[str, Any] = field(default_factory=dict) task_params_if_fail: Dict[str, Any] = field(default_factory=dict) - max_graphs_input = False # TODO add support for task splitting - max_graphs_output = True # TODO add support for task splitting + + individuals_input_count: Optional[int] = None + repeat_count: int = 1 + tries_count: int = 1 def __post_init__(self): # some checks @@ -84,30 +92,56 @@ def __post_init__(self): # TODO check interface of operator def __call__(self, task: GeneticOperatorTask): - if task.left_tries > 0: + final_tasks = list() + + if task.stage is not TaskStagesEnum.FAIL: + # if task from previous node then set max tries + task.left_tries = self.tries_count + + # if there are unappropriated individuals count + # then divide task to subtasks with appropriate individuals count + length, max_length = len(task.individuals), self.individuals_input_count + if max_length is not None and length > max_length: + individuals_groups = [task.individuals[i * max_length:min(length, (i + 1) * max_length)] + for i in range(ceil(length / max_length))] + for individuals_group in individuals_groups: + final_tasks.append(task.copy(individuals=individuals_group)) + # get task for current run + task = final_tasks.pop() + + # repeat each task if it is allowed + if self.repeat_count > 1: + final_tasks.append(task) + for _ in range(self.repeat_count - 1): + final_tasks.extend([task.copy() for task in final_tasks]) + # get task for current run + task = final_tasks.pop() + + # run operator + if task.stage is not TaskStagesEnum.FAIL or task.left_tries > 0: try: # TODO all operator should return list of lists of graph - graphs, operator_type = self.operator(task.graphs, task.operator_type) - # tasks = [task.create_successive_task(graphs, operator_type) for graphs in grouped_graphs] - tasks = [task.create_successive_task(graphs, operator_type, **self.task_params_if_success)] + individuals, operator_type = self.operator(task.individuals, task.operator_type) + tasks = [task.create_successive_task(individuals, prev_stage_node=self.name, + operator_type=None, **self.task_params_if_success)] next_nodes = self.success_outputs except Exception as exception: # TODO save where it fails tasks = [task.create_failed_task(exception, **self.task_params_if_fail)] next_nodes = self.fail_outputs - final_tasks = list() for _task in tasks: for _node in next_nodes: new_task = _task.copy() if _node is None: - if task.stage is TaskStagesEnum.SUCCESS: + if new_task.stage is TaskStagesEnum.SUCCESS: new_task.stage = TaskStagesEnum.FINISH - elif task.stage is TaskStagesEnum.FAIL: + elif new_task.stage is TaskStagesEnum.FAIL: + # if there is no next node, then no tries new_task.left_tries = -1 - new_task.stage_node = _node + new_task.next_stage_node = _node final_tasks.append(new_task) - return final_tasks + return final_tasks def __hash__(self): # TODO add test for hash @@ -173,10 +207,10 @@ def __call__(self, task: GeneticOperatorTask): if task.stage is TaskStagesEnum.FINISH: raise ValueError('Task is finished') - if task.stage_node not in self.__nodes_map: + if task.next_stage_node not in self.__nodes_map: raise ValueError(f"Unknown stage node {task.stage}") - return self.__nodes_map[task.stage_node](task) + return self.__nodes_map[task.next_stage_node](task) def __getitem__(self, node_name: str): if node_name not in self.__nodes_map: diff --git a/golem/core/optimisers/genetic/operators/reproduction.py b/golem/core/optimisers/genetic/operators/reproduction.py index 2230c670a..8ebda9067 100644 --- a/golem/core/optimisers/genetic/operators/reproduction.py +++ b/golem/core/optimisers/genetic/operators/reproduction.py @@ -2,6 +2,7 @@ from copy import deepcopy, copy from dataclasses import dataclass from enum import Enum +from itertools import chain from multiprocessing.managers import DictProxy from multiprocessing import Manager from queue import Empty, Queue @@ -22,6 +23,7 @@ from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.selection import Selection from golem.core.optimisers.graph import OptGraph +from golem.core.optimisers.opt_history_objects.parent_operator import ParentOperator from golem.core.optimisers.populational_optimizer import EvaluationAttemptsError from golem.core.optimisers.opt_history_objects.individual import Individual from golem.utilities.random import RandomStateHandler @@ -81,16 +83,14 @@ def evaluate(graphs, operator_type, evaluator=evaluator): return evaluated_individuals[0].graph, None raise ValueError('evaluator error') - empty_task = GeneticOperatorTask([x.graph for x in population], - stage_node='crossover') + empty_task = GeneticOperatorTask(population, next_stage_node='crossover') crossover = GeneticNode(name='crossover', operator=self.crossover, - success_outputs=['mutation_1', 'mutation_2'], - task_params_if_success={'operation_type': None}) + success_outputs=['mutation_1', 'mutation_2']) mutation_1 = GeneticNode(name='mutation_1', operator=self.mutation, - success_outputs=['evaluation']) + success_outputs=['evaluation'], individuals_input_count=1) mutation_2 = GeneticNode(name='mutation_2', operator=self.mutation, - success_outputs=['mutation_1']) + success_outputs=['mutation_1'], individuals_input_count=1) evaluation = GeneticNode(name='evaluation', operator=evaluate) pipeline = GeneticPipeline('main', [crossover, mutation_1, mutation_2, evaluation]) @@ -117,9 +117,9 @@ def evaluate(graphs, operator_type, evaluator=evaluator): left_tries = self.parameters.pop_size * MAX_GRAPH_GEN_ATTEMPTS_PER_IND * n_jobs while left_tries > 0 and len(finished_tasks) < self.parameters.pop_size: # main thread is fast - # frequent queues blocking with qsize is not good idea + # frequent queue.qsize() is not good idea time.sleep(1) - while result_queue.qsize() > 0: + for _ in range(result_queue.qsize()): left_tries -= 1 task = result_queue.get() if task.stage is TaskStagesEnum.FINISH: @@ -141,37 +141,42 @@ def evaluate(graphs, operator_type, evaluator=evaluator): failed_tasks=failed_tasks) return new_population + def _rebuild_individual(self, individual: Individual, + known_uid_to_population_map: Dict[str, Individual]): + # TODO add test + if individual.uid in known_uid_to_population_map: + # if individual is known, then no need to rebuild it + new_individual = known_uid_to_population_map[individual.uid] + else: + parent_operator = None + if individual.parent_operator: + operator = individual.parent_operator + parent_individuals = [self._rebuild_individual(ind) for ind in operator.parent_individuals] + parent_operator = ParentOperator(type_=operator.type_, + operators=operator.operators, + parent_individuals=parent_individuals) + + new_individual = Individual(individual.graph, + parent_operator, + fitness=individual.fitness, + # TODO get requirements from self, not from mutation + metadata=self.mutation.requirements.static_individual_metadata) + # add new individual to known individuals + known_uid_to_population_map[individual.uid] = individual + return new_individual + def _process_tasks(self, population: PopulationT, finished_tasks: List['ReproducerWorkerTask'], failed_tasks: List['ReproducerWorkerTask']): population_uid_map = {ind.uid: ind for ind in population} - individuals = list() new_population = list() - for task in finished_tasks + failed_tasks: - if task.stage > ReproducerWorkerStageEnum.MUTATION: - uids = (task.graph_1_uid, task.graph_2_uid) - # create individuals, generated by crossover - if uids not in crossover_individuals: - individuals = self.crossover._get_individuals(new_graphs=[task.graph_for_mutation], - parent_individuals=[population_uid_map[uid] - for uid in uids], - crossover_type=task.crossover_type, - fitness=task.crossover_fitness) - crossover_individuals[uids] = individuals[0] - - # create individuals, generated by mutation - if uids in crossover_individuals: - individual = self.mutation._get_individual(new_graph=task.final_graph, - mutation_type=task.mutation_type, - parent=crossover_individuals[uids], - fitness=task.final_fitness) - if task.stage is ReproducerWorkerStageEnum.FINISH: - new_population.append(individual) - elif task.failed_stage is ReproducerWorkerStageEnum.MUTATION_VERIFICATION: - # experience for mab - self.mutation.agent_experience.collect_experience(individual, task.mutation_type, reward=-1.0) + for task in finished_tasks: + new_inds = [self._rebuild_individual(ind, population_uid_map) for ind in task.individuals] + new_population.extend(new_inds) + # experience for mab + # self.mutation.agent_experience.collect_experience(individual, task.mutation_type, reward=-1.0) return new_population def _check_final_population(self, population: PopulationT) -> None: diff --git a/test/unit/optimizers/gp_operators/test_genetic_pipelines.py b/test/unit/optimizers/gp_operators/test_genetic_pipelines.py new file mode 100644 index 000000000..5827a91ec --- /dev/null +++ b/test/unit/optimizers/gp_operators/test_genetic_pipelines.py @@ -0,0 +1,182 @@ +import random +from math import ceil +from itertools import product +from typing import Optional + +import pytest +from examples.synthetic_graph_evolution.generators import generate_labeled_graph + +from golem.core.adapter.nx_adapter import BaseNetworkxAdapter +from golem.core.optimisers.genetic.operators.node import GeneticOperatorTask, TaskStagesEnum, GeneticNode +from golem.core.optimisers.genetic.operators.operator import EvaluationOperator, Operator, PopulationT +from golem.core.optimisers.opt_history_objects.individual import Individual + + +class UncorrectIndividualsCount(Exception): + pass + + +class Mock: + def __init__(self, success_prob: float = 1.0): + self.success_prob = success_prob + + def __call__(self): + if random.random() > self.success_prob: + raise Exception() + + +class MockOperator(Mock, Operator): + def __init__(self, *args, + individuals_input_count: Optional[int] = None, + individuals_output_count: Optional[int] = None, + **kwargs): + super().__init__(*args, **kwargs) + self.individuals_input_count = individuals_input_count + self.individuals_output_count = individuals_output_count + + def __call__(self, individuals, operation_type = None): + if len(individuals) > self.individuals_input_count or len(individuals) == 0: + raise UncorrectIndividualsCount() + super().__call__() + if self.individuals_output_count is None: + return individuals, operation_type + else: + return individuals[self.individuals_output_count], operation_type + + +class MockEvaluator(Mock, EvaluationOperator): + def __call__(self, pop): + super().__call__() + n_valid = int(ceil(self.success_prob * len(pop))) + evaluated = random.sample(pop, n_valid) + return evaluated + +def get_rand_population(pop_size: int = 10) -> PopulationT: + graph_sizes = list(range(5, 15)) + random_pop = [generate_labeled_graph('tree', size=random.choice(graph_sizes), + directed=True) + for _ in range(pop_size)] + graph_pop = BaseNetworkxAdapter().adapt(random_pop) + individuals = [Individual(graph) for graph in graph_pop] + return individuals + + +def get_random_task(pop_size: int = 10, operator_type: str = 'test_operator_type', **params): + return GeneticOperatorTask(individuals=get_rand_population(pop_size), + operator_type=operator_type, + **params) + + +def test_genetic_task_constructor(): + individuals = get_rand_population() + operator_type = 'test_operator_type' + + task = GeneticOperatorTask(individuals=individuals, + operator_type=operator_type) + + # check task constructor + assert task.individuals == individuals + assert task.operator_type == operator_type + assert task.left_tries == 1 + assert task.exception is None + assert task.stage is TaskStagesEnum.INIT + + +def test_genetic_failed_task(): + task = get_random_task() + + left_tries = task.left_tries + stage = task.stage + + exception = Exception('test') + new_task = task.create_failed_task(exception) + + assert id(new_task) != id(task) + assert task.exception is None + assert task.left_tries == left_tries + assert task.stage == stage + + assert new_task.exception == exception + assert (task.left_tries - new_task.left_tries) == 1 + assert new_task.stage is TaskStagesEnum.FAIL + + for attr in ('individuals', 'operator_type', 'next_stage_node', 'prev_stage_node', 'parent_task'): + assert getattr(new_task, attr) == getattr(task, attr) + + +def test_genetic_successive_task(): + task = get_random_task() + + stage = task.stage + individuals = task.individuals + new_individuals = get_rand_population(5) + new_task = task.create_successive_task(new_individuals) + + assert id(new_task) != id(task) + assert task.stage == stage + assert task.individuals == individuals + + assert task.left_tries == new_task.left_tries + assert new_task.stage is TaskStagesEnum.SUCCESS + assert new_task.individuals == new_individuals + assert new_task.parent_task == task + + for attr in ('next_stage_node', 'prev_stage_node'): + assert getattr(new_task, attr) == getattr(task, attr) + + +@pytest.mark.parametrize(['stage', 'success_outputs', 'left_tries', + 'individuals_input_count', 'individuals_output_count', + 'repeat_count', 'tries_count'], + product([TaskStagesEnum.INIT, TaskStagesEnum.SUCCESS], # stage + [[None], ['1', '2', '3']], # success_outputs + [1, 3], # left_tries + [1, 3, None], # individuals_input_count + [1, 3, None], # individuals_output_count + [1, 3], # repeat_count + [1, 3], # tries_count + )) +def test_genetic_node_with_nonfailed_task(stage, success_outputs, left_tries, individuals_input_count, + individuals_output_count, repeat_count, tries_count): + pop_size = 10 + node_name = 'test' + + task = get_random_task(pop_size=pop_size, stage=stage, left_tries=left_tries) + operator = MockOperator(success_prob=1, individuals_input_count=individuals_input_count) + node = GeneticNode(name=node_name, operator=operator, success_outputs=success_outputs, + individuals_input_count=individuals_input_count, + repeat_count=repeat_count, tries_count=tries_count) + + final_tasks = node(task) + + # check final_tasks count + # individuals that MockOperator returns + _individuals_output_count = individuals_output_count or pop_size + # individuals that MockOperator can get + _individuals_input_count = individuals_input_count or pop_size + # if there are repeats condition or task is divided due to + # unappropriate individuals count (higher than individuals_input_count) + # then incoming tasks are copied and divided + incoming_tasks_count = ceil(pop_size / _individuals_input_count) * repeat_count + # then only one task may be processed + incoming_tasks_count -= 1 + processed_tasks_count = 1 * _individuals_output_count * len(success_outputs) + + assert len(final_tasks) == (incoming_tasks_count + processed_tasks_count) + + # check tasks stage + processed_task_stage = TaskStagesEnum.FINISH if success_outputs == [None] else TaskStagesEnum.SUCCESS + if stage is TaskStagesEnum.INIT: + assert sum(_task.stage is TaskStagesEnum.INIT for _task in final_tasks) == incoming_tasks_count + assert sum(_task.stage is processed_task_stage for _task in final_tasks) == processed_tasks_count + elif processed_task_stage is TaskStagesEnum.SUCCESS: + assert all(_task.stage is TaskStagesEnum.SUCCESS for _task in final_tasks) + else: + assert sum(_task.stage is TaskStagesEnum.SUCCESS for _task in final_tasks) == incoming_tasks_count + assert sum(_task.stage is processed_task_stage for _task in final_tasks) == processed_tasks_count + + # check left_tries + assert all(_task.left_tries == tries_count for _task in final_tasks) + + # check success task + assert sum(_task.prev_stage_node == node_name for _task in final_tasks) == processed_tasks_count \ No newline at end of file From f68a0396243e9cef5c25f902a0586f40995660ca Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Thu, 16 Nov 2023 17:27:24 +0300 Subject: [PATCH 64/65] wip --- golem/core/optimisers/genetic/gp_optimizer.py | 2 +- .../core/optimisers/genetic/operators/node.py | 5 +- .../gp_operators/test_genetic_pipelines.py | 51 ++++++++++++++++--- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index 89401cbff..ffea0b49c 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -8,7 +8,7 @@ from golem.core.optimisers.genetic.operators.mutation import Mutation, SinglePredefinedGraphMutation from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator from golem.core.optimisers.genetic.operators.regularization import Regularization -from golem.core.optimisers.genetic.operators.reproduction import ReproductionController, ReproducerWorkerStageEnum +from golem.core.optimisers.genetic.operators.reproduction import ReproductionController from golem.core.optimisers.genetic.operators.selection import Selection from golem.core.optimisers.genetic.parameters.graph_depth import AdaptiveGraphDepth from golem.core.optimisers.genetic.parameters.operators_prob import init_adaptive_operators_prob diff --git a/golem/core/optimisers/genetic/operators/node.py b/golem/core/optimisers/genetic/operators/node.py index 6e6615fe0..52ebea283 100644 --- a/golem/core/optimisers/genetic/operators/node.py +++ b/golem/core/optimisers/genetic/operators/node.py @@ -104,7 +104,7 @@ def __call__(self, task: GeneticOperatorTask): if max_length is not None and length > max_length: individuals_groups = [task.individuals[i * max_length:min(length, (i + 1) * max_length)] for i in range(ceil(length / max_length))] - for individuals_group in individuals_groups: + for individuals_group in reversed(individuals_groups): final_tasks.append(task.copy(individuals=individuals_group)) # get task for current run task = final_tasks.pop() @@ -112,8 +112,7 @@ def __call__(self, task: GeneticOperatorTask): # repeat each task if it is allowed if self.repeat_count > 1: final_tasks.append(task) - for _ in range(self.repeat_count - 1): - final_tasks.extend([task.copy() for task in final_tasks]) + final_tasks = [task.copy() for task in final_tasks * self.repeat_count] # get task for current run task = final_tasks.pop() diff --git a/test/unit/optimizers/gp_operators/test_genetic_pipelines.py b/test/unit/optimizers/gp_operators/test_genetic_pipelines.py index 5827a91ec..b3ab483a5 100644 --- a/test/unit/optimizers/gp_operators/test_genetic_pipelines.py +++ b/test/unit/optimizers/gp_operators/test_genetic_pipelines.py @@ -1,4 +1,5 @@ import random +from collections import Counter from math import ceil from itertools import product from typing import Optional @@ -35,13 +36,14 @@ def __init__(self, *args, self.individuals_output_count = individuals_output_count def __call__(self, individuals, operation_type = None): - if len(individuals) > self.individuals_input_count or len(individuals) == 0: + if ((self.individuals_input_count is not None and len(individuals) > self.individuals_input_count) or + len(individuals) == 0): raise UncorrectIndividualsCount() super().__call__() if self.individuals_output_count is None: return individuals, operation_type else: - return individuals[self.individuals_output_count], operation_type + return individuals[:1] * self.individuals_output_count, operation_type class MockEvaluator(Mock, EvaluationOperator): @@ -142,7 +144,8 @@ def test_genetic_node_with_nonfailed_task(stage, success_outputs, left_tries, in node_name = 'test' task = get_random_task(pop_size=pop_size, stage=stage, left_tries=left_tries) - operator = MockOperator(success_prob=1, individuals_input_count=individuals_input_count) + operator = MockOperator(success_prob=1, individuals_input_count=individuals_input_count, + individuals_output_count=individuals_output_count) node = GeneticNode(name=node_name, operator=operator, success_outputs=success_outputs, individuals_input_count=individuals_input_count, repeat_count=repeat_count, tries_count=tries_count) @@ -160,10 +163,10 @@ def test_genetic_node_with_nonfailed_task(stage, success_outputs, left_tries, in incoming_tasks_count = ceil(pop_size / _individuals_input_count) * repeat_count # then only one task may be processed incoming_tasks_count -= 1 - processed_tasks_count = 1 * _individuals_output_count * len(success_outputs) - + processed_tasks_count = 1 * len(success_outputs) assert len(final_tasks) == (incoming_tasks_count + processed_tasks_count) + # check tasks stage processed_task_stage = TaskStagesEnum.FINISH if success_outputs == [None] else TaskStagesEnum.SUCCESS if stage is TaskStagesEnum.INIT: @@ -178,5 +181,39 @@ def test_genetic_node_with_nonfailed_task(stage, success_outputs, left_tries, in # check left_tries assert all(_task.left_tries == tries_count for _task in final_tasks) - # check success task - assert sum(_task.prev_stage_node == node_name for _task in final_tasks) == processed_tasks_count \ No newline at end of file + # check prev and next nodes + assert sum(_task.prev_stage_node == node_name for _task in final_tasks) == processed_tasks_count + next_nodes = Counter(_task.next_stage_node for _task in final_tasks) + if success_outputs == [None]: + assert next_nodes[None] == len(final_tasks) + else: + assert set(next_nodes[name] for name in success_outputs) == {1} + + # check that processed task has correct individuals count + assert all(len(_task.individuals) == (individuals_output_count or _individuals_input_count) + for _task in final_tasks if _task.prev_stage_node == node.name) + + +@pytest.mark.parametrize(['success_outputs', 'left_tries', + 'individuals_input_count', 'individuals_output_count', + 'repeat_count', 'tries_count'], + product([[None], ['1', '2', '3']], # success_outputs + [1, 3], # left_tries + [1, 3, None], # individuals_input_count + [1, 3, None], # individuals_output_count + [1, 3], # repeat_count + [1, 3], # tries_count + )) +def test_genetic_node_with_nonfailed_task(success_outputs, left_tries, individuals_input_count, + individuals_output_count, repeat_count, tries_count): + pop_size = 10 + node_name = 'test' + + task = get_random_task(pop_size=pop_size, stage=TaskStagesEnum.FAIL, left_tries=left_tries) + operator = MockOperator(success_prob=1, individuals_input_count=individuals_input_count, + individuals_output_count=individuals_output_count) + node = GeneticNode(name=node_name, operator=operator, success_outputs=success_outputs, + individuals_input_count=individuals_input_count, + repeat_count=repeat_count, tries_count=tries_count) + + final_tasks = node(task) From fe385008ca8d9f444729daab02f2ba07bbf5ba3f Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Fri, 24 Nov 2023 17:59:06 +0300 Subject: [PATCH 65/65] wip --- .../optimisers/genetic/gp_optimizer_new.py | 116 +++++++++++++++ golem/core/optimisers/genetic/pool.py | 137 ++++++++++++++++++ 2 files changed, 253 insertions(+) create mode 100644 golem/core/optimisers/genetic/gp_optimizer_new.py create mode 100644 golem/core/optimisers/genetic/pool.py diff --git a/golem/core/optimisers/genetic/gp_optimizer_new.py b/golem/core/optimisers/genetic/gp_optimizer_new.py new file mode 100644 index 000000000..ffea0b49c --- /dev/null +++ b/golem/core/optimisers/genetic/gp_optimizer_new.py @@ -0,0 +1,116 @@ +from typing import Sequence, Union, Any + +from golem.core.dag.graph import Graph +from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters +from golem.core.optimisers.genetic.operators.crossover import Crossover, SinglePredefinedGraphCrossover +from golem.core.optimisers.genetic.operators.elitism import Elitism +from golem.core.optimisers.genetic.operators.inheritance import Inheritance +from golem.core.optimisers.genetic.operators.mutation import Mutation, SinglePredefinedGraphMutation +from golem.core.optimisers.genetic.operators.operator import PopulationT, EvaluationOperator +from golem.core.optimisers.genetic.operators.regularization import Regularization +from golem.core.optimisers.genetic.operators.reproduction import ReproductionController +from golem.core.optimisers.genetic.operators.selection import Selection +from golem.core.optimisers.genetic.parameters.graph_depth import AdaptiveGraphDepth +from golem.core.optimisers.genetic.parameters.operators_prob import init_adaptive_operators_prob +from golem.core.optimisers.genetic.parameters.population_size import init_adaptive_pop_size, PopulationSize +from golem.core.optimisers.objective.objective import Objective +from golem.core.optimisers.opt_history_objects.individual import Individual +from golem.core.optimisers.optimization_parameters import GraphRequirements +from golem.core.optimisers.optimizer import GraphGenerationParams +from golem.core.optimisers.populational_optimizer import PopulationalOptimizer + + +class EvoGraphOptimizer(PopulationalOptimizer): + """ + Multi-objective evolutionary graph optimizer named GPComp + """ + + def __init__(self, + objective: Objective, + initial_graphs: Sequence[Union[Graph, Any]], + requirements: GraphRequirements, + graph_generation_params: GraphGenerationParams, + graph_optimizer_params: GPAlgorithmParameters): + super().__init__(objective, initial_graphs, requirements, graph_generation_params, graph_optimizer_params) + # Define genetic operators + self.regularization = Regularization(graph_optimizer_params, graph_generation_params) + self.selection = Selection(graph_optimizer_params) + self.crossover = SinglePredefinedGraphCrossover(graph_optimizer_params, requirements, graph_generation_params) + self.mutation = SinglePredefinedGraphMutation(graph_optimizer_params, requirements, graph_generation_params) + self.inheritance = Inheritance(graph_optimizer_params, self.selection) + self.elitism = Elitism(graph_optimizer_params) + self.operators = [self.regularization, self.selection, self.crossover, + self.mutation, self.inheritance, self.elitism] + + self.reproducer = ReproductionController(parameters=graph_optimizer_params, + selection=self.selection, + mutation=self.mutation, + crossover=self.crossover, + verifier=self.graph_generation_params.verifier) + + # Define adaptive parameters + self._pop_size: PopulationSize = init_adaptive_pop_size(graph_optimizer_params, self.generations) + self._operators_prob = init_adaptive_operators_prob(graph_optimizer_params) + self._graph_depth = AdaptiveGraphDepth(self.generations, + start_depth=requirements.start_depth, + max_depth=requirements.max_depth, + max_stagnation_gens=graph_optimizer_params.adaptive_depth_max_stagnation, + adaptive=graph_optimizer_params.adaptive_depth) + + # Define initial parameters + self.requirements.max_depth = self._graph_depth.initial + self.graph_optimizer_params.pop_size = self._pop_size.initial + self.initial_individuals = [Individual(graph, metadata=requirements.static_individual_metadata) + for graph in self.initial_graphs] + + def _initial_population(self, evaluator: EvaluationOperator): + """ Initializes the initial population """ + # Adding of initial assumptions to history as zero generation + self._update_population(evaluator(self.initial_individuals), 'initial_assumptions') + # pop_size = self.graph_optimizer_params.pop_size + # + # if len(self.initial_individuals) < pop_size: + # self.initial_individuals += self.reproducer._reproduce(population=self.initial_individuals, + # evaluator=evaluator) + # # Adding of extended population to history + # self._update_population(self.initial_individuals, 'extended_initial_assumptions') + + def _evolve_population(self, evaluator: EvaluationOperator) -> PopulationT: + """ Method realizing full evolution cycle """ + + # Defines adaptive changes to algorithm parameters + # like pop_size and operator probabilities + self._update_requirements() + + # Regularize previous population + individuals_to_select = self.regularization(self.population, evaluator) + # Reproduce from previous pop to get next population + new_population = self.reproducer.reproduce(individuals_to_select, evaluator) + + # Adaptive agent experience collection & learning + # Must be called after reproduction (that collects the new experience) + experience = self.mutation.agent_experience + experience.collect_results(new_population) + self.mutation.agent.partial_fit(experience) + + # Use some part of previous pop in the next pop + new_population = self.inheritance(self.population, new_population) + new_population = self.elitism(self.generations.best_individuals, new_population) + return new_population + + def _update_requirements(self): + if not self.generations.is_any_improved: + self.graph_optimizer_params.mutation_prob, self.graph_optimizer_params.crossover_prob = \ + self._operators_prob.next(self.population) + self.log.info( + f'Next mutation proba: {self.graph_optimizer_params.mutation_prob}; ' + f'Next crossover proba: {self.graph_optimizer_params.crossover_prob}') + self.graph_optimizer_params.pop_size = self._pop_size.next(self.population) + self.requirements.max_depth = self._graph_depth.next() + self.log.info( + f'Next population size: {self.graph_optimizer_params.pop_size}; ' + f'max graph depth: {self.requirements.max_depth}') + + # update requirements in operators + for operator in self.operators: + operator.update_requirements(self.graph_optimizer_params, self.requirements) diff --git a/golem/core/optimisers/genetic/pool.py b/golem/core/optimisers/genetic/pool.py new file mode 100644 index 000000000..fa43d4523 --- /dev/null +++ b/golem/core/optimisers/genetic/pool.py @@ -0,0 +1,137 @@ +from dataclasses import dataclass +from enum import Enum, auto +from typing import Optional, List, Any, Callable + + +class ParametersTypesEnum(Enum): + UNKNOWN = auto() + OPTIMIZER = auto() + POOL = auto() + NODE = auto() + + def __ge__(self, other): + if self.__class__ is other.__class__: + return self.value >= other.value + return NotImplemented + + def __gt__(self, other): + if self.__class__ is other.__class__: + return self.value > other.value + return NotImplemented + + def __next__(self): + return ParametersTypesEnum(self.value + 1) + + +# class Parameters: +# def __init__(self, type_: ParametersTypesEnum, data: Optional[dict] = None): +# data = data or dict() +# +# for k in data: +# if isinstance(data[k], dict): +# data[k] = Parameters(next(type_), data[k]) +# self.type = type_ +# self.__data = data +# +# def __getitem__(self, keys): +# data = self.__data +# for key in keys: +# data = data[key] +# return data +# +# def __setitem__(self, keys, value): +# data = self.__data +# for key in keys[:-1]: +# if key not in data: +# data[key] = Parameters(next(self.type)) +# data = data[key] +# data[keys[-1]] = value +# +# def __repr__(self): +# def pp(parameters, indent=0): +# return '\n' + '\n'.join(f"{' ' * indent}'{key}': {value.type.name + pp(value, indent + 2) if isinstance(value, self.__class__) else value}" +# for key, value in parameters.__data.items()) +# return self.type.name + pp(self) +# +# def __iter__(self): +# return (x for x in self.__data.keys()) +# +# def items(self): +# return (x for x in self.__data.items()) +# +# def filter_by_type(self, type_: ParametersTypesEnum): +# return [pars for name, pars in self.items() +# if isinstance(pars, Parameters) and pars.type is type_] + + +class Parameters: + pass + + +@dataclass +class OptimizerParameters(Parameters): + pool_parameters: List['PoolParameters'] + n_jobs: int = -1 + + +@dataclass +class PoolParameters(Parameters): + name: str + constructor: Callable + n_jobs: int + nodes: List['Node'] + scheme: 'Scheme' + task_constructor: Callable + task_history: List[Any] + + +class Optimizer: + def __init__(self, parameters: OptimizerParameters): + self.parameters = parameters + + def _evolve_population(self): + common_parameters = self.parameters + for pool_params in common_parameters.pool_parameters: + pool = pool_params.constructor(pool_params, common_parameters) + common_parameters.update(pool.run()) + + +class Pool: + """ Pool of nodes """ + + def __init__(self, pool_parameters: PoolParameters, parameters: OptimizerParameters): + self.name = pool_parameters.name + self.nodes_map = {node.name: node for node in pool_parameters.nodes} + self.task = pool_parameters.task + self.scheme = pool_parameters.scheme + + # TODO error if there are some nodes with same name + + def __call__(self, task: Task): + if not task.next in self.nodes_map: + raise ValueError((f"Pool {self.name}. Unknown node {task.next}. " + f"Existing nodes: {', '.join(self.nodes_map)}.")) + processed_task = task.run_on_node(self.nodes_map[task.next]) + return processed_task + + +class Node: + """ Node with operation """ + + def __init__(self, name: str, operation: Callable): + self.name = name + self.operation = operation + + def __call__(self, *args, **kwargs): + return self.operation(*args, **kwargs) + + +class Task: + """ Data with parameters for operation """ + + def __init__(self, data: Any, parameters: Any): + self.data = data + self.parameters = parameters + + def run_on_node(self, node: Node): + result = node(self.data, self.parameters)