diff --git a/app/app_graph.py b/app/altenative_app_graph.py similarity index 94% rename from app/app_graph.py rename to app/altenative_app_graph.py index 8202929e..7d7ba426 100644 --- a/app/app_graph.py +++ b/app/altenative_app_graph.py @@ -6,7 +6,7 @@ import mcts from mcts_run_setup import config_with_standard_graph -from rostok.graph_generators.mcts_helper import (make_mcts_step, prepare_mcts_state_and_helper) +from rostok.graph_generators.mcts_helper import (make_mcts_step, prepare_mcts_state_and_helper, CheckpointMCTS) from rostok.graph_grammar.node import GraphGrammar from rostok.library.obj_grasp.objects import get_object_parametrized_sphere, get_object_parametrized_tilt_ellipsoid from rostok.library.rule_sets.ruleset_old_style_graph import create_rules @@ -32,6 +32,8 @@ # the constant that determines how we reduce the number of iterations in the MCTS search iteration_reduction_rate = hp.ITERATION_REDUCTION_TIME + +checkpointer = CheckpointMCTS(mcts_helper.report, "AppGraphSphere", rewrite=False) start = time.time() finish = False n_steps = 0 @@ -39,7 +41,7 @@ iteration_limit = base_iteration_limit - int(graph_env.counter_action / max_numbers_rules * (base_iteration_limit * iteration_reduction_rate)) searcher = mcts.mcts(iterationLimit=iteration_limit) - finish, graph_env = make_mcts_step(searcher, graph_env, n_steps) + finish, graph_env = make_mcts_step(searcher, graph_env, n_steps, checkpointer) n_steps += 1 print(f"number iteration: {n_steps}, counter actions: {graph_env.counter_action} " + f"reward: {mcts_helper.report.get_best_info()[1]}") @@ -74,7 +76,7 @@ sys.stdout = original_stdout simulation_rewarder = control_optimizer.rewarder -simulation_manager = control_optimizer.simulation_control +simulation_manager = control_optimizer.simulation_scenario # visualisation in the end of the search best_graph, reward, best_control = mcts_helper.report.get_best_info() data = {"initial_value": best_control} diff --git a/app/app.py b/app/app.py index 40797cb7..ac246ecc 100644 --- a/app/app.py +++ b/app/app.py @@ -74,7 +74,7 @@ sys.stdout = original_stdout simulation_rewarder = control_optimizer.rewarder -simulation_manager = control_optimizer.simulation_control +simulation_manager = control_optimizer.simulation_scenario # visualisation in the end of the search best_graph, reward, best_control = mcts_helper.report.get_best_info() data = {"initial_value": best_control} diff --git a/app/app_graph_new.py b/app/app_graph_new.py index a56e1ee4..462b8f53 100644 --- a/app/app_graph_new.py +++ b/app/app_graph_new.py @@ -76,7 +76,7 @@ sys.stdout = original_stdout simulation_rewarder = control_optimizer.rewarder -simulation_manager = control_optimizer.simulation_control +simulation_manager = control_optimizer.simulation_scenario # visualisation in the end of the search best_graph, reward, best_control = mcts_helper.report.get_best_info() data = {"initial_value": best_control} diff --git a/app/app_new.py b/app/app_new.py index cf435c25..e17a12e7 100644 --- a/app/app_new.py +++ b/app/app_new.py @@ -74,7 +74,7 @@ sys.stdout = original_stdout simulation_rewarder = control_optimizer.rewarder -simulation_manager = control_optimizer.simulation_control +simulation_manager = control_optimizer.simulation_scenario # visualisation in the end of the search best_graph, reward, best_control = mcts_helper.report.get_best_info() data = {"initial_value": best_control} diff --git a/app/app_new_multiobject.py b/app/app_new_multiobject.py index 082b15b9..11a5fcaf 100644 --- a/app/app_new_multiobject.py +++ b/app/app_new_multiobject.py @@ -81,7 +81,7 @@ sys.stdout = original_stdout simulation_rewarder = control_optimizer.rewarder -simulation_manager = control_optimizer.simulation_control[0] +simulation_manager = control_optimizer.simulation_scenario[0] # visualisation in the end of the search best_graph, reward, best_control = mcts_helper.report.get_best_info() data = control_optimizer.optim_parameters2data_control(best_control) diff --git a/app/checkpoint/AppGraphSphere_1/MCTS_data.pickle b/app/checkpoint/AppGraphSphere_1/MCTS_data.pickle new file mode 100644 index 00000000..9d1e2f25 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_1/MCTS_data.pickle differ diff --git a/app/checkpoint/AppGraphSphere_1/log-file.txt b/app/checkpoint/AppGraphSphere_1/log-file.txt new file mode 100644 index 00000000..2e5aee26 --- /dev/null +++ b/app/checkpoint/AppGraphSphere_1/log-file.txt @@ -0,0 +1,18 @@ +MCTS Iteration: 0, Iteration time: 1.5279135704040527 +main_result: +rules: Init +control: +reward: 0 + +best_result: +rules: Init +control: +reward: 0 + +max number of non-terminal rules: 20 search parameter: 3 + +Number of unique mechanisms tested in current MCTS run: 3 +Number of states 3 + +---------------------------------- + diff --git a/app/checkpoint/AppGraphSphere_1/optimized_MCTS_state_report.pickle b/app/checkpoint/AppGraphSphere_1/optimized_MCTS_state_report.pickle new file mode 100644 index 00000000..ca363355 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_1/optimized_MCTS_state_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_1/optimized_graph_report.pickle b/app/checkpoint/AppGraphSphere_1/optimized_graph_report.pickle new file mode 100644 index 00000000..bbc73963 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_1/optimized_graph_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_10/MCTS_data.pickle b/app/checkpoint/AppGraphSphere_10/MCTS_data.pickle new file mode 100644 index 00000000..26a9bff9 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_10/MCTS_data.pickle differ diff --git a/app/checkpoint/AppGraphSphere_10/log-file.txt b/app/checkpoint/AppGraphSphere_10/log-file.txt new file mode 100644 index 00000000..5222ca9c --- /dev/null +++ b/app/checkpoint/AppGraphSphere_10/log-file.txt @@ -0,0 +1,126 @@ +MCTS Iteration: 0, Iteration time: 2.387392282485962 +main_result: +rules: Init +control: +reward: 0 + +best_result: +rules: Init +control: +reward: 0 + +max number of non-terminal rules: 20 search parameter: 3 + +Number of unique mechanisms tested in current MCTS run: 3 +Number of states 3 + +---------------------------------- + +MCTS Iteration: 2, Iteration time: 2.043215036392212 +main_result: +rules: Init AddFinger_RN Terminal_Negative_Translate2 +control: +reward: 0 + +best_result: +rules: Init AddFinger_RN Terminal_Negative_Translate2 +control: +reward: 0 + +max number of non-terminal rules: 20 search parameter: 3 + +Number of unique mechanisms tested in current MCTS run: 9 +Number of states 9 + +---------------------------------- + +MCTS Iteration: 4, Iteration time: 1.625321865081787 +main_result: +rules: Init AddFinger_RN Terminal_Negative_Translate2 RemoveFinger_N AddFinger_R +control: +reward: 0 + +best_result: +rules: Init AddFinger_RN Terminal_Negative_Translate2 RemoveFinger_N AddFinger_R +control: +reward: 0 + +max number of non-terminal rules: 20 search parameter: 3 + +Number of unique mechanisms tested in current MCTS run: 15 +Number of states 15 + +---------------------------------- + +MCTS Iteration: 6, Iteration time: 1.576897144317627 +main_result: +rules: Init AddFinger_RN Terminal_Negative_Translate2 RemoveFinger_N AddFinger_R RemoveFinger RemoveFinger_RP +control: +reward: 0 + +best_result: +rules: Init AddFinger_RN Terminal_Negative_Translate2 RemoveFinger_N AddFinger_R RemoveFinger RemoveFinger_RP +control: +reward: 0 + +max number of non-terminal rules: 20 search parameter: 3 + +Number of unique mechanisms tested in current MCTS run: 21 +Number of states 21 + +---------------------------------- + +MCTS Iteration: 8, Iteration time: 0.7679116725921631 +main_result: +rules: Init AddFinger_RN Terminal_Negative_Translate2 RemoveFinger_N AddFinger_R RemoveFinger RemoveFinger_RP AddFinger_PT Remove_FG +control: +reward: 0 + +best_result: +rules: Init AddFinger_RN Terminal_Negative_Translate2 RemoveFinger_N AddFinger_R RemoveFinger RemoveFinger_RP AddFinger_PT Remove_FG +control: +reward: 0 + +max number of non-terminal rules: 20 search parameter: 3 + +Number of unique mechanisms tested in current MCTS run: 27 +Number of states 27 + +---------------------------------- + +MCTS Iteration: 10, Iteration time: 0.5185501575469971 +main_result: +rules: Init AddFinger_RN Terminal_Negative_Translate2 RemoveFinger_N AddFinger_R RemoveFinger RemoveFinger_RP AddFinger_PT Remove_FG Remove_FG Terminal_Radial_Translate2 +control: +reward: 0 + +best_result: +rules: Init AddFinger_RN Terminal_Negative_Translate2 RemoveFinger_N AddFinger_R RemoveFinger RemoveFinger_RP AddFinger_PT Remove_FG Remove_FG Terminal_Radial_Translate2 +control: +reward: 0 + +max number of non-terminal rules: 20 search parameter: 3 + +Number of unique mechanisms tested in current MCTS run: 33 +Number of states 33 + +---------------------------------- + +MCTS Iteration: 12, Iteration time: 0.1713728904724121 +main_result: +rules: Init AddFinger_RN Terminal_Negative_Translate2 RemoveFinger_N AddFinger_R RemoveFinger RemoveFinger_RP AddFinger_PT Remove_FG Remove_FG Terminal_Radial_Translate2 Remove_FG Terminal_Radial_Translate2 +control: +reward: 0 + +best_result: +rules: Init AddFinger_RN Terminal_Negative_Translate2 RemoveFinger_N AddFinger_R RemoveFinger RemoveFinger_RP AddFinger_PT Remove_FG Remove_FG Terminal_Radial_Translate2 Remove_FG Terminal_Radial_Translate2 +control: +reward: 0 + +max number of non-terminal rules: 20 search parameter: 3 + +Number of unique mechanisms tested in current MCTS run: 35 +Number of states 39 + +---------------------------------- + diff --git a/app/checkpoint/AppGraphSphere_10/optimized_MCTS_state_report.pickle b/app/checkpoint/AppGraphSphere_10/optimized_MCTS_state_report.pickle new file mode 100644 index 00000000..18123955 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_10/optimized_MCTS_state_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_10/optimized_graph_report.pickle b/app/checkpoint/AppGraphSphere_10/optimized_graph_report.pickle new file mode 100644 index 00000000..bfa0023b Binary files /dev/null and b/app/checkpoint/AppGraphSphere_10/optimized_graph_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_2/MCTS_data.pickle b/app/checkpoint/AppGraphSphere_2/MCTS_data.pickle new file mode 100644 index 00000000..9c117c5f Binary files /dev/null and b/app/checkpoint/AppGraphSphere_2/MCTS_data.pickle differ diff --git a/app/checkpoint/AppGraphSphere_2/log-file.txt b/app/checkpoint/AppGraphSphere_2/log-file.txt new file mode 100644 index 00000000..3f2a9fcb --- /dev/null +++ b/app/checkpoint/AppGraphSphere_2/log-file.txt @@ -0,0 +1,18 @@ +MCTS Iteration: 0, Iteration time: 1.9557695388793945 +main_result: +rules: Init +control: +reward: 0 + +best_result: +rules: Init +control: +reward: 0 + +max number of non-terminal rules: 20 search parameter: 3 + +Number of unique mechanisms tested in current MCTS run: 3 +Number of states 3 + +---------------------------------- + diff --git a/app/checkpoint/AppGraphSphere_2/optimized_MCTS_state_report.pickle b/app/checkpoint/AppGraphSphere_2/optimized_MCTS_state_report.pickle new file mode 100644 index 00000000..3b7cfb0c Binary files /dev/null and b/app/checkpoint/AppGraphSphere_2/optimized_MCTS_state_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_2/optimized_graph_report.pickle b/app/checkpoint/AppGraphSphere_2/optimized_graph_report.pickle new file mode 100644 index 00000000..05839fae Binary files /dev/null and b/app/checkpoint/AppGraphSphere_2/optimized_graph_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_3/MCTS_data.pickle b/app/checkpoint/AppGraphSphere_3/MCTS_data.pickle new file mode 100644 index 00000000..2d828e25 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_3/MCTS_data.pickle differ diff --git a/app/checkpoint/AppGraphSphere_3/log-file.txt b/app/checkpoint/AppGraphSphere_3/log-file.txt new file mode 100644 index 00000000..16943e18 --- /dev/null +++ b/app/checkpoint/AppGraphSphere_3/log-file.txt @@ -0,0 +1,18 @@ +MCTS Iteration: 0, Iteration time: 2.1372127532958984 +main_result: +rules: Init +control: +reward: 0 + +best_result: +rules: Init +control: +reward: 0 + +max number of non-terminal rules: 20 search parameter: 3 + +Number of unique mechanisms tested in current MCTS run: 3 +Number of states 3 + +---------------------------------- + diff --git a/app/checkpoint/AppGraphSphere_3/optimized_MCTS_state_report.pickle b/app/checkpoint/AppGraphSphere_3/optimized_MCTS_state_report.pickle new file mode 100644 index 00000000..6cc31515 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_3/optimized_MCTS_state_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_3/optimized_graph_report.pickle b/app/checkpoint/AppGraphSphere_3/optimized_graph_report.pickle new file mode 100644 index 00000000..16495e18 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_3/optimized_graph_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_4/MCTS_data.pickle b/app/checkpoint/AppGraphSphere_4/MCTS_data.pickle new file mode 100644 index 00000000..bcd49a36 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_4/MCTS_data.pickle differ diff --git a/app/checkpoint/AppGraphSphere_4/log-file.txt b/app/checkpoint/AppGraphSphere_4/log-file.txt new file mode 100644 index 00000000..1cd7376d --- /dev/null +++ b/app/checkpoint/AppGraphSphere_4/log-file.txt @@ -0,0 +1,18 @@ +MCTS Iteration: 0, Iteration time: 1.5798587799072266 +main_result: +rules: Init +control: +reward: 0 + +best_result: +rules: Init +control: +reward: 0 + +max number of non-terminal rules: 20 search parameter: 3 + +Number of unique mechanisms tested in current MCTS run: 3 +Number of states 3 + +---------------------------------- + diff --git a/app/checkpoint/AppGraphSphere_4/optimized_MCTS_state_report.pickle b/app/checkpoint/AppGraphSphere_4/optimized_MCTS_state_report.pickle new file mode 100644 index 00000000..d7aa9338 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_4/optimized_MCTS_state_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_4/optimized_graph_report.pickle b/app/checkpoint/AppGraphSphere_4/optimized_graph_report.pickle new file mode 100644 index 00000000..0b265a99 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_4/optimized_graph_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_5/MCTS_data.pickle b/app/checkpoint/AppGraphSphere_5/MCTS_data.pickle new file mode 100644 index 00000000..05032502 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_5/MCTS_data.pickle differ diff --git a/app/checkpoint/AppGraphSphere_5/log-file.txt b/app/checkpoint/AppGraphSphere_5/log-file.txt new file mode 100644 index 00000000..5d3c467f --- /dev/null +++ b/app/checkpoint/AppGraphSphere_5/log-file.txt @@ -0,0 +1,18 @@ +MCTS Iteration: 0, Iteration time: 1.234072208404541 +main_result: +rules: Init +control: +reward: 0 + +best_result: +rules: Init +control: +reward: 0 + +max number of non-terminal rules: 20 search parameter: 3 + +Number of unique mechanisms tested in current MCTS run: 3 +Number of states 3 + +---------------------------------- + diff --git a/app/checkpoint/AppGraphSphere_5/optimized_MCTS_state_report.pickle b/app/checkpoint/AppGraphSphere_5/optimized_MCTS_state_report.pickle new file mode 100644 index 00000000..8ac32700 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_5/optimized_MCTS_state_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_5/optimized_graph_report.pickle b/app/checkpoint/AppGraphSphere_5/optimized_graph_report.pickle new file mode 100644 index 00000000..3d82c868 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_5/optimized_graph_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_6/MCTS_data.pickle b/app/checkpoint/AppGraphSphere_6/MCTS_data.pickle new file mode 100644 index 00000000..f5d5f248 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_6/MCTS_data.pickle differ diff --git a/app/checkpoint/AppGraphSphere_6/log-file.txt b/app/checkpoint/AppGraphSphere_6/log-file.txt new file mode 100644 index 00000000..28db96bd --- /dev/null +++ b/app/checkpoint/AppGraphSphere_6/log-file.txt @@ -0,0 +1,18 @@ +MCTS Iteration: 0, Iteration time: 3.1061370372772217 +main_result: +rules: Init +control: +reward: 0 + +best_result: +rules: Init +control: +reward: 0 + +max number of non-terminal rules: 20 search parameter: 3 + +Number of unique mechanisms tested in current MCTS run: 3 +Number of states 3 + +---------------------------------- + diff --git a/app/checkpoint/AppGraphSphere_6/optimized_MCTS_state_report.pickle b/app/checkpoint/AppGraphSphere_6/optimized_MCTS_state_report.pickle new file mode 100644 index 00000000..3ec6e8ee Binary files /dev/null and b/app/checkpoint/AppGraphSphere_6/optimized_MCTS_state_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_6/optimized_graph_report.pickle b/app/checkpoint/AppGraphSphere_6/optimized_graph_report.pickle new file mode 100644 index 00000000..2b302616 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_6/optimized_graph_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_7/MCTS_data.pickle b/app/checkpoint/AppGraphSphere_7/MCTS_data.pickle new file mode 100644 index 00000000..cee399f3 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_7/MCTS_data.pickle differ diff --git a/app/checkpoint/AppGraphSphere_7/log-file.txt b/app/checkpoint/AppGraphSphere_7/log-file.txt new file mode 100644 index 00000000..3798acac --- /dev/null +++ b/app/checkpoint/AppGraphSphere_7/log-file.txt @@ -0,0 +1,18 @@ +MCTS Iteration: 0, Iteration time: 1.6669328212738037 +main_result: +rules: Init +control: +reward: 0 + +best_result: +rules: Init +control: +reward: 0 + +max number of non-terminal rules: 20 search parameter: 3 + +Number of unique mechanisms tested in current MCTS run: 3 +Number of states 3 + +---------------------------------- + diff --git a/app/checkpoint/AppGraphSphere_7/optimized_MCTS_state_report.pickle b/app/checkpoint/AppGraphSphere_7/optimized_MCTS_state_report.pickle new file mode 100644 index 00000000..f40fa232 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_7/optimized_MCTS_state_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_7/optimized_graph_report.pickle b/app/checkpoint/AppGraphSphere_7/optimized_graph_report.pickle new file mode 100644 index 00000000..88b2ee09 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_7/optimized_graph_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_8/MCTS_data.pickle b/app/checkpoint/AppGraphSphere_8/MCTS_data.pickle new file mode 100644 index 00000000..b072e4de Binary files /dev/null and b/app/checkpoint/AppGraphSphere_8/MCTS_data.pickle differ diff --git a/app/checkpoint/AppGraphSphere_8/log-file.txt b/app/checkpoint/AppGraphSphere_8/log-file.txt new file mode 100644 index 00000000..fb42af11 --- /dev/null +++ b/app/checkpoint/AppGraphSphere_8/log-file.txt @@ -0,0 +1,18 @@ +MCTS Iteration: 0, Iteration time: 1.877546787261963 +main_result: +rules: Init +control: +reward: 0 + +best_result: +rules: Init +control: +reward: 0 + +max number of non-terminal rules: 20 search parameter: 3 + +Number of unique mechanisms tested in current MCTS run: 3 +Number of states 3 + +---------------------------------- + diff --git a/app/checkpoint/AppGraphSphere_8/optimized_MCTS_state_report.pickle b/app/checkpoint/AppGraphSphere_8/optimized_MCTS_state_report.pickle new file mode 100644 index 00000000..cda94c47 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_8/optimized_MCTS_state_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_8/optimized_graph_report.pickle b/app/checkpoint/AppGraphSphere_8/optimized_graph_report.pickle new file mode 100644 index 00000000..791fed35 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_8/optimized_graph_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_9/MCTS_data.pickle b/app/checkpoint/AppGraphSphere_9/MCTS_data.pickle new file mode 100644 index 00000000..4e46ef11 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_9/MCTS_data.pickle differ diff --git a/app/checkpoint/AppGraphSphere_9/log-file.txt b/app/checkpoint/AppGraphSphere_9/log-file.txt new file mode 100644 index 00000000..88a04637 --- /dev/null +++ b/app/checkpoint/AppGraphSphere_9/log-file.txt @@ -0,0 +1,18 @@ +MCTS Iteration: 0, Iteration time: 1.2005963325500488 +main_result: +rules: Init +control: +reward: 0 + +best_result: +rules: Init +control: +reward: 0 + +max number of non-terminal rules: 20 search parameter: 3 + +Number of unique mechanisms tested in current MCTS run: 3 +Number of states 3 + +---------------------------------- + diff --git a/app/checkpoint/AppGraphSphere_9/optimized_MCTS_state_report.pickle b/app/checkpoint/AppGraphSphere_9/optimized_MCTS_state_report.pickle new file mode 100644 index 00000000..439957bc Binary files /dev/null and b/app/checkpoint/AppGraphSphere_9/optimized_MCTS_state_report.pickle differ diff --git a/app/checkpoint/AppGraphSphere_9/optimized_graph_report.pickle b/app/checkpoint/AppGraphSphere_9/optimized_graph_report.pickle new file mode 100644 index 00000000..1d0fff34 Binary files /dev/null and b/app/checkpoint/AppGraphSphere_9/optimized_graph_report.pickle differ diff --git a/app/hyperparameters.py b/app/hyperparameters.py index d6f3b16e..d99586e8 100644 --- a/app/hyperparameters.py +++ b/app/hyperparameters.py @@ -1,7 +1,8 @@ MAX_NUMBER_RULES = 10 -BASE_ITERATION_LIMIT = 20 -BASE_ITERATION_LIMIT_GRAPH = 200 +BASE_ITERATION_LIMIT = 30 +BASE_ITERATION_LIMIT_GRAPH = 3 +BASE_ITERATION_LIMIT_TENDON = 3 ITERATION_REDUCTION_TIME = 0.7 diff --git a/app/mcts_run_setup.py b/app/mcts_run_setup.py index 91447661..c453712a 100644 --- a/app/mcts_run_setup.py +++ b/app/mcts_run_setup.py @@ -16,12 +16,29 @@ def config_with_standard(grasp_object_blueprint): # configurate the simulation manager + def object_callback(): + return creator.create_environment_body(grasp_object_blueprint) + simulation_manager = ConstTorqueGrasp(hp.TIME_STEP_SIMULATION, hp.TIME_SIMULATION) - simulation_manager.grasp_object_callback = lambda: creator.create_environment_body( - grasp_object_blueprint) - simulation_manager.add_flag(FlagContactTimeOut(hp.FLAG_TIME_NO_CONTACT)) - simulation_manager.add_flag(FlagFlyingApart(hp.FLAG_FLYING_APART)) - simulation_manager.add_flag(FlagSlipout(hp.FLAG_TIME_SLIPOUT)) + simulation_manager.grasp_object_callback = object_callback + event_contact = EventContact() + simulation_manager.add_event(event_contact) + event_timeout = EventContactTimeOut(hp.FLAG_TIME_NO_CONTACT, event_contact) + simulation_manager.add_event(event_timeout) + event_flying_apart = EventFlyingApart(hp.FLAG_FLYING_APART) + simulation_manager.add_event(event_flying_apart) + event_slipout = EventSlipOut(hp.FLAG_TIME_SLIPOUT) + simulation_manager.add_event(event_slipout) + event_grasp = EventGrasp( + grasp_limit_time=hp.GRASP_TIME, + contact_event=event_contact, + verbosity=0, + ) + simulation_manager.add_event(event_grasp) + event_stop_external_force = EventStopExternalForce(grasp_event=event_grasp, + force_test_time=hp.FORCE_TEST_TIME) + simulation_manager.add_event(event_stop_external_force) + #create criterion manager simulation_rewarder = SimulationReward(1) #create criterions and add them to manager diff --git a/app/report.py b/app/report.py index efa6958c..da029905 100644 --- a/app/report.py +++ b/app/report.py @@ -26,7 +26,7 @@ def plot_graph(graph: GraphGrammar): best_graph, reward, best_control = report.get_best_info() simulation_rewarder = control_optimizer.rewarder -simulation_manager = control_optimizer.simulation_control +simulation_manager = control_optimizer.simulation_scenario plot_graph(best_graph) data = {"initial_value": best_control} diff --git a/app/restore_optimization.py b/app/restore_optimization.py new file mode 100644 index 00000000..44b47eb5 --- /dev/null +++ b/app/restore_optimization.py @@ -0,0 +1,64 @@ +import sys +import time +from pathlib import Path + +import hyperparameters as hp +import mcts +from mcts_run_setup import config_with_standard_graph + +from rostok.graph_generators.mcts_helper import (make_mcts_step, prepare_mcts_state_and_helper, CheckpointMCTS) +from rostok.graph_grammar.node import GraphGrammar +from rostok.library.obj_grasp.objects import get_object_parametrized_sphere, get_object_parametrized_tilt_ellipsoid +from rostok.library.rule_sets.ruleset_old_style_graph import create_rules + + +checkpointer = CheckpointMCTS(mcts_helper.report, "AppGraphSphere", rewrite=False) +start = time.time() +finish = False +n_steps = 0 +while not finish: + iteration_limit = base_iteration_limit - int(graph_env.counter_action / max_numbers_rules * + (base_iteration_limit * iteration_reduction_rate)) + searcher = mcts.mcts(iterationLimit=iteration_limit) + finish, graph_env = make_mcts_step(searcher, graph_env, n_steps, checkpointer) + n_steps += 1 + print(f"number iteration: {n_steps}, counter actions: {graph_env.counter_action} " + + f"reward: {mcts_helper.report.get_best_info()[1]}") +ex = time.time() - start +print(f"time :{ex}") +# saving results of the search +report = mcts_helper.report +path = report.make_time_dependent_path() +report.save() +report.save_visuals() +report.save_lists() +report.save_means() + +# additions to the file +with open(Path(path, "mcts_result.txt"), "a") as file: + original_stdout = sys.stdout + sys.stdout = file + print() + print("Object to grasp:", grasp_object_blueprint.shape) + print("Object initial coordinats:", grasp_object_blueprint.pos) + print("Time optimization:", ex) + print("MAX_NUMBER_RULES:", hp.MAX_NUMBER_RULES) + print("BASE_ITERATION_LIMIT:", hp.BASE_ITERATION_LIMIT) + print("ITERATION_REDUCTION_TIME:", hp.ITERATION_REDUCTION_TIME) + print("CRITERION_WEIGHTS:", + [hp.TIME_CRITERION_WEIGHT, hp.FORCE_CRITERION_WEIGHT, hp.OBJECT_COG_CRITERION_WEIGHT]) + print("CONTROL_OPTIMIZATION_ITERATION:", hp.CONTROL_OPTIMIZATION_ITERATION) + print("TIME_STEP_SIMULATION:", hp.TIME_STEP_SIMULATION) + print("TIME_SIMULATION:", hp.TIME_SIMULATION) + print("FLAG_TIME_NO_CONTACT:", hp.FLAG_TIME_NO_CONTACT) + print("FLAG_TIME_SLIPOUT:", hp.FLAG_TIME_SLIPOUT) + sys.stdout = original_stdout + +simulation_rewarder = control_optimizer.rewarder +simulation_manager = control_optimizer.simulation_scenario +# visualisation in the end of the search +best_graph, reward, best_control = mcts_helper.report.get_best_info() +data = {"initial_value": best_control} +simulation_output = simulation_manager.run_simulation(best_graph, data, True) +res = -simulation_rewarder.calculate_reward(simulation_output) +print("Best reward obtained in the MCTS search:", res) \ No newline at end of file diff --git a/app/top_graphs_visualisation.py b/app/top_graphs_visualisation.py index 0308c9ab..2b236a30 100644 --- a/app/top_graphs_visualisation.py +++ b/app/top_graphs_visualisation.py @@ -19,7 +19,7 @@ def vis_top_n_mechs(report: MCTSSaveable, n: int, object: EnvironmentBodyBluepri graph_report = report.seen_graphs control_optimizer = config_with_standard(grasp_object_blueprint) simulation_rewarder = control_optimizer.rewarder - simulation_manager = control_optimizer.simulation_control + simulation_manager = control_optimizer.simulation_scenario graph_list = graph_report.graph_list sorted_graph_list = sorted(graph_list, key=lambda x: x.reward) @@ -127,6 +127,6 @@ def func_finish(): grasp_object_blueprint = get_object_parametrized_sphere(0.4, 1) grasp_object_blueprint = get_obj_hard_mesh_piramida() report: OptimizedGraphReport = load_saveable( - Path(r"results\Reports_23y_06m_15d_03H_13M\MCTS_data.pickle")) + Path(r"results\Reports_23y_07m_15d_14H_03M\MCTS_data.pickle")) vis_top_n_mechs(report, 3, grasp_object_blueprint) # save_svg_mean_reward( name = 'kek', objecy_name='sphere') \ No newline at end of file diff --git a/rostok/graph_generators/mcts_helper.py b/rostok/graph_generators/mcts_helper.py index 8fee5de0..da926597 100644 --- a/rostok/graph_generators/mcts_helper.py +++ b/rostok/graph_generators/mcts_helper.py @@ -1,18 +1,23 @@ import sys +import os +import pickle +import time from copy import deepcopy from pathlib import Path from statistics import mean - + import matplotlib.pyplot as plt import numpy as np +from rostok.block_builder_chrono.block_builder_chrono_api import \ + ChronoBlockCreatorInterface as creator from rostok.graph_generators.graph_environment import \ GraphVocabularyEnvironment from rostok.graph_grammar.graph_utils import (plot_graph_reward, save_graph_plot_reward) from rostok.graph_grammar.node import GraphGrammar from rostok.graph_grammar.rule_vocabulary import RuleVocabulary from rostok.trajectory_optimizer.control_optimizer import GraphRewardCalculator -from rostok.utils.pickle_save import Saveable +from rostok.utils.pickle_save import Saveable, load_saveable from rostok.utils.states import (MCTSOptimizedState, OptimizedGraph, OptimizedState, RobotState) @@ -329,7 +334,7 @@ def __init__(self, def getReward(self): """Make optimization and calculate reward for the graph of the state. - + It also adds the graph to the seen_graph of the helper.report object """ report = self.helper.report.seen_graphs.check_graph(self.graph) @@ -380,7 +385,7 @@ def prepare_mcts_state_and_helper(graph: GraphGrammar, return mcts_state -def make_mcts_step(searcher, state: MCTSGraphEnvironment, counter): +def make_mcts_step(searcher, state: MCTSGraphEnvironment, counter, checkpointer=None): """Start MCTS search for the state and return the new state corresponding to the action Args: @@ -388,6 +393,7 @@ def make_mcts_step(searcher, state: MCTSGraphEnvironment, counter): state (MCTSGraphEnvironment): starting state for the search counter: counter of the steps """ + start = time.time() state.helper.step_counter = counter action = searcher.search(initialState=state) rule_action = action.get_rule @@ -400,5 +406,160 @@ def make_mcts_step(searcher, state: MCTSGraphEnvironment, counter): main_reward = new_state.getReward() main_control = new_state.movments_trajectory state.helper.set_main_optimized_state(new_state.state, main_reward, main_control) + iteration_time = time.time() - start + if checkpointer: + checkpointer.update_checkpoint_n_logs(iteration_time, new_state) return done, new_state + + +#================================== +# Prototyping +#================================== + + +class CheckpointMCTS(): + """Class include all the information that should be saved as a result of MCTS search. + + Attributes: + seen_graphs (OptimizedGraphReport): graphs obtained in the search + seen_states (OptimizedMCTSStateReport): states obtained in the search + main_state (RobotState): the main state of the MCTS search + + """ + + def __init__(self, + mcts_saveable: MCTSSaveable, + folder_name, + checkpoint_iter=1, + rewrite=True) -> None: + + self.path = "./" + + self.mcts_saveable = mcts_saveable + self.iteration = 0 + self.checkpoint_iter = checkpoint_iter + self.last_iteration_time = 0 + + self.prepare_folders_n_files(folder_name, rewrite) + + def logging(self): + """Saves graphs and info for main and best states.""" + path_to_file = Path(self.path, "log-file.txt") + with open(path_to_file, 'a', encoding='utf-8') as file: + original_stdout = sys.stdout + sys.stdout = file + print(f'MCTS Iteration: {self.iteration}, Iteration time: {self.last_iteration_time}') + print('main_result:') + print('rules:', *self.mcts_saveable.main_simulated_state.state.rule_list) + print('control:', *self.mcts_saveable.main_simulated_state.control) + print('reward:', self.mcts_saveable.main_simulated_state.reward) + print() + print('best_result:') + print('rules:', *self.mcts_saveable.best_simulated_state.state.rule_list) + print('control:', *self.mcts_saveable.best_simulated_state.control) + print('reward:', self.mcts_saveable.best_simulated_state.reward) + print() + print('max number of non-terminal rules:', self.mcts_saveable.non_terminal_rules_limit, + 'search parameter:', self.mcts_saveable.search_parameter) + print() + print("Number of unique mechanisms tested in current MCTS run: ", + len(self.mcts_saveable.seen_graphs.graph_list)) + print("Number of states ", len(self.mcts_saveable.seen_states.state_list)) + print(f"\n----------------------------------\n") + sys.stdout = original_stdout + + def dump_results(self, current_state): + saveables = [self.mcts_saveable.seen_graphs, self.mcts_saveable.seen_states, self.mcts_saveable] + + for instance_saveable in saveables: + with open(Path(self.path, instance_saveable.file_name + '.pickle'), "wb+") as file: + pickle.dump(instance_saveable, file) + + sim_scenario = current_state.optimizer.simulation_scenario + if isinstance(sim_scenario, list): + object_callback = [] + for scene in sim_scenario: + object_callback.append(scene.grasp_object_callback) + scene.grasp_object_callback = None + with open(os.path.join(self.path, "state.pickle"), "wb") as file: + pickle.dump(current_state, file) + for scene, callback_obj in zip(sim_scenario, object_callback): + scene.grasp_object_callback = callback_obj + else: + object_callback = sim_scenario.grasp_object_callback + with open(os.path.join(self.path, "state.pickle"), "wb") as file: + pickle.dump(current_state, file) + sim_scenario.grasp_object_callback = object_callback + + def save(self, current_state): + """Save all information in the object but not object itself.""" + self.logging() + self.dump_results(current_state) + + def update_checkpoint_n_logs(self, iteration_time, current_state): + self.last_iteration_time = iteration_time + if self.iteration % (self.checkpoint_iter + 1) == 0: + self.prepare_folders_n_files(self.path.split("/")[-1], True, True) + self.save(current_state) + + self.iteration += 1 + + def prepare_folders_n_files(self, folder_name, rewrite, loggging=False): + folder_path_to_checkpoint = os.path.join("./", "app/" + "checkpoint/") + + if not os.path.exists(folder_path_to_checkpoint): + print("Create folder for checkpoint") + os.mkdir(folder_path_to_checkpoint) + + folder_path = os.path.join("./", "app/" + "checkpoint/", folder_name) + + if not os.path.exists(folder_path): + print(f"Create checkpoint dictionary - {folder_path}") + os.mkdir(folder_path) + elif rewrite: + print(f"Rewriting data in dictionary - {folder_path}") + path_to_file = Path(folder_path, "log-file.txt") + if os.path.exists(path_to_file) and not loggging: + open(path_to_file, "w").close() + else: + postfix_folder = 1 + folder_path = folder_path + f"_{postfix_folder}" + while os.path.exists(folder_path): + folder_path = folder_path.replace(f"_{postfix_folder-1}", f"_{postfix_folder}") + postfix_folder += 1 + + print(f"Create checkpoint dictionary - {folder_path}") + os.mkdir(folder_path) + + self.path = folder_path + + @classmethod + def restore_optimization(cls, folder_with_checkpoint, checkpoint_iter, grasp_object_blueprint): + if isinstance(grasp_object_blueprint,list): + grasp_object_callback = [(lambda obj=obj: creator.create_environment_body(obj)) for obj in grasp_object_blueprint] + else: + grasp_object_callback = lambda: creator.create_environment_body(grasp_object_blueprint) + + path_to_checkpoint = os.path.join("./","app/","checkpoint/", folder_with_checkpoint) + + if os.path.exists(path_to_checkpoint): + path_mcts_history = os.path.join(path_to_checkpoint, "MCTS_data.pickle") + mcts_saveable = load_saveable(path_mcts_history) + + path_graph_report = os.path.join(path_to_checkpoint, "optimized_graph_report.pickle") + seen_graphs = load_saveable(path_graph_report) + + path_mcts_state = os.path.join(path_to_checkpoint, "optimized_MCTS_state_report.pickle") + seen_mcts_states = load_saveable(path_mcts_state) + + path_last_mcts_state = os.path.join(path_to_checkpoint, "state.pickle") + last_mcts_state = load_saveable(path_last_mcts_state) + + + else: + print("Couldn't find dirictory with previous checkpoint") + + return last_mcts_state, mcts_saveable, seen_graphs, seen_mcts_states diff --git a/rostok/trajectory_optimizer/control_optimizer.py b/rostok/trajectory_optimizer/control_optimizer.py index d3fea623..406e497f 100644 --- a/rostok/trajectory_optimizer/control_optimizer.py +++ b/rostok/trajectory_optimizer/control_optimizer.py @@ -28,25 +28,25 @@ def print_log(self): class CalculatorWithConstTorqueOptimization(GraphRewardCalculator): def __init__(self, - simulation_control, + simulation_scenario, rewarder: SimulationReward, optimization_bounds=(0, 15), optimization_limit=10): """Base class optimizing constant torque for controlling the mechanism. In subclass, it have to override method: bound_parameter, _transform_parameter2data and run_optimization. Args: - simulation_control (Union[list[tuple[ParametrizedSimulation, int]], ParametrizedSimulation]): Define simulation scenario for virtual experiment and weights for each. + simulation_scenario (Union[list[tuple[ParametrizedSimulation, int]], ParametrizedSimulation]): Define simulation scenario for virtual experiment and weights for each. rewarder (SimulationReward): Instance of the class on which the objective function will be calculated optimization_bounds (tuple, optional): Args define the boundaries of the variables to be optimized. Defaults to (0, 15). optimization_limit (int, optional): The maximum number of optimization iterations. Defaults to 10. """ - self.simulation_control = simulation_control + self.simulation_scenario = simulation_scenario self.rewarder: SimulationReward = rewarder self.bounds = optimization_bounds self.limit = optimization_limit def simulate_with_control_parameters(self, data, graph): - return self.simulation_control.run_simulation(graph, data) + return self.simulation_scenario.run_simulation(graph, data) def calculate_reward(self, graph: GraphGrammar): """Constant moment optimization method using scenario simulation and rewarder for calculating objective function. @@ -61,10 +61,10 @@ def calculate_reward(self, graph: GraphGrammar): if not multi_bound: return (0, []) - if isinstance(self.simulation_control, list): + if isinstance(self.simulation_scenario, list): reward = 0 optim_parameters = np.array([]) - for sim_scene in self.simulation_control: + for sim_scene in self.simulation_scenario: result = self.run_optimization(self._reward_with_parameters, multi_bound, args=(graph, sim_scene[0])) @@ -78,7 +78,7 @@ def calculate_reward(self, graph: GraphGrammar): else: result = self.run_optimization(self._reward_with_parameters, multi_bound, - args=(graph, self.simulation_control)) + args=(graph, self.simulation_scenario)) reward = -result.fun optim_parameters = result.x @@ -95,7 +95,7 @@ def optim_parameters2data_control(self, parameters, *args): dict: Dictionary defining the parameters of the control class """ parameters = np.array(parameters) - if isinstance(self.simulation_control, list): + if isinstance(self.simulation_scenario, list): list_args = [args for __ in range(len(parameters))] data_control = list(map(self._transform_parameters2data, parameters, list_args)) else: @@ -169,8 +169,8 @@ def run_optimization(self, callback, multi_bound, args): class CalculatorWithGraphOptimization(GraphRewardCalculator): - def __init__(self, simulation_control, rewarder: SimulationReward, torque_dict): - self.simulation_control = simulation_control + def __init__(self, simulation_scenario, rewarder: SimulationReward, torque_dict): + self.simulation_scenario = simulation_scenario self.rewarder: SimulationReward = rewarder self.torque_dict = torque_dict @@ -189,7 +189,7 @@ def calculate_reward(self, graph: GraphGrammar): return (0, []) control_sequence = self.build_control_from_graph(graph) data = {"initial_value": control_sequence} - simulation_output = self.simulation_control.run_simulation(graph, data) + simulation_output = self.simulation_scenario.run_simulation(graph, data) reward = self.rewarder.calculate_reward(simulation_output) return (reward, control_sequence) @@ -214,13 +214,13 @@ class ConstTorqueOptimizationBranchTemplate(CalculatorWithConstTorqueOptimizatio """ def __init__(self, - simulation_control, + simulation_scenario, rewarder: SimulationReward, optimization_bounds=(0, 15), optimization_limit=10, select_optimisation_value=OptimizationParametr.START, const_parameter=-0.5): - super().__init__(simulation_control, rewarder, optimization_bounds, optimization_limit) + super().__init__(simulation_scenario, rewarder, optimization_bounds, optimization_limit) self.select_optimisation_value = select_optimisation_value self.const_parameter = const_parameter