From 4c744e7b0434d41154c97ba40181eeac693bd866 Mon Sep 17 00:00:00 2001 From: Alind Khare Date: Wed, 21 Feb 2024 14:10:28 -0500 Subject: [PATCH] Update Dynamic Discretization to enable Warm Starts (#87) * Fixed Capacity Constraint Map for dynamic Discretization * Implemented STRL choose exprs reuse from prev sovler invocation in dynamic discretization * Added finer Grained discretization around prev solutions * Added input of lower discretization around prev sol * Rectified Bug * Added limitation of total solver time to 1 min * changed time and added more info in errors * Rectified Dynamic Discretization Bugs * Implemented getResourceQuantity() for allocation exprs * Added conf file for dynamic discretization * Format changes to pass builds. * Format changes to main to fix builds. * Changes to pass flake8 compatibility. --------- Co-authored-by: Sukrit Kalra --- ...w_motiv_new_conf_sukrit_runs_strl_1_8.conf | 45 ++++++++ ...w_conf_sukrit_runs_strl_1_8_selective.conf | 47 ++++++++ main.py | 17 +++ profiles/workers/alibaba_cluster.yaml | 2 +- .../include/tetrisched/CapacityConstraint.hpp | 3 +- .../include/tetrisched/Expression.hpp | 11 ++ .../include/tetrisched/OptimizationPasses.hpp | 14 ++- .../include/tetrisched/Scheduler.hpp | 4 +- schedulers/tetrisched/python/Expressions.cpp | 25 +++-- schedulers/tetrisched/python/TetrischedPy.cpp | 48 ++++---- .../tetrisched/src/CapacityConstraint.cpp | 33 +++++- schedulers/tetrisched/src/Expression.cpp | 34 +++++- .../tetrisched/src/OptimizationPasses.cpp | 105 +++++++++++++----- schedulers/tetrisched/src/Scheduler.cpp | 11 +- schedulers/tetrisched_scheduler.py | 34 ++++++ 15 files changed, 359 insertions(+), 74 deletions(-) create mode 100644 configs/new_motiv_new_conf_sukrit_runs_strl_1_8.conf create mode 100644 configs/new_motiv_new_conf_sukrit_runs_strl_1_8_selective.conf diff --git a/configs/new_motiv_new_conf_sukrit_runs_strl_1_8.conf b/configs/new_motiv_new_conf_sukrit_runs_strl_1_8.conf new file mode 100644 index 00000000..ef9be034 --- /dev/null +++ b/configs/new_motiv_new_conf_sukrit_runs_strl_1_8.conf @@ -0,0 +1,45 @@ +# Output configs. +--log_dir=experiments/new_exps_new_conf/strl_1_8 +--log_file_name=alibaba_trace_replay_tetrisched.log +--csv_file_name=alibaba_trace_replay_tetrisched.csv +--log_level=debug + +# Workload configs. +--execution_mode=replay +--replay_trace=alibaba +--workload_profile_paths=traces/alibaba-cluster-trace-v2018/easy_dag_sukrit_10k.pkl,traces/alibaba-cluster-trace-v2018/medium_dag_sukrit_10k.pkl,traces/alibaba-cluster-trace-v2018/hard_dag_sukrit_10k.pkl +--workload_profile_path_labels=easy,medium,hard +--override_release_policies=poisson,poisson,poisson +--override_num_invocations=0,300,150 +--override_poisson_arrival_rates=0.005,0.0125,0.0125 +--randomize_start_time_max=50 +--min_deadline=5 +--max_deadline=500 +--min_deadline_variances=25,50,10 +--max_deadline_variances=50,100,25 + +# Worker configs. +--worker_profile_path=./profiles/workers/alibaba_cluster.yaml + +# Loader configs. +--alibaba_loader_task_cpu_divisor=10 +--alibaba_loader_min_critical_path_runtimes=200,500,600 +--alibaba_loader_max_critical_path_runtimes=500,1000,1000 + +# Scheduler configs. +#--scheduler=EDF +--scheduler=TetriSched +#--drop_skipped_tasks +--release_taskgraphs +--scheduler_runtime=0 +--enforce_deadlines +--scheduler_time_discretization=1 +--scheduler_enable_optimization_pass +--random_seed=420665456 +--retract_schedules +#--scheduler_log_to_file +--scheduler_dynamic_discretization +--scheduler_max_time_discretization=8 +--scheduler_max_occupancy_threshold=0.999 +--finer_discretization_at_prev_solution +--finer_discretization_window=4 \ No newline at end of file diff --git a/configs/new_motiv_new_conf_sukrit_runs_strl_1_8_selective.conf b/configs/new_motiv_new_conf_sukrit_runs_strl_1_8_selective.conf new file mode 100644 index 00000000..9697d01d --- /dev/null +++ b/configs/new_motiv_new_conf_sukrit_runs_strl_1_8_selective.conf @@ -0,0 +1,47 @@ +# Output configs. +--log_dir=experiments/new_exps_new_conf/strl_1_8_selective_new +--log_file_name=alibaba_trace_replay_tetrisched.log +--csv_file_name=alibaba_trace_replay_tetrisched.csv +--log_level=debug + +# Workload configs. +--execution_mode=replay +--replay_trace=alibaba +--workload_profile_paths=traces/alibaba-cluster-trace-v2018/easy_dag_sukrit_10k.pkl,traces/alibaba-cluster-trace-v2018/medium_dag_sukrit_10k.pkl,traces/alibaba-cluster-trace-v2018/hard_dag_sukrit_10k.pkl +--workload_profile_path_labels=easy,medium,hard +--override_release_policies=poisson,poisson,poisson +--override_num_invocations=0,300,150 +--override_poisson_arrival_rates=0.005,0.0125,0.0125 +--randomize_start_time_max=50 +--min_deadline=5 +--max_deadline=500 +--min_deadline_variances=25,50,10 +--max_deadline_variances=50,100,25 + +# Worker configs. +--worker_profile_path=./profiles/workers/alibaba_cluster.yaml + +# Loader configs. +--alibaba_loader_task_cpu_divisor=10 +--alibaba_loader_min_critical_path_runtimes=200,500,600 +--alibaba_loader_max_critical_path_runtimes=500,1000,1000 + +# Scheduler configs. +#--scheduler=EDF +--scheduler=TetriSched +#--drop_skipped_tasks +--release_taskgraphs +--scheduler_runtime=0 +--enforce_deadlines +--scheduler_time_discretization=1 +--scheduler_enable_optimization_pass +--random_seed=420665456 +--retract_schedules +#--scheduler_log_to_file +--scheduler_dynamic_discretization +--scheduler_max_time_discretization=8 +--scheduler_max_occupancy_threshold=0.999 +--finer_discretization_at_prev_solution +--finer_discretization_window=4 +--scheduler_selective_rescheduling +--scheduler_log_to_file diff --git a/main.py b/main.py index fe3e49b9..e38d71cf 100644 --- a/main.py +++ b/main.py @@ -334,6 +334,18 @@ "The discretization is dynamically decided based on the occupancy request for " "each time slice. (default: False)", ) +flags.DEFINE_bool( + "finer_discretization_at_prev_solution", + False, + "If `True`, the scheduler keeps discretization of 1 around previous solution. " + "The discretization is dynamically decided based on the occupancy request for " + "each time slice. (default: False)", +) +flags.DEFINE_integer( + "finer_discretization_window", + 5, + "The window around previous solution that keeps discretization of 1.", +) flags.DEFINE_integer( "scheduler_max_time_discretization", 5, @@ -782,6 +794,7 @@ def main(args): _flags=FLAGS, ) elif FLAGS.scheduler == "TetriSched": + finer_discretization = FLAGS.finer_discretization_at_prev_solution scheduler = TetriSchedScheduler( preemptive=FLAGS.preemption, runtime=EventTime(FLAGS.scheduler_runtime, EventTime.Unit.US), @@ -802,6 +815,10 @@ def main(args): ), dynamic_discretization=FLAGS.scheduler_dynamic_discretization, max_occupancy_threshold=FLAGS.scheduler_max_occupancy_threshold, + finer_discretization_at_prev_solution=finer_discretization, + finer_discretization_window=EventTime( + FLAGS.finer_discretization_window, EventTime.Unit.US + ), ) elif FLAGS.scheduler == "GraphenePrime": scheduler = TetriSchedScheduler( diff --git a/profiles/workers/alibaba_cluster.yaml b/profiles/workers/alibaba_cluster.yaml index 38dd94aa..b87b93fd 100644 --- a/profiles/workers/alibaba_cluster.yaml +++ b/profiles/workers/alibaba_cluster.yaml @@ -3,4 +3,4 @@ - name: Worker_1_1 resources: - name: Slot_1 - quantity: 60 + quantity: 70 diff --git a/schedulers/tetrisched/include/tetrisched/CapacityConstraint.hpp b/schedulers/tetrisched/include/tetrisched/CapacityConstraint.hpp index a0f11385..fb48da06 100644 --- a/schedulers/tetrisched/include/tetrisched/CapacityConstraint.hpp +++ b/schedulers/tetrisched/include/tetrisched/CapacityConstraint.hpp @@ -144,7 +144,8 @@ class CapacityConstraintMap { const Time startTime, const Time duration, const IndicatorT usageIndicator, const PartitionUsageT usageVariable, - std::optional