Skip to content

Commit

Permalink
Merge pull request #545 from ddps-lab/azure-collector-fix
Browse files Browse the repository at this point in the history
구독 수 추가때문에 기존 available_locations False 의 결과 수집을 추가합니다.
  • Loading branch information
krtaiyang authored Mar 2, 2025
2 parents 7236bf7 + 7f770b1 commit 926f1df
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,26 +27,23 @@ def lambda_handler(event, context):
Logger.info(f"Lambda triggered: action: {action}, event_time: {datetime.strftime(event_time_utc_datetime, '%Y-%m-%d %H:%M:%S')}, desired_count: {desired_count}")

if action == FIRST_TIME_ACTION:
# sps_res_availability_zones_true_df, sps_res_availability_zones_false_df = load_sps.collect_spot_placement_score_first_time(desired_count=desired_count)
sps_res_availability_zones_true_df = load_sps.collect_spot_placement_score_first_time(desired_count=desired_count)
sps_res_availability_zones_true_df, sps_res_availability_zones_false_df = load_sps.collect_spot_placement_score_first_time(desired_count=desired_count)

elif action == EVERY_10MIN_ACTION:
# UTC 15:00 (KST 00:00)인 경우 실행 건너뛰기
if event_time_utc_datetime.strftime("%H:%M") == UTC_1500_TIME:
Logger.info("Skipping scheduled time (UTC 15:00, KST 00:00)")
return handle_response(200, "Executed successfully. Scheduled time skipped.", action, event_time_utc_datetime)
# sps_res_availability_zones_true_df, sps_res_availability_zones_false_df = load_sps.collect_spot_placement_score(desired_count=desired_count)
sps_res_availability_zones_true_df = load_sps.collect_spot_placement_score(desired_count=desired_count)
sps_res_availability_zones_true_df, sps_res_availability_zones_false_df = load_sps.collect_spot_placement_score(desired_count=desired_count)

else:
raise ValueError(f"Invalid lambda action.")


if sps_res_availability_zones_true_df is None: raise ValueError("sps_res_true_df is None")
# if sps_res_availability_zones_false_df is None: raise ValueError("sps_res_false_df is None")
if sps_res_availability_zones_false_df is None: raise ValueError("sps_res_false_df is None")

# if not handle_res_df(sps_res_availability_zones_true_df, sps_res_availability_zones_false_df, event_time_utc_datetime):
if not handle_res_df(sps_res_availability_zones_true_df, event_time_utc_datetime):
if not handle_res_df(sps_res_availability_zones_true_df, sps_res_availability_zones_false_df, event_time_utc_datetime):
raise RuntimeError("Failed to handle_res_df")

return handle_response(200, "Executed Successfully!", action, event_time_utc_datetime)
Expand All @@ -57,12 +54,11 @@ def lambda_handler(event, context):
send_slack_message(f"AZURE SPS MODULE EXCEPTION!\n{error_msg}\Log_stream_id: {log_stream_id}")
return handle_response(500, "Execute Failed!", action, event_time_utc_datetime, str(e))

# def handle_res_df(sps_res_true_df, sps_res_false_df, time_datetime):
def handle_res_df(sps_res_true_df, time_datetime):
def handle_res_df(sps_res_true_df, sps_res_false_df, time_datetime):
try:
time_str = time_datetime.strftime("%Y-%m-%d %H:%M:%S")
sps_res_true_df['time'] = time_str
# sps_res_false_df['time'] = time_str
sps_res_false_df['time'] = time_str

sps_res_true_df['AvailabilityZone'] = sps_res_true_df['AvailabilityZone'].where(pd.notna(sps_res_true_df['AvailabilityZone']), None)

Expand All @@ -71,10 +67,9 @@ def handle_res_df(sps_res_true_df, time_datetime):
raise ValueError("price_if_df is None")

success_availability_zone_true = process_zone_data(price_saving_if_df, sps_res_true_df, time_datetime, True)
# success_availability_zone_false = process_zone_data(price_saving_if_df, sps_res_false_df, time_datetime, False)
success_availability_zone_false = process_zone_data(price_saving_if_df, sps_res_false_df, time_datetime, False)

# if success_availability_zone_true and success_availability_zone_false:
if success_availability_zone_true:
if success_availability_zone_true and success_availability_zone_false:
Logger.info("Successfully merged the price/if/sps df, process_zone_data!")
return True
else:
Expand Down
40 changes: 19 additions & 21 deletions collector/spot-dataset/azure/lambda/current_collector/load_sps.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,12 @@ def collect_spot_placement_score_first_time(desired_count):
df_greedy_clustering_filtered = sps_prepare_parameters.greedy_clustering_to_create_optimized_request_list(
regions_and_instance_types_filtered_df)

# sps_res_availability_zones_false_df = execute_spot_placement_score_task_by_parameter_pool_df(df_greedy_clustering_filtered, False, desired_count)
# print(f'Time_out_retry_count: {SS_Resources.time_out_retry_count}')
# print(f'Bad_request_retry_count: {SS_Resources.bad_request_retry_count}')
# print(f'Too_many_requests_count: {SS_Resources.too_many_requests_count}')
# print(f'Found_invalid_region_retry_count: {SS_Resources.found_invalid_region_retry_count}')
# print(f'Found_invalid_instance_type_retry_count: {SS_Resources.found_invalid_instance_type_retry_count}')
sps_res_availability_zones_false_df = execute_spot_placement_score_task_by_parameter_pool_df(df_greedy_clustering_filtered, False, desired_count)
print(f'Time_out_retry_count: {SS_Resources.time_out_retry_count}')
print(f'Bad_request_retry_count: {SS_Resources.bad_request_retry_count}')
print(f'Too_many_requests_count: {SS_Resources.too_many_requests_count}')
print(f'Found_invalid_region_retry_count: {SS_Resources.found_invalid_region_retry_count}')
print(f'Found_invalid_instance_type_retry_count: {SS_Resources.found_invalid_instance_type_retry_count}')

S3.upload_file(df_greedy_clustering_filtered, f"{AZURE_CONST.DF_TO_USE_TODAY_PKL_FILENAME}", "pkl")

Expand All @@ -112,8 +112,7 @@ def collect_spot_placement_score_first_time(desired_count):
minutes, seconds = divmod(int(elapsed), 60)
print(f"Prepare the request pool. time: {minutes}min {seconds}sec")

# return sps_res_availability_zones_true_df, sps_res_availability_zones_false_df
return sps_res_availability_zones_true_df
return sps_res_availability_zones_true_df, sps_res_availability_zones_false_df


@log_execution_time
Expand All @@ -137,27 +136,26 @@ def collect_spot_placement_score(desired_count):

get_sps_count_true = SS_Resources.succeed_to_get_sps_count
get_next_available_location_count_true = SS_Resources.succeed_to_get_next_available_location_count
# SS_Resources.succeed_to_get_sps_count = SS_Resources.succeed_to_get_next_available_location_count = 0
# sps_res_availability_zones_false_df = execute_spot_placement_score_task_by_parameter_pool_df(df_greedy_clustering_filtered, False, desired_count)
# print(f'Time_out_retry_count: {SS_Resources.time_out_retry_count}')
# print(f'Bad_request_retry_count: {SS_Resources.bad_request_retry_count}')
# print(f'Too_many_requests_count: {SS_Resources.too_many_requests_count}')
# print(f'Found_invalid_region_retry_count: {SS_Resources.found_invalid_region_retry_count}')
# print(f'Found_invalid_instance_type_retry_count: {SS_Resources.found_invalid_instance_type_retry_count}')
SS_Resources.succeed_to_get_sps_count = SS_Resources.succeed_to_get_next_available_location_count = 0
sps_res_availability_zones_false_df = execute_spot_placement_score_task_by_parameter_pool_df(df_greedy_clustering_filtered, False, desired_count)
print(f'Time_out_retry_count: {SS_Resources.time_out_retry_count}')
print(f'Bad_request_retry_count: {SS_Resources.bad_request_retry_count}')
print(f'Too_many_requests_count: {SS_Resources.too_many_requests_count}')
print(f'Found_invalid_region_retry_count: {SS_Resources.found_invalid_region_retry_count}')
print(f'Found_invalid_instance_type_retry_count: {SS_Resources.found_invalid_instance_type_retry_count}')

print(f'\n========================================')
print(f'df_greedy_clustering_filtered lens: {len(df_greedy_clustering_filtered)}')
print(f'Successfully_to_get_sps_count_true: {get_sps_count_true}')
# print(f'Successfully_to_get_sps_count_false: {SS_Resources.succeed_to_get_sps_count}')
# print(f'Successfully_to_get_sps_count_all: {SS_Resources.succeed_to_get_sps_count + get_sps_count_true}')
print(f'Successfully_to_get_sps_count_false: {SS_Resources.succeed_to_get_sps_count}')
print(f'Successfully_to_get_sps_count_all: {SS_Resources.succeed_to_get_sps_count + get_sps_count_true}')

print(f'Successfully_get_next_available_location_count_true: {get_next_available_location_count_true}')
# print(f'Successfully_get_next_available_location_count_false: {SS_Resources.succeed_to_get_next_available_location_count}')
# print(f'Successfully_get_next_available_location_count_all: {SS_Resources.succeed_to_get_next_available_location_count + get_next_available_location_count_true}')
print(f'Successfully_get_next_available_location_count_false: {SS_Resources.succeed_to_get_next_available_location_count}')
print(f'Successfully_get_next_available_location_count_all: {SS_Resources.succeed_to_get_next_available_location_count + get_next_available_location_count_true}')
print(f'========================================')

# return sps_res_availability_zones_true_df, sps_res_availability_zones_false_df
return sps_res_availability_zones_true_df
return sps_res_availability_zones_true_df, sps_res_availability_zones_false_df


def execute_spot_placement_score_task_by_parameter_pool_df(api_calls_df, availability_zones, desired_count):
Expand Down

0 comments on commit 926f1df

Please sign in to comment.