From 0314d9712d172f64076d35e38ed1b2de911d5f22 Mon Sep 17 00:00:00 2001 From: noopur Date: Wed, 13 Nov 2024 05:54:57 +0000 Subject: [PATCH 1/7] Logic to print score and training details Signed-off-by: noopur --- tests/end_to_end/models/participants.py | 6 +- tests/end_to_end/utils/federation_helper.py | 3 +- tests/end_to_end/utils/xml_helper.py | 77 ++++++++++++++++++--- 3 files changed, 73 insertions(+), 13 deletions(-) diff --git a/tests/end_to_end/models/participants.py b/tests/end_to_end/models/participants.py index 0469868ea8..8dfe95b4fc 100644 --- a/tests/end_to_end/models/participants.py +++ b/tests/end_to_end/models/participants.py @@ -292,8 +292,7 @@ def start(self): """ try: log.info(f"Starting {self.name}") - curr_time = datetime.now().strftime("%Y%m%d_%H%M%S") - filename = f"{self.name}_{curr_time}.log" + filename = f"{self.name}.log" res_file = os.path.join(os.getcwd(), self.workspace_path, filename) bg_file = open(res_file, "w", buffering=1) @@ -412,8 +411,7 @@ def start(self): """ try: log.info(f"Starting {self.collaborator_name}") - curr_time = datetime.now().strftime("%Y%m%d_%H%M%S") - filename = f"{self.collaborator_name}_{curr_time}.log" + filename = f"{self.collaborator_name}.log" res_file = os.path.join(os.getcwd(), self.workspace_path, filename) bg_file = open(res_file, "w", buffering=1) diff --git a/tests/end_to_end/utils/federation_helper.py b/tests/end_to_end/utils/federation_helper.py index a4addbc49f..3cb091b7ce 100644 --- a/tests/end_to_end/utils/federation_helper.py +++ b/tests/end_to_end/utils/federation_helper.py @@ -91,7 +91,8 @@ def verify_federation_run_completion(fed_obj, results): for i, participant in enumerate(fed_obj.collaborators + [fed_obj.aggregator]) ] - # Result will contain a list of tuple of replica and operator objects. + # Result will contain a list of boolean values for all the participants. + # True - successful completion, False - failed/incomplete results = [f.result() for f in futures] log.info(f"Results: {results}") diff --git a/tests/end_to_end/utils/xml_helper.py b/tests/end_to_end/utils/xml_helper.py index b3812acc3c..06715c59f1 100644 --- a/tests/end_to_end/utils/xml_helper.py +++ b/tests/end_to_end/utils/xml_helper.py @@ -13,6 +13,47 @@ testsuites = tree.getroot() +def get_aggregator_logs(model_name): + """ + Get the aggregator logs to fetch the metric values and scores + Args: + model_name: the model name for which the aggregator logs are to be fetched + Returns: + tuple: the locally tuned model validation, train, aggregated model validation and score + """ + lt_mv, train, agg_mv, score = None, None, None, "NA" + + workspace_name = "workspace_" + model_name + agg_log_file = os.path.join("results", workspace_name, "aggregator.log") + + if not os.path.exists(agg_log_file): + print(f"Aggregator log file {agg_log_file} not found.") + else: + with open(agg_log_file, 'r') as f: + for raw_line in f: + # Log file contains aggregator.py: which gets concatenated with the actual log line if not stripped + line = raw_line.strip() if "aggregator.py:" not in raw_line else raw_line.split("aggregator.py:")[0].strip() + # Fetch the metric origin and aggregator details + if "metric_origin" in line and "aggregator" in line: + if "locally_tuned_model_validation" in line: + reqd_line = line.strip() if "}" in line else line.strip() + next(f).strip() + lt_mv = eval(reqd_line.split("METRIC")[1].strip('"')) + if "train" in line: + reqd_line = line.strip() if "}" in line else line.strip() + next(f).strip() + train = eval(reqd_line.split("METRIC")[1].strip('"')) + if "aggregated_model_validation" in line: + reqd_line = line.strip() if "}" in line else line.strip() + next(f).strip() + agg_mv = eval(reqd_line.split("METRIC")[1].strip('"')) + + # Fetch the best model details + if "saved the best model" in line: + reqd_line = line.strip() + score_line = reqd_line.split("METRIC")[1].strip('"').strip() + score = score_line.split("score")[1].strip() + + return (lt_mv, train, agg_mv, score) + + def get_test_status(result): """ Get the test status/verdict @@ -64,12 +105,32 @@ def get_testcase_result(): return database_list -result = get_testcase_result() +if __name__ == "__main__": + """ + Main function to get the test case results and aggregator logs + And write the results to GitHub step summary + """ + score = "NA" + result = get_testcase_result() + + if not os.getenv("MODEL_NAME"): + print("MODEL_NAME is not set, cannot find out aggregator logs") + else: + (lt_mv, train, agg_mv, score) = get_aggregator_logs(os.getenv("MODEL_NAME")) + + # Write the results to GitHub step summary + with open(os.getenv('GITHUB_STEP_SUMMARY'), 'a') as fh: + # DO NOT change the print statements + print("| Name | Time (in seconds) | Result | Score (if applicable) |", file=fh) + print("| ------------- | ------------- | ------------- | ------------- |", file=fh) + for item in result: + print(f"| {item['name']} | {item['time']} | {item['result']} | {score} |", file=fh) + print("", file=fh) -# Write the results to GitHub step summary -with open(os.getenv('GITHUB_STEP_SUMMARY'), 'a') as fh: - # DO NOT change the print statements - print("| Name | Time (in seconds) | Result |", file=fh) - print("| ------------- | ------------- | ------------- |", file=fh) - for item in result: - print(f"| {item['name']} | {item['time']} | {item['result']} |", file=fh) + # DO NOT change the print statements + if lt_mv and train and agg_mv: + print("| Task | Metric Name | Metric Value | Round |", file=fh) + print("| ------------- | ------------- | ------------- | ------------- |", file=fh) + print(f"| {lt_mv['task_name']} | {lt_mv['metric_name']} | {lt_mv['metric_value']} | {lt_mv['round']} |", file=fh) + print(f"| {train['task_name']} | {train['metric_name']} | {train['metric_value']} | {train['round']} |", file=fh) + print(f"| {agg_mv['task_name']} | {agg_mv['metric_name']} | {agg_mv['metric_value']} | {agg_mv['round']} |", file=fh) From 0ce72f9ff60ef71a38747d0145e3a1c02232fb25 Mon Sep 17 00:00:00 2001 From: noopur Date: Wed, 13 Nov 2024 06:20:46 +0000 Subject: [PATCH 2/7] Add collab and rounds to summary Signed-off-by: noopur --- tests/end_to_end/utils/xml_helper.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/end_to_end/utils/xml_helper.py b/tests/end_to_end/utils/xml_helper.py index 06715c59f1..4b29f00acc 100644 --- a/tests/end_to_end/utils/xml_helper.py +++ b/tests/end_to_end/utils/xml_helper.py @@ -118,19 +118,21 @@ def get_testcase_result(): else: (lt_mv, train, agg_mv, score) = get_aggregator_logs(os.getenv("MODEL_NAME")) + num_cols = os.getenv("NUM_COLLABORATORS") + num_rounds = os.getenv("NUM_ROUNDS") # Write the results to GitHub step summary with open(os.getenv('GITHUB_STEP_SUMMARY'), 'a') as fh: # DO NOT change the print statements - print("| Name | Time (in seconds) | Result | Score (if applicable) |", file=fh) - print("| ------------- | ------------- | ------------- | ------------- |", file=fh) + print("| Name | Time (in seconds) | Result | Score (if applicable) | Collaborators | Rounds to train |", file=fh) + print("| ------------- | ------------- | ------------- | ------------- | ------------- | ------------- |", file=fh) for item in result: - print(f"| {item['name']} | {item['time']} | {item['result']} | {score} |", file=fh) + print(f"| {item['name']} | {item['time']} | {item['result']} | {score} | {num_cols} | {num_rounds} |", file=fh) print("", file=fh) # DO NOT change the print statements if lt_mv and train and agg_mv: print("| Task | Metric Name | Metric Value | Round |", file=fh) print("| ------------- | ------------- | ------------- | ------------- |", file=fh) - print(f"| {lt_mv['task_name']} | {lt_mv['metric_name']} | {lt_mv['metric_value']} | {lt_mv['round']} |", file=fh) - print(f"| {train['task_name']} | {train['metric_name']} | {train['metric_value']} | {train['round']} |", file=fh) - print(f"| {agg_mv['task_name']} | {agg_mv['metric_name']} | {agg_mv['metric_value']} | {agg_mv['round']} |", file=fh) + print(f"| {lt_mv['task_name']} | {lt_mv['metric_name']} | {lt_mv['metric_value']} | {int(lt_mv['round'] + 1)} |", file=fh) + print(f"| {train['task_name']} | {train['metric_name']} | {train['metric_value']} | {int(train['round'] + 1)} |", file=fh) + print(f"| {agg_mv['task_name']} | {agg_mv['metric_name']} | {agg_mv['metric_value']} | {int(agg_mv['round'] + 1)} |", file=fh) From 86888a8335ee183f20f4de074beb19b545f62a98 Mon Sep 17 00:00:00 2001 From: noopur Date: Wed, 13 Nov 2024 08:14:27 +0000 Subject: [PATCH 3/7] Use only agg accuracy Signed-off-by: noopur --- .github/workflows/task_runner_e2e.yml | 2 +- tests/end_to_end/utils/test_summary_helper.py | 124 ++++++++++++++++ tests/end_to_end/utils/xml_helper.py | 138 ------------------ 3 files changed, 125 insertions(+), 139 deletions(-) create mode 100644 tests/end_to_end/utils/test_summary_helper.py delete mode 100644 tests/end_to_end/utils/xml_helper.py diff --git a/.github/workflows/task_runner_e2e.yml b/.github/workflows/task_runner_e2e.yml index a1e52df6a3..e6b0b2edb3 100644 --- a/.github/workflows/task_runner_e2e.yml +++ b/.github/workflows/task_runner_e2e.yml @@ -78,7 +78,7 @@ jobs: if: steps.run_task_runner_tests.outcome == 'success' || steps.run_task_runner_tests.outcome == 'failure' run: | export PYTHONPATH="$PYTHONPATH:." - python tests/end_to_end/utils/xml_helper.py + python tests/end_to_end/utils/test_sumamry_helper.py echo "Test summary printed" - name: Tar files # Tar the test results only if the tests were run diff --git a/tests/end_to_end/utils/test_summary_helper.py b/tests/end_to_end/utils/test_summary_helper.py new file mode 100644 index 0000000000..e82ecfe2c2 --- /dev/null +++ b/tests/end_to_end/utils/test_summary_helper.py @@ -0,0 +1,124 @@ +# Copyright 2020-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import xml.etree.ElementTree as ET +from lxml import etree +import os + +# Initialize the XML parser +parser = etree.XMLParser(recover=True, encoding='utf-8') +tree = ET.parse("results/results.xml", parser=parser) + +# Get the root element +testsuites = tree.getroot() + + +def get_aggregated_accuracy(agg_log_file): + """ + Get the aggregated accuracy from aggregator logs + Args: + agg_log_file: the aggregator log file + Returns: + agg_accuracy: the aggregated accuracy + """ + if not os.path.exists(agg_log_file): + print(f"Aggregator log file {agg_log_file} not found. Cannot get aggregated accuracy") + return "Not Found" + + # Example line(s) containing spaces and special characters: + """ + METRIC {'metric_origin': 'aggregator', 'task_name': 'aggregated_model_validation', 'metric_name': 'accuracy', 'metric_value': aggregator.py:933 + 0.15911591053009033, 'round': 0} + """ + try: + with open(agg_log_file, 'r') as f: + for line in f: + if "metric_origin" in line and "aggregator" in line and "aggregated_model_validation" in line: + line = line.split("aggregator.py:")[0].strip() + # If the line does not contain closing bracket "}", then concatenate the next line + reqd_line = line if "}" in line else line + next(f).strip() + agg_accuracy = eval(reqd_line.split("METRIC")[1].strip('"'))["metric_value"] + return agg_accuracy + + except Exception as e: + # Do not fail the test if the accuracy cannot be fetched + print(f"Error while reading aggregator log file: {e}") + return "Not Found" + + +def get_test_status(result): + """ + Get the test status/verdict + Args + result: the result object to check` + Returns + status of the test status + """ + status = "FAILED" + if "failure" in result.tag or "error" in result.tag: + # If the result has a tag "failure", set status as "FAIL" + status = "FAILED" + elif "skipped" in result.tag: + # If the result has a tag "skipped", set status as "SKIPPED" + status = "SKIPPED" + else: + status = "PASSED" + return status + + +def get_testcase_result(): + """ + Get the test case results from the XML file + """ + database_list = [] + status = None + # Iterate over each testsuite in testsuites + for testsuite in testsuites: + # Populate testcase details in a dictionary + for testcase in testsuite: + database_dict = {} + if testcase.attrib.get("name"): + database_dict["name"] = testcase.attrib.get("name") + database_dict["time"] = testcase.attrib.get("time") + + # Successful test won't have any result/subtag + if len(testcase) == 0: + database_dict["result"] = "PASSED" + + # Iterate over each result in testsuite + for result in testcase: + status = get_test_status(result) + database_dict["result"] = status + + # Append the dictionary to database_list + database_list.append(database_dict) + status = None + + return database_list + + +if __name__ == "__main__": + """ + Main function to get the test case results and aggregator logs + And write the results to GitHub step summary + """ + result = get_testcase_result() + + num_cols = os.getenv("NUM_COLLABORATORS") + num_rounds = os.getenv("NUM_ROUNDS") + model_name = os.getenv("MODEL_NAME") + + if not model_name: + print("MODEL_NAME is not set, cannot find out aggregator logs") + else: + workspace_name = "workspace_" + model_name + agg_log_file = os.path.join("results", workspace_name, "aggregator.log") + agg_accuracy = get_aggregated_accuracy(agg_log_file) + + # Write the results to GitHub step summary + with open(os.getenv('GITHUB_STEP_SUMMARY'), 'a') as fh: + # DO NOT change the print statements + print("| Name | Time (in seconds) | Result | Collaborators | Rounds to train | Score (if applicable) |", file=fh) + print("| ------------- | ------------- | ------------- | ------------- | ------------- | ------------- |", file=fh) + for item in result: + print(f"| {item['name']} | {item['time']} | {item['result']} | {num_cols} | {num_rounds} | {agg_accuracy} |", file=fh) diff --git a/tests/end_to_end/utils/xml_helper.py b/tests/end_to_end/utils/xml_helper.py deleted file mode 100644 index 4b29f00acc..0000000000 --- a/tests/end_to_end/utils/xml_helper.py +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import xml.etree.ElementTree as ET -from lxml import etree -import os - -# Initialize the XML parser -parser = etree.XMLParser(recover=True, encoding='utf-8') -tree = ET.parse("results/results.xml", parser=parser) - -# Get the root element -testsuites = tree.getroot() - - -def get_aggregator_logs(model_name): - """ - Get the aggregator logs to fetch the metric values and scores - Args: - model_name: the model name for which the aggregator logs are to be fetched - Returns: - tuple: the locally tuned model validation, train, aggregated model validation and score - """ - lt_mv, train, agg_mv, score = None, None, None, "NA" - - workspace_name = "workspace_" + model_name - agg_log_file = os.path.join("results", workspace_name, "aggregator.log") - - if not os.path.exists(agg_log_file): - print(f"Aggregator log file {agg_log_file} not found.") - else: - with open(agg_log_file, 'r') as f: - for raw_line in f: - # Log file contains aggregator.py: which gets concatenated with the actual log line if not stripped - line = raw_line.strip() if "aggregator.py:" not in raw_line else raw_line.split("aggregator.py:")[0].strip() - # Fetch the metric origin and aggregator details - if "metric_origin" in line and "aggregator" in line: - if "locally_tuned_model_validation" in line: - reqd_line = line.strip() if "}" in line else line.strip() + next(f).strip() - lt_mv = eval(reqd_line.split("METRIC")[1].strip('"')) - if "train" in line: - reqd_line = line.strip() if "}" in line else line.strip() + next(f).strip() - train = eval(reqd_line.split("METRIC")[1].strip('"')) - if "aggregated_model_validation" in line: - reqd_line = line.strip() if "}" in line else line.strip() + next(f).strip() - agg_mv = eval(reqd_line.split("METRIC")[1].strip('"')) - - # Fetch the best model details - if "saved the best model" in line: - reqd_line = line.strip() - score_line = reqd_line.split("METRIC")[1].strip('"').strip() - score = score_line.split("score")[1].strip() - - return (lt_mv, train, agg_mv, score) - - -def get_test_status(result): - """ - Get the test status/verdict - Args - result: the result object to check` - Returns - status of the test status - """ - status = "FAILED" - if "failure" in result.tag or "error" in result.tag: - # If the result has a tag "failure", set status as "FAIL" - status = "FAILED" - elif "skipped" in result.tag: - # If the result has a tag "skipped", set status as "SKIPPED" - status = "SKIPPED" - else: - status = "PASSED" - return status - - -def get_testcase_result(): - """ - Get the test case results from the XML file - """ - database_list = [] - status = None - # Iterate over each testsuite in testsuites - for testsuite in testsuites: - # Populate testcase details in a dictionary - for testcase in testsuite: - database_dict = {} - if testcase.attrib.get("name"): - database_dict["name"] = testcase.attrib.get("name") - database_dict["time"] = testcase.attrib.get("time") - - # Successful test won't have any result/subtag - if len(testcase) == 0: - database_dict["result"] = "PASSED" - - # Iterate over each result in testsuite - for result in testcase: - status = get_test_status(result) - database_dict["result"] = status - - # Append the dictionary to database_list - database_list.append(database_dict) - status = None - - return database_list - - -if __name__ == "__main__": - """ - Main function to get the test case results and aggregator logs - And write the results to GitHub step summary - """ - score = "NA" - result = get_testcase_result() - - if not os.getenv("MODEL_NAME"): - print("MODEL_NAME is not set, cannot find out aggregator logs") - else: - (lt_mv, train, agg_mv, score) = get_aggregator_logs(os.getenv("MODEL_NAME")) - - num_cols = os.getenv("NUM_COLLABORATORS") - num_rounds = os.getenv("NUM_ROUNDS") - # Write the results to GitHub step summary - with open(os.getenv('GITHUB_STEP_SUMMARY'), 'a') as fh: - # DO NOT change the print statements - print("| Name | Time (in seconds) | Result | Score (if applicable) | Collaborators | Rounds to train |", file=fh) - print("| ------------- | ------------- | ------------- | ------------- | ------------- | ------------- |", file=fh) - for item in result: - print(f"| {item['name']} | {item['time']} | {item['result']} | {score} | {num_cols} | {num_rounds} |", file=fh) - print("", file=fh) - - # DO NOT change the print statements - if lt_mv and train and agg_mv: - print("| Task | Metric Name | Metric Value | Round |", file=fh) - print("| ------------- | ------------- | ------------- | ------------- |", file=fh) - print(f"| {lt_mv['task_name']} | {lt_mv['metric_name']} | {lt_mv['metric_value']} | {int(lt_mv['round'] + 1)} |", file=fh) - print(f"| {train['task_name']} | {train['metric_name']} | {train['metric_value']} | {int(train['round'] + 1)} |", file=fh) - print(f"| {agg_mv['task_name']} | {agg_mv['metric_name']} | {agg_mv['metric_value']} | {int(agg_mv['round'] + 1)} |", file=fh) From 2646607732c9ea333d12242b094d4aac2d58bcbc Mon Sep 17 00:00:00 2001 From: noopur Date: Wed, 13 Nov 2024 08:17:51 +0000 Subject: [PATCH 4/7] Typo fix Signed-off-by: noopur --- .github/workflows/task_runner_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/task_runner_e2e.yml b/.github/workflows/task_runner_e2e.yml index e6b0b2edb3..f1be185a89 100644 --- a/.github/workflows/task_runner_e2e.yml +++ b/.github/workflows/task_runner_e2e.yml @@ -78,7 +78,7 @@ jobs: if: steps.run_task_runner_tests.outcome == 'success' || steps.run_task_runner_tests.outcome == 'failure' run: | export PYTHONPATH="$PYTHONPATH:." - python tests/end_to_end/utils/test_sumamry_helper.py + python tests/end_to_end/utils/test_summary_helper.py echo "Test summary printed" - name: Tar files # Tar the test results only if the tests were run From a05cbf13cae2a1c3aef40bd14b830636782a1019 Mon Sep 17 00:00:00 2001 From: noopur Date: Wed, 13 Nov 2024 08:35:23 +0000 Subject: [PATCH 5/7] Renamed summary file Signed-off-by: noopur --- .github/workflows/task_runner_e2e.yml | 2 +- tests/end_to_end/models/participants.py | 1 - .../utils/{test_summary_helper.py => summary_helper.py} | 0 3 files changed, 1 insertion(+), 2 deletions(-) rename tests/end_to_end/utils/{test_summary_helper.py => summary_helper.py} (100%) diff --git a/.github/workflows/task_runner_e2e.yml b/.github/workflows/task_runner_e2e.yml index f1be185a89..f81869939b 100644 --- a/.github/workflows/task_runner_e2e.yml +++ b/.github/workflows/task_runner_e2e.yml @@ -78,7 +78,7 @@ jobs: if: steps.run_task_runner_tests.outcome == 'success' || steps.run_task_runner_tests.outcome == 'failure' run: | export PYTHONPATH="$PYTHONPATH:." - python tests/end_to_end/utils/test_summary_helper.py + python tests/end_to_end/utils/summary_helper.py echo "Test summary printed" - name: Tar files # Tar the test results only if the tests were run diff --git a/tests/end_to_end/models/participants.py b/tests/end_to_end/models/participants.py index 8dfe95b4fc..5dc582a06c 100644 --- a/tests/end_to_end/models/participants.py +++ b/tests/end_to_end/models/participants.py @@ -2,7 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 import os -from datetime import datetime import yaml import logging diff --git a/tests/end_to_end/utils/test_summary_helper.py b/tests/end_to_end/utils/summary_helper.py similarity index 100% rename from tests/end_to_end/utils/test_summary_helper.py rename to tests/end_to_end/utils/summary_helper.py From 60c06226533246519659f64ebb1eeae86090dd2d Mon Sep 17 00:00:00 2001 From: noopur Date: Wed, 13 Nov 2024 08:49:57 +0000 Subject: [PATCH 6/7] Job name as tr to help in ZephyrScale later Signed-off-by: noopur --- .github/workflows/task_runner_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/task_runner_e2e.yml b/.github/workflows/task_runner_e2e.yml index f81869939b..84753daedf 100644 --- a/.github/workflows/task_runner_e2e.yml +++ b/.github/workflows/task_runner_e2e.yml @@ -30,7 +30,7 @@ env: jobs: test_run: - name: test + name: tr # do not change this name, it is used in the ZephyrScale fetch logic. runs-on: ubuntu-22.04 timeout-minutes: 120 # 2 hours strategy: From b9466d4f541288d25a775ddff25c6031fd56ce8e Mon Sep 17 00:00:00 2001 From: noopur Date: Wed, 13 Nov 2024 09:28:46 +0000 Subject: [PATCH 7/7] Remove comment related to ZS Signed-off-by: noopur --- .github/workflows/task_runner_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/task_runner_e2e.yml b/.github/workflows/task_runner_e2e.yml index 84753daedf..7f7f904aa3 100644 --- a/.github/workflows/task_runner_e2e.yml +++ b/.github/workflows/task_runner_e2e.yml @@ -30,7 +30,7 @@ env: jobs: test_run: - name: tr # do not change this name, it is used in the ZephyrScale fetch logic. + name: tr runs-on: ubuntu-22.04 timeout-minutes: 120 # 2 hours strategy: