Replace xml.etree.ElementTree.parse with its defusedxml (#1230)

* Replace xml.etree.ElementTree.parse with its defusedxml Signed-off-by: Chaurasiya, Payal <[email protected]> * convert to json Signed-off-by: Chaurasiya, Payal <[email protected]> * Fix memory logs and create pdf Signed-off-by: Chaurasiya, Payal <[email protected]> --------- Signed-off-by: Chaurasiya, Payal <[email protected]>
securefederatedai · Dec 27, 2024 · 18cda3e · 18cda3e
1 parent c280f10
commit 18cda3e
Show file tree

Hide file tree

Showing 4 changed files with 131 additions and 4 deletions.
diff --git a/test-requirements.txt b/test-requirements.txt
@@ -4,3 +4,6 @@ paramiko
 pytest==8.3.4
 pytest-asyncio==0.25.0
 pytest-mock==3.14.0
+defusedxml==0.7.1
+matplotlib==3.10.0
+fpdf==1.7.2
diff --git a/tests/end_to_end/test_suites/memory_logs_tests.py b/tests/end_to_end/test_suites/memory_logs_tests.py
@@ -9,6 +9,7 @@
 from tests.end_to_end.utils.common_fixtures import fx_federation_tr, fx_federation_tr_dws
 import tests.end_to_end.utils.constants as constants
 from tests.end_to_end.utils import federation_helper as fed_helper, ssh_helper as ssh
+from tests.end_to_end.utils.generate_report import generate_memory_report
 
 log = logging.getLogger(__name__)
 
@@ -78,7 +79,9 @@ def _log_memory_usage(request, fed_obj):
     ), "Aggregator memory usage file is not available"
 
     # Log the aggregator memory usage details
-    memory_usage_dict = json.load(open(aggregator_memory_usage_file))
+    memory_usage_dict = _convert_to_json(aggregator_memory_usage_file)
+    aggregator_path = os.path.join(fed_obj.workspace_path, "aggregator")
+    generate_memory_report(memory_usage_dict, aggregator_path)
 
     # check memory usage entries for each round
     assert (
@@ -98,10 +101,30 @@ def _log_memory_usage(request, fed_obj):
             collaborator_memory_usage_file
         ), f"Memory usage file for collaborator {collaborator.collaborator_name} is not available"
 
-        memory_usage_dict = json.load(open(collaborator_memory_usage_file))
+        memory_usage_dict = _convert_to_json(collaborator_memory_usage_file)
+        collaborator_path = os.path.join(fed_obj.workspace_path, collaborator.name)
+        generate_memory_report(memory_usage_dict, collaborator_path)
 
         assert (
             len(memory_usage_dict) == request.config.num_rounds
         ), f"Memory usage details are not available for all rounds for collaborator {collaborator.collaborator_name}"
 
     log.info("Memory usage details are available for all participants")
+
+
+def _convert_to_json(file):
+    """
+    Reads a file containing JSON objects, one per line, and converts them into a list of parsed JSON objects.
+
+    Args:
+        file (str): The path to the file containing JSON objects.
+
+    Returns:
+        list: A list of parsed JSON objects.
+    """
+    with open(file, 'r') as infile:
+        json_objects = infile.readlines()
+
+    # Parse each JSON object
+    parsed_json_objects = [json.loads(obj) for obj in json_objects]
+    return parsed_json_objects
diff --git a/tests/end_to_end/utils/generate_report.py b/tests/end_to_end/utils/generate_report.py
@@ -0,0 +1,101 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.stats import linregress
+from fpdf import FPDF
+
+class PDF(FPDF):
+    def header(self):
+        self.set_font('Arial', 'B', 14)
+
+    def chapter_title(self, title):
+        self.add_page()
+        self.set_font('Arial', 'B', 14)  # Set font to bold for title
+        self.cell(0, 10, title, 0, 1, 'L')
+
+    def chapter_body(self, body):
+        self.set_font('Arial', '', 12)
+        self.multi_cell(0, 10, body)
+
+def generate_memory_report(memory_usage_dict, workspace_path):
+    """
+    Generates a memory usage report from a CSV file.
+
+    Parameters:
+    file_path (str): The path to the CSV file containing memory usage data.
+
+    Returns:
+    None
+    """
+    # Load data
+    data = pd.DataFrame(memory_usage_dict)
+
+    # Plotting the chart
+    plt.figure(figsize=(10, 5))
+    plt.plot(data['round_number'], data['virtual_memory/used'], marker='o')
+    plt.title('Memory Usage per Round')
+    plt.xlabel('round_number')
+    plt.ylabel('Virtual Memory Used (MB)')
+    plt.grid(True)
+    output_path = f"{workspace_path}/mem_usage_plot.png"
+    plt.savefig(output_path)
+    plt.close()
+
+    # Calculate statistics
+    min_mem = round(data['virtual_memory/used'].min(), 2)
+    max_mem = round(data['virtual_memory/used'].max(), 2)
+    mean_mem = round(data['virtual_memory/used'].mean(), 2)
+    variance_mem = round(data['virtual_memory/used'].var(), 2)
+    std_dev_mem = round(data['virtual_memory/used'].std(), 2)
+    slope, _, _, _, _ = linregress(data.index, data['virtual_memory/used'])
+    slope = round(slope, 2)
+    stats_path = f"{workspace_path}/mem_stats.txt"
+    with open(stats_path, 'w') as file:
+        file.write(f"Minimum Memory Used: {min_mem} MB\n")
+        file.write(f"Maximum Memory Used: {max_mem} MB\n")
+        file.write(f"Mean Memory Used: {mean_mem} MB\n")
+        file.write(f"Variance: {variance_mem}\n")
+        file.write(f"Standard Deviation: {std_dev_mem}\n")
+        file.write(f"Slope: {slope}\n")
+
+    # Generate PDF report
+    pdf = PDF()
+    add_introduction(pdf)
+    add_chart_analysis(pdf, output_path, data)
+    add_statistical_overview(pdf, stats_path)
+    add_conclusion(pdf, slope)
+    pdf_output_path = f"{workspace_path}/MemAnalysis.pdf"
+    pdf.output(pdf_output_path)
+
+    print("Memory report generation completed. Report saved to:", pdf_output_path)
+
+def add_introduction(pdf):
+    pdf.chapter_title('Introduction')
+    intro_text = ("The purpose of this memory analysis is to identify memory usage trends and potential bottlenecks. "
+                  "This analysis focuses on the relationship between round information and memory usage.")
+    pdf.chapter_body(intro_text)
+
+def add_chart_analysis(pdf, output_path, data):
+    pdf.chapter_title('Chart Analysis')
+    pdf.image(output_path, w=180)
+    diffs = data['virtual_memory/used'].diff().round(2)
+    significant_changes = diffs[diffs.abs() > 500]
+    for index, value in significant_changes.items():
+        pdf.chapter_body(f"Significant memory change: {value} MB at Round {data['round_number'][index]}")
+
+def add_statistical_overview(pdf, stats_path):
+    pdf.chapter_title('Statistical Overview')
+    with open(stats_path, 'r') as file:
+        stats = file.read()
+    pdf.chapter_body(stats)
+
+def add_conclusion(pdf, slope):
+    pdf.chapter_title('Conclusion')
+    if slope > 0:
+        conclusion_text = "The upward slope in the graph indicates a trend of increasing memory usage over rounds."
+    else:
+        conclusion_text = "There is no continuous memory growth."
+    pdf.chapter_body(conclusion_text)
+
+# Uncomment the following line to run the function directly when this script is executed
+# generate_memory_report('/home/sys_tpe_st_svc_acct/memory_leak/mem_info_aggr.csv')
diff --git a/tests/end_to_end/utils/summary_helper.py b/tests/end_to_end/utils/summary_helper.py
@@ -1,7 +1,7 @@
 # Copyright 2020-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-import xml.etree.ElementTree as ET
+from defusedxml.ElementTree import parse as defused_parse
 from lxml import etree
 import os
 from pathlib import Path
@@ -17,7 +17,7 @@
     print(f"Results XML file not found at {result_xml}. Exiting...")
     exit(1)
 
-tree = ET.parse(result_xml, parser=parser)
+tree = defused_parse(result_xml, parser=parser)
 
 # Get the root element
 testsuites = tree.getroot()