Skip to content

Commit

Permalink
Replace xml.etree.ElementTree.parse with its defusedxml (#1230)
Browse files Browse the repository at this point in the history
* Replace xml.etree.ElementTree.parse with its defusedxml

Signed-off-by: Chaurasiya, Payal <[email protected]>

* convert to json

Signed-off-by: Chaurasiya, Payal <[email protected]>

* Fix memory logs and create pdf

Signed-off-by: Chaurasiya, Payal <[email protected]>

---------

Signed-off-by: Chaurasiya, Payal <[email protected]>
  • Loading branch information
payalcha authored Dec 27, 2024
1 parent c280f10 commit 18cda3e
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 4 deletions.
3 changes: 3 additions & 0 deletions test-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@ paramiko
pytest==8.3.4
pytest-asyncio==0.25.0
pytest-mock==3.14.0
defusedxml==0.7.1
matplotlib==3.10.0
fpdf==1.7.2
27 changes: 25 additions & 2 deletions tests/end_to_end/test_suites/memory_logs_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from tests.end_to_end.utils.common_fixtures import fx_federation_tr, fx_federation_tr_dws
import tests.end_to_end.utils.constants as constants
from tests.end_to_end.utils import federation_helper as fed_helper, ssh_helper as ssh
from tests.end_to_end.utils.generate_report import generate_memory_report

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -78,7 +79,9 @@ def _log_memory_usage(request, fed_obj):
), "Aggregator memory usage file is not available"

# Log the aggregator memory usage details
memory_usage_dict = json.load(open(aggregator_memory_usage_file))
memory_usage_dict = _convert_to_json(aggregator_memory_usage_file)
aggregator_path = os.path.join(fed_obj.workspace_path, "aggregator")
generate_memory_report(memory_usage_dict, aggregator_path)

# check memory usage entries for each round
assert (
Expand All @@ -98,10 +101,30 @@ def _log_memory_usage(request, fed_obj):
collaborator_memory_usage_file
), f"Memory usage file for collaborator {collaborator.collaborator_name} is not available"

memory_usage_dict = json.load(open(collaborator_memory_usage_file))
memory_usage_dict = _convert_to_json(collaborator_memory_usage_file)
collaborator_path = os.path.join(fed_obj.workspace_path, collaborator.name)
generate_memory_report(memory_usage_dict, collaborator_path)

assert (
len(memory_usage_dict) == request.config.num_rounds
), f"Memory usage details are not available for all rounds for collaborator {collaborator.collaborator_name}"

log.info("Memory usage details are available for all participants")


def _convert_to_json(file):
"""
Reads a file containing JSON objects, one per line, and converts them into a list of parsed JSON objects.
Args:
file (str): The path to the file containing JSON objects.
Returns:
list: A list of parsed JSON objects.
"""
with open(file, 'r') as infile:
json_objects = infile.readlines()

# Parse each JSON object
parsed_json_objects = [json.loads(obj) for obj in json_objects]
return parsed_json_objects
101 changes: 101 additions & 0 deletions tests/end_to_end/utils/generate_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import linregress
from fpdf import FPDF

class PDF(FPDF):
def header(self):
self.set_font('Arial', 'B', 14)

def chapter_title(self, title):
self.add_page()
self.set_font('Arial', 'B', 14) # Set font to bold for title
self.cell(0, 10, title, 0, 1, 'L')

def chapter_body(self, body):
self.set_font('Arial', '', 12)
self.multi_cell(0, 10, body)

def generate_memory_report(memory_usage_dict, workspace_path):
"""
Generates a memory usage report from a CSV file.
Parameters:
file_path (str): The path to the CSV file containing memory usage data.
Returns:
None
"""
# Load data
data = pd.DataFrame(memory_usage_dict)

# Plotting the chart
plt.figure(figsize=(10, 5))
plt.plot(data['round_number'], data['virtual_memory/used'], marker='o')
plt.title('Memory Usage per Round')
plt.xlabel('round_number')
plt.ylabel('Virtual Memory Used (MB)')
plt.grid(True)
output_path = f"{workspace_path}/mem_usage_plot.png"
plt.savefig(output_path)
plt.close()

# Calculate statistics
min_mem = round(data['virtual_memory/used'].min(), 2)
max_mem = round(data['virtual_memory/used'].max(), 2)
mean_mem = round(data['virtual_memory/used'].mean(), 2)
variance_mem = round(data['virtual_memory/used'].var(), 2)
std_dev_mem = round(data['virtual_memory/used'].std(), 2)
slope, _, _, _, _ = linregress(data.index, data['virtual_memory/used'])
slope = round(slope, 2)
stats_path = f"{workspace_path}/mem_stats.txt"
with open(stats_path, 'w') as file:
file.write(f"Minimum Memory Used: {min_mem} MB\n")
file.write(f"Maximum Memory Used: {max_mem} MB\n")
file.write(f"Mean Memory Used: {mean_mem} MB\n")
file.write(f"Variance: {variance_mem}\n")
file.write(f"Standard Deviation: {std_dev_mem}\n")
file.write(f"Slope: {slope}\n")

# Generate PDF report
pdf = PDF()
add_introduction(pdf)
add_chart_analysis(pdf, output_path, data)
add_statistical_overview(pdf, stats_path)
add_conclusion(pdf, slope)
pdf_output_path = f"{workspace_path}/MemAnalysis.pdf"
pdf.output(pdf_output_path)

print("Memory report generation completed. Report saved to:", pdf_output_path)

def add_introduction(pdf):
pdf.chapter_title('Introduction')
intro_text = ("The purpose of this memory analysis is to identify memory usage trends and potential bottlenecks. "
"This analysis focuses on the relationship between round information and memory usage.")
pdf.chapter_body(intro_text)

def add_chart_analysis(pdf, output_path, data):
pdf.chapter_title('Chart Analysis')
pdf.image(output_path, w=180)
diffs = data['virtual_memory/used'].diff().round(2)
significant_changes = diffs[diffs.abs() > 500]
for index, value in significant_changes.items():
pdf.chapter_body(f"Significant memory change: {value} MB at Round {data['round_number'][index]}")

def add_statistical_overview(pdf, stats_path):
pdf.chapter_title('Statistical Overview')
with open(stats_path, 'r') as file:
stats = file.read()
pdf.chapter_body(stats)

def add_conclusion(pdf, slope):
pdf.chapter_title('Conclusion')
if slope > 0:
conclusion_text = "The upward slope in the graph indicates a trend of increasing memory usage over rounds."
else:
conclusion_text = "There is no continuous memory growth."
pdf.chapter_body(conclusion_text)

# Uncomment the following line to run the function directly when this script is executed
# generate_memory_report('/home/sys_tpe_st_svc_acct/memory_leak/mem_info_aggr.csv')
4 changes: 2 additions & 2 deletions tests/end_to_end/utils/summary_helper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright 2020-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import xml.etree.ElementTree as ET
from defusedxml.ElementTree import parse as defused_parse
from lxml import etree
import os
from pathlib import Path
Expand All @@ -17,7 +17,7 @@
print(f"Results XML file not found at {result_xml}. Exiting...")
exit(1)

tree = ET.parse(result_xml, parser=parser)
tree = defused_parse(result_xml, parser=parser)

# Get the root element
testsuites = tree.getroot()
Expand Down

0 comments on commit 18cda3e

Please sign in to comment.