Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update verify_performance.py | Refactor TEST01 verify performance, support for pointpainting #2070

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 50 additions & 121 deletions compliance/nvidia/TEST01/verify_performance.py
Original file line number Diff line number Diff line change
@@ -1,153 +1,82 @@
#! /usr/bin/env python3
#!/usr/bin/env python3
# Copyright 2018-2022 The MLPerf Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
import json
# Full license text: http://www.apache.org/licenses/LICENSE-2.0

import argparse
import os
import sys
import re

sys.path.append(os.getcwd())


def main():
# Parse arguments to identify the path to the accuracy logs from
# the accuracy and performance runs
parser = argparse.ArgumentParser()
parser.add_argument(
"--reference_summary",
"-r",
help="Specifies the path to the summary log for the performance run.",
default="",
)
parser.add_argument(
"--test_summary",
"-t",
help="Specifies the path to the summary log for this test.",
default="",
)
args = parser.parse_args()

print("Verifying performance.")
ref_file = open(args.reference_summary, "r")
test_file = open(args.test_summary, "r")
ref_score = 0
test_score = 0
ref_mode = ""
test_mode = ""

for line in ref_file:
if re.match("Scenario", line):
ref_mode = line.split(": ", 1)[1].strip()
continue

if ref_mode == "SingleStream":
if re.match(".*Early stopping 90th percentile estimate", line):
ref_score = line.split(": ", 1)[1].strip()
# Mapping scenarios to their respective metrics
METRIC_MAP = {
"SingleStream": ".*Early stopping (90th|99.9th) percentile estimate",
"MultiStream": ".*Early stopping 99th percentile estimate",
"Server": "Completed samples per second",
"Offline": "Samples per second"
}

def parse_summary(file_path):
"""Parses the summary file and extracts mode, score, and target latency."""
mode, score, target_latency = None, None, None

with open(file_path, "r") as file:
for line in file:
if re.match("Scenario", line):
mode = line.split(": ", 1)[1].strip()
continue

if ref_mode == "MultiStream":
if re.match(".*Early stopping 99th percentile estimate", line):
ref_score = line.split(": ", 1)[1].strip()
if mode in METRIC_MAP and re.match(METRIC_MAP[mode], line):
score = float(line.split(": ", 1)[1].strip())
continue

if ref_mode == "Server":
if re.match("Completed samples per second", line):
ref_score = line.split(": ", 1)[1].strip()
continue
if re.match("target_latency (ns)", line):
ref_target_latency = line.split(": ", 1)[1].strip()
if mode == "Server" and re.match("target_latency \(ns\)", line):
target_latency = float(line.split(": ", 1)[1].strip())
continue

if ref_mode == "Offline":
if re.match("Samples per second", line):
ref_score = line.split(": ", 1)[1].strip()
continue

if re.match("Result is", line):
valid = line.split(": ", 1)[1].strip()
if valid == "INVALID":
sys.exit("TEST FAIL: Reference results are invalid")

if re.match("\\d+ ERROR", line):
error = line.split(" ", 1)[0].strip()
print("WARNING: " + error + " ERROR reported in reference results")
if re.match("Result is", line):
if "INVALID" in line:
sys.exit(f"TEST FAIL: {file_path} results are invalid")

for line in test_file:
if re.match("Scenario", line):
test_mode = line.split(": ", 1)[1].strip()
continue
if re.match(r"\d+ ERROR", line):
error_count = line.split(" ", 1)[0].strip()
print(f"WARNING: {error_count} ERROR reported in {file_path}")

if test_mode == "SingleStream":
if re.match(".*Early stopping 90th percentile estimate", line):
test_score = line.split(": ", 1)[1].strip()
continue

if test_mode == "MultiStream":
if re.match(".*Early stopping 99th percentile estimate", line):
test_score = line.split(": ", 1)[1].strip()
continue
return mode, score, target_latency

if test_mode == "Server":
if re.match("Completed samples per second", line):
test_score = line.split(": ", 1)[1].strip()
continue
if re.match("target_latency (ns)", line):
test_target_latency = line.split(": ", 1)[1].strip()
if test_target_latency != ref_target_latency:
print("TEST FAIL: Server target latency mismatch")
sys.exit()
continue
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-r", "--reference_summary", required=True, help="Path to reference performance summary log.")
parser.add_argument("-t", "--test_summary", required=True, help="Path to test performance summary log.")
args = parser.parse_args()

if test_mode == "Offline":
if re.match("Samples per second", line):
test_score = line.split(": ", 1)[1].strip()
continue
print("Verifying performance...")

if re.match("Result is", line):
valid = line.split(": ", 1)[1].strip()
if valid == "INVALID":
sys.exit("TEST FAIL: Test results are invalid")
ref_mode, ref_score, ref_target_latency = parse_summary(args.reference_summary)
test_mode, test_score, test_target_latency = parse_summary(args.test_summary)

if re.match("\\d+ ERROR", line):
error = line.split(" ", 1)[0].strip()
print("WARNING: " + error + " ERROR reported in test results")
if ref_mode != test_mode:
sys.exit("TEST FAIL: Test and reference scenarios do not match!")

if test_mode != ref_mode:
sys.exit("Test and reference scenarios do not match!")
if ref_mode == "Server" and ref_target_latency and test_target_latency and ref_target_latency != test_target_latency:
sys.exit("TEST FAIL: Server target latency mismatch")

print("reference score = {}".format(ref_score))
print("test score = {}".format(test_score))
print(f"Reference score = {ref_score}")
print(f"Test score = {test_score}")

# Define thresholds
threshold = 0.10
if (ref_mode == "SingleStream" and ref_score <= 200000) or (ref_mode == "MultiStream" and ref_score <= 1600000):
threshold = 0.20 # Increased tolerance for short latencies

# In single-/multi-stream mode, latencies can be very short for high performance systems
# and run-to-run variation due to external disturbances (OS) can be significant.
# In this case we relax pass threshold to 20%
if (ref_mode == "SingleStream" and float(ref_score) <= 200000) or (
ref_mode == "MultiStream" and float(ref_score) <= 1600000
):
threshold = 0.20

if float(test_score) < float(ref_score) * (1 + threshold) and float(
test_score
) > float(ref_score) * (1 - threshold):
# Performance validation
lower_bound, upper_bound = ref_score * (1 - threshold), ref_score * (1 + threshold)
if lower_bound <= test_score <= upper_bound:
print("TEST PASS")
else:
print("TEST FAIL: Test score invalid")


if __name__ == "__main__":
main()
Loading