Skip to content

Commit

Permalink
Enforce and map availability
Browse files Browse the repository at this point in the history
  • Loading branch information
pgmpablo157321 committed Jan 23, 2024
1 parent acd623e commit 9d75268
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 19 deletions.
17 changes: 11 additions & 6 deletions mlperf_logging/package_checker/package_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,7 @@ def check_training_result_files(folder, usage, ruleset, quiet, werror,
_print_divider_bar()

_print_divider_bar()
if too_many_errors:
logging.info('PACKAGE CHECKER FOUND ERRORS, LOOK INTO ERROR LOG LINES AND FIX THEM.')
else:
logging.info('PACKAGE CHECKER FOUND NO ERRORS, SUCCESS !')
return not too_many_errors


def check_systems(folder, usage, ruleset):
Expand Down Expand Up @@ -215,12 +212,20 @@ def check_training_package(folder, usage, ruleset, quiet, werror, rcp_bypass, rc
usage: The usage such as training or hpc
ruleset: The ruleset such as 0.6.0, 0.7.0, 1.0.0, etc.
"""
too_many_errors = False
if ruleset in {'1.0.0', '1.1.0', '2.0.0', '2.1.0', '3.0.0', '3.1.0'}:
logging.info(' Checking System Description Files')
if not check_systems(folder, usage, ruleset):
system_description_pass = check_systems(folder, usage, ruleset)
too_many_errors = too_many_errors or not system_description_pass
if not system_description_pass:
logging.error('System description file checker failed')

check_training_result_files(folder, usage, ruleset, quiet, werror, rcp_bypass, rcp_bert_train_samples)
training_pass = check_training_result_files(folder, usage, ruleset, quiet, werror, rcp_bypass, rcp_bert_train_samples)
too_many_errors = too_many_errors or not training_pass
if too_many_errors:
logging.info('PACKAGE CHECKER FOUND ERRORS, LOOK INTO ERROR LOG LINES AND FIX THEM.')
else:
logging.info('PACKAGE CHECKER FOUND NO ERRORS, SUCCESS !')
_print_divider_bar()
print('\n** Detailed log output is also at', log_output)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
availability:
Available on-premise: Available on-premise
available on-premise: Available on-premise
onprem: Available on-premise
available: Available on-premise
Available onprem: Available on-premise
available onprem: Available on-premise
Available cloud: Available cloud
available cloud: Available cloud
cloud: Available cloud
Research, Development, or Internal (RDI): Research, Development, or Internal (RDI)
preview: Research, Development, or Internal (RDI)
research, development, or internal (rdi): Research, Development, or Internal (RDI)
rdi: Research, Development, or Internal (RDI)
research: Research, Development, or Internal (RDI)
development: Research, Development, or Internal (RDI)
internal: Research, Development, or Internal (RDI)
preview: Preview

columns:
training:
Expand Down
44 changes: 32 additions & 12 deletions mlperf_logging/result_summarizer/result_summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,15 @@ def _code_url(system_desc, usage, ruleset):
)


def _map_availability(availability, config):
map_ = config["availability"]
if availability in map_:
return map_[availability]
elif availability.lower() in map_:
return map_[availability.lower()]
raise ValueError(f"Specified availability {availability} is not valid, must be one of: {list(map_.keys())}")


def _get_sort_by_column_names():
return [
'division', 'system', 'accelerator_model_name', 'framework',
Expand Down Expand Up @@ -566,7 +575,7 @@ def _fill_empty_benchmark_scores(
benchmark_scores[benchmark] = None


def summarize_results(folder, usage, ruleset, csv_file=None):
def summarize_results(folder, usage, ruleset, csv_file=None, **kwargs):
"""Summarizes a set of results.
Args:
Expand Down Expand Up @@ -604,7 +613,11 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
# Construct prefix portion of the row.
try:
_check_and_update_system_specs('division', 'division')
_check_and_update_system_specs('status', 'availability')
# Map availability if requested
if "availability" in kwargs:
_check_and_update_system_specs('status', 'availability', lambda desc: _map_availability(desc["status"], kwargs["availability"]))
else:
_check_and_update_system_specs('status', 'availability')
_check_and_update_system_specs('submitter', 'submitter')
_check_and_update_system_specs('system_name',
'system',
Expand Down Expand Up @@ -754,11 +767,22 @@ def main():
power_weak_scaling_summaries = []

def _update_summaries(folder):
strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary = summarize_results(
folder,
args.usage,
args.ruleset,
)
if args.usage == "Training":
config_path = os.path.join(os.path.dirname(__file__), "config.yaml")
with open(config_path, "r") as f:
config = yaml.safe_load(f)
strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary = summarize_results(
folder,
args.usage,
args.ruleset,
availability = config["availability"]
)
else:
strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary = summarize_results(
folder,
args.usage,
args.ruleset,
)
strong_scaling_summaries.append(strong_scaling_summary)
if len(weak_scaling_summary) > 0:
weak_scaling_summaries.append(weak_scaling_summary)
Expand Down Expand Up @@ -789,16 +813,12 @@ def _update_summaries(folder):
# Parse results for single organization.
_update_summaries(args.folder)

def _map_availability(availability, config):
map_ = config["availability"]
return map_.get(availability, availability)

def _map_columns_index(column, config):
map_ = config["columns"][args.usage][args.ruleset]
return tuple(map_.get(column, map_.get("default") + [column]))

def _summaries_to_xlsx(summaries: pd.DataFrame, path, version):
config_path = os.path.join(os.path.dirname(__file__), "xlsx_config.yaml")
config_path = os.path.join(os.path.dirname(__file__), "config.yaml")
with open(config_path, "r") as f:
config = yaml.safe_load(f)
writer = pd.ExcelWriter(path, engine="xlsxwriter")
Expand Down
25 changes: 25 additions & 0 deletions mlperf_logging/system_desc_checker/system_desc_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,31 @@ def check_system_desc(json_file, usage, ruleset):
_get_or_default(contents, "framework", ""),
]) + ","

# Check availability
if usage == "training":
availability_options = [
"Available on-premise",
"available on-premise",
"onprem",
"available",
"Available onprem",
"available onprem",
"Available cloud",
"available cloud",
"cloud",
"Research, Development, or Internal (RDI)",
"research, development, or internal (rdi)",
"rdi",
"research",
"development",
"internal",
"preview"
]
if ("status" in contents) and (contents["status"] not in availability_options) and (contents["status"].lower() not in availability_options):
valid = False
invalid_reasons = ["Field status contains a non valid value: {}, must be one of {}".format(contents["status"], availability_options)]


ruleset_prefix = "https://github.com/mlperf/{}_results_v{}".format(usage, ruleset)
if "submitter" in contents and "system_name" in contents:
details_link = "{ruleset_prefix}/blob/master/{submitter}/systems/{system_name}.json".format(
Expand Down

0 comments on commit 9d75268

Please sign in to comment.