diff --git a/mlperf_logging/package_checker/package_checker.py b/mlperf_logging/package_checker/package_checker.py index 614d00ef..43c7d92c 100644 --- a/mlperf_logging/package_checker/package_checker.py +++ b/mlperf_logging/package_checker/package_checker.py @@ -181,10 +181,7 @@ def check_training_result_files(folder, usage, ruleset, quiet, werror, _print_divider_bar() _print_divider_bar() - if too_many_errors: - logging.info('PACKAGE CHECKER FOUND ERRORS, LOOK INTO ERROR LOG LINES AND FIX THEM.') - else: - logging.info('PACKAGE CHECKER FOUND NO ERRORS, SUCCESS !') + return not too_many_errors def check_systems(folder, usage, ruleset): @@ -215,12 +212,20 @@ def check_training_package(folder, usage, ruleset, quiet, werror, rcp_bypass, rc usage: The usage such as training or hpc ruleset: The ruleset such as 0.6.0, 0.7.0, 1.0.0, etc. """ + too_many_errors = False if ruleset in {'1.0.0', '1.1.0', '2.0.0', '2.1.0', '3.0.0', '3.1.0'}: logging.info(' Checking System Description Files') - if not check_systems(folder, usage, ruleset): + system_description_pass = check_systems(folder, usage, ruleset) + too_many_errors = too_many_errors or not system_description_pass + if not system_description_pass: logging.error('System description file checker failed') - check_training_result_files(folder, usage, ruleset, quiet, werror, rcp_bypass, rcp_bert_train_samples) + training_pass = check_training_result_files(folder, usage, ruleset, quiet, werror, rcp_bypass, rcp_bert_train_samples) + too_many_errors = too_many_errors or not training_pass + if too_many_errors: + logging.info('PACKAGE CHECKER FOUND ERRORS, LOOK INTO ERROR LOG LINES AND FIX THEM.') + else: + logging.info('PACKAGE CHECKER FOUND NO ERRORS, SUCCESS !') _print_divider_bar() print('\n** Detailed log output is also at', log_output) diff --git a/mlperf_logging/result_summarizer/xlsx_config.yaml b/mlperf_logging/result_summarizer/config.yaml similarity index 92% rename from mlperf_logging/result_summarizer/xlsx_config.yaml rename to mlperf_logging/result_summarizer/config.yaml index 0212ef88..af01d5ae 100644 --- a/mlperf_logging/result_summarizer/xlsx_config.yaml +++ b/mlperf_logging/result_summarizer/config.yaml @@ -1,14 +1,20 @@ availability: Available on-premise: Available on-premise + available on-premise: Available on-premise onprem: Available on-premise available: Available on-premise + Available onprem: Available on-premise + available onprem: Available on-premise Available cloud: Available cloud + available cloud: Available cloud cloud: Available cloud Research, Development, or Internal (RDI): Research, Development, or Internal (RDI) - preview: Research, Development, or Internal (RDI) + research, development, or internal (rdi): Research, Development, or Internal (RDI) + rdi: Research, Development, or Internal (RDI) research: Research, Development, or Internal (RDI) development: Research, Development, or Internal (RDI) internal: Research, Development, or Internal (RDI) + preview: Preview columns: training: diff --git a/mlperf_logging/result_summarizer/result_summarizer.py b/mlperf_logging/result_summarizer/result_summarizer.py index 228c1e2a..318f9064 100644 --- a/mlperf_logging/result_summarizer/result_summarizer.py +++ b/mlperf_logging/result_summarizer/result_summarizer.py @@ -98,6 +98,15 @@ def _code_url(system_desc, usage, ruleset): ) +def _map_availability(availability, config): + map_ = config["availability"] + if availability in map_: + return map_[availability] + elif availability.lower() in map_: + return map_[availability.lower()] + raise ValueError(f"Specified availability {availability} is not valid, must be one of: {list(map_.keys())}") + + def _get_sort_by_column_names(): return [ 'division', 'system', 'accelerator_model_name', 'framework', @@ -566,7 +575,7 @@ def _fill_empty_benchmark_scores( benchmark_scores[benchmark] = None -def summarize_results(folder, usage, ruleset, csv_file=None): +def summarize_results(folder, usage, ruleset, csv_file=None, **kwargs): """Summarizes a set of results. Args: @@ -604,7 +613,11 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None): # Construct prefix portion of the row. try: _check_and_update_system_specs('division', 'division') - _check_and_update_system_specs('status', 'availability') + # Map availability if requested + if "availability" in kwargs: + _check_and_update_system_specs('status', 'availability', lambda desc: _map_availability(desc["status"], kwargs["availability"])) + else: + _check_and_update_system_specs('status', 'availability') _check_and_update_system_specs('submitter', 'submitter') _check_and_update_system_specs('system_name', 'system', @@ -754,11 +767,22 @@ def main(): power_weak_scaling_summaries = [] def _update_summaries(folder): - strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary = summarize_results( - folder, - args.usage, - args.ruleset, - ) + if args.usage == "Training": + config_path = os.path.join(os.path.dirname(__file__), "config.yaml") + with open(config_path, "r") as f: + config = yaml.safe_load(f) + strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary = summarize_results( + folder, + args.usage, + args.ruleset, + availability = config["availability"] + ) + else: + strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary = summarize_results( + folder, + args.usage, + args.ruleset, + ) strong_scaling_summaries.append(strong_scaling_summary) if len(weak_scaling_summary) > 0: weak_scaling_summaries.append(weak_scaling_summary) @@ -789,16 +813,12 @@ def _update_summaries(folder): # Parse results for single organization. _update_summaries(args.folder) - def _map_availability(availability, config): - map_ = config["availability"] - return map_.get(availability, availability) - def _map_columns_index(column, config): map_ = config["columns"][args.usage][args.ruleset] return tuple(map_.get(column, map_.get("default") + [column])) def _summaries_to_xlsx(summaries: pd.DataFrame, path, version): - config_path = os.path.join(os.path.dirname(__file__), "xlsx_config.yaml") + config_path = os.path.join(os.path.dirname(__file__), "config.yaml") with open(config_path, "r") as f: config = yaml.safe_load(f) writer = pd.ExcelWriter(path, engine="xlsxwriter") diff --git a/mlperf_logging/system_desc_checker/system_desc_checker.py b/mlperf_logging/system_desc_checker/system_desc_checker.py index e674cd3c..8bdd260f 100644 --- a/mlperf_logging/system_desc_checker/system_desc_checker.py +++ b/mlperf_logging/system_desc_checker/system_desc_checker.py @@ -94,6 +94,31 @@ def check_system_desc(json_file, usage, ruleset): _get_or_default(contents, "framework", ""), ]) + "," + # Check availability + if usage == "training": + availability_options = [ + "Available on-premise", + "available on-premise", + "onprem", + "available", + "Available onprem", + "available onprem", + "Available cloud", + "available cloud", + "cloud", + "Research, Development, or Internal (RDI)", + "research, development, or internal (rdi)", + "rdi", + "research", + "development", + "internal", + "preview" + ] + if ("status" in contents) and (contents["status"] not in availability_options) and (contents["status"].lower() not in availability_options): + valid = False + invalid_reasons = ["Field status contains a non valid value: {}, must be one of {}".format(contents["status"], availability_options)] + + ruleset_prefix = "https://github.com/mlperf/{}_results_v{}".format(usage, ruleset) if "submitter" in contents and "system_name" in contents: details_link = "{ruleset_prefix}/blob/master/{submitter}/systems/{system_name}.json".format(