Enforce and map availability

mlcommons · Jan 23, 2024 · 9d75268 · 9d75268
1 parent acd623e
commit 9d75268
Show file tree

Hide file tree

Showing 4 changed files with 75 additions and 19 deletions.
diff --git a/mlperf_logging/package_checker/package_checker.py b/mlperf_logging/package_checker/package_checker.py
@@ -181,10 +181,7 @@ def check_training_result_files(folder, usage, ruleset, quiet, werror,
             _print_divider_bar()
 
     _print_divider_bar()
-    if too_many_errors:
-        logging.info('PACKAGE CHECKER FOUND ERRORS, LOOK INTO ERROR LOG LINES AND FIX THEM.')
-    else:
-        logging.info('PACKAGE CHECKER FOUND NO ERRORS, SUCCESS !')
+    return not too_many_errors
 
 
 def check_systems(folder, usage, ruleset):
@@ -215,12 +212,20 @@ def check_training_package(folder, usage, ruleset, quiet, werror, rcp_bypass, rc
         usage: The usage such as training or hpc
         ruleset: The ruleset such as 0.6.0, 0.7.0, 1.0.0, etc.
     """
+    too_many_errors = False
     if ruleset in {'1.0.0', '1.1.0', '2.0.0', '2.1.0', '3.0.0', '3.1.0'}:
         logging.info(' Checking System Description Files')
-        if not check_systems(folder, usage, ruleset):
+        system_description_pass = check_systems(folder, usage, ruleset)
+        too_many_errors = too_many_errors or not system_description_pass
+        if not system_description_pass:
             logging.error('System description file checker failed')
 
-    check_training_result_files(folder, usage, ruleset, quiet, werror, rcp_bypass, rcp_bert_train_samples)
+    training_pass = check_training_result_files(folder, usage, ruleset, quiet, werror, rcp_bypass, rcp_bert_train_samples)
+    too_many_errors = too_many_errors or not training_pass
+    if too_many_errors:
+        logging.info('PACKAGE CHECKER FOUND ERRORS, LOOK INTO ERROR LOG LINES AND FIX THEM.')
+    else:
+        logging.info('PACKAGE CHECKER FOUND NO ERRORS, SUCCESS !')
     _print_divider_bar()
     print('\n** Detailed log output is also at', log_output)
 

diff --git a/...ogging/result_summarizer/xlsx_config.yaml → mlperf_logging/result_summarizer/config.yaml b/...ogging/result_summarizer/xlsx_config.yaml → mlperf_logging/result_summarizer/config.yaml
@@ -1,14 +1,20 @@
 availability:
   Available on-premise: Available on-premise
+  available on-premise: Available on-premise
   onprem: Available on-premise
   available: Available on-premise
+  Available onprem: Available on-premise
+  available onprem: Available on-premise
   Available cloud: Available cloud
+  available cloud: Available cloud
   cloud: Available cloud
   Research, Development, or Internal (RDI): Research, Development, or Internal (RDI)
-  preview: Research, Development, or Internal (RDI)
+  research, development, or internal (rdi): Research, Development, or Internal (RDI)
+  rdi: Research, Development, or Internal (RDI)
   research: Research, Development, or Internal (RDI)
   development: Research, Development, or Internal (RDI)
   internal: Research, Development, or Internal (RDI)
+  preview: Preview
 
 columns:
   training:

diff --git a/mlperf_logging/result_summarizer/result_summarizer.py b/mlperf_logging/result_summarizer/result_summarizer.py
@@ -98,6 +98,15 @@ def _code_url(system_desc, usage, ruleset):
     )
 
 
+def _map_availability(availability, config):
+        map_ = config["availability"]
+        if availability in map_:
+            return map_[availability]
+        elif availability.lower() in map_:
+            return map_[availability.lower()]
+        raise ValueError(f"Specified availability {availability} is not valid, must be one of: {list(map_.keys())}")
+
+
 def _get_sort_by_column_names():
     return [
         'division', 'system', 'accelerator_model_name', 'framework',
@@ -566,7 +575,7 @@ def _fill_empty_benchmark_scores(
                 benchmark_scores[benchmark] = None
 
 
-def summarize_results(folder, usage, ruleset, csv_file=None):
+def summarize_results(folder, usage, ruleset, csv_file=None, **kwargs):
     """Summarizes a set of results.
 
     Args:
@@ -604,7 +613,11 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
         # Construct prefix portion of the row.
         try:
             _check_and_update_system_specs('division', 'division')
-            _check_and_update_system_specs('status', 'availability')
+            # Map availability if requested
+            if "availability" in kwargs:
+                _check_and_update_system_specs('status', 'availability', lambda desc: _map_availability(desc["status"], kwargs["availability"]))
+            else:
+                _check_and_update_system_specs('status', 'availability')
             _check_and_update_system_specs('submitter', 'submitter')
             _check_and_update_system_specs('system_name',
                                            'system',
@@ -754,11 +767,22 @@ def main():
     power_weak_scaling_summaries = []
 
     def _update_summaries(folder):
-        strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary = summarize_results(
-            folder,
-            args.usage,
-            args.ruleset,
-        )
+        if args.usage == "Training":
+            config_path = os.path.join(os.path.dirname(__file__), "config.yaml")
+            with open(config_path, "r") as f:
+                config = yaml.safe_load(f)
+            strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary = summarize_results(
+                folder,
+                args.usage,
+                args.ruleset,
+                availability = config["availability"]
+            )
+        else:
+            strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary = summarize_results(
+                folder,
+                args.usage,
+                args.ruleset,
+            )
         strong_scaling_summaries.append(strong_scaling_summary)
         if len(weak_scaling_summary) > 0:
             weak_scaling_summaries.append(weak_scaling_summary)
@@ -789,16 +813,12 @@ def _update_summaries(folder):
         # Parse results for single organization.
         _update_summaries(args.folder)
 
-    def _map_availability(availability, config):
-        map_ = config["availability"]
-        return map_.get(availability, availability)
-
     def _map_columns_index(column, config):
         map_ = config["columns"][args.usage][args.ruleset]
         return tuple(map_.get(column, map_.get("default") + [column]))
 
     def _summaries_to_xlsx(summaries: pd.DataFrame, path, version):
-        config_path = os.path.join(os.path.dirname(__file__), "xlsx_config.yaml")
+        config_path = os.path.join(os.path.dirname(__file__), "config.yaml")
         with open(config_path, "r") as f:
             config = yaml.safe_load(f)
         writer = pd.ExcelWriter(path, engine="xlsxwriter")

diff --git a/mlperf_logging/system_desc_checker/system_desc_checker.py b/mlperf_logging/system_desc_checker/system_desc_checker.py
@@ -94,6 +94,31 @@ def check_system_desc(json_file, usage, ruleset):
         _get_or_default(contents, "framework", ""),
     ]) + ","
 
+    # Check availability
+    if usage == "training":
+        availability_options = [
+            "Available on-premise", 
+            "available on-premise",
+            "onprem",
+            "available",
+            "Available onprem",
+            "available onprem",
+            "Available cloud",
+            "available cloud",
+            "cloud",
+            "Research, Development, or Internal (RDI)",
+            "research, development, or internal (rdi)",
+            "rdi",
+            "research",
+            "development",
+            "internal",
+            "preview"
+        ]
+        if ("status" in contents) and (contents["status"] not in availability_options) and (contents["status"].lower() not in availability_options):
+            valid = False
+            invalid_reasons = ["Field status contains a non valid value: {}, must be one of {}".format(contents["status"], availability_options)]
+
+
     ruleset_prefix = "https://github.com/mlperf/{}_results_v{}".format(usage, ruleset)
     if "submitter" in contents and "system_name" in contents:
         details_link = "{ruleset_prefix}/blob/master/{submitter}/systems/{system_name}.json".format(