diff --git a/mlperf_logging/compliance_checker/mlp_parser/__init__.py b/mlperf_logging/compliance_checker/mlp_parser/__init__.py
index 94fb516..76564ff 100644
--- a/mlperf_logging/compliance_checker/mlp_parser/__init__.py
+++ b/mlperf_logging/compliance_checker/mlp_parser/__init__.py
@@ -6,6 +6,7 @@
 from .ruleset_210 import parse_file as parse_file_210
 from .ruleset_300 import parse_file as parse_file_300
 from .ruleset_310 import parse_file as parse_file_310
+from .ruleset_400 import parse_file as parse_file_400
 
 
 def parse_file(filename, ruleset='0.6.0'):
@@ -25,5 +26,7 @@ def parse_file(filename, ruleset='0.6.0'):
         return parse_file_300(filename)
     elif ruleset == '3.1.0':
         return parse_file_310(filename)
+    elif ruleset == '4.0.0':
+        return parse_file_400(filename)
     else:
         raise Exception(f'Ruleset "{ruleset}" is not supported')
diff --git a/mlperf_logging/compliance_checker/mlp_parser/ruleset_400.py b/mlperf_logging/compliance_checker/mlp_parser/ruleset_400.py
new file mode 100644
index 0000000..e30b08d
--- /dev/null
+++ b/mlperf_logging/compliance_checker/mlp_parser/ruleset_400.py
@@ -0,0 +1,105 @@
+'''
+Parses a text MLPerf log into a structured format.
+'''
+
+from __future__ import print_function
+
+import collections
+import json
+import re
+import sys
+from dataclasses import dataclass
+
+from io import open
+
+@dataclass
+class LogLine:
+    """Class for keeping track of an item in inventory."""
+    full_string: str
+    timestamp: float
+    key: str
+    value: str
+    lineno: int
+
+TOKEN = ':::MLLOG '
+
+
+def parse_line(line):
+    if not line.startswith(TOKEN):
+        return None
+
+    return json.loads(line[len(TOKEN):])
+
+
+def string_to_logline(lineno, string):
+    ''' Returns a LogLine or raises a ValueError '''
+    m = parse_line(string)
+
+    if m is None:
+        raise ValueError('does not match regex')
+
+    args = []
+    args.append(string) # full string
+
+    ts = float(m['time_ms']) # may raise error, e.g. "1.2.3"
+    # TODO check for weird values
+    args.append(ts)
+
+    args.append(m['key']) # key
+
+    j = { 'value': m['value'], 'metadata': m['metadata'] }
+    args.append(j)
+
+    args.append(lineno)
+    return LogLine(*args)
+
+
+def parse_file(filename):
+    ''' Reads a file by name and returns list of loglines and list of errors'''
+    with open(filename, encoding='latin-1') as f:
+        return parse_generator(f)
+
+
+def strip_and_dedup(gen):
+    lines = []
+    for l in gen:
+        if TOKEN not in l:
+            continue
+        lines.append(re.sub(".*"+TOKEN, TOKEN, l))
+    return lines
+
+
+
+def parse_generator(gen):
+    ''' Reads a generator of lines and returns (loglines, errors)
+    The list of errors are any parsing issues as a tuple (str_line, error_msg)
+    '''
+    loglines = []
+    failed = []
+    for lineno, line in enumerate(strip_and_dedup(gen)):
+        line = line.strip()
+        try:
+            ll = string_to_logline(lineno, line)
+            loglines.append(ll)
+        except ValueError as e:
+            failed.append((line, str(e)))
+    return loglines, failed
+
+
+if __name__ == '__main__':
+    if len(sys.argv) != 2:
+        print('usage: mlp_parser.py FILENAME')
+        print('       tests parsing on the file.')
+        sys.exit(1)
+
+    filename = sys.argv[1]
+    lines, errors = parse_file(filename)
+
+    print('Parsed {} log lines with {} errors.'.format(len(lines), len(errors)))
+
+    if len(errors) > 0:
+        print('Lines which failed to parse:')
+        for line, error in errors:
+            print('  Following line failed: {}'.format(error))
+            print(line)
+
diff --git a/mlperf_logging/compliance_checker/training_4.0.0/closed_common.yaml b/mlperf_logging/compliance_checker/training_4.0.0/closed_common.yaml
new file mode 100644
index 0000000..395d1e9
--- /dev/null
+++ b/mlperf_logging/compliance_checker/training_4.0.0/closed_common.yaml
@@ -0,0 +1,11 @@
+
+- KEY:
+    NAME:  submission_benchmark
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] in ['resnet', 'ssd', 'stable_diffusion', 'maskrcnn', 'gpt3', 'dlrm_dcnv2', 'bert', 'rnnt', 'unet3d'] "
+    POST:  " enqueue_config('training_4.0.0/closed_{}.yaml'.format(v['value'])) "
+
+- KEY:
+    NAME: gradient_accumulation_steps
+    REQ: EXACTLY_ONE
+    CHECK: " v['value'] > 0 "
diff --git a/mlperf_logging/compliance_checker/training_4.0.0/closed_stable_diffusion.yaml b/mlperf_logging/compliance_checker/training_4.0.0/closed_stable_diffusion.yaml
new file mode 100644
index 0000000..3cdc3e6
--- /dev/null
+++ b/mlperf_logging/compliance_checker/training_4.0.0/closed_stable_diffusion.yaml
@@ -0,0 +1,74 @@
+# Stable diffusion uses two metrics, FID and CLIP.
+# These metrics can be calculated offline, using different scripts
+# and logged seperatly. Therefore, we create a virtual key
+# called aggregated_eval_accuracy, which aggregates
+# both metrics into a single log line
+
+- BEGIN:
+    CODE: |
+        from dataclasses import replace
+        agg_eval_lines = {}
+        for line in loglines:
+            if line.key == "eval_accuracy" and 'metric' in line.value['metadata']:
+                samples_count = line.value['metadata']['samples_count']
+                if samples_count not in agg_eval_lines:
+                    new_line = replace(line) # Make a copy
+                    new_line.key = "aggregated_eval_accuracy"
+                    new_line.full_string = "" # Not needed
+                    new_line.lineno = -1      # Not needed
+                    new_line.value = {'value': {'samples_count': samples_count}, 'metadata':{}}
+                    agg_eval_lines[samples_count] = new_line
+
+                agg_eval_lines[samples_count].timestamp = max(line.timestamp, agg_eval_lines[samples_count].timestamp)
+                agg_eval_lines[samples_count].value['value'][line.value['metadata']['metric']] = line.value['value']
+        loglines.extend(agg_eval_lines.values())
+
+
+- KEY:
+    NAME:  global_batch_size
+    REQ:   AT_LEAST_ONE
+    CHECK: " v['value'] >= 0 "
+
+- KEY:
+    NAME:  opt_name
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 'adamw' "
+
+- KEY:
+    NAME:  opt_adamw_beta_1
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 0.9 "
+
+- KEY:
+    NAME:  opt_adamw_beta_2
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 0.999 "
+
+- KEY:
+    NAME:  opt_adamw_epsilon
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 1e-08 "
+
+- KEY:
+    NAME:  opt_adamw_weight_decay
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 0.01 "
+
+- KEY:
+    NAME:  opt_base_learning_rate
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] >= 0.0 "
+
+- KEY:
+    NAME:  opt_learning_rate_warmup_steps
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] >= 0 "
+
+- KEY:
+    NAME: aggregated_eval_accuracy
+    REQ: AT_LEAST(2)
+    CHECK:
+        - "'FID' in v['value']"
+        - "'CLIP' in v['value']"
+        - "'samples_count' in v['value']"
+    ATLEAST_ONE_CHECK: "(0.0 <= v['value']['FID'] <= 90.0) and (0.15 <= v['value']['CLIP'] <= 1.0)"
diff --git a/mlperf_logging/compliance_checker/training_4.0.0/common.yaml b/mlperf_logging/compliance_checker/training_4.0.0/common.yaml
new file mode 100644
index 0000000..4949d29
--- /dev/null
+++ b/mlperf_logging/compliance_checker/training_4.0.0/common.yaml
@@ -0,0 +1,151 @@
+# This file lists all the KEYs to be checked. Every line that matches mlperf logging regex (::MLL...) will be checked against these rules.
+# In the order of the appearance in the log, for each line will execute the code specified under CHECK for the KEY in that line.
+# The code will be launched using local state 'v' which is the content of value field in log line, and global state 's'.
+# Global state 's' exists to allow cross-line checks, like start/stop pairs etc. To initialize 's' use BEGIN record which CODE will
+# be executed before any checks.
+# In addition, occurrence of each key will be counted and at the end if a requirement regarding the number of occurrences is defined it will
+# be confirmed. This could be implemented using global state, but since this is a common thing to do it is natively supported.
+#
+# KEY record:
+# NAME
+# REQ   - optional - {EXACTLY_ONE, AT_LEAST_ONE}
+# PRE   - optional - code to be executed before CHECK
+# CHECK - optional - expression to be evaluated to verify correctness
+# POST  - optional - code to be executed after CHECK
+
+- BEGIN:
+    CODE: >
+        s.update({
+            'init_started': False,
+            'init_stopped' : False,
+            'run_started' : False,
+            'run_stopped' : False,
+            'in_epoch' : False,
+            'last_epoch' : 0,
+            'in_block' : False,
+            'block_first_epoch' : -1,
+            'first_init_start': 9e99,
+            'compile_time_mins': 0,
+        })
+
+- KEY:
+    NAME:  submission_org
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] != '' "
+
+- KEY:
+    NAME:  submission_platform
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] != '' "
+
+- KEY:
+    NAME:  submission_division
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] in ['closed', 'open'] "
+    POST:  " enqueue_config('training_4.0.0/{}_common.yaml'.format(v['value'])); s['compile_time_mins'] = 240 if v['value'] == 'open' else 30 "
+
+- KEY:
+    NAME:  submission_status
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] in ['cloud', 'onprem', 'preview', 'research'] "
+
+# at least one record should be found, but any found records must pass the test
+- KEY:
+    NAME:  cache_clear
+    REQ:   AT_LEAST_ONE
+    CHECK:
+        - "'value' in v"
+
+# frequency not checked
+- KEY:
+    NAME:  init_start
+    REQ:   AT_LEAST_ONE
+    CHECK:
+        - "not s['init_stopped']"
+        - "not s['run_started']"
+    POST:  " s['init_started'] = True; s['first_init_start']=min(s['first_init_start'], ll.timestamp) "
+
+# confirm less than 20min since the very first init_start
+- KEY:
+    NAME:  init_stop
+    REQ:   EXACTLY_ONE
+    CHECK:
+        - "s['init_started']"
+        - "not s['run_started']"
+        - "ll.timestamp - s['first_init_start'] < (s['compile_time_mins']*60*1e3)"
+    POST:  " s['init_stopped'] = True"
+
+- KEY:
+    NAME:  run_start
+    REQ:   EXACTLY_ONE
+    CHECK: " ( s['init_stopped'] == True )"
+    POST:  " s['run_started'] = True "
+
+# status can also be aborted, but not allowing it here for now
+# if eval is inside epoch and we decide to terminate, we can lack epoch_stop, it is ok
+- KEY:
+    NAME:  run_stop
+    REQ:   EXACTLY_ONE
+    CHECK:
+        - "s['run_started']"
+        - "'status' in v['metadata']"
+    POST:  " s['run_stopped'] = True "
+
+# FIXME: check epoch_count value match
+- KEY:
+    NAME:  block_start
+    REQ:   AT_LEAST_ONE_OR(epoch_start)
+    CHECK:
+        - "s['run_started']"
+        - "('epoch_count' in v['metadata']) | ('samples_count' in v['metadata'])"
+        - "'first_epoch_num' in v['metadata'] if 'epoch_count' in v['metadata'] else True"
+        - "v['metadata']['epoch_count'] > 0 if 'epoch_count' in v['metadata'] else True"
+        - "v['metadata']['samples_count'] >= 0 if 'samples_count' in v['metadata'] else True"
+
+- KEY:
+    NAME:  block_stop
+    REQ:   AT_LEAST_ONE_OR(epoch_stop)
+    CHECK:
+        - "('first_epoch_num' in v['metadata']) | ('samples_count' in v['metadata'])"
+
+- KEY:
+    NAME:  epoch_start
+    REQ:   AT_LEAST_ONE_OR(block_start)
+    CHECK:
+        - "'epoch_num' in v['metadata']"
+
+- KEY:
+    NAME:  epoch_stop
+    REQ:   AT_LEAST_ONE_OR(block_stop)
+    CHECK:
+        - "'epoch_num' in v['metadata']"
+
+# making sure previous eval did print it's accuracy result
+- KEY:
+    NAME:  eval_start
+    REQ:   AT_LEAST_ONE_OR(block_start)
+    CHECK:
+        - "('epoch_num' in v['metadata']) | ('samples_count' in v['metadata'])"
+
+- KEY:
+    NAME:  eval_stop
+    REQ:   AT_LEAST_ONE_OR(block_stop)
+    CHECK:
+        - "('epoch_num' in v['metadata']) | ('samples_count' in v['metadata'])"
+
+- KEY:
+    NAME:  eval_accuracy
+    REQ:   AT_LEAST_ONE
+    CHECK:
+        - "('epoch_num' in v['metadata']) | ('samples_count' in v['metadata'])"
+
+- KEY:
+    NAME:  train_samples
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] != '' "
+
+- KEY:
+    NAME:  eval_samples
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] != '' "
+
diff --git a/mlperf_logging/compliance_checker/training_4.0.0/open_common.yaml b/mlperf_logging/compliance_checker/training_4.0.0/open_common.yaml
new file mode 100644
index 0000000..5ed0b9c
--- /dev/null
+++ b/mlperf_logging/compliance_checker/training_4.0.0/open_common.yaml
@@ -0,0 +1,6 @@
+
+- KEY:
+    NAME:  submission_benchmark
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] in ['resnet', 'ssd', 'stable_diffusion', 'maskrcnn', 'gpt3', 'dlrm_dcnv2', 'bert', 'rnnt', 'unet3d'] "
+    POST:  " enqueue_config('training_4.0.0/open_{}.yaml'.format(v['value'])) "
diff --git a/mlperf_logging/compliance_checker/training_4.0.0/open_stable_diffusion.yaml b/mlperf_logging/compliance_checker/training_4.0.0/open_stable_diffusion.yaml
new file mode 100644
index 0000000..fe25e31
--- /dev/null
+++ b/mlperf_logging/compliance_checker/training_4.0.0/open_stable_diffusion.yaml
@@ -0,0 +1,33 @@
+# Stable diffusion uses two metrics, FID and CLIP.
+# These metrics can be calculated offline, using different scripts
+# and logged seperatly. Therefore, we create a virtual key
+# called aggregated_eval_accuracy, which aggregates
+# both metrics into a single log line
+
+- BEGIN:
+    CODE: |
+        from dataclasses import replace
+        agg_eval_lines = {}
+        for line in loglines:
+            if line.key == "eval_accuracy" and 'metric' in line.value['metadata']:
+                samples_count = line.value['metadata']['samples_count']
+                if samples_count not in agg_eval_lines:
+                    new_line = replace(line) # Make a copy
+                    new_line.key = "aggregated_eval_accuracy"
+                    new_line.full_string = "" # Not needed
+                    new_line.lineno = -1      # Not needed
+                    new_line.value = {'value': {'samples_count': samples_count}, 'metadata':{}}
+                    agg_eval_lines[samples_count] = new_line
+
+                agg_eval_lines[samples_count].timestamp = max(line.timestamp, agg_eval_lines[samples_count].timestamp)
+                agg_eval_lines[samples_count].value['value'][line.value['metadata']['metric']] = line.value['value']
+        loglines.extend(agg_eval_lines.values())
+
+- KEY:
+    NAME: aggregated_eval_accuracy
+    REQ: AT_LEAST(2)
+    CHECK:
+        - "'FID' in v['value']"
+        - "'CLIP' in v['value']"
+        - "'samples_count' in v['value']"
+    ATLEAST_ONE_CHECK: "v['value']['FID'] >= 0.0 and v['value']['CLIP'] <= 1.0"
diff --git a/mlperf_logging/mllog/constants.py b/mlperf_logging/mllog/constants.py
index 7e4c169..fd6f6ad 100644
--- a/mlperf_logging/mllog/constants.py
+++ b/mlperf_logging/mllog/constants.py
@@ -178,6 +178,7 @@
 FIRST_EPOCH_NUM = "first_epoch_num"
 STATUS = "status"
 STEP_NUM = "step_num"
+SAMPLES_COUNT = "samples_count"
 
 # Power constants
 POWER_MEASUREMENT_START = "power_measurement_start"
diff --git a/mlperf_logging/mllog/examples/power/compute_metric_example.py b/mlperf_logging/mllog/examples/power/compute_metric_example.py
index 59f5e4b..add3627 100644
--- a/mlperf_logging/mllog/examples/power/compute_metric_example.py
+++ b/mlperf_logging/mllog/examples/power/compute_metric_example.py
@@ -6,7 +6,7 @@ def get_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("--input-log", type=str, default=None)
     parser.add_argument("--hardware-type", type=str, choices=["node", "sw"], default="node")
-    parser.add_argument("--ruleset", type=str, choices=["0.6.0", "0.7.0", "1.0.0", "1.1.0", "2.0.0", "2.1.0", "3.0.0", "3.1.0"], default="3.1.0")
+    parser.add_argument("--ruleset", type=str, choices=["0.6.0", "0.7.0", "1.0.0", "1.1.0", "2.0.0", "2.1.0", "3.0.0", "3.1.0", "4.0.0"], default="4.0.0")
     args = parser.parse_args()
     return args
 
diff --git a/mlperf_logging/package_checker/README.md b/mlperf_logging/package_checker/README.md
index aa03b41..6c7422c 100644
--- a/mlperf_logging/package_checker/README.md
+++ b/mlperf_logging/package_checker/README.md
@@ -10,7 +10,7 @@ To check an organization's submission package for compliance:
 python3 -m mlperf_logging.package_checker FOLDER USAGE RULESET
 ```
 
-Currently, USAGE in ["training"] and RULESET in ["0.6.0", "0.7.0", "1.0.0", "1.1.0", "2.0.0", "2.1.0", "3.0.0", "3.1.0"] are supported.
+Currently, USAGE in ["training"] and RULESET in ["0.6.0", "0.7.0", "1.0.0", "1.1.0", "2.0.0", "2.1.0", "3.0.0", "3.1.0", "4.0.0"] are supported.
 
 The package checker checks:
 1. The number of result files for each benchmark matches the required count. If
diff --git a/mlperf_logging/package_checker/package_checker.py b/mlperf_logging/package_checker/package_checker.py
index fd8f033..262ace7 100644
--- a/mlperf_logging/package_checker/package_checker.py
+++ b/mlperf_logging/package_checker/package_checker.py
@@ -175,13 +175,13 @@ def check_training_result_files(folder, usage, ruleset, quiet, werror,
                 logging.error(" %d files do not comply, directory cannot be accepted", errors_found)
 
             # Check if each run use unique seeds.
-            if ruleset in {'1.0.0', '1.1.0', '2.0.0', '2.1.0', '3.0.0', '3.1.0'} and division == 'closed':
+            if ruleset in {'1.0.0', '1.1.0', '2.0.0', '2.1.0', '3.0.0', '3.1.0', '4.0.0'} and division == 'closed':
                 if not seed_checker.check_seeds(result_files, source_files):
                     too_many_errors = True
                     logging.error('Seed checker failed')
 
             # Run RCP checker for >= 1.0.0
-            if ruleset in {'1.0.0', '1.1.0', '2.0.0', '2.1.0', '3.0.0', '3.1.0'} and division == 'closed' and benchmark != 'minigo':
+            if ruleset in {'1.0.0', '1.1.0', '2.0.0', '2.1.0', '3.0.0', '3.1.0', '4.0.0'} and division == 'closed' and benchmark != 'minigo':
                 # Now go again through result files to do RCP checks
                 rcp_bypass = (global_rcp_bypass or system_rcp_bypass or result_rcp_bypass)
                 rcp_pass, rcp_msg, _ = rcp_checker.check_directory(
@@ -235,7 +235,7 @@ def check_training_package(folder, usage, ruleset, quiet, werror, rcp_bypass, rc
         ruleset: The ruleset such as 0.6.0, 0.7.0, 1.0.0, etc.
     """
     too_many_errors = False
-    if ruleset in {'1.0.0', '1.1.0', '2.0.0', '2.1.0', '3.0.0', '3.1.0'}:
+    if ruleset in {'1.0.0', '1.1.0', '2.0.0', '2.1.0', '3.0.0', '3.1.0', '4.0.0'}:
         logging.info(' Checking System Description Files')
         system_description_pass = check_systems(folder, usage, ruleset)
         too_many_errors = too_many_errors or not system_description_pass
diff --git a/mlperf_logging/rcp_checker/README.md b/mlperf_logging/rcp_checker/README.md
index 7396f64..17af28e 100644
--- a/mlperf_logging/rcp_checker/README.md
+++ b/mlperf_logging/rcp_checker/README.md
@@ -8,10 +8,10 @@ Run Reference Convergence Point checks for a submission directory.
 This consists of testing whether a submission does not converge
 statistically faster than the reference.
 
-For training, RCPs are loaded from directory mlperf_logging/rcp_checker/training_3.1.0/*.json
+For training, RCPs are loaded from directory mlperf_logging/rcp_checker/training_4.0.0/*.json
 
 The RCP checker supports only the 1.0.0 version onwards.
-The current training version is 3.1.0.
+The current training version is 4.0.0.
 
 ## Usage
 
diff --git a/mlperf_logging/rcp_checker/rcp_checker.py b/mlperf_logging/rcp_checker/rcp_checker.py
index c63cd57..981925e 100644
--- a/mlperf_logging/rcp_checker/rcp_checker.py
+++ b/mlperf_logging/rcp_checker/rcp_checker.py
@@ -72,7 +72,7 @@ def read_submission_file(result_file, use_train_samples):
 
                 if benchmark == "stable_diffusion" and ("eval_error" in str or "eval_accuracy" in str):
                     eval_accuracy_str = str
-                    eval_step = json.loads(eval_accuracy_str)["metadata"]["step_num"]
+                    eval_step = json.loads(eval_accuracy_str)["metadata"]["samples_count"]
                     eval_metric = json.loads(eval_accuracy_str)["metadata"]["metric"]
                     eval_score = json.loads(eval_accuracy_str)["value"]
                     stable_diffusion_eval_results[eval_step][eval_metric] = eval_score
@@ -83,7 +83,7 @@ def read_submission_file(result_file, use_train_samples):
                 elif use_train_samples and "train_samples" in str:
                     eval_accuracy_str = str
                     conv_epoch = json.loads(eval_accuracy_str)["value"]
-        
+
                 if "run_stop" in str and json.loads(str)["key"] == "run_stop":
                     conv_result = json.loads(str)["metadata"]["status"]
                     if conv_result == "success":
@@ -150,8 +150,8 @@ def get_submission_epochs(result_files, bert_train_samples):
 class RCP_Checker:
 
     def __init__(self, usage, ruleset, benchmark, verbose, rcp_file=None):
-        if ruleset not in {'1.0.0', "1.1.0", "2.0.0", "2.1.0", "3.0.0", "3.1.0"}:
-            raise Exception('RCP Checker only supported in 1.0.0, 1.1.0, 2.0.0, 2.1.0, 3.0.0 and 3.1.0')
+        if ruleset not in {'1.0.0', "1.1.0", "2.0.0", "2.1.0", "3.0.0", "3.1.0", "4.0.0"}:
+            raise Exception('RCP Checker only supported in 1.0.0, 1.1.0, 2.0.0, 2.1.0, 3.0.0, 3.1.0 and 4.0.0')
         self.usage = usage
         self.ruleset = ruleset
         self.benchmark = benchmark
@@ -518,7 +518,7 @@ def get_parser():
     parser.add_argument('--rcp_usage', type=str, default='training',
                     choices=['training', 'hpc'],
                     help='what WG does the benchmark come from to check the log against')
-    parser.add_argument('--rcp_version', type=str, default='3.1.0',
+    parser.add_argument('--rcp_version', type=str, default='4.0.0',
                     help='what version of rules to check the log against')
     parser.add_argument('--verbose', action='store_true')
     parser.add_argument('--bert_train_samples', action='store_true',
diff --git a/mlperf_logging/rcp_checker/training_4.0.0/rcps_stable_diffusion.json b/mlperf_logging/rcp_checker/training_4.0.0/rcps_stable_diffusion.json
new file mode 100644
index 0000000..7bd17f2
--- /dev/null
+++ b/mlperf_logging/rcp_checker/training_4.0.0/rcps_stable_diffusion.json
@@ -0,0 +1,66 @@
+{
+
+  "sd_ref_512":
+  {
+    "Benchmark": "stable_diffusion",
+    "Creator": "NVIDIA",
+    "When": "Reference RCPs before v3.1",
+    "Platform": "32xDGX-A100",
+    "BS": 512,
+    "Hyperparams": {
+      "opt_adamw_beta_1": 0.9,
+      "opt_adamw_beta_2": 0.999,
+      "opt_adamw_epsilon": 1e-08,
+      "opt_adamw_weight_decay": 0.01,
+      "opt_base_learning_rate": 1.25e-7,
+      "opt_learning_rate_warmup_steps": 1000
+    },
+    "Epochs to converge": [
+      2560000, 2560000, 2560000, 2560000, 2560000,
+      2560000, 2560000, 2560000, 2560000, 2560000,
+      2560000, 2560000, 2560000, 3072000]
+  },
+
+  "sd_ref_1024":
+  {
+    "Benchmark": "stable_diffusion",
+    "Creator": "NVIDIA",
+    "When": "Reference RCPs before v3.1",
+    "Platform": "32xDGX-A100",
+    "BS": 1024,
+    "Hyperparams": {
+        "opt_adamw_beta_1": 0.9,
+        "opt_adamw_beta_2": 0.999,
+        "opt_adamw_epsilon": 1e-08,
+        "opt_adamw_weight_decay": 0.01,
+        "opt_base_learning_rate": 1.25e-7,
+        "opt_learning_rate_warmup_steps": 1000
+    },
+    "Epochs to converge": [
+      2560000, 2560000, 2560000, 2560000, 2560000,
+      3072000, 3072000, 3072000, 3072000, 3072000,
+      3072000, 3072000, 2560000]
+  },
+
+  "sd_ref_2048":
+  {
+    "Benchmark": "stable_diffusion",
+    "Creator": "NVIDIA",
+    "When": "Reference RCPs before v3.1",
+    "Platform": "32xDGX-A100",
+    "BS": 2048,
+    "Hyperparams": {
+        "opt_adamw_beta_1": 0.9,
+        "opt_adamw_beta_2": 0.999,
+        "opt_adamw_epsilon": 1e-08,
+        "opt_adamw_weight_decay": 0.01,
+        "opt_base_learning_rate": 1.25e-7,
+        "opt_learning_rate_warmup_steps": 1000
+    },
+    "Epochs to converge": [
+      3584000, 3584000, 3584000, 3584000, 4096000,
+      4096000, 4096000, 4096000, 4096000, 4096000,
+      4096000, 4608000, 4608000]
+  }
+
+}
diff --git a/mlperf_logging/repo_checker/README.md b/mlperf_logging/repo_checker/README.md
index a3a703b..c4f0fe5 100644
--- a/mlperf_logging/repo_checker/README.md
+++ b/mlperf_logging/repo_checker/README.md
@@ -12,12 +12,12 @@ review process.
 python3 -m mlperf_logging.repo_checker FOLDER USAGE RULESET
 ```
 
-Currently, USAGE in ["training", "hpc"] and RULESETS 2.0.0, 2.1.0, 3.0.0 and 3.1.0 are supported.
+Currently, USAGE in ["training", "hpc"] and RULESETS 2.0.0, 2.1.0, 3.0.0, 3.1.0 and 4.0.0 are supported.
 
 The repo checker checks:
 1. Whether the repo contains filenames that github does not like, e.g. files with spaces,
    files that start with '.' or '/.'
-2. Files that violate the github file limit (50MB) 
+2. Files that violate the github file limit (50MB)
 
 ## Tested software versions
 Tested and confirmed working using the following software versions:
diff --git a/mlperf_logging/repo_checker/repo_checker.py b/mlperf_logging/repo_checker/repo_checker.py
index 2f9b1cf..f7494a3 100644
--- a/mlperf_logging/repo_checker/repo_checker.py
+++ b/mlperf_logging/repo_checker/repo_checker.py
@@ -127,8 +127,8 @@ def get_parser():
     parser.add_argument(
         'ruleset',
         type=str,
-        choices=['2.0.0', '2.1.0', '3.0.0', '3.1.0'],
-        help='the ruleset. 2.0.0, 2.1.0, 3.0.0 and 3.1.0 are currently supported.'
+        choices=['2.0.0', '2.1.0', '3.0.0', '3.1.0', '4.0.0'],
+        help='the ruleset. 2.0.0, 2.1.0, 3.0.0, 3.1.0 and 4.0.0 are currently supported.'
     )
     parser.add_argument(
         '--log_output',
diff --git a/mlperf_logging/result_summarizer/config.yaml b/mlperf_logging/result_summarizer/config.yaml
index af01d5a..d1a77a1 100644
--- a/mlperf_logging/result_summarizer/config.yaml
+++ b/mlperf_logging/result_summarizer/config.yaml
@@ -61,6 +61,18 @@ columns:
       unet3d: ["Benchmark results (minutes)", "Image segmentation (medical)", "KiTS19", "3D U-Net"]
       stable_diffusion: ["Benchmark results (minutes)", "Text to image", "Laion 400m and Coco-2017", "StableDiffusion"]
       default: [" ", " ", " "]
+    "4.0.0":
+      bert: ["Benchmark results (minutes)", "NLP", "Wikipedia", "BERT"]
+      gpt3: ["Benchmark results (minutes)", "LLM", "C4", "GPT3"]
+      dlrm_dcnv2: ["Benchmark results (minutes)", "Recommendation", "1TB Multihot Clickthrough", "DLRM DCNv2"]
+      maskrcnn: ["Benchmark results (minutes)", "Object detection, heavy-weight", "COCO", "Mask R-CNN"]
+      minigo: ["Benchmark results (minutes)", "Reinforcement Learning", "Go", "Minigo"]
+      resnet: ["Benchmark results (minutes)", "Image classification", "ImageNet", "ResNet"]
+      ssd: ["Benchmark results (minutes)", "Object detection, light-weight", "OpenImages", "RetinaNet"]
+      rnnt: ["Benchmark results (minutes)", "Speech recognition", "LibriSpeech", "RNN-T"]
+      unet3d: ["Benchmark results (minutes)", "Image segmentation (medical)", "KiTS19", "3D U-Net"]
+      stable_diffusion: ["Benchmark results (minutes)", "Text to image", "Laion 400m and Coco-2017", "StableDiffusion"]
+      default: [" ", " ", " "]
 
   hpc:
     "2.0.0":