anandhu-eng · anandhu-eng · Sep 27, 2024 · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024
diff --git a/docs/submission/index.md b/docs/submission/index.md
@@ -60,63 +60,63 @@ Once all the results across all the models are ready you can use the following c
 === "Closed Edge"
     ### Closed Edge Submission
     ```bash
-       cm run script --tags=generate,inference,submission \
-          --clean \
-          --preprocess_submission=yes \
-          --run-checker \
-          --submitter=MLCommons \
-          --tar=yes \
-          --env.CM_TAR_OUTFILE=submission.tar.gz \
-          --division=closed \
-          --category=edge \
-          --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
-          --quiet
+    cm run script --tags=generate,inference,submission \
+       --clean \
+       --preprocess_submission=yes \
+       --run-checker \
+       --submitter=MLCommons \
+       --tar=yes \
+       --env.CM_TAR_OUTFILE=submission.tar.gz \
+       --division=closed \
+       --category=edge \
+       --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
+       --quiet
     ```
 
 === "Closed Datacenter"
     ### Closed Datacenter Submission
     ```bash
-       cm run script --tags=generate,inference,submission \
-          --clean \
-          --preprocess_submission=yes \
-          --run-checker \
-          --submitter=MLCommons \
-          --tar=yes \
-          --env.CM_TAR_OUTFILE=submission.tar.gz \
-          --division=closed \
-          --category=datacenter \
-          --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
-          --quiet
+    cm run script --tags=generate,inference,submission \
+       --clean \
+       --preprocess_submission=yes \
+       --run-checker \
+       --submitter=MLCommons \
+       --tar=yes \
+       --env.CM_TAR_OUTFILE=submission.tar.gz \
+       --division=closed \
+       --category=datacenter \
+       --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
+       --quiet
     ```
 === "Open Edge"
     ### Open Edge Submission
     ```bash
-       cm run script --tags=generate,inference,submission \
-          --clean \
-          --preprocess_submission=yes \
-          --run-checker \
-          --submitter=MLCommons \
-          --tar=yes \
-          --env.CM_TAR_OUTFILE=submission.tar.gz \
-          --division=open \
-          --category=edge \
-          --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
-          --quiet
+    cm run script --tags=generate,inference,submission \
+       --clean \
+       --preprocess_submission=yes \
+       --run-checker \
+       --submitter=MLCommons \
+       --tar=yes \
+       --env.CM_TAR_OUTFILE=submission.tar.gz \
+       --division=open \
+       --category=edge \
+       --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
+       --quiet
     ```
 === "Open Datacenter"
     ### Closed Datacenter Submission
     ```bash
-       cm run script --tags=generate,inference,submission \
-          --clean \
-          --preprocess_submission=yes \
-          --run-checker \
-          --submitter=MLCommons \
-          --tar=yes \
-          --env.CM_TAR_OUTFILE=submission.tar.gz \
-          --division=open \
-          --category=datacenter \
-          --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
-          --quiet
+    cm run script --tags=generate,inference,submission \
+       --clean \
+       --preprocess_submission=yes \
+       --run-checker \
+       --submitter=MLCommons \
+       --tar=yes \
+       --env.CM_TAR_OUTFILE=submission.tar.gz \
+       --division=open \
+       --category=datacenter \
+       --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
+       --quiet
     ```
 
 * Use `--hw_name="My system name"` to give a meaningful system name. Examples can be seen [here](https://github.com/mlcommons/inference_results_v3.0/tree/main/open/cTuning/systems)
@@ -134,7 +134,7 @@ If you are collecting results across multiple systems you can generate different
 Run the following command after **replacing `--repo_url` with your GitHub repository URL**.
 
 ```bash
-   cm run script --tags=push,github,mlperf,inference,submission \
+cm run script --tags=push,github,mlperf,inference,submission \
    --repo_url=https://github.com/GATEOverflow/mlperf_inference_submissions_v4.1 \
    --commit_message="Results on <HW name> added by <Name>" \
    --quiet

diff --git a/language/llama2-70b/evaluate-accuracy.py b/language/llama2-70b/evaluate-accuracy.py
@@ -48,6 +48,7 @@ def main():
     checkpoint_path = args.checkpoint_path
     metric = evaluate.load("rouge")
     nltk.download('punkt')
+    nltk.download('punkt_tab')
 
     tokenizer = AutoTokenizer.from_pretrained(
         checkpoint_path,

diff --git a/main.py b/main.py
@@ -149,7 +149,7 @@ def mlperf_inference_implementation_readme(spaces, model, implementation, *, imp
                                     # tips regarding the running of nural magic server
                                     content += f"\n{cur_space3}!!! tip\n\n"
                                     content += f"{cur_space3}    - Host and Port number of the server can be configured through `--host` and `--port` options. Otherwise, server will run on the default host `localhost` and port `8000`.\n\n"
-                                
+
                             setup_run_cmd = mlperf_inference_run_command(spaces+17, model, implementation, framework.lower(), category.lower(), "Offline", device.lower(), "test", test_query_count, True, scenarios, code_version, extra_variation_tags, extra_input_string, extra_docker_input_string)
 
                             if execution_env == "Native": # Native implementation steps through virtual environment
@@ -186,6 +186,7 @@ def mlperf_inference_implementation_readme(spaces, model, implementation, *, imp
                             content += f"{cur_space3} You can reuse the same environment as described for {model.split('.')[0]}.\n"
                             content += f"{cur_space3}###### Performance Estimation for Offline Scenario\n"
 
+
                             content += mlperf_inference_run_command(spaces+17, model, implementation, framework.lower(), category.lower(), "Offline", device.lower(), "test", test_query_count, True, scenarios, code_version).replace("--docker ","")
                             content += f"{cur_space3}The above command should do a test run of Offline scenario and record the estimated offline_target_qps.\n\n"
 
@@ -413,7 +414,7 @@ def mlperf_inference_run_command(spaces, model, implementation, framework, categ
         if scenario == "Server" or (scenario == "All Scenarios" and "Server" in scenarios):
             scenario_option += f"\\\n{pre_space} --server_target_qps=<SERVER_TARGET_QPS>"
 
-        run_cmd_extra = get_run_cmd_extra(f_pre_space, model, implementation, device, scenario, scenarios)
+        run_cmd_extra = get_run_cmd_extra(f_pre_space, model, implementation, device, scenario, scenarios, True, extra_input_string)
 
         if docker:
             docker_cmd_suffix = f" \\\n{pre_space} --docker --quiet"

diff --git a/recommendation/dlrm_v2/pytorch/README.md b/recommendation/dlrm_v2/pytorch/README.md
@@ -69,6 +69,31 @@ cd $HOME/mlcommons/inference/loadgen
 CFLAGS="-std=c++14" python setup.py develop --user
 ```
 
+### Download preprocessed Dataset
+
+Download the preprocessed dataset using Rclone.
+
+To run Rclone on Windows, you can download the executable [here](https://rclone.org/install/#windows).
+To install Rclone on Linux/macOS/BSD systems, run:
+```
+sudo -v ; curl https://rclone.org/install.sh | sudo bash
+```
+Once Rclone is installed, run the following command to authenticate with the bucket:
+```
+rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com
+```
+Prepare your dataset destination:
+```
+cd $HOME/mlcommons/inference/recommendation/dlrm_v2/pytorch/
+mkdir ./dataset && cd ./dataset
+mv <downloaded_file(s)> ./
+export DATA_DIR=./dataset
+```
+Download the dataset
+```
+rclone copy mlc-inference:mlcommons-inference-wg-public/dlrm_preprocessed ./dataset  -P
+```
+
 
 ### Downloading model weights
 

diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
@@ -636,6 +636,7 @@
     "power_supply_details",
     "disk_drives",
     "disk_controllers",
+    "system_power_only",
 ]
 
 SYSTEM_DESC_MEANINGFUL_RESPONSE_REQUIRED_FIELDS_POWER = []
@@ -931,6 +932,18 @@ def check_extra_files(path, target_files):
 def split_path(m):
     return m.replace("\\", "/").split("/")
 
+def get_boolean(s):
+    if s is None:
+        return False
+    elif isinstance(s, bool):
+        return s
+    elif isinstance(s, str):
+        return (s.lower() == "true")
+    elif isinstance(s, int):
+        return bool(s)
+    else:
+        raise TypeError(f"Variable should be bool, string or int, got {type(s)} instead")
+
 
 def find_error_in_detail_log(config, fname):
     is_valid = True
@@ -1639,36 +1652,37 @@ def log_result(
         unit = special_unit_dict.get(model_name, unit_dict)[scenario_fixed]
         power_unit = power_unit_dict[scenario_fixed]
 
-        csv.write(
-            fmt.format(
-                submitter,
-                available,
-                division,
-                '"' + system_type + '"',
-                '"' + system_name + '"',
-                system_desc,
-                model_name,
-                mlperf_model,
-                scenario_fixed,
-                r,
-                acc,
-                system_json.get("number_of_nodes"),
-                '"' + system_json.get("host_processor_model_name") + '"',
-                system_json.get("host_processors_per_node"),
-                system_json.get("host_processor_core_count"),
-                '"' + system_json.get("accelerator_model_name") + '"',
-                '"' + str(system_json.get("accelerators_per_node")) + '"',
-                name.replace("\\", "/"),
-                '"' + system_json.get("framework", "") + '"',
-                '"' + system_json.get("operating_system", "") + '"',
-                '"' + notes + '"',
-                compliance,
-                errors,
-                config.version,
-                inferred,
-                power_metric > 0,
-                unit,
-            )
+        if (power_metric <= 0) or (not get_boolean(system_json.get("system_power_only"))):
+            csv.write(
+                fmt.format(
+                    submitter,
+                    available,
+                    division,
+                    '"' + system_type + '"',
+                    '"' + system_name + '"',
+                    system_desc,
+                    model_name,
+                    mlperf_model,
+                    scenario_fixed,
+                    r,
+                    acc,
+                    system_json.get("number_of_nodes"),
+                    '"' + system_json.get("host_processor_model_name") + '"',
+                    system_json.get("host_processors_per_node"),
+                    system_json.get("host_processor_core_count"),
+                    '"' + system_json.get("accelerator_model_name") + '"',
+                    '"' + str(system_json.get("accelerators_per_node")) + '"',
+                    name.replace("\\", "/"),
+                    '"' + system_json.get("framework", "") + '"',
+                    '"' + system_json.get("operating_system", "") + '"',
+                    '"' + notes + '"',
+                    compliance,
+                    errors,
+                    config.version,
+                    inferred,
+                    power_metric > 0,
+                    unit,
+                )
         )
 
         if power_metric > 0: