Merge branch 'mlcommons:master' into master

anandhu-eng · Jun 25, 2024 · c6105aa · c6105aa
2 parents 5df4337 + 2a46c7a
commit c6105aa
Show file tree

Hide file tree

Showing 17 changed files with 273 additions and 68 deletions.
diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
@@ -3,12 +3,18 @@ name: Build loadgen wheels and release them into PYPI
 on:
   release:
     types: [published]
+  push:
+    branches:
+      - master
+    paths:
+      - loadgen/setup.py
 
 jobs:
   build_wheels:
     name: Build wheels on ${{ matrix.os }}
     runs-on: ${{ matrix.os }}
     strategy:
+      fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macOS-latest]
 
@@ -18,7 +24,7 @@ jobs:
       - uses: actions/setup-python@v3
 
       - name: Install requirements
-        run: python -m pip install cibuildwheel==2.16.2 twine==4.0.2
+        run: python -m pip install cibuildwheel twine
 
       - name: Build wheels
         run: python -m cibuildwheel loadgen/ --output-dir wheels

diff --git a/.github/workflows/test-bert.yml b/.github/workflows/test-bert.yml
@@ -30,9 +30,7 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python3 -m pip install cmind
-        cm pull repo mlcommons@ck
-        cm run script --quiet --tags=get,sys-utils-cm
+        python3 -m pip install cm4mlops
     - name: Test BERT and end to end submission generation
       run: |
-        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=bert-99 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --adr.compiler.tags=gcc  --adr.inference-src.version=custom --adr.inference-src.env.CM_GIT_CHECKOUT=${{ github.event.pull_request.head.ref }} --adr.inference-src.env.CM_GIT_URL=${{ github.event.pull_request.head.repo.html_url }} --target_qps=1
+        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=bert-99 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --adr.compiler.tags=gcc  --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.${{ github.event.pull_request.head.ref }}
diff --git a/.github/workflows/test-loadgen.yml b/.github/workflows/test-loadgen.yml
@@ -28,8 +28,7 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python3 -m pip install cmind
-        cm pull repo mlcommons@ck
+        python3 -m pip install cm4mlops
     - name: Test Loadgen
       run: |
-        cm run script --tags=get,mlperf,inference,loadgen --quiet --version=custom --adr.inference-src.env.CM_GIT_CHECKOUT=${{ github.event.pull_request.head.ref }} --adr.inference-src.env.CM_GIT_URL=${{ github.event.pull_request.head.repo.html_url }}
+        cm run script --tags=get,mlperf,inference,loadgen --quiet --version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.${{ github.event.pull_request.head.ref }}
diff --git a/.github/workflows/test-resnet50.yml b/.github/workflows/test-resnet50.yml
@@ -30,9 +30,7 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python3 -m pip install cmind
-        cm pull repo mlcommons@ck
-        cm run script --quiet --tags=get,sys-utils-cm
+        python3 -m pip install cm4mlops
     - name: Test Resnet50 and end to end submission generation
       run: |
-        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=resnet50 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --adr.inference-src.env.CM_GIT_CHECKOUT=${{ github.event.pull_request.head.ref }} --adr.inference-src.version=custom --adr.inference-src.env.CM_GIT_URL=${{ github.event.pull_request.head.repo.html_url }}
+        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=resnet50 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --adr.inference-src.tags=_branch.${{ github.event.pull_request.head.ref }},_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src.version=custom
diff --git a/.github/workflows/test-retinanet.yml b/.github/workflows/test-retinanet.yml
@@ -30,9 +30,7 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python3 -m pip install cmind
-        cm pull repo mlcommons@ck
-        cm run script --quiet --tags=get,sys-utils-cm
+        python3 -m pip install cm4mlops
     - name: Test Retinanet and end to end submission generation
       run: |
-        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=retinanet --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=10 --adr.compiler.tags=gcc  --adr.inference-src.version=custom --adr.inference-src.env.CM_GIT_CHECKOUT=${{ github.event.pull_request.head.ref }} --adr.inference-src.env.CM_GIT_URL=${{ github.event.pull_request.head.repo.html_url }}
+        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=retinanet --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=10 --adr.compiler.tags=gcc  --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.${{ github.event.pull_request.head.ref }}
diff --git a/.github/workflows/test-submission-checker.yml b/.github/workflows/test-submission-checker.yml
@@ -33,4 +33,4 @@ jobs:
         git clone https://github.com/mlcommons/inference_results_v4.0 --depth 1
     - name: Test MLPerf inference submission checker
       run: |
-        cm run script --tags=run,mlperf,inference,submission,checker --adr.inference-src.tags=_branch.${{ github.event.pull_request.head.ref }},_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src.version=custom --input=`pwd`/inference_results_v4.0 --quiet 
+        cm run script --tags=run,mlperf,inference,submission,checker --adr.inference-src.tags=_branch.${{ github.event.pull_request.head.ref }},_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src.version=custom --input=`pwd`/inference_results_v4.0 --version=r4.0 --quiet 
diff --git a/.github/workflows/test-tvm.yml b/.github/workflows/test-tvm.yml
@@ -30,9 +30,7 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python3 -m pip install cmind
-        cm pull repo mlcommons@ck
-        cm run script --quiet --tags=get,sys-utils-cm
+        python3 -m pip install cm4mlops
     - name: Test Resnet50 TVM backend
       run: |
-        cm run script --tags=run,mlperf,inference,generate-run-cmds --quiet --submitter="MLCommons" --hw_name=default --model=resnet50 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --adr.compiler.tags=gcc  --adr.inference-src.version=custom --adr.inference-src.env.CM_GIT_CHECKOUT=${{ github.event.pull_request.head.ref }} --adr.inference-src.env.CM_GIT_URL=${{ github.event.pull_request.head.repo.html_url }} --target_qps=1
+        cm run script --tags=run,mlperf,inference,generate-run-cmds --quiet --submitter="MLCommons" --hw_name=default --model=resnet50 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --adr.compiler.tags=gcc  --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.${{ github.event.pull_request.head.ref }}
diff --git a/compliance/nvidia/README.md b/compliance/nvidia/README.md
@@ -37,5 +37,6 @@ The `run_verification.py` found in each test directory will copy the test files
 | 3d-unet | [TEST01](./TEST01/), [TEST05](./TEST05/) |
 | rnnt | [TEST01](./TEST01/), [TEST05](./TEST05/) |
 | gpt-j | - |
-| stable-diffusion-xl | - |
-| Llama2-70b | [TEST06]() |
+| stable-diffusion-xl | [TEST01](./TEST01/), [TEST04](./TEST04/), [TEST05](./TEST05/) |
+| Llama2-70b | [TEST06](./TEST06/) |
+| mixtral-8x7b | [TEST06](./TEST06/) |
diff --git a/language/mixtral-8x7b/README.md b/language/mixtral-8x7b/README.md
@@ -247,15 +247,15 @@ python -u evaluate-accuracy.py --checkpoint-path mistralai/Mixtral-8x7B-instruct
 Reference scores:
 Open Orca:
 ```json
-{'rouge1': 45.4911, 'rouge2': 23.2829, 'rougeL': 30.3615, 'rougeLsum': 42.4333}
+{'rouge1': 45.4911, 'rouge2': 23.2829, 'rougeL': 30.3615}
 ```
 GSM8K:
 ```json
-{'gsm8k_accuracy': 73.78}
+{'gsm8k': 73.78}
 ```
 MBXP:
 ```json
-{'mbxp_accuracy': 60.16}
+{'mbxp': 60.16}
 ```
 For official submissions, 99% of each reference score is enforced. Additionally, 90%-110% of the generated tokens_per_samples:
 ```json

diff --git a/language/mixtral-8x7b/evaluate-accuracy.py b/language/mixtral-8x7b/evaluate-accuracy.py
@@ -193,20 +193,18 @@ def main():
             continue
         correct += (ref == tgt)
 
-    gsm8k_accuracy = 100.0 * correct / gsm8k_total
+    result['gsm8k'] = 100.0 * correct / gsm8k_total
 
     # MBXP metric
     from evaluate_mbxp import evaluate_mbxp
-    mbxp_accuracy = evaluate_mbxp(results_MBXP, args.n_workers)
+    result['mbxp'] = evaluate_mbxp(results_MBXP, args.n_workers)
 
     result = {
         **result,
         'gen_len': np.sum(prediction_lens),
         'gen_num': gen_num,
         'gen_tok_len': gen_tok_len,
         'tokens_per_sample': round(gen_tok_len / gen_num, 1),
-        'gsm8k_accuracy': gsm8k_accuracy,
-        'mbxp_accuracy': mbxp_accuracy
     }
 
     print("\nResults\n")

diff --git a/loadgen/pyproject.toml b/loadgen/pyproject.toml
@@ -4,4 +4,4 @@ build-backend = "setuptools.build_meta:__legacy__"
 
 [tool.cibuildwheel]
 environment = "CFLAGS='-std=c++14'"
-build = "cp3{7,8,9,10,11}-*"
+build = "cp3{7,8,9,10,11,12}-*"
diff --git a/loadgen/setup.py b/loadgen/setup.py
@@ -40,6 +40,9 @@
     "query_sample_library.h",
     "system_under_test.h",
     "test_settings.h",
+    "issue_query_controller.h",
+    "early_stopping.h",
+    "query_dispatch_library.h",
 ]
 
 lib_headers = [
@@ -49,6 +52,8 @@
     "utils.h",
     "version.h",
     "results.h",
+    "bindings/c_api.h",
+    "version_generator.py"
 ]
 
 lib_sources = [
@@ -63,6 +68,7 @@
 ]
 
 lib_bindings = [
+    "bindings/c_api.cc",
     "bindings/python_api.cc",
 ]
 
@@ -76,13 +82,13 @@
 
 mlperf_loadgen_module = Pybind11Extension(
         "mlperf_loadgen",
-        define_macros=[("MAJOR_VERSION", "4"), ("MINOR_VERSION", "0")],
+        define_macros=[("MAJOR_VERSION", "4"), ("MINOR_VERSION", "1")],
         include_dirs=[".", get_include()],
         sources=mlperf_loadgen_sources,
         depends=mlperf_loadgen_headers)
 
-setup(name="mlperf_loadgen",
-      version="4.0",
+setup(name="mlcommons_loadgen",
+      version="4.1",
       description="MLPerf Inference LoadGen python bindings",
       url="https://mlcommons.org/",
       cmdclass={"build_ext": build_ext},

diff --git a/loadgen/version_generator.py b/loadgen/version_generator.py
@@ -69,7 +69,6 @@ def generate_loadgen_version_definitions_sha1(ofile, loadgen_root):
     sha1s = ""
     loadgen_files = (
         ["/bindings/" + s for s in os.listdir(loadgen_root + "/bindings")] +
-        ["/demos/" + s for s in os.listdir(loadgen_root + "/demos")] +
         ["/" + s for s in os.listdir(loadgen_root)])
     for fn in sorted(loadgen_files):
         full_fn = loadgen_root + fn

diff --git a/mlperf.conf b/mlperf.conf
@@ -19,13 +19,13 @@ stable-diffusion-xl.*.performance_sample_count_override = 5000
 3d-unet.*.performance_sample_count_override = 0
 
 # Set seeds. The seeds will be distributed two weeks before the submission.
-*.*.qsl_rng_seed = 13281865557512327830
-*.*.sample_index_rng_seed = 198141574272810017
-*.*.schedule_rng_seed = 7575108116881280410
+*.*.qsl_rng_seed = 3066443479025735752
+*.*.sample_index_rng_seed = 10688027786191513374
+*.*.schedule_rng_seed = 14962580496156340209
 # Set seeds for TEST_05. The seeds will be distributed two weeks before the submission.
-*.*.test05_qsl_rng_seed = 2376919268182438552
-*.*.test05_sample_index_rng_seed = 11176391829184272374
-*.*.test05_schedule_rng_seed = 3911940905271271337
+*.*.test05_qsl_rng_seed = 16799458546791641818
+*.*.test05_sample_index_rng_seed = 5453809927556429288
+*.*.test05_schedule_rng_seed = 5435552105434836064
 
 
 *.SingleStream.target_latency_percentile = 90

diff --git a/text_to_image/tools/sample_ids.py b/text_to_image/tools/sample_ids.py
@@ -16,7 +16,7 @@ def get_args():
         "--n", type=int, default=10, help="Dataset download location"
     )
     parser.add_argument(
-        "--seed", "-s", type=int, default=926019364, help="Dataset download location"
+        "--seed", "-s", type=int, default=633994880, help="Dataset download location"
     )
     args = parser.parse_args()
     return args

diff --git a/text_to_image/tools/sample_ids.txt b/text_to_image/tools/sample_ids.txt
@@ -1,10 +1,10 @@
-4459
-4015
-2705
-1682
-4048
-4683
-3757
-1578
-3319
-95
+4655
+2569
+1303
+109
+4509
+3009
+2179
+1826
+2094
+3340
Original file line number	Diff line number	Diff line change
		@@ -1,10 +1,10 @@
		4459
		4015
		2705
		1682
		4048
		4683
		3757
		1578
		3319
		95
		4655
		2569
		1303
		109
		4509
		3009
		2179
		1826
		2094
		3340