diff --git a/docs/benchmarks/language/llama2-70b.md b/docs/benchmarks/language/llama2-70b.md index c8df1e2fa..87b637b42 100644 --- a/docs/benchmarks/language/llama2-70b.md +++ b/docs/benchmarks/language/llama2-70b.md @@ -28,7 +28,7 @@ hide: ## Neural Magic MLPerf Implementation LLAMA2-70b-99 -{{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "NeuralMagic") }} +{{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "neuralmagic") }} LLAMA2-70b-99.9 -{{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "NeuralMagic") }} \ No newline at end of file +{{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "neuralmagic") }} \ No newline at end of file diff --git a/docs/benchmarks/recommendation/dlrm-v2.md b/docs/benchmarks/recommendation/dlrm-v2.md index 0657176c7..d1b41deb1 100644 --- a/docs/benchmarks/recommendation/dlrm-v2.md +++ b/docs/benchmarks/recommendation/dlrm-v2.md @@ -11,25 +11,25 @@ hide: ## MLPerf Reference Implementation in Python DLRM-v2-99 -{{ mlperf_inference_implementation_readme (4, "dlrm_v2-99", "reference") }} +{{ mlperf_inference_implementation_readme (4, "dlrm-v2-99", "reference") }} DLRM-v2-99.9 -{{ mlperf_inference_implementation_readme (4, "dlrm_v2-99.9", "reference") }} +{{ mlperf_inference_implementation_readme (4, "dlrm-v2-99.9", "reference") }} === "Nvidia" ## Nvidia MLPerf Implementation DLRM-v2-99 -{{ mlperf_inference_implementation_readme (4, "dlrm_v2-99", "nvidia") }} +{{ mlperf_inference_implementation_readme (4, "dlrm-v2-99", "nvidia") }} DLRM-v2-99.9 -{{ mlperf_inference_implementation_readme (4, "dlrm_v2-99.9", "nvidia") }} +{{ mlperf_inference_implementation_readme (4, "dlrm-v2-99.9", "nvidia") }} === "Intel" ## Intel MLPerf Implementation DLRM-v2-99 -{{ mlperf_inference_implementation_readme (4, "dlrm_v2-99", "intel") }} +{{ mlperf_inference_implementation_readme (4, "dlrm-v2-99", "intel") }} DLRM-v2-99.9 -{{ mlperf_inference_implementation_readme (4, "dlrm_v2-99.9", "intel") }} \ No newline at end of file +{{ mlperf_inference_implementation_readme (4, "dlrm-v2-99.9", "intel") }} \ No newline at end of file diff --git a/main.py b/main.py index 92d5e9ccf..dc71f195a 100644 --- a/main.py +++ b/main.py @@ -34,12 +34,12 @@ def mlperf_inference_implementation_readme(spaces, model, implementation): devices = [ "CUDA" ] frameworks = [ "TensorRT" ] - elif implementation == "NeuralMagic": + elif implementation == "neuralmagic": devices = [ "CUDA" ] - frameworks = [ "vLLM" ] + frameworks = [ "pytorch" ] elif implementation == "intel": - if model not in [ "bert-99", "bert-99.9", "gptj-99", "gptj-99.9", "resnet50", "retinanet", "3d-unet-99", "3d-unet-99.9", "dlrm_v2-99", "dlrm_v2-99.9" ]: + if model not in [ "bert-99", "bert-99.9", "gptj-99", "gptj-99.9", "resnet50", "retinanet", "3d-unet-99", "3d-unet-99.9", "dlrm-v2-99", "dlrm-v2-99.9" ]: return pre_space+" WIP" if model in [ "bert-99", "bert-99.9", "retinanet", "3d-unet-99", "3d-unet-99.9" ]: code_version="r4.0" @@ -269,11 +269,14 @@ def mlperf_inference_run_command(spaces, model, implementation, framework, categ docker_cmd_suffix += f" \\\n{pre_space} --test_query_count={test_query_count}" if "llama2-70b" in model: - if implementation != "NeuralMagic": + if implementation != "neuralmagic": docker_cmd_suffix += f" \\\n{pre_space} --tp_size=" docker_cmd_suffix += f" \\\n{pre_space} --nvidia_llama2_dataset_file_path=" else: docker_cmd_suffix += f" \\\n{pre_space} --api_server=" + + if "dlrm-v2" in model and implementation == "nvidia": + docker_cmd_suffix += f" \\\n{pre_space} --criteo_day23_raw_data_path=" docker_setup_cmd = f"""\n {f_pre_space}```bash @@ -295,11 +298,14 @@ def mlperf_inference_run_command(spaces, model, implementation, framework, categ cmd_suffix += f" \\\n {pre_space} --test_query_count={test_query_count}" if "llama2-70b" in model: - if implementation != "NeuralMagic": + if implementation != "neuralmagic": cmd_suffix += f" \\\n{pre_space} --tp_size=" cmd_suffix += f" \\\n{pre_space} --nvidia_llama2_dataset_file_path=" else: cmd_suffix += f" \\\n{pre_space} --api_server=" + + if "dlrm-v2" in model and implementation == "nvidia": + cmd_suffix += f" \\\n{pre_space} --criteo_day23_raw_data_path=" run_cmd = f"""\n {f_pre_space}```bash