Skip to content

Commit

Permalink
Fix reference implementation to pass TEST06 (mlcommons#1814)
Browse files Browse the repository at this point in the history
* Update SUT.py

* Update run_verification.py

* Fix first token dtype for mixtral.
  • Loading branch information
arjunsuresh authored Jul 26, 2024
1 parent 379ee53 commit 8a94e52
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion compliance/nvidia/TEST06/run_verification.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def get_args():
help="Specifies the path to the output directory where compliance logs will be uploaded from, i.e. inference_results_v0.7/closed/NVIDIA/compliance/T4x8/resnet/Offline.",
required=True)
parser.add_argument("--eos_token_id", '-e', default=2, help="EOS token id of the tokenizer")
parser.add_argument("--dtype", "-d", default="int64", choices=["int64", "int32", "int16", "float32"])
parser.add_argument("--dtype", "-d", default="int32", choices=["int64", "int32", "int16", "float32"])
parser.add_argument("--scenario", "-s", required=True, choices=["Offline", "Server", "SingleStream", "MultiStream"])
args = parser.parse_args()
return args
Expand Down
2 changes: 1 addition & 1 deletion language/llama2-70b/SUT.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def process_first_tokens(self):

first_tokens, response_id = first_token_item

response_data = array.array("B", np.array(first_tokens, np.float32).tobytes())
response_data = array.array("B", np.array(first_tokens, np.int32).tobytes())
bi = response_data.buffer_info()
response = [lg.QuerySampleResponse(response_id, bi[0], bi[1])]
lg.FirstTokenComplete(response)
Expand Down
2 changes: 1 addition & 1 deletion language/mixtral-8x7b/SUT.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ def process_first_tokens(self):
first_tokens, response_id = first_token_item

response_data = array.array("B", np.array(
first_tokens, np.float32).tobytes())
first_tokens, np.int32).tobytes())
bi = response_data.buffer_info()
response = [lg.QuerySampleResponse(response_id, bi[0], bi[1])]
lg.FirstTokenComplete(response)
Expand Down

0 comments on commit 8a94e52

Please sign in to comment.