Skip to content

Commit

Permalink
Update LLM metrics in submission checker (#1720)
Browse files Browse the repository at this point in the history
  • Loading branch information
pgmpablo157321 authored Jun 4, 2024
1 parent 1c3f8e7 commit 0e25492
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 11 deletions.
3 changes: 3 additions & 0 deletions loadgen/results.cc
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,9 @@ void PerformanceSummary::LogDetail(AsyncDetail& detail) {
MLPERF_LOG(detail, "result_time_to_output_token_min", time_per_output_token_min);
MLPERF_LOG(detail, "result_time_to_output_token_max", time_per_output_token_max);
MLPERF_LOG(detail, "result_time_to_output_token_mean", time_per_output_token_mean);
double tps_as_completed =
token_count / pr.final_query_all_samples_done_time;
MLPERF_LOG(detail, "result_completed_tokens_per_second", tps_as_completed);
} else {
double tokens_per_second = token_count / pr.max_latency;
MLPERF_LOG(detail, "result_tokens_per_second", tokens_per_second);
Expand Down
45 changes: 34 additions & 11 deletions tools/submission/submission_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,33 @@
}

RESULT_FIELD_BENCHMARK_OVERWRITE = {
"llama2-70b-99": {
"Offline": "result_tokens_per_second",
"Server": "result_completed_samples_per_sec",
"v4.0": {
"llama2-70b-99": {
"Offline": "result_tokens_per_second",
"Server": "result_completed_samples_per_sec",
},
"llama2-70b-99.9": {
"Offline": "result_tokens_per_second",
"Server": "result_completed_samples_per_sec",
},
},
"llama2-70b-99.9": {
"v4.1": {
"llama2-70b-99": {
"Offline": "result_tokens_per_second",
"Server": "result_completed_samples_per_sec",
"Server": "result_completed_tokens_per_second",
},
"llama2-70b-99.9": {
"Offline": "result_tokens_per_second",
"Server": "result_completed_tokens_per_second",
},
"gptj-99": {
"Offline": "result_inferred_tokens_per_second",
"Server": "result_inferred_completed_tokens_per_second",
},
"gptj-99.9": {
"Offline": "result_inferred_tokens_per_second",
"Server": "result_inferred_completed_tokens_per_second",
}
}
}

Expand Down Expand Up @@ -800,6 +820,7 @@ def get_performance_metric(
config, model, path, scenario_fixed, division, system_json, has_power=False
):
#Assumes new logging format
version = config.version

fname = os.path.join(path, "mlperf_log_detail.txt")
mlperf_log = MLPerfLog(fname)
Expand All @@ -810,9 +831,9 @@ def get_performance_metric(
is_valid = True
scenario = mlperf_log["effective_scenario"]

res = float(mlperf_log[RESULT_FIELD_NEW[config.version][scenario]])
if model in RESULT_FIELD_BENCHMARK_OVERWRITE and scenario in RESULT_FIELD_BENCHMARK_OVERWRITE[model]:
res = float(mlperf_log[RESULT_FIELD_BENCHMARK_OVERWRITE[model][scenario]])
res = float(mlperf_log[RESULT_FIELD_NEW[version][scenario]])
if version in RESULT_FIELD_BENCHMARK_OVERWRITE and model in RESULT_FIELD_BENCHMARK_OVERWRITE[version] and scenario in RESULT_FIELD_BENCHMARK_OVERWRITE[version][model]:
res = float(mlperf_log[RESULT_FIELD_BENCHMARK_OVERWRITE[version][model][scenario]])

inferred = False
if scenario_fixed != scenario:
Expand All @@ -826,6 +847,7 @@ def check_performance_dir(
is_valid = False
rt = {}

version = config.version
# look for: Result is: VALID
fname = os.path.join(path, "mlperf_log_detail.txt")
mlperf_log = MLPerfLog(fname)
Expand All @@ -840,9 +862,10 @@ def check_performance_dir(
schedule_rng_seed = mlperf_log["effective_schedule_rng_seed"]
scenario = mlperf_log["effective_scenario"]

res = float(mlperf_log[RESULT_FIELD_NEW[config.version][scenario]])
if model in RESULT_FIELD_BENCHMARK_OVERWRITE and scenario in RESULT_FIELD_BENCHMARK_OVERWRITE[model]:
res = float(mlperf_log[RESULT_FIELD_BENCHMARK_OVERWRITE[model][scenario]])
res = float(mlperf_log[RESULT_FIELD_NEW[version][scenario]])
if version in RESULT_FIELD_BENCHMARK_OVERWRITE and model in RESULT_FIELD_BENCHMARK_OVERWRITE[version] and scenario in RESULT_FIELD_BENCHMARK_OVERWRITE[version][model]:
res = float(mlperf_log[RESULT_FIELD_BENCHMARK_OVERWRITE[version][model][scenario]])


if model in ["llama2-70b-99", "llama2-70b-99.9"]:
llama_constraint, is_valid = extra_check_llama2(mlperf_log, scenario_fixed)
Expand Down

0 comments on commit 0e25492

Please sign in to comment.