Skip to content

Commit

Permalink
#0: Fix average tok/s/u to account for compile iteration
Browse files Browse the repository at this point in the history
  • Loading branch information
mtairum committed Jan 27, 2025
1 parent 4ce4ff9 commit f821686
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions models/demos/llama3/demo/simple_text_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,8 +501,10 @@ def test_llama_demo_text(
avg_decode_iteration_time = total_inference_decode_time / (iteration - 1)

prefill_tok_s = prefill_lens[0] / total_inference_prefill_time / batch_size
decode_tok_s_user = num_tokens_generated_decode[0] / total_inference_decode_time
decode_tok_s = num_tokens_generated_decode[0] / total_inference_decode_time * batch_size
decode_tok_s_user = (num_tokens_generated_decode[0] - 1) / total_inference_decode_time # Remove the compile time
decode_tok_s = (
(num_tokens_generated_decode[0] - 1) / total_inference_decode_time * batch_size
) # Remove the compile time

measurements = {
# Required measurements
Expand Down

0 comments on commit f821686

Please sign in to comment.