This repository has been archived by the owner on Oct 11, 2024. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
94a26cd
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bigger_is_better
{"name": "request_throughput", "description": "Benchmark vllm serving\nmodel - mistralai/Mistral-7B-Instruct-v0.2\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"5,inf\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA A10G x 4", "vllm_version": "0.1.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.1.2+cu121"}
1.0852489035865591
prompts/s{"name": "input_throughput", "description": "Benchmark vllm serving\nmodel - mistralai/Mistral-7B-Instruct-v0.2\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"5,inf\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA A10G x 4", "vllm_version": "0.1.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.1.2+cu121"}
202.0733458478173
tokens/s{"name": "output_throughput", "description": "Benchmark vllm serving\nmodel - mistralai/Mistral-7B-Instruct-v0.2\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"5,inf\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA A10G x 4", "vllm_version": "0.1.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.1.2+cu121"}
191.87200615410367
tokens/s{"name": "request_throughput", "description": "Benchmark vllm engine throughput - with dataset\nmodel - mistralai/Mistral-7B-Instruct-v0.2\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"output-len\": 128,\n \"num-prompts\": 100,\n \"dataset\": \"sharegpt\",\n \"max-model-len\": 4096\n}", "gpu_description": "NVIDIA A10G x 4", "vllm_version": "0.1.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.1.2+cu121"}
7.8124766626673825
prompts/s{"name": "token_throughput", "description": "Benchmark vllm engine throughput - with dataset\nmodel - mistralai/Mistral-7B-Instruct-v0.2\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"output-len\": 128,\n \"num-prompts\": 100,\n \"dataset\": \"sharegpt\",\n \"max-model-len\": 4096\n}", "gpu_description": "NVIDIA A10G x 4", "vllm_version": "0.1.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.1.2+cu121"}
3542.4112931532713
tokens/sThis comment was automatically generated by workflow using github-action-benchmark.
94a26cd
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
smaller_is_better
{"name": "median_request_latency", "description": "Benchmark vllm serving\nmodel - mistralai/Mistral-7B-Instruct-v0.2\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"5,inf\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA A10G x 4", "vllm_version": "0.1.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.1.2+cu121"}
3240.434912000069
ms{"name": "mean_ttft_ms", "description": "Benchmark vllm serving\nmodel - mistralai/Mistral-7B-Instruct-v0.2\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"5,inf\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA A10G x 4", "vllm_version": "0.1.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.1.2+cu121"}
237.94871639997837
ms{"name": "median_ttft_ms", "description": "Benchmark vllm serving\nmodel - mistralai/Mistral-7B-Instruct-v0.2\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"5,inf\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA A10G x 4", "vllm_version": "0.1.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.1.2+cu121"}
237.83499699993627
ms{"name": "mean_tpot_ms", "description": "Benchmark vllm serving\nmodel - mistralai/Mistral-7B-Instruct-v0.2\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"5,inf\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA A10G x 4", "vllm_version": "0.1.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.1.2+cu121"}
13.714923565798504
ms{"name": "median_tpot_ms", "description": "Benchmark vllm serving\nmodel - mistralai/Mistral-7B-Instruct-v0.2\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"5,inf\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA A10G x 4", "vllm_version": "0.1.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.1.2+cu121"}
14.25400808860743
msThis comment was automatically generated by workflow using github-action-benchmark.
94a26cd
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bigger_is_better
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.1.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.1.2+cu121"}
3.9845809581855027
prompts/s3.98509085065037
prompts/s1.00
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.1.0", "python_version": "3.10.12 (main, Mar 7 2024, 18:39:53) [GCC 9.4.0]", "torch_version": "2.1.2+cu121"}
1530.0790879432332
tokens/s1530.2748866497423
tokens/s1.00
This comment was automatically generated by workflow using github-action-benchmark.