From 18b9328c4214aed1ed9f5bbccea50b99a68f6d38 Mon Sep 17 00:00:00 2001 From: caetano melone Date: Thu, 7 Mar 2024 00:56:45 -0800 Subject: [PATCH 1/5] add tests for prediction API --- gantry/tests/defs/prediction.py | 64 ++++++++++++++++ gantry/tests/sql/insert_samples.sql | 6 ++ gantry/tests/test_prediction.py | 111 ++++++++++++++++++++++++++++ 3 files changed, 181 insertions(+) create mode 100644 gantry/tests/defs/prediction.py create mode 100644 gantry/tests/sql/insert_samples.sql create mode 100644 gantry/tests/test_prediction.py diff --git a/gantry/tests/defs/prediction.py b/gantry/tests/defs/prediction.py new file mode 100644 index 0000000..9da2ed4 --- /dev/null +++ b/gantry/tests/defs/prediction.py @@ -0,0 +1,64 @@ +# flake8: noqa +# fmt: off + +NORMAL_BUILD = { + "hash": "testing", + "package": { + "name": "py-torch", + "version": "2.2.1", + "variants": "~caffe2+cuda+cudnn~debug+distributed+fbgemm+gloo+kineto~magma~metal+mkldnn+mpi~nccl+nnpack+numa+numpy+onnx_ml+openmp+qnnpack~rocm+tensorpipe~test+valgrind+xnnpack build_system=python_pip cuda_arch=80", + }, + "compiler": { + "name": "gcc", + "version": "11.4.0", + }, +} + +# everything in NORMAL_BUILD["package"]["variants"] except removing build_system=python_pip +# in order to test the expensive variants filter +EXPENSIVE_VARIANT_BUILD = { + "hash": "testing", + "package": { + "name": "py-torch", + "version": "2.2.1", + "variants": "~caffe2+cuda+cudnn~debug+distributed+fbgemm+gloo+kineto~magma~metal+mkldnn+mpi~nccl+nnpack+numa+numpy+onnx_ml+openmp+qnnpack~rocm+tensorpipe~test+valgrind+xnnpack cuda_arch=80", + }, + "compiler": { + "name": "gcc", + "version": "11.4.0", + }, +} + +# no variants should match this, so we expect the default prediction +BAD_VARIANT_BUILD = { + "hash": "testing", + "package": { + "name": "py-torch", + "version": "2.2.1", + "variants": "+no~expensive~variants+match", + }, + "compiler": { + "name": "gcc", + "version": "11.4.0", + }, +} + +# calculated by running the baseline prediction algorithm on the sample data in gantry/tests/sql/insert_prediction.sql +NORMAL_PREDICTION = { + "hash": "testing", + "variables": { + "KUBERNETES_CPU_REQUEST": "12", + "KUBERNETES_MEMORY_REQUEST": "9576M", + }, +} + + +# this is what will get returned when there are no samples in the database +# that match what the client wants +DEFAULT_PREDICTION = { + "hash": "testing", + "variables": { + "KUBERNETES_CPU_REQUEST": "1", + "KUBERNETES_MEMORY_REQUEST": "2000M", + }, +} diff --git a/gantry/tests/sql/insert_samples.sql b/gantry/tests/sql/insert_samples.sql new file mode 100644 index 0000000..d017ebe --- /dev/null +++ b/gantry/tests/sql/insert_samples.sql @@ -0,0 +1,6 @@ +INSERT INTO nodes VALUES(6789,'ec2c47a0-7e9b-cfa3-9ad4-ac227ade598d','ip-192-168-202-150.ec2.internal',32.0,131072000000.0,'amd64','linux','m5.8xlarge'); +INSERT INTO jobs VALUES(6781,'runner-2j2ndhxu-project-2-concurrent-0-nbogpypi1',6789,1708919572.983000041,1708924744.811000108,101502092,'success','develop','py-torch','2.2.1','{"caffe2": false, "cuda": true, "cudnn": true, "debug": false, "distributed": true, "fbgemm": true, "gloo": true, "kineto": true, "magma": false, "metal": false, "mkldnn": true, "mpi": true, "nccl": false, "nnpack": true, "numa": true, "numpy": true, "onnx_ml": true, "openmp": true, "qnnpack": true, "rocm": false, "tensorpipe": true, "test": false, "valgrind": true, "xnnpack": true, "build_system": "python_pip", "cuda_arch": "80"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',12,12.0,NULL,9.77948152336477605,11.98751586519425772,12.00060520666194109,0.3736576704015182604,3.811106184376615414,48000000000.0,64000000000.0,9652098890.24199867,7399608320.0,41186873344.0,85508096.0,8707419891.779100419); +INSERT INTO jobs VALUES(6782,'runner-2j2ndhxu-project-2-concurrent-0-nbogpypi2',6789,1708919572.983000041,1708924744.811000108,101502093,'success','develop','py-torch','2.2.1','{"caffe2": false, "cuda": true, "cudnn": true, "debug": false, "distributed": true, "fbgemm": true, "gloo": true, "kineto": true, "magma": false, "metal": false, "mkldnn": true, "mpi": true, "nccl": false, "nnpack": true, "numa": true, "numpy": true, "onnx_ml": true, "openmp": true, "qnnpack": true, "rocm": false, "tensorpipe": true, "test": false, "valgrind": true, "xnnpack": true, "build_system": "python_pip", "cuda_arch": "80"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',12,12.0,NULL,10.77948152336477605,11.98751586519425772,12.00060520666194109,0.3736576704015182604,3.811106184376615414,48000000000.0,64000000000.0,9958098890.24199867,7399608320.0,41186873344.0,85508096.0,8707419891.779100419); +INSERT INTO jobs VALUES(6783,'runner-2j2ndhxu-project-2-concurrent-0-nbogpypi3',6789,1708919572.983000041,1708924744.811000108,101502094,'success','develop','py-torch','2.2.1','{"caffe2": false, "cuda": true, "cudnn": true, "debug": false, "distributed": true, "fbgemm": true, "gloo": true, "kineto": true, "magma": false, "metal": false, "mkldnn": true, "mpi": true, "nccl": false, "nnpack": true, "numa": true, "numpy": true, "onnx_ml": true, "openmp": true, "qnnpack": true, "rocm": false, "tensorpipe": true, "test": false, "valgrind": true, "xnnpack": true, "build_system": "python_pip", "cuda_arch": "80"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',12,12.0,NULL,11.77948152336477605,11.98751586519425772,12.00060520666194109,0.3736576704015182604,3.811106184376615414,48000000000.0,64000000000.0,9158098890.24199867,7399608320.0,41186873344.0,85508096.0,8707419891.779100419); +INSERT INTO jobs VALUES(6784,'runner-2j2ndhxu-project-2-concurrent-0-nbogpypi4',6789,1708919572.983000041,1708924744.811000108,101502095,'success','develop','py-torch','2.2.1','{"caffe2": false, "cuda": true, "cudnn": true, "debug": false, "distributed": true, "fbgemm": true, "gloo": true, "kineto": true, "magma": false, "metal": false, "mkldnn": true, "mpi": true, "nccl": false, "nnpack": true, "numa": true, "numpy": true, "onnx_ml": true, "openmp": true, "qnnpack": true, "rocm": false, "tensorpipe": true, "test": false, "valgrind": true, "xnnpack": true, "build_system": "python_pip", "cuda_arch": "80"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',12,12.0,NULL,12.77948152336477605,11.98751586519425772,12.00060520666194109,0.3736576704015182604,3.811106184376615414,48000000000.0,64000000000.0,9758098890.24199867,7399608320.0,41186873344.0,85508096.0,8707419891.779100419); +INSERT INTO jobs VALUES(6785,'runner-2j2ndhxu-project-2-concurrent-0-nbogpypi5',6789,1708919572.983000041,1708924744.811000108,101502096,'success','develop','py-torch','2.2.1','{"caffe2": false, "cuda": true, "cudnn": true, "debug": false, "distributed": true, "fbgemm": true, "gloo": true, "kineto": true, "magma": false, "metal": false, "mkldnn": true, "mpi": true, "nccl": false, "nnpack": true, "numa": true, "numpy": true, "onnx_ml": true, "openmp": true, "qnnpack": true, "rocm": false, "tensorpipe": true, "test": false, "valgrind": true, "xnnpack": true, "build_system": "python_pip", "cuda_arch": "80"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',12,12.0,NULL,13.77948152336477605,11.98751586519425772,12.00060520666194109,0.3736576704015182604,3.811106184376615414,48000000000.0,64000000000.0,9358098890.24199867,7399608320.0,41186873344.0,85508096.0,8707419891.779100419); diff --git a/gantry/tests/test_prediction.py b/gantry/tests/test_prediction.py new file mode 100644 index 0000000..ac56eb8 --- /dev/null +++ b/gantry/tests/test_prediction.py @@ -0,0 +1,111 @@ +import pytest + +from gantry.routes.prediction import prediction +from gantry.tests.defs import prediction as defs +from gantry.util.prediction import validate_payload + + +@pytest.fixture +async def db_conn_inserted(db_conn): + """Returns a connection to a database with 5 samples inserted""" + + with open("gantry/tests/sql/insert_samples.sql") as f: + await db_conn.executescript(f.read()) + + return db_conn + + +async def test_exact_match(db_conn_inserted): + """All fields are an exact match for 5 samples in the database.""" + + assert ( + await prediction.predict_single(db_conn_inserted, defs.NORMAL_BUILD) + == defs.NORMAL_PREDICTION + ) + + +async def test_expensive_variants(db_conn_inserted): + """ + Tests whether the algorithm filters by expensive variants. + The input has been modified to prevent an exact match with + any of the samples. + """ + + assert ( + await prediction.predict_single(db_conn_inserted, defs.EXPENSIVE_VARIANT_BUILD) + == defs.NORMAL_PREDICTION + ) + + +async def test_no_variant_match(db_conn_inserted): + """ + All fields match except for variants, expect default predictions with no sample. + """ + + assert ( + await prediction.predict_single(db_conn_inserted, defs.BAD_VARIANT_BUILD) + == defs.DEFAULT_PREDICTION + ) + + +async def test_partial_match(db_conn_inserted): + """ + Some of the fields match, so the prediction should be based on matching + with other fields. In reality, we're using the same dataset but just + testing that the prediction will be the same with a different compiler name. + """ + + # same as NORMAL_BUILD, but with a different compiler name to test partial matching + diff_compiler_build = defs.NORMAL_BUILD.copy() + diff_compiler_build["compiler"]["name"] = "gcc-different" + + assert ( + await prediction.predict_single(db_conn_inserted, diff_compiler_build) + == defs.NORMAL_PREDICTION + ) + + +async def test_empty_sample(db_conn): + """No samples in the database, so we expect default predictions.""" + + assert ( + await prediction.predict_single(db_conn, defs.NORMAL_BUILD) + == defs.DEFAULT_PREDICTION + ) + + +# Test validate_payload + + +def test_valid_payload(): + """Tests that a valid payload returns True""" + assert validate_payload(defs.NORMAL_BUILD) is True + + +def test_invalid_payloads(): + """Test a series of invalid payloads""" + + # non dict + assert validate_payload("hi") is False + + build = defs.NORMAL_BUILD.copy() + # missing package + del build["package"] + assert validate_payload(build) is False + + build = defs.NORMAL_BUILD.copy() + # missing compiler + del build["compiler"] + assert validate_payload(build) is False + + # name and version are strings in the package and compiler + for key in ["name", "version"]: + for field in ["package", "compiler"]: + build = defs.NORMAL_BUILD.copy() + build[field][key] = 123 + assert validate_payload(build) is False + + # invalid variants + build = defs.NORMAL_BUILD.copy() + build["package"]["variants"] = "+++++" + assert validate_payload(build) is False From 2479cfdae46aa19dde8c9d6c224daf42306b1415 Mon Sep 17 00:00:00 2001 From: caetano melone Date: Thu, 7 Mar 2024 20:53:20 -0800 Subject: [PATCH 2/5] update tests to use spec strings instead of dict payloads --- gantry/tests/defs/prediction.py | 50 +++++++------------------------- gantry/tests/test_prediction.py | 51 ++++++++++++++++----------------- 2 files changed, 36 insertions(+), 65 deletions(-) diff --git a/gantry/tests/defs/prediction.py b/gantry/tests/defs/prediction.py index 9da2ed4..90408a1 100644 --- a/gantry/tests/defs/prediction.py +++ b/gantry/tests/defs/prediction.py @@ -1,62 +1,34 @@ # flake8: noqa # fmt: off -NORMAL_BUILD = { - "hash": "testing", - "package": { - "name": "py-torch", - "version": "2.2.1", - "variants": "~caffe2+cuda+cudnn~debug+distributed+fbgemm+gloo+kineto~magma~metal+mkldnn+mpi~nccl+nnpack+numa+numpy+onnx_ml+openmp+qnnpack~rocm+tensorpipe~test+valgrind+xnnpack build_system=python_pip cuda_arch=80", - }, - "compiler": { - "name": "gcc", - "version": "11.4.0", - }, -} +from gantry.util.spec import parse_alloc_spec + +NORMAL_BUILD = parse_alloc_spec( + "py-torch@2.2.1 ~caffe2+cuda+cudnn~debug+distributed+fbgemm+gloo+kineto~magma~metal+mkldnn+mpi~nccl+nnpack+numa+numpy+onnx_ml+openmp+qnnpack~rocm+tensorpipe~test+valgrind+xnnpack build_system=python_pip cuda_arch=80%gcc@11.4.0" +) # everything in NORMAL_BUILD["package"]["variants"] except removing build_system=python_pip # in order to test the expensive variants filter -EXPENSIVE_VARIANT_BUILD = { - "hash": "testing", - "package": { - "name": "py-torch", - "version": "2.2.1", - "variants": "~caffe2+cuda+cudnn~debug+distributed+fbgemm+gloo+kineto~magma~metal+mkldnn+mpi~nccl+nnpack+numa+numpy+onnx_ml+openmp+qnnpack~rocm+tensorpipe~test+valgrind+xnnpack cuda_arch=80", - }, - "compiler": { - "name": "gcc", - "version": "11.4.0", - }, -} +EXPENSIVE_VARIANT_BUILD = parse_alloc_spec( + "py-torch@2.2.1 ~caffe2+cuda+cudnn~debug+distributed+fbgemm+gloo+kineto~magma~metal+mkldnn+mpi~nccl+nnpack+numa+numpy+onnx_ml+openmp+qnnpack~rocm+tensorpipe~test+valgrind+xnnpack cuda_arch=80%gcc@11.4.0" +) # no variants should match this, so we expect the default prediction -BAD_VARIANT_BUILD = { - "hash": "testing", - "package": { - "name": "py-torch", - "version": "2.2.1", - "variants": "+no~expensive~variants+match", - }, - "compiler": { - "name": "gcc", - "version": "11.4.0", - }, -} +BAD_VARIANT_BUILD = parse_alloc_spec( + "py-torch@2.2.1 +no~expensive~variants+match%gcc@11.4.0" +) # calculated by running the baseline prediction algorithm on the sample data in gantry/tests/sql/insert_prediction.sql NORMAL_PREDICTION = { - "hash": "testing", "variables": { "KUBERNETES_CPU_REQUEST": "12", "KUBERNETES_MEMORY_REQUEST": "9576M", }, } - # this is what will get returned when there are no samples in the database # that match what the client wants DEFAULT_PREDICTION = { - "hash": "testing", "variables": { "KUBERNETES_CPU_REQUEST": "1", "KUBERNETES_MEMORY_REQUEST": "2000M", diff --git a/gantry/tests/test_prediction.py b/gantry/tests/test_prediction.py index ac56eb8..8f93055 100644 --- a/gantry/tests/test_prediction.py +++ b/gantry/tests/test_prediction.py @@ -2,7 +2,7 @@ from gantry.routes.prediction import prediction from gantry.tests.defs import prediction as defs -from gantry.util.prediction import validate_payload +from gantry.util.spec import parse_alloc_spec @pytest.fixture @@ -57,7 +57,7 @@ async def test_partial_match(db_conn_inserted): # same as NORMAL_BUILD, but with a different compiler name to test partial matching diff_compiler_build = defs.NORMAL_BUILD.copy() - diff_compiler_build["compiler"]["name"] = "gcc-different" + diff_compiler_build["compiler_name"] = "gcc-different" assert ( await prediction.predict_single(db_conn_inserted, diff_compiler_build) @@ -75,37 +75,36 @@ async def test_empty_sample(db_conn): # Test validate_payload +def test_valid_spec(): + """Tests that a valid spec is parsed correctly.""" + assert parse_alloc_spec("emacs@29.2 +json+native+treesitter%gcc@12.3.0") == { + "pkg_name": "emacs", + "pkg_version": "29.2", + "pkg_variants": '{"json": true, "native": true, "treesitter": true}', + "pkg_variants_dict": {"json": True, "native": True, "treesitter": True}, + "compiler_name": "gcc", + "compiler_version": "12.3.0", + } -def test_valid_payload(): - """Tests that a valid payload returns True""" - assert validate_payload(defs.NORMAL_BUILD) is True +def test_invalid_specs(): + """Test a series of invalid specs""" + # not a spec + assert parse_alloc_spec("hi") == {} -def test_invalid_payloads(): - """Test a series of invalid payloads""" - - # non dict - assert validate_payload("hi") is False - - build = defs.NORMAL_BUILD.copy() # missing package - del build["package"] - assert validate_payload(build) is False + assert parse_alloc_spec("@29.2 +json+native+treesitter%gcc@12.3.0") == {} - build = defs.NORMAL_BUILD.copy() # missing compiler - del build["compiler"] - assert validate_payload(build) is False + assert parse_alloc_spec("emacs@29.2 +json+native+treesitter") == {} + + # variants not spaced correctly + assert parse_alloc_spec("emacs@29.2+json+native+treesitter%gcc@12.3.0") == {} - # name and version are strings in the package and compiler - for key in ["name", "version"]: - for field in ["package", "compiler"]: - build = defs.NORMAL_BUILD.copy() - build[field][key] = 123 - assert validate_payload(build) is False + # missing versions + assert parse_alloc_spec("emacs@29.2 +json+native+treesitter%gcc@") == {} + assert parse_alloc_spec("emacs@ +json+native+treesitter%gcc@12.3.0") == {} # invalid variants - build = defs.NORMAL_BUILD.copy() - build["package"]["variants"] = "+++++" - assert validate_payload(build) is False + assert parse_alloc_spec("emacs@29.2 this_is_not_a_thing%gcc@12.3.0") == {} From 7c71a0fd9656b61ecece335c39b0144a4641466a Mon Sep 17 00:00:00 2001 From: caetano melone Date: Tue, 30 Apr 2024 21:37:49 -0700 Subject: [PATCH 3/5] predict -> predict_single --- gantry/tests/test_prediction.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/gantry/tests/test_prediction.py b/gantry/tests/test_prediction.py index 8f93055..29bc261 100644 --- a/gantry/tests/test_prediction.py +++ b/gantry/tests/test_prediction.py @@ -19,7 +19,7 @@ async def test_exact_match(db_conn_inserted): """All fields are an exact match for 5 samples in the database.""" assert ( - await prediction.predict_single(db_conn_inserted, defs.NORMAL_BUILD) + await prediction.predict(db_conn_inserted, defs.NORMAL_BUILD) == defs.NORMAL_PREDICTION ) @@ -32,7 +32,7 @@ async def test_expensive_variants(db_conn_inserted): """ assert ( - await prediction.predict_single(db_conn_inserted, defs.EXPENSIVE_VARIANT_BUILD) + await prediction.predict(db_conn_inserted, defs.EXPENSIVE_VARIANT_BUILD) == defs.NORMAL_PREDICTION ) @@ -43,7 +43,7 @@ async def test_no_variant_match(db_conn_inserted): """ assert ( - await prediction.predict_single(db_conn_inserted, defs.BAD_VARIANT_BUILD) + await prediction.predict(db_conn_inserted, defs.BAD_VARIANT_BUILD) == defs.DEFAULT_PREDICTION ) @@ -60,7 +60,7 @@ async def test_partial_match(db_conn_inserted): diff_compiler_build["compiler_name"] = "gcc-different" assert ( - await prediction.predict_single(db_conn_inserted, diff_compiler_build) + await prediction.predict(db_conn_inserted, diff_compiler_build) == defs.NORMAL_PREDICTION ) @@ -69,8 +69,7 @@ async def test_empty_sample(db_conn): """No samples in the database, so we expect default predictions.""" assert ( - await prediction.predict_single(db_conn, defs.NORMAL_BUILD) - == defs.DEFAULT_PREDICTION + await prediction.predict(db_conn, defs.NORMAL_BUILD) == defs.DEFAULT_PREDICTION ) From b583c9adffe54c7ef0892828abd447e64cb1ee71 Mon Sep 17 00:00:00 2001 From: Caetano Melone Date: Wed, 8 May 2024 11:14:55 -0700 Subject: [PATCH 4/5] split up `parse_alloc_spec` version checks Co-authored-by: Alec Scott --- gantry/tests/test_prediction.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gantry/tests/test_prediction.py b/gantry/tests/test_prediction.py index 29bc261..4c8110d 100644 --- a/gantry/tests/test_prediction.py +++ b/gantry/tests/test_prediction.py @@ -101,9 +101,13 @@ def test_invalid_specs(): # variants not spaced correctly assert parse_alloc_spec("emacs@29.2+json+native+treesitter%gcc@12.3.0") == {} - # missing versions + # missing compiler version assert parse_alloc_spec("emacs@29.2 +json+native+treesitter%gcc@") == {} + assert parse_alloc_spec("emacs@29.2 +json+native+treesitter%gcc") == {} + + # missing package version assert parse_alloc_spec("emacs@ +json+native+treesitter%gcc@12.3.0") == {} + assert parse_alloc_spec("emacs+json+native+treesitter%gcc@12.3.0") == {} # invalid variants assert parse_alloc_spec("emacs@29.2 this_is_not_a_thing%gcc@12.3.0") == {} From cdcff5c3e81136386204c451cd50c09ff351de27 Mon Sep 17 00:00:00 2001 From: caetano melone Date: Wed, 8 May 2024 11:17:19 -0700 Subject: [PATCH 5/5] style --- gantry/tests/test_prediction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gantry/tests/test_prediction.py b/gantry/tests/test_prediction.py index 4c8110d..59b1ca4 100644 --- a/gantry/tests/test_prediction.py +++ b/gantry/tests/test_prediction.py @@ -104,7 +104,7 @@ def test_invalid_specs(): # missing compiler version assert parse_alloc_spec("emacs@29.2 +json+native+treesitter%gcc@") == {} assert parse_alloc_spec("emacs@29.2 +json+native+treesitter%gcc") == {} - + # missing package version assert parse_alloc_spec("emacs@ +json+native+treesitter%gcc@12.3.0") == {} assert parse_alloc_spec("emacs+json+native+treesitter%gcc@12.3.0") == {}