From 56f9d3d6a4627fa46b93706ecc042f4933a5c8af Mon Sep 17 00:00:00 2001 From: David Schneller Date: Tue, 1 Oct 2024 23:28:10 +0200 Subject: [PATCH] Test cleanup --- pspamm/codegen/precision.py | 6 +++++ tests/sve_testsuite_generator.py | 7 +----- tests/unit_tests_arm.py | 42 ++++++++++++++++---------------- tests/unit_tests_hsw.py | 38 ++++++++++++++--------------- tests/unit_tests_knl.py | 38 ++++++++++++++--------------- 5 files changed, 66 insertions(+), 65 deletions(-) diff --git a/pspamm/codegen/precision.py b/pspamm/codegen/precision.py index e88349e..df5153e 100644 --- a/pspamm/codegen/precision.py +++ b/pspamm/codegen/precision.py @@ -19,4 +19,10 @@ def size(self): self.BFLOAT16: 2 }[self] raise NotImplementedError() + + def __repr__(self): + return self.getCType(self) + + def __str__(self): + return self.getCType(self) diff --git a/tests/sve_testsuite_generator.py b/tests/sve_testsuite_generator.py index a3830ee..48fe477 100644 --- a/tests/sve_testsuite_generator.py +++ b/tests/sve_testsuite_generator.py @@ -6,6 +6,7 @@ import sys import os import testsuite_generator as test_generator +from pspamm.codegen.precision import * BASEDIR = 'build' @@ -30,8 +31,6 @@ def make(kernels, arch): f.write(test_generator.head_of_testsuite) - include_single_prec = False - for kern in kernels: arguments = ['pspamm-generator', str(kern.m), str(kern.n), str(kern.k), str(kern.lda), str(kern.ldb), str(kern.ldc), str(kern.alpha), str(kern.beta)] @@ -41,8 +40,6 @@ def make(kernels, arch): prec = 's' if kern.precision == Precision.SINGLE else 'd' arguments += ['--precision', prec] - if prec == 's': - include_single_prec = True block_sizes = list(set(kern.block_sizes)) @@ -100,8 +97,6 @@ def make(kernels, arch): bm = bs[0] bn = bs[1] - prec = 's' if kern.precision == Precision.SINGLE else 'd' - if arch.startswith("arm_sve"): veclen = int(arch[7:]) assert veclen % 128 == 0 and veclen <= 2048 diff --git a/tests/unit_tests_arm.py b/tests/unit_tests_arm.py index 9fd5e54..50ba6c4 100755 --- a/tests/unit_tests_arm.py +++ b/tests/unit_tests_arm.py @@ -11,27 +11,27 @@ kernels = [] for precision in (Precision.SINGLE, Precision.DOUBLE): - kernels.append(generator.DenseKernel("test4", precision, 4, 4, 4, 4, 4, 4, 2.0, 2.0, [(4, 4)], 0.0000001)) - - kernels.append(generator.SparseKernel("test1", precision, 8, 56, 56, 8, 0, 8, 1.0, 0.0, [(8, 4), (8,1)] + [x.getBlocksize(8, 56, 1) for x in blocksize_algs], generator.generateMTX(56, 56, 30), 0.0000001)) - kernels.append(generator.DenseKernel("test2", precision, 8, 40, 40, 8, 40, 8, 3.0, 2.0, [(8, 5), (8,2)] + [x.getBlocksize(8, 40, 1) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("test3", precision, 8, 56, 56, 8, 56, 8, 0.0, 0.0, [(8, 3), (8, 5)] + [x.getBlocksize(8, 56, 1) for x in blocksize_algs], 0.0000001)) - - kernels.append(generator.SparseKernel("arm_only_test1", precision, 2, 3, 4, 2, 0, 2, 1.1233, 0.0, [(2, 1), (2,3)] + [x.getBlocksize(2, 3, 1) for x in blocksize_algs], generator.generateMTX(4, 3, 5), 0.0000001)) - kernels.append(generator.SparseKernel("arm_only_test2", precision, 2, 3, 4, 20, 0, 14, 1.0, 1.0, [(2, 2), (2,3)] + [x.getBlocksize(2, 3, 1) for x in blocksize_algs], generator.generateMTX(4, 3, 5), 0.0000001)) - kernels.append(generator.SparseKernel("arm_only_test3", precision, 32, 80, 50, 32, 0, 32, 1.0, 3.0, [(8, 5)] + [x.getBlocksize(32, 80, 1) for x in blocksize_algs], generator.generateMTX(50, 80, 294), 0.0000001)) - kernels.append(generator.SparseKernel("arm_only_test4", precision, 32, 32, 32, 34, 0, 32, 1.0, 0.0, [(4, 4), (4,3)] + [x.getBlocksize(32, 32, 1) for x in blocksize_algs], generator.generateMTX(32, 32, 24), 0.0000001)) - kernels.append(generator.SparseKernel("arm_only_test5", precision, 2, 1, 1, 2, 0, 8, 1.0, -1.0, [(2, 1)] + [x.getBlocksize(2, 1, 1) for x in blocksize_algs], generator.generateMTX(1, 1, 1), 0.0000001)) - kernels.append(generator.SparseKernel("arm_only_test6", precision, 2, 2, 2, 2, 0, 2, 2.0, 234234.123, [(2, 1)] + [x.getBlocksize(2, 2, 1) for x in blocksize_algs], generator.generateMTX(2, 2, 1), 0.0000001)) - kernels.append(generator.SparseKernel("arm_only_test7", precision, 16, 5, 7, 16, 0, 16, 0.0, -1.123, [(8, 1), (8,2)] + [x.getBlocksize(16, 5, 1) for x in blocksize_algs], generator.generateMTX(7, 5, 35), 0.0000001)) - - kernels.append(generator.DenseKernel("arm_only_test8", precision, 2, 3, 4, 2, 4, 2, 1.0, 0.0, [(2, 1), (2,3)] + [x.getBlocksize(2, 3, 1) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("arm_only_test9", precision, 2, 3, 4, 20, 12, 14, 2.0, 1.123, [(2, 2), (2,3)] + [x.getBlocksize(2, 3, 1) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("arm_only_test10", precision, 32, 80, 50, 32, 50, 32, 0.0, 0.2, [(8, 5)] + [x.getBlocksize(32, 80, 1) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("arm_only_test11", precision, 32, 32, 32, 33, 68, 32, 1231.0, 14443.0, [(4, 4), (4,3)] + [x.getBlocksize(32, 32, 1) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("arm_only_test12", precision, 2, 1, 1, 2, 1, 8, 1.0, 3.0, [(2, 1)] + [x.getBlocksize(2, 1, 1) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("arm_only_test13", precision, 2, 3, 3, 2, 3, 2, 1.0, 0.0, [(2, 1)] + [x.getBlocksize(2, 3, 1) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("arm_only_test14", precision, 16, 5, 7, 16, 7, 16, 1.0, 1.0, [(8, 1), (8,2)] + [x.getBlocksize(16, 5, 1) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"test4_{precision}", precision, 4, 4, 4, 4, 4, 4, 2.0, 2.0, [(4, 4)], 0.0000001)) + + kernels.append(generator.SparseKernel(f"test1_{precision}", precision, 8, 56, 56, 8, 0, 8, 1.0, 0.0, [(8, 4), (8,1)] + [x.getBlocksize(8, 56, 1) for x in blocksize_algs], generator.generateMTX(56, 56, 30), 0.0000001)) + kernels.append(generator.DenseKernel(f"test2_{precision}", precision, 8, 40, 40, 8, 40, 8, 3.0, 2.0, [(8, 5), (8,2)] + [x.getBlocksize(8, 40, 1) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"test3_{precision}", precision, 8, 56, 56, 8, 56, 8, 0.0, 0.0, [(8, 3), (8, 5)] + [x.getBlocksize(8, 56, 1) for x in blocksize_algs], 0.0000001)) + + kernels.append(generator.SparseKernel(f"arm_only_test1_{precision}", precision, 2, 3, 4, 2, 0, 2, 1.1233, 0.0, [(2, 1), (2,3)] + [x.getBlocksize(2, 3, 1) for x in blocksize_algs], generator.generateMTX(4, 3, 5), 0.0000001)) + kernels.append(generator.SparseKernel(f"arm_only_test2_{precision}", precision, 2, 3, 4, 20, 0, 14, 1.0, 1.0, [(2, 2), (2,3)] + [x.getBlocksize(2, 3, 1) for x in blocksize_algs], generator.generateMTX(4, 3, 5), 0.0000001)) + kernels.append(generator.SparseKernel(f"arm_only_test3_{precision}", precision, 32, 80, 50, 32, 0, 32, 1.0, 3.0, [(8, 5)] + [x.getBlocksize(32, 80, 1) for x in blocksize_algs], generator.generateMTX(50, 80, 294), 0.0000001)) + kernels.append(generator.SparseKernel(f"arm_only_test4_{precision}", precision, 32, 32, 32, 34, 0, 32, 1.0, 0.0, [(4, 4), (4,3)] + [x.getBlocksize(32, 32, 1) for x in blocksize_algs], generator.generateMTX(32, 32, 24), 0.0000001)) + kernels.append(generator.SparseKernel(f"arm_only_test5_{precision}", precision, 2, 1, 1, 2, 0, 8, 1.0, -1.0, [(2, 1)] + [x.getBlocksize(2, 1, 1) for x in blocksize_algs], generator.generateMTX(1, 1, 1), 0.0000001)) + kernels.append(generator.SparseKernel(f"arm_only_test6_{precision}", precision, 2, 2, 2, 2, 0, 2, 2.0, 234234.123, [(2, 1)] + [x.getBlocksize(2, 2, 1) for x in blocksize_algs], generator.generateMTX(2, 2, 1), 0.0000001)) + kernels.append(generator.SparseKernel(f"arm_only_test7_{precision}", precision, 16, 5, 7, 16, 0, 16, 0.0, -1.123, [(8, 1), (8,2)] + [x.getBlocksize(16, 5, 1) for x in blocksize_algs], generator.generateMTX(7, 5, 35), 0.0000001)) + + kernels.append(generator.DenseKernel(f"arm_only_test8_{precision}", precision, 2, 3, 4, 2, 4, 2, 1.0, 0.0, [(2, 1), (2,3)] + [x.getBlocksize(2, 3, 1) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"arm_only_test9_{precision}", precision, 2, 3, 4, 20, 12, 14, 2.0, 1.123, [(2, 2), (2,3)] + [x.getBlocksize(2, 3, 1) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"arm_only_test10_{precision}", precision, 32, 80, 50, 32, 50, 32, 0.0, 0.2, [(8, 5)] + [x.getBlocksize(32, 80, 1) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"arm_only_test11_{precision}", precision, 32, 32, 32, 33, 68, 32, 1231.0, 14443.0, [(4, 4), (4,3)] + [x.getBlocksize(32, 32, 1) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"arm_only_test12_{precision}", precision, 2, 1, 1, 2, 1, 8, 1.0, 3.0, [(2, 1)] + [x.getBlocksize(2, 1, 1) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"arm_only_test13_{precision}", precision, 2, 3, 3, 2, 3, 2, 1.0, 0.0, [(2, 1)] + [x.getBlocksize(2, 3, 1) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"arm_only_test14_{precision}", precision, 16, 5, 7, 16, 7, 16, 1.0, 1.0, [(8, 1), (8,2)] + [x.getBlocksize(16, 5, 1) for x in blocksize_algs], 0.0000001)) generator.make(kernels, "arm") diff --git a/tests/unit_tests_hsw.py b/tests/unit_tests_hsw.py index 8e16ea8..47316e5 100755 --- a/tests/unit_tests_hsw.py +++ b/tests/unit_tests_hsw.py @@ -10,25 +10,25 @@ kernels = [] for precision in (Precision.SINGLE, Precision.DOUBLE): - kernels.append(generator.SparseKernel("test1", precision, 8, 56, 56, 8, 0, 8, 2.0, 0.0, [(8, 4), (8,1)] + [x.getBlocksize(8, 56, 2) for x in blocksize_algs], generator.generateMTX(56, 56, 30), 0.0000001)) - kernels.append(generator.DenseKernel("test2", precision, 8, 40, 40, 8, 40, 8, 2.5, 1.0, [(8,2)] + [x.getBlocksize(8, 40, 2) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("test3", precision, 8, 56, 56, 8, 56, 8, 1.0, 5.0, [(8, 3)] + [x.getBlocksize(8, 56, 2) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.SparseKernel("hsw_only_test1", precision, 8, 2, 1, 8, 0, 8, 1.0, 0.0, [(8,1)] + [x.getBlocksize(8, 2, 2) for x in blocksize_algs], generator.generateMTX(1, 2, 1), 0.0000001)) - kernels.append(generator.SparseKernel("hsw_only_test2", precision, 24, 40, 40, 32, 0, 24, 1000, 1.0, [(8, 2)] + [x.getBlocksize(24, 40, 2) for x in blocksize_algs], generator.generateMTX(40, 40, 20), 0.0000001)) - - kernels.append(generator.SparseKernel("hsw_only_test3", precision, 8, 2, 1, 8, 0, 16, -2.0, 0.0, [(8, 1)] + [x.getBlocksize(8, 2, 2) for x in blocksize_algs], generator.generateMTX(1, 2, 2), 0.0000001)) - kernels.append(generator.SparseKernel("hsw_only_test4", precision, 24, 20, 10, 40, 0, 24, 35.222, 0.0, [] + [x.getBlocksize(8, 20, 2) for x in blocksize_algs], generator.generateMTX(10, 20, 1), 0.0000001)) - kernels.append(generator.SparseKernel("hsw_only_test5", precision, 64, 5, 10, 64, 0, 64, 2.3, 0.0, [] + [x.getBlocksize(64, 5, 2) for x in blocksize_algs], generator.generateMTX(10, 5, 1), 0.0000001)) - kernels.append(generator.SparseKernel("hsw_only_test6", precision, 8, 1, 1, 16, 0, 56, 1.0, 0.0, [(8, 1)] + [x.getBlocksize(8, 1, 2) for x in blocksize_algs], generator.generateMTX(1, 1, 1), 0.0000001)) - kernels.append(generator.SparseKernel("hsw_only_test7", precision, 8, 24, 40, 8, 0, 8, 1.0, 333333.2222222, [(8,1)] + [x.getBlocksize(8, 24, 2) for x in blocksize_algs], generator.generateMTX(40, 24, 1), 0.0000001)) - - kernels.append(generator.DenseKernel("hsw_only_test8", precision, 8, 2, 1, 8, 1, 8, 2.5, 0.0, [(8,1)] + [x.getBlocksize(8, 2, 2) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("hsw_only_test9", precision, 32, 40, 40, 32, 60, 32, 2.0, -4.33, [(8,2)] + [x.getBlocksize(32, 40, 2) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("hsw_only_test10", precision, 56, 28, 56, 56, 56, 56, 0.1, 3.0, [x.getBlocksize(56, 28, 2) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("hsw_only_test11", precision, 8, 20, 8, 40, 10, 8, 234234.123123, 0.0, [(8,3)] + [x.getBlocksize(8, 20, 2) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("hsw_only_test12", precision, 64, 5, 10, 64, 12, 64, 1.0, 1.0, [] + [x.getBlocksize(64, 5, 2) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("hsw_only_test13", precision, 8, 1, 1, 16, 1, 56, 0.0, 123.0, [(8, 1)] + [x.getBlocksize(8, 1, 2) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("hsw_only_test14", precision, 8, 24, 40, 8, 41, 8, 2.0, 1.0, [] + [x.getBlocksize(8, 24, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.SparseKernel(f"test1_{precision}", precision, 8, 56, 56, 8, 0, 8, 2.0, 0.0, [(8, 4), (8,1)] + [x.getBlocksize(8, 56, 2) for x in blocksize_algs], generator.generateMTX(56, 56, 30), 0.0000001)) + kernels.append(generator.DenseKernel(f"test2_{precision}", precision, 8, 40, 40, 8, 40, 8, 2.5, 1.0, [(8,2)] + [x.getBlocksize(8, 40, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"test3_{precision}", precision, 8, 56, 56, 8, 56, 8, 1.0, 5.0, [(8, 3)] + [x.getBlocksize(8, 56, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.SparseKernel(f"hsw_only_test1_{precision}", precision, 8, 2, 1, 8, 0, 8, 1.0, 0.0, [(8,1)] + [x.getBlocksize(8, 2, 2) for x in blocksize_algs], generator.generateMTX(1, 2, 1), 0.0000001)) + kernels.append(generator.SparseKernel(f"hsw_only_test2_{precision}", precision, 24, 40, 40, 32, 0, 24, 1000, 1.0, [(8, 2)] + [x.getBlocksize(24, 40, 2) for x in blocksize_algs], generator.generateMTX(40, 40, 20), 0.0000001)) + + kernels.append(generator.SparseKernel(f"hsw_only_test3_{precision}", precision, 8, 2, 1, 8, 0, 16, -2.0, 0.0, [(8, 1)] + [x.getBlocksize(8, 2, 2) for x in blocksize_algs], generator.generateMTX(1, 2, 2), 0.0000001)) + kernels.append(generator.SparseKernel(f"hsw_only_test4_{precision}", precision, 24, 20, 10, 40, 0, 24, 35.222, 0.0, [] + [x.getBlocksize(8, 20, 2) for x in blocksize_algs], generator.generateMTX(10, 20, 1), 0.0000001)) + kernels.append(generator.SparseKernel(f"hsw_only_test5_{precision}", precision, 64, 5, 10, 64, 0, 64, 2.3, 0.0, [] + [x.getBlocksize(64, 5, 2) for x in blocksize_algs], generator.generateMTX(10, 5, 1), 0.0000001)) + kernels.append(generator.SparseKernel(f"hsw_only_test6_{precision}", precision, 8, 1, 1, 16, 0, 56, 1.0, 0.0, [(8, 1)] + [x.getBlocksize(8, 1, 2) for x in blocksize_algs], generator.generateMTX(1, 1, 1), 0.0000001)) + kernels.append(generator.SparseKernel(f"hsw_only_test7_{precision}", precision, 8, 24, 40, 8, 0, 8, 1.0, 333333.2222222, [(8,1)] + [x.getBlocksize(8, 24, 2) for x in blocksize_algs], generator.generateMTX(40, 24, 1), 0.0000001)) + + kernels.append(generator.DenseKernel(f"hsw_only_test8_{precision}", precision, 8, 2, 1, 8, 1, 8, 2.5, 0.0, [(8,1)] + [x.getBlocksize(8, 2, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"hsw_only_test9_{precision}", precision, 32, 40, 40, 32, 60, 32, 2.0, -4.33, [(8,2)] + [x.getBlocksize(32, 40, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"hsw_only_test10_{precision}", precision, 56, 28, 56, 56, 56, 56, 0.1, 3.0, [x.getBlocksize(56, 28, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"hsw_only_test11_{precision}", precision, 8, 20, 8, 40, 10, 8, 234234.123123, 0.0, [(8,3)] + [x.getBlocksize(8, 20, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"hsw_only_test12_{precision}", precision, 64, 5, 10, 64, 12, 64, 1.0, 1.0, [] + [x.getBlocksize(64, 5, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"hsw_only_test13_{precision}", precision, 8, 1, 1, 16, 1, 56, 0.0, 123.0, [(8, 1)] + [x.getBlocksize(8, 1, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"hsw_only_test14_{precision}", precision, 8, 24, 40, 8, 41, 8, 2.0, 1.0, [] + [x.getBlocksize(8, 24, 2) for x in blocksize_algs], 0.0000001)) generator.make(kernels, "hsw") diff --git a/tests/unit_tests_knl.py b/tests/unit_tests_knl.py index 6c0d2a4..1795301 100755 --- a/tests/unit_tests_knl.py +++ b/tests/unit_tests_knl.py @@ -12,25 +12,25 @@ kernels = [] for precision in (Precision.SINGLE, Precision.DOUBLE): - kernels.append(generator.SparseKernel("test1", precision, 8, 56, 56, 8, 0, 8, 2.0, 0.0, [(8, 4), (8,1)] + [x.getBlocksize(8, 56, 2) for x in blocksize_algs], generator.generateMTX(56, 56, 30), 0.0000001)) - kernels.append(generator.DenseKernel("test2", precision, 8, 40, 40, 8, 40, 8, 2.5, 1.0, [(8, 5), (8,2)] + [x.getBlocksize(8, 40, 2) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("test3", precision, 8, 56, 56, 8, 56, 8, 1.0, 5.0, [(8, 3), (8, 5)] + [x.getBlocksize(8, 56, 2) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.SparseKernel("knl_only_test1", precision, 8, 2, 1, 8, 0, 8, 1.0, 0.0, [(8,1)] + [x.getBlocksize(8, 2, 2) for x in blocksize_algs], generator.generateMTX(1, 2, 1), 0.0000001)) - kernels.append(generator.SparseKernel("knl_only_test2", precision, 24, 40, 40, 32, 0, 24, 1000, 1.0, [(8, 2), (16,7)] + [x.getBlocksize(24, 40, 2) for x in blocksize_algs], generator.generateMTX(40, 40, 20), 0.0000001)) - - kernels.append(generator.SparseKernel("knl_only_test3", precision, 8, 2, 1, 8, 0, 16, -2.0, 0.0, [(8, 1)] + [x.getBlocksize(8, 2, 2) for x in blocksize_algs], generator.generateMTX(1, 2, 2), 0.0000001)) - kernels.append(generator.SparseKernel("knl_only_test4", precision, 24, 20, 10, 40, 0, 24, 35.222, 0.0, [(8, 20), (24,3)] + [x.getBlocksize(8, 20, 2) for x in blocksize_algs], generator.generateMTX(10, 20, 1), 0.0000001)) - kernels.append(generator.SparseKernel("knl_only_test5", precision, 64, 5, 10, 64, 0, 64, 2.3, 0.0, [(32, 2), (8,14)] + [x.getBlocksize(64, 5, 2) for x in blocksize_algs], generator.generateMTX(10, 5, 1), 0.0000001)) - kernels.append(generator.SparseKernel("knl_only_test6", precision, 8, 1, 1, 16, 0, 56, 1.0, 0.0, [(8, 1)] + [x.getBlocksize(8, 1, 2) for x in blocksize_algs], generator.generateMTX(1, 1, 1), 0.0000001)) - kernels.append(generator.SparseKernel("knl_only_test7", precision, 8, 24, 40, 8, 0, 8, 1.0, 333333.2222222, [(8, 24), (8,1)] + [x.getBlocksize(8, 24, 2) for x in blocksize_algs], generator.generateMTX(40, 24, 1), 0.0000001)) - - kernels.append(generator.DenseKernel("knl_only_test8", precision, 8, 2, 1, 8, 1, 8, 2.5, 0.0, [(8,1)] + [x.getBlocksize(8, 2, 2) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("knl_only_test9", precision, 32, 40, 40, 32, 60, 32, 2.0, -4.33, [(8,2), (16,7)] + [x.getBlocksize(32, 40, 2) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("knl_only_test10", precision, 56, 28, 56, 56, 56, 56, 0.1, 3.0, [(8, 28)], 0.0000001)) - kernels.append(generator.DenseKernel("knl_only_test11", precision, 8, 20, 8, 40, 10, 8, 234234.123123, 0.0, [(8, 20), (8,3)] + [x.getBlocksize(8, 20, 2) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("knl_only_test12", precision, 64, 5, 10, 64, 12, 64, 1.0, 1.0, [(32, 2), (8,14)] + [x.getBlocksize(64, 5, 2) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("knl_only_test13", precision, 8, 1, 1, 16, 1, 56, 0.0, 123.0, [(8, 1)] + [x.getBlocksize(8, 1, 2) for x in blocksize_algs], 0.0000001)) - kernels.append(generator.DenseKernel("knl_only_test14", precision, 8, 24, 40, 8, 41, 8, 2.0, 1.0, [(8, 24)] + [x.getBlocksize(8, 24, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.SparseKernel(f"test1_{precision}", precision, 8, 56, 56, 8, 0, 8, 2.0, 0.0, [(8, 4), (8,1)] + [x.getBlocksize(8, 56, 2) for x in blocksize_algs], generator.generateMTX(56, 56, 30), 0.0000001)) + kernels.append(generator.DenseKernel(f"test2_{precision}", precision, 8, 40, 40, 8, 40, 8, 2.5, 1.0, [(8, 5), (8,2)] + [x.getBlocksize(8, 40, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"test3_{precision}", precision, 8, 56, 56, 8, 56, 8, 1.0, 5.0, [(8, 3), (8, 5)] + [x.getBlocksize(8, 56, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.SparseKernel(f"knl_only_test1_{precision}", precision, 8, 2, 1, 8, 0, 8, 1.0, 0.0, [(8,1)] + [x.getBlocksize(8, 2, 2) for x in blocksize_algs], generator.generateMTX(1, 2, 1), 0.0000001)) + kernels.append(generator.SparseKernel(f"knl_only_test2_{precision}", precision, 24, 40, 40, 32, 0, 24, 1000, 1.0, [(8, 2), (16,7)] + [x.getBlocksize(24, 40, 2) for x in blocksize_algs], generator.generateMTX(40, 40, 20), 0.0000001)) + + kernels.append(generator.SparseKernel(f"knl_only_test3_{precision}", precision, 8, 2, 1, 8, 0, 16, -2.0, 0.0, [(8, 1)] + [x.getBlocksize(8, 2, 2) for x in blocksize_algs], generator.generateMTX(1, 2, 2), 0.0000001)) + kernels.append(generator.SparseKernel(f"knl_only_test4_{precision}", precision, 24, 20, 10, 40, 0, 24, 35.222, 0.0, [(8, 20), (24,3)] + [x.getBlocksize(8, 20, 2) for x in blocksize_algs], generator.generateMTX(10, 20, 1), 0.0000001)) + kernels.append(generator.SparseKernel(f"knl_only_test5_{precision}", precision, 64, 5, 10, 64, 0, 64, 2.3, 0.0, [(32, 2), (8,14)] + [x.getBlocksize(64, 5, 2) for x in blocksize_algs], generator.generateMTX(10, 5, 1), 0.0000001)) + kernels.append(generator.SparseKernel(f"knl_only_test6_{precision}", precision, 8, 1, 1, 16, 0, 56, 1.0, 0.0, [(8, 1)] + [x.getBlocksize(8, 1, 2) for x in blocksize_algs], generator.generateMTX(1, 1, 1), 0.0000001)) + kernels.append(generator.SparseKernel(f"knl_only_test7_{precision}", precision, 8, 24, 40, 8, 0, 8, 1.0, 333333.2222222, [(8, 24), (8,1)] + [x.getBlocksize(8, 24, 2) for x in blocksize_algs], generator.generateMTX(40, 24, 1), 0.0000001)) + + kernels.append(generator.DenseKernel(f"knl_only_test8_{precision}", precision, 8, 2, 1, 8, 1, 8, 2.5, 0.0, [(8,1)] + [x.getBlocksize(8, 2, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"knl_only_test9_{precision}", precision, 32, 40, 40, 32, 60, 32, 2.0, -4.33, [(8,2), (16,7)] + [x.getBlocksize(32, 40, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"knl_only_test10_{precision}", precision, 56, 28, 56, 56, 56, 56, 0.1, 3.0, [(8, 28)], 0.0000001)) + kernels.append(generator.DenseKernel(f"knl_only_test11_{precision}", precision, 8, 20, 8, 40, 10, 8, 234234.123123, 0.0, [(8, 20), (8,3)] + [x.getBlocksize(8, 20, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"knl_only_test12_{precision}", precision, 64, 5, 10, 64, 12, 64, 1.0, 1.0, [(32, 2), (8,14)] + [x.getBlocksize(64, 5, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"knl_only_test13_{precision}", precision, 8, 1, 1, 16, 1, 56, 0.0, 123.0, [(8, 1)] + [x.getBlocksize(8, 1, 2) for x in blocksize_algs], 0.0000001)) + kernels.append(generator.DenseKernel(f"knl_only_test14_{precision}", precision, 8, 24, 40, 8, 41, 8, 2.0, 1.0, [(8, 24)] + [x.getBlocksize(8, 24, 2) for x in blocksize_algs], 0.0000001)) generator.make(kernels, "knl")