diff --git a/test/test_loop_fusion.py b/test/test_loop_fusion.py new file mode 100644 index 000000000..0e683ee63 --- /dev/null +++ b/test/test_loop_fusion.py @@ -0,0 +1,247 @@ +__copyright__ = "Copyright (C) 2021 Kaushik Kulkarni" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import sys +import numpy as np +import loopy as lp +import pyopencl as cl +import pyopencl.clmath # noqa +import pyopencl.clrandom # noqa + +import logging +logger = logging.getLogger(__name__) + +try: + import faulthandler +except ImportError: + pass +else: + faulthandler.enable() + +from pyopencl.tools import pytest_generate_tests_for_pyopencl \ + as pytest_generate_tests + +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa + +__all__ = [ + "pytest_generate_tests", + "cl" # "cl.create_some_context" + ] + + +def test_loop_fusion_vanilla(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel( + "{[i0, i1, j0, j1]: 0 <= i0, i1, j0, j1 < 10}", + """ + a[i0] = 1 + b[i1, j0] = 2 {id=write_b} + c[j1] = 3 {id=write_c} + """) + ref_knl = knl + + fused_chunks = lp.get_kennedy_unweighted_fusion_candidates(knl["loopy_kernel"], + frozenset(["j0", "j1"])) + + knl = knl.with_kernel(lp.rename_inames_in_batch(knl["loopy_kernel"], + fused_chunks)) + assert len(ref_knl["loopy_kernel"].all_inames()) == 4 + assert len(knl["loopy_kernel"].all_inames()) == 3 + assert len(knl["loopy_kernel"].id_to_insn["write_b"].within_inames + & knl["loopy_kernel"].id_to_insn["write_c"].within_inames) == 1 + + lp.auto_test_vs_ref(ref_knl, ctx, knl) + + +def test_loop_fusion_outer_iname_preventing_fusion(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel( + "{[i0, j0, j1]: 0 <= i0, j0, j1 < 10}", + """ + a[i0] = 1 + b[i0, j0] = 2 {id=write_b} + c[j1] = 3 {id=write_c} + """) + ref_knl = knl + + fused_chunks = lp.get_kennedy_unweighted_fusion_candidates(knl["loopy_kernel"], + frozenset(["j0", "j1"])) + + knl = knl.with_kernel(lp.rename_inames_in_batch(knl["loopy_kernel"], + fused_chunks)) + + assert len(knl["loopy_kernel"].all_inames()) == 3 + assert len(knl["loopy_kernel"].all_inames()) == 3 + assert len(knl["loopy_kernel"].id_to_insn["write_b"].within_inames + & knl["loopy_kernel"].id_to_insn["write_c"].within_inames) == 0 + + lp.auto_test_vs_ref(ref_knl, ctx, knl) + + +def test_loop_fusion_with_loop_independent_deps(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel( + "{[j0, j1]: 0 <= j0, j1 < 10}", + """ + a[j0] = 1 + b[j1] = 2 * a[j1] + """, seq_dependencies=True) + + ref_knl = knl + + fused_chunks = lp.get_kennedy_unweighted_fusion_candidates(knl["loopy_kernel"], + frozenset(["j0", "j1"])) + + knl = knl.with_kernel(lp.rename_inames_in_batch(knl["loopy_kernel"], + fused_chunks)) + + assert len(ref_knl["loopy_kernel"].all_inames()) == 2 + assert len(knl["loopy_kernel"].all_inames()) == 1 + + lp.auto_test_vs_ref(ref_knl, ctx, knl) + + +def test_loop_fusion_constrained_by_outer_loop_deps(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel( + "{[j0, j1]: 0 <= j0, j1 < 10}", + """ + a[j0] = 1 {id=write_a} + b = 2 {id=write_b} + c[j1] = 2 * a[j1] {id=write_c} + """, seq_dependencies=True) + + ref_knl = knl + + fused_chunks = lp.get_kennedy_unweighted_fusion_candidates(knl["loopy_kernel"], + frozenset(["j0", "j1"])) + + knl = knl.with_kernel(lp.rename_inames_in_batch(knl["loopy_kernel"], + fused_chunks)) + + assert len(ref_knl["loopy_kernel"].all_inames()) == 2 + assert len(knl["loopy_kernel"].all_inames()) == 2 + assert len(knl["loopy_kernel"].id_to_insn["write_a"].within_inames + & knl["loopy_kernel"].id_to_insn["write_c"].within_inames) == 0 + + lp.auto_test_vs_ref(ref_knl, ctx, knl) + + +def test_loop_fusion_with_loop_carried_deps1(ctx_factory): + + ctx = ctx_factory() + knl = lp.make_kernel( + "{[i0, i1]: 1<=i0, i1<10}", + """ + x[i0] = i0 {id=first_write} + x[i1-1] = i1 ** 2 {id=second_write} + """, + seq_dependencies=True) + + ref_knl = knl + + fused_chunks = lp.get_kennedy_unweighted_fusion_candidates(knl["loopy_kernel"], + frozenset(["i0", + "i1"])) + + knl = knl.with_kernel(lp.rename_inames_in_batch(knl["loopy_kernel"], + fused_chunks)) + + assert len(ref_knl["loopy_kernel"].all_inames()) == 2 + assert len(knl["loopy_kernel"].all_inames()) == 1 + assert len(knl["loopy_kernel"].id_to_insn["first_write"].within_inames + & knl["loopy_kernel"].id_to_insn["second_write"].within_inames) == 1 + + lp.auto_test_vs_ref(ref_knl, ctx, knl) + + +def test_loop_fusion_with_loop_carried_deps2(ctx_factory): + ctx = ctx_factory() + knl = lp.make_kernel( + "{[i0, i1]: 1<=i0, i1<10}", + """ + x[i0-1] = i0 {id=first_write} + x[i1] = i1 ** 2 {id=second_write} + """, + seq_dependencies=True) + + ref_knl = knl + + fused_chunks = lp.get_kennedy_unweighted_fusion_candidates(knl["loopy_kernel"], + frozenset(["i0", + "i1"])) + + knl = knl.with_kernel(lp.rename_inames_in_batch(knl["loopy_kernel"], + fused_chunks)) + + assert len(ref_knl["loopy_kernel"].all_inames()) == 2 + assert len(knl["loopy_kernel"].all_inames()) == 2 + assert len(knl["loopy_kernel"].id_to_insn["first_write"].within_inames + & knl["loopy_kernel"].id_to_insn["second_write"].within_inames) == 0 + + lp.auto_test_vs_ref(ref_knl, ctx, knl) + + +def test_loop_fusion_with_indirection(ctx_factory): + ctx = ctx_factory() + map_ = np.random.permutation(10) + cq = cl.CommandQueue(ctx) + + knl = lp.make_kernel( + "{[i0, i1]: 0<=i0, i1<10}", + """ + x[i0] = i0 {id=first_write} + x[map[i1]] = i1 ** 2 {id=second_write} + """, + seq_dependencies=True) + + ref_knl = knl + + fused_chunks = lp.get_kennedy_unweighted_fusion_candidates(knl["loopy_kernel"], + frozenset(["i0", + "i1"])) + + knl = knl.with_kernel(lp.rename_inames_in_batch(knl["loopy_kernel"], + fused_chunks)) + + assert len(ref_knl["loopy_kernel"].all_inames()) == 2 + assert len(knl["loopy_kernel"].all_inames()) == 2 + assert len(knl["loopy_kernel"].id_to_insn["first_write"].within_inames + & knl["loopy_kernel"].id_to_insn["second_write"].within_inames) == 0 + + _, (out1,) = ref_knl(cq, map=map_) + _, (out2,) = knl(cq, map=map_) + np.testing.assert_allclose(out1, out2) + + +if __name__ == "__main__": + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: fdm=marker