diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 234988280..85e0336b0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,10 +50,11 @@ jobs: - uses: actions/checkout@v4 - name: "Main Script" run: | + EXTRA_INSTALL="mypy pytest types-colorama types-Pygments" curl -L -O https://tiker.net/ci-support-v0 . ./ci-support-v0 + build_py_project_in_conda_env - python -m pip install mypy ./run-mypy.sh pytest: @@ -194,9 +195,9 @@ jobs: cd /home/firedrake/firedrake/src/firedrake # patch so exception messages get shown - curl -L https://gist.githubusercontent.com/inducer/17d7134ace215f0df1f3627eac4195c7/raw/63edfaf2ec8bf06987896569a4f24264df490e9e/firedrake-debug-patch.diff | patch -p1 + curl -L https://gist.githubusercontent.com/inducer/17d7134ace215f0df1f3627eac4195c7/raw/ec5470a7d8587b6e1f336f3ef1d0ece5e26f236a/firedrake-debug-patch.diff | patch -p1 - pytest --tb=native -rsxw --durations=10 -m 'not parallel' tests/multigrid/ + pytest --tb=native -rsxw --durations=10 tests/firedrake/regression -k "poisson_strong or stokes_mini or dg_advection" validate_cff: name: Validate CITATION.cff diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2c314752e..9cd13dd54 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -158,11 +158,10 @@ Ruff: Mypy: script: | - EXTRA_INSTALL="pybind11 numpy" + EXTRA_INSTALL="mypy pybind11 numpy types-colorama types-Pygments" curl -L -O https://tiker.net/ci-support-v0 . ./ci-support-v0 build_py_project_in_venv - python -m pip install mypy ./run-mypy.sh tags: - python3 diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index a87cfef7d..000000000 --- a/MANIFEST.in +++ /dev/null @@ -1,23 +0,0 @@ -include test/*.py -include test/*.f90 -recursive-include examples *.py *.cl *.floopy *.sh *.ipynb *.cpp *.loopy -recursive-include contrib *.vim *.py - -include build-helpers/*.sh -include build-helpers/*.spec - -include doc/*.rst -include doc/Makefile -include doc/*.py -include doc/images/*.png -include doc/_static/*.css -include doc/_templates/*.html -include doc/images/*.svg -include doc/images/*.png - -include configure.py -include Makefile.in -include README.rst -include LICENSE -include CITATION.cff -include requirements*.txt diff --git a/contrib/c-integer-semantics.py b/contrib/c-integer-semantics.py index 8556430d0..8b30415c2 100644 --- a/contrib/c-integer-semantics.py +++ b/contrib/c-integer-semantics.py @@ -95,8 +95,7 @@ def main(): func.argtypes = [ctypes.c_longlong, ctypes.c_longlong] func.restype = ctypes.c_longlong - cdiv = int_exp.cdiv # noqa - cmod = int_exp.cmod # noqa + cmod = int_exp.cmod int_floor_div = int_exp.loopy_floor_div_int64 int_floor_div_pos_b = int_exp.loopy_floor_div_pos_b_int64 int_mod_pos_b = int_exp.loopy_mod_pos_b_int64 diff --git a/contrib/mem-pattern-explorer/pattern_vis.py b/contrib/mem-pattern-explorer/pattern_vis.py index bbde23174..f285dbb88 100644 --- a/contrib/mem-pattern-explorer/pattern_vis.py +++ b/contrib/mem-pattern-explorer/pattern_vis.py @@ -27,7 +27,7 @@ def __init__(self, gsize, lsize, subgroup_size=32, decay_constant=0.75): self.arrays = [] - def l(self, index): # noqa: E741,E743 + def l(self, index): # noqa: E743 subscript = [np.newaxis] * self.ind_length subscript[len(self.gsize) + index] = slice(None) @@ -147,7 +147,7 @@ def get_plot_data(self): div_ceil(nelements, self.elements_per_row), self.elements_per_row,) shaped_array = np.zeros( - base_shape + (self.nattributes,), + (*base_shape, self.nattributes), dtype=np.float32) shaped_array.reshape(-1, self.nattributes)[:nelements] = self.array @@ -160,7 +160,7 @@ def get_plot_data(self): else: subgroup.fill(1) - rgb_array = np.zeros(base_shape + (3,)) + rgb_array = np.zeros((*base_shape, 3)) if 1: if len(self.ctx.gsize) > 1: # g.0 -> red diff --git a/doc/conf.py b/doc/conf.py index c4a13c445..b23ce311b 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -2,7 +2,7 @@ from urllib.request import urlopen -_conf_url = "https://raw.githubusercontent.com/inducer/sphinxconfig/main/sphinxconfig.py" # noqa +_conf_url = "https://raw.githubusercontent.com/inducer/sphinxconfig/main/sphinxconfig.py" with urlopen(_conf_url) as _inf: exec(compile(_inf.read(), _conf_url, "exec"), globals()) @@ -35,20 +35,6 @@ "pyrsistent": ("https://pyrsistent.readthedocs.io/en/latest/", None), } -# Some modules need to import things just so that sphinx can resolve symbols in -# type annotations. Often, we do not want these imports (e.g. of PyOpenCL) when -# in normal use (because they would introduce unintended side effects or hard -# dependencies). This flag exists so that these imports only occur during doc -# build. Since sphinx appears to resolve type hints lexically (as it should), -# this needs to be cross-module (since, e.g. an inherited arraycontext -# docstring can be read by sphinx when building meshmode, a dependent package), -# this needs a setting of the same name across all packages involved, that's -# why this name is as global-sounding as it is. -import sys - - -sys._BUILDING_SPHINX_DOCS = True - nitpicky = True nitpick_ignore_regex = [ @@ -62,13 +48,13 @@ ["py:class", r"immutables\.(.+)"], # Reference not found from ""? I'm not even sure where to look. - ["py:class", r"Expression"], + ["py:class", r"ExpressionNode"], + + # Type aliases + ["py:class", r"InameStr"], + ["py:class", r"ConcreteCallablesTable"], + ["py:class", r"LoopNestTree"], + ["py:class", r"LoopTree"], + ["py:class", r"ToLoopyTypeConvertible"], + ["py:class", r"ToStackMatchConvertible"], ] - -autodoc_type_aliases = { - "ToLoopyTypeConvertible": "ToLoopyTypeConvertible", - "ExpressionT": "ExpressionT", - "InameStr": "InameStr", - "ShapeType": "ShapeType", - "StridesType": "StridesType", -} diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst index 2962c23b8..227356b11 100644 --- a/doc/ref_kernel.rst +++ b/doc/ref_kernel.rst @@ -25,7 +25,7 @@ consist of arithmetic operations and calls to functions. If the outermost operation of the RHS expression is a function call, the RHS value may be a tuple, and multiple (still scalar) arrays appear as LHS values. (This is the only sense in which tuple types are supported.) -Each statement is parametrized by zero or more loop variables ("inames"). +Each statement is parameterized by zero or more loop variables ("inames"). A statement is executed once for each integer point defined by the domain forest for the iname tuple given for that statement (:attr:`loopy.InstructionBase.within_inames`). Each execution of a @@ -656,8 +656,6 @@ Helper values .. {{{ -.. autoclass:: auto - .. autoclass:: UniqueName .. autoclass:: Optional @@ -693,11 +691,7 @@ The Kernel Object Do not create :class:`LoopKernel` objects directly. Instead, refer to :ref:`creating-kernels`. -.. autoclass:: LoopKernel - -.. autoclass:: KernelState - :members: - :undoc-members: +.. automodule:: loopy.kernel Implementation Details: The Base Array -------------------------------------- diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 1dd43e7e2..8b531d23b 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -1681,7 +1681,7 @@ Each line of output will look roughly like:: data type accessed. - lid_strides: A :class:`dict` of **{** :class:`int` **:** - :class:`pymbolic.primitives.Expression` or :class:`int` **}** that specifies + :data:`~pymbolic.typing.Expression` or :class:`int` **}** that specifies local strides for each local id in the memory access index. Local ids not found will not be present in ``lid_strides.keys()``. Uniform access (i.e. work-items within a sub-group access the same item) is indicated by setting @@ -1689,7 +1689,7 @@ Each line of output will look roughly like:: which case the 0 key will not be present in lid_strides. - gid_strides: A :class:`dict` of **{** :class:`int` **:** - :class:`pymbolic.primitives.Expression` or :class:`int` **}** that specifies + :data:`~pymbolic.typing.Expression` or :class:`int` **}** that specifies global strides for each global id in the memory access index. Global ids not found will not be present in ``gid_strides.keys()``. diff --git a/examples/python/ispc-stream-harness.py b/examples/python/ispc-stream-harness.py index bf6e29e47..ce61b16be 100644 --- a/examples/python/ispc-stream-harness.py +++ b/examples/python/ispc-stream-harness.py @@ -26,7 +26,7 @@ def transform(knl, vars, stream_dtype): knl = lp.add_and_infer_dtypes(knl, dict.fromkeys(vars, stream_dtype)) - knl = lp.set_argument_order(knl, vars + ["n"]) + knl = lp.set_argument_order(knl, [*vars, "n"]) return knl diff --git a/loopy/__init__.py b/loopy/__init__.py index ef9868e6f..734528219 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -478,7 +481,7 @@ def register_preamble_generators(kernel: LoopKernel, preamble_generators): "and would thus disrupt loopy's caches" % pgen) - new_pgens = (pgen,) + new_pgens + new_pgens = (pgen, *new_pgens) return kernel.copy(preamble_generators=new_pgens) @@ -496,7 +499,7 @@ def register_symbol_manglers(kernel, manglers): "and would disrupt loopy's caches" % m) - new_manglers = (m,) + new_manglers + new_manglers = (m, *new_manglers) return kernel.copy(symbol_manglers=new_manglers) diff --git a/loopy/__main__.py b/loopy/__main__.py index 630b93830..d8b61adc1 100644 --- a/loopy/__main__.py +++ b/loopy/__main__.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import loopy.cli diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 6ee762556..04a2b9239 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -21,7 +24,7 @@ """ from dataclasses import dataclass -from typing import TYPE_CHECKING, Optional, Tuple +from typing import TYPE_CHECKING from warnings import warn import numpy as np @@ -77,26 +80,26 @@ def fill_rand(ary): @dataclass class TestArgInfo: name: str - ref_array: "cla.Array" - ref_storage_array: "cla.Array" + ref_array: cla.Array + ref_storage_array: cla.Array - ref_pre_run_array: "cla.Array" - ref_pre_run_storage_array: "cla.Array" + ref_pre_run_array: cla.Array + ref_pre_run_storage_array: cla.Array - ref_shape: Tuple[int, ...] - ref_strides: Tuple[int, ...] + ref_shape: tuple[int, ...] + ref_strides: tuple[int, ...] ref_alloc_size: int - ref_numpy_strides: Tuple[int, ...] + ref_numpy_strides: tuple[int, ...] needs_checking: bool # The attributes below are being modified in make_args, hence this dataclass # cannot be frozen. - test_storage_array: Optional["cla.Array"] = None - test_array: Optional["cla.Array"] = None - test_shape: Optional[Tuple[int, ...]] = None - test_strides: Optional[Tuple[int, ...]] = None - test_numpy_strides: Optional[Tuple[int, ...]] = None - test_alloc_size: Optional[Tuple[int, ...]] = None + test_storage_array: cla.Array | None = None + test_array: cla.Array | None = None + test_shape: tuple[int, ...] | None = None + test_strides: tuple[int, ...] | None = None + test_numpy_strides: tuple[int, ...] | None = None + test_alloc_size: tuple[int, ...] | None = None # {{{ "reference" arguments @@ -410,12 +413,12 @@ def auto_test_vs_ref( if ref_entrypoint is None: if len(ref_prog.entrypoints) != 1: raise LoopyError("Unable to guess entrypoint for ref_prog.") - ref_entrypoint = list(ref_prog.entrypoints)[0] + ref_entrypoint = next(iter(ref_prog.entrypoints)) if test_entrypoint is None: if len(test_prog.entrypoints) != 1: raise LoopyError("Unable to guess entrypoint for ref_prog.") - test_entrypoint = list(test_prog.entrypoints)[0] + test_entrypoint = next(iter(test_prog.entrypoints)) ref_prog = lp.preprocess_kernel(ref_prog) test_prog = lp.preprocess_kernel(test_prog) diff --git a/loopy/check.py b/loopy/check.py index ee24d6e4b..1a63c90bc 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -22,9 +25,8 @@ import logging from collections import defaultdict -from collections.abc import Mapping, Sequence from functools import reduce -from typing import List, Optional, Tuple, Union +from typing import TYPE_CHECKING import numpy as np @@ -39,7 +41,6 @@ WriteRaceConditionWarning, warn_with_kernel, ) -from loopy.kernel import LoopKernel from loopy.kernel.array import ( ArrayBase, FixedStrideArrayDimTag, @@ -68,7 +69,15 @@ check_each_kernel, ) from loopy.type_inference import TypeReader -from loopy.typing import ExpressionT, not_none +from loopy.typing import not_none + + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + + from pymbolic.typing import Expression + + from loopy.kernel import LoopKernel logger = logging.getLogger(__name__) @@ -206,22 +215,22 @@ def check_separated_array_consistency(kernel: LoopKernel) -> None: for attr_name in ["address_space", "is_input", "is_output"]: if getattr(arg, attr_name) != getattr(sub_arg, attr_name): raise LoopyError( - "Attribute '{attr_name}' of " + f"Attribute '{attr_name}' of " f"'{arg.name}' and associated sep array " f"'{sub_arg.name}' is not consistent.") @check_each_kernel def check_offsets_and_dim_tags(kernel: LoopKernel) -> None: - from pymbolic.primitives import Expression, Variable + from pymbolic.primitives import ExpressionNode, Variable from loopy.symbolic import DependencyMapper arg_name_vars = {Variable(name) for name in kernel.arg_dict} - dep_mapper = DependencyMapper() + dep_mapper: DependencyMapper[[]] = DependencyMapper() def ensure_depends_only_on_arguments( - what: str, expr: Union[str, ExpressionT]) -> None: + what: str, expr: str | Expression) -> None: if isinstance(expr, str): expr = Variable(expr) @@ -241,14 +250,14 @@ def ensure_depends_only_on_arguments( continue if arg.offset is auto: pass - elif isinstance(arg.offset, (int, np.integer, Expression, str)): + elif isinstance(arg.offset, (int, np.integer, ExpressionNode, str)): ensure_depends_only_on_arguments(what, arg.offset) else: raise LoopyError(f"invalid value of offset for '{arg.name}'") if arg.dim_tags is None: - new_dim_tags: Optional[Tuple[ArrayDimImplementationTag, ...]] = \ + new_dim_tags: tuple[ArrayDimImplementationTag, ...] | None = \ arg.dim_tags else: new_dim_tags = () @@ -259,13 +268,13 @@ def ensure_depends_only_on_arguments( if dim_tag.stride is auto: pass elif isinstance( - dim_tag.stride, (int, np.integer, Expression)): + dim_tag.stride, (int, np.integer, ExpressionNode)): ensure_depends_only_on_arguments(what, dim_tag.stride) else: raise LoopyError(f"invalid value of {what}") assert new_dim_tags is not None - new_dim_tags = new_dim_tags + (dim_tag,) + new_dim_tags = (*new_dim_tags, dim_tag) arg = arg.copy(dim_tags=new_dim_tags) @@ -281,7 +290,7 @@ def ensure_depends_only_on_arguments( pass if tv.offset is auto: pass - elif isinstance(tv.offset, (int, np.integer, Expression, str)): + elif isinstance(tv.offset, (int, np.integer, ExpressionNode, str)): ensure_depends_only_on_arguments(what, tv.offset) else: raise LoopyError(f"invalid value of offset for '{tv.name}'") @@ -294,7 +303,7 @@ def ensure_depends_only_on_arguments( if dim_tag.stride is auto: raise LoopyError(f"The {what}" f" is 'auto', " "which is not allowed.") - elif isinstance(dim_tag.stride, (int, np.integer, Expression)): + elif isinstance(dim_tag.stride, (int, np.integer, ExpressionNode)): ensure_depends_only_on_arguments(what, dim_tag.stride) else: raise LoopyError(f"invalid value of {what}") @@ -1323,7 +1332,7 @@ def check_for_nested_base_storage(kernel: LoopKernel) -> None: # must run after preprocessing has created variables for base_storage from loopy.kernel.data import ArrayArg - arrays: List[ArrayBase] = [ + arrays: list[ArrayBase] = [ arg for arg in kernel.args if isinstance(arg, ArrayArg) ] arrays = arrays + list(kernel.temporary_variables.values()) diff --git a/loopy/cli.py b/loopy/cli.py index 69c35fcea..4841f8e9f 100644 --- a/loopy/cli.py +++ b/loopy/cli.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import sys import numpy as np diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 2e39d89bd..3c3b42f34 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -21,59 +24,46 @@ """ import logging -import sys from dataclasses import dataclass, replace from typing import ( TYPE_CHECKING, Any, - FrozenSet, Mapping, - Optional, Sequence, - Set, - Tuple, - Union, ) -from immutables import Map - -from loopy.codegen.result import CodeGenerationResult -from loopy.library.reduction import ReductionOpFunction -from loopy.translation_unit import CallablesTable, TranslationUnit +import immutables logger = logging.getLogger(__name__) from functools import reduce +import islpy # to help out Sphinx import islpy as isl -from pytools import ProcessLogger, UniqueNameGenerator +import pytools # to help out Sphinx +from pytools import ProcessLogger from pytools.persistent_dict import WriteOncePersistentDict from loopy.diagnostic import LoopyError, warn -from loopy.kernel import LoopKernel from loopy.kernel.function_interface import CallableKernel from loopy.symbolic import CombineMapper -from loopy.target import TargetBase from loopy.tools import LoopyKeyBuilder, caches -from loopy.types import LoopyType -from loopy.typing import ExpressionT from loopy.version import DATA_MODEL_VERSION if TYPE_CHECKING: - from loopy.codegen.result import GeneratedProgram + from loopy.codegen.result import CodeGenerationResult, GeneratedProgram from loopy.codegen.tools import CodegenOperationCacheManager - - -if getattr(sys, "_BUILDING_SPHINX_DOCS", False): - from loopy.codegen.result import GeneratedProgram # noqa: F811 - from loopy.codegen.tools import CodegenOperationCacheManager # noqa: F811 + from loopy.kernel import LoopKernel + from loopy.library.reduction import ReductionOpFunction + from loopy.target import TargetBase + from loopy.translation_unit import CallablesTable, TranslationUnit + from loopy.types import LoopyType + from loopy.typing import Expression __doc__ = """ -.. currentmodule:: loopy.codegen - .. autoclass:: PreambleInfo .. autoclass:: VectorizationInfo @@ -90,9 +80,9 @@ References ^^^^^^^^^^ -.. class:: Expression +.. class:: ExpressionNode - See :class:`pymbolic.Expression`. + See :class:`pymbolic.primitives.ExpressionNode`. """ @@ -112,8 +102,6 @@ class VectorizationInfo: iname: str length: int - # FIXME why is this here? - space: isl.Space @dataclass(frozen=True) @@ -134,108 +122,76 @@ class SeenFunction: """ name: str c_name: str - arg_dtypes: Tuple[LoopyType, ...] - result_dtypes: Tuple[LoopyType, ...] + arg_dtypes: tuple[LoopyType, ...] + result_dtypes: tuple[LoopyType, ...] @dataclass(frozen=True) class CodeGenerationState: """ - .. attribute:: kernel - .. attribute:: target - .. attribute:: implemented_domain - - The entire implemented domain (as an :class:`islpy.Set`) - i.e. all constraints that have been enforced so far. - - .. attribute:: implemented_predicates - - A :class:`frozenset` of predicates for which checks have been - implemented. - - .. attribute:: seen_dtypes - - set of dtypes that were encountered - - .. attribute:: seen_functions - - set of :class:`SeenFunction` instances + .. autoattribute:: kernel + .. autoattribute:: target + .. autoattribute:: implemented_domain + .. autoattribute:: implemented_predicates + .. autoattribute:: seen_dtypes + .. autoattribute:: seen_functions .. attribute:: seen_atomic_dtypes - .. attribute:: var_subst_map - - .. attribute:: allow_complex - - .. attribute:: vectorization_info - - *None* (to mean vectorization has not yet been applied), or an instance of - :class:`VectorizationInfo`. - - .. attribute:: is_generating_device_code + .. autoattribute:: var_subst_map - .. attribute:: gen_program_name - - None (indicating that host code is being generated) - or the name of the device program currently being - generated. - - .. attribute:: schedule_index_end - - .. attribute:: callables_table - - A mapping from callable names to instances of - :class:`loopy.kernel.function_interface.InKernelCallable`. - - .. attribute:: is_entrypoint + .. autoattribute:: allow_complex + .. autoattribute:: vectorization_info + .. autoattribute:: is_generating_device_code + .. autoattribute:: gen_program_name + .. autoattribute:: schedule_index_end + .. autoattribute:: callables_table + .. autoattribute:: is_entrypoint + .. autoattribute:: codegen_cache_manager + """ - A :class:`bool` to indicate if the code is being generated for an - entrypoint kernel + kernel: LoopKernel - .. attribute:: codegen_cache_manager + # LoopKernel should not have a target, should use this instead + target: TargetBase - An instance of :class:`loopy.codegen.tools.CodegenOperationCacheManager`. + implemented_domain: islpy.Set + """ + The entire implemented domain (as an :class:`islpy.Set`) + i.e. all constraints that have been enforced so far. """ - kernel: LoopKernel - target: TargetBase - implemented_domain: isl.Set - implemented_predicates: FrozenSet[Union[str, ExpressionT]] + implemented_predicates: frozenset[str | Expression] # /!\ mutable - seen_dtypes: Set[LoopyType] - seen_functions: Set[SeenFunction] - seen_atomic_dtypes: Set[LoopyType] + seen_dtypes: set[LoopyType] + seen_functions: set[SeenFunction] + seen_atomic_dtypes: set[LoopyType] - var_subst_map: Map[str, ExpressionT] + var_subst_map: immutables.Map[str, Expression] allow_complex: bool callables_table: CallablesTable is_entrypoint: bool - var_name_generator: UniqueNameGenerator + var_name_generator: pytools.UniqueNameGenerator is_generating_device_code: bool - gen_program_name: str - schedule_index_end: int - codegen_cachemanager: "CodegenOperationCacheManager" - vectorization_info: Optional[VectorizationInfo] = None - def __post_init__(self): - # FIXME: If this doesn't bomb during testing, we can get rid of target. - assert self.target == self.kernel.target + gen_program_name: str - assert self.vectorization_info is None or isinstance( - self.vectorization_info, VectorizationInfo) + schedule_index_end: int + codegen_cache_manager: CodegenOperationCacheManager + vectorization_info: VectorizationInfo | None = None # {{{ copy helpers - def copy(self, **kwargs: Any) -> "CodeGenerationState": + def copy(self, **kwargs: Any) -> CodeGenerationState: return replace(self, **kwargs) def copy_and_assign( - self, name: str, value: ExpressionT) -> "CodeGenerationState": + self, name: str, value: Expression) -> CodeGenerationState: """Make a copy of self with variable *name* fixed to *value*.""" return self.copy(var_subst_map=self.var_subst_map.set(name, value)) - def copy_and_assign_many(self, assignments) -> "CodeGenerationState": + def copy_and_assign_many(self, assignments) -> CodeGenerationState: """Make a copy of self with *assignments* included.""" return self.copy(var_subst_map=self.var_subst_map.update(assignments)) @@ -305,7 +261,8 @@ def unvectorize(self, func): novec_self = self.copy(vectorization_info=None) for i in range(vinf.length): - idx_aff = isl.Aff.zero_on_domain(vinf.space.params()) + i + idx_aff = isl.Aff.zero_on_domain( + isl.Space.params_alloc(self.kernel.isl_context, 0)) + i new_codegen_state = novec_self.fix(vinf.iname, idx_aff) generated = func(new_codegen_state) @@ -367,10 +324,16 @@ def map_constant(self, expr): @dataclass(frozen=True) class PreambleInfo: + """ + .. autoattribute:: kernel + .. autoattribute:: seen_dtypes + .. autoattribute:: seen_functions + .. autoattribute:: seen_atomic_dtypes + """ kernel: LoopKernel - seen_dtypes: Set[LoopyType] - seen_functions: Set[SeenFunction] - seen_atomic_dtypes: Set[LoopyType] + seen_dtypes: set[LoopyType] + seen_functions: set[SeenFunction] + seen_atomic_dtypes: set[LoopyType] # FIXME: This makes all the above redundant. It probably shouldn't be here. codegen_state: CodeGenerationState @@ -418,7 +381,7 @@ def generate_code_for_a_single_kernel(kernel, callables_table, target, seen_dtypes=seen_dtypes, seen_functions=seen_functions, seen_atomic_dtypes=seen_atomic_dtypes, - var_subst_map=Map(), + var_subst_map=immutables.Map(), allow_complex=allow_complex, var_name_generator=kernel.get_var_name_generator(), is_generating_device_code=False, @@ -429,7 +392,7 @@ def generate_code_for_a_single_kernel(kernel, callables_table, target, schedule_index_end=len(kernel.linearization), callables_table=callables_table, is_entrypoint=is_entrypoint, - codegen_cachemanager=CodegenOperationCacheManager.from_kernel(kernel), + codegen_cache_manager=CodegenOperationCacheManager.from_kernel(kernel), ) from loopy.codegen.result import generate_host_or_device_program @@ -519,7 +482,7 @@ def diverge_callee_entrypoints(program): new_callables[name] = clbl - return program.copy(callables_table=Map(new_callables)) + return program.copy(callables_table=immutables.Map(new_callables)) @dataclass(frozen=True) @@ -543,10 +506,10 @@ class TranslationUnitCodeGenerationResult: .. automethod:: all_code """ - host_programs: Mapping[str, "GeneratedProgram"] - device_programs: Sequence["GeneratedProgram"] - host_preambles: Sequence[Tuple[int, str]] = () - device_preambles: Sequence[Tuple[int, str]] = () + host_programs: Mapping[str, GeneratedProgram] + device_programs: Sequence[GeneratedProgram] + host_preambles: Sequence[tuple[int, str]] = () + device_preambles: Sequence[tuple[int, str]] = () def host_code(self): from loopy.codegen.result import process_preambles @@ -666,7 +629,7 @@ def generate_code_v2(t_unit: TranslationUnit) -> CodeGenerationResult: # adding the callee fdecls to the device_programs device_programs = ([device_programs[0].copy( ast=t_unit.target.get_device_ast_builder().ast_module.Collection( - callee_fdecls+[device_programs[0].ast]))] + + [*callee_fdecls, device_programs[0].ast]))] + device_programs[1:]) def not_reduction_op(name: str | ReductionOpFunction) -> str: diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index a066d3425..0f3bdba41 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -21,13 +24,15 @@ """ -from typing import FrozenSet +from typing import TYPE_CHECKING import islpy as isl from islpy import dim_type -from loopy.codegen.tools import CodegenOperationCacheManager -from loopy.kernel import LoopKernel + +if TYPE_CHECKING: + from loopy.codegen.tools import CodegenOperationCacheManager + from loopy.kernel import LoopKernel # {{{ approximate, convex bounds check generator @@ -62,7 +67,7 @@ def get_approximate_convex_bounds_checks(domain, check_inames, def get_usable_inames_for_conditional( kernel: LoopKernel, sched_index: int, - op_cache_manager: CodegenOperationCacheManager) -> FrozenSet[str]: + op_cache_manager: CodegenOperationCacheManager) -> frozenset[str]: active_inames = op_cache_manager.active_inames[sched_index] crosses_barrier = op_cache_manager.has_barrier_within[sched_index] diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index bee09229f..fd38c97e7 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -1,4 +1,5 @@ """Loop nest build top-level control/hoisting.""" +from __future__ import annotations __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -273,7 +274,7 @@ class ScheduleIndexInfo(ImmutableRecord): schedule_indices=[i], admissible_cond_inames=( get_usable_inames_for_conditional(kernel, i, - codegen_state.codegen_cachemanager)), + codegen_state.codegen_cache_manager)), required_predicates=get_required_predicates(kernel, i), used_inames_within=find_used_inames_within(kernel, i) ) @@ -470,7 +471,7 @@ def gen_code(inner_codegen_state): prev_gen_code = gen_code - def gen_code(inner_codegen_state): # noqa pylint:disable=function-redefined + def gen_code(inner_codegen_state): # pylint: disable=function-redefined condition_exprs = [ constraint_to_cond_expr(cns) for cns in bounds_checks] + list(pred_checks) diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index 1bc26733e..3b0195507 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -1,4 +1,5 @@ """Code generation for Instruction objects.""" +from __future__ import annotations __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -191,7 +192,7 @@ def generate_assignment_instruction_code(codegen_state, insn): from pymbolic.mapper.stringifier import PREC_NONE lhs_code = codegen_state.expression_to_code_mapper(insn.assignee, PREC_NONE) - from cgen import Statement as S # noqa + from cgen import Statement as S gs, ls = kernel.get_grid_size_upper_bounds(codegen_state.callables_table) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index c64c2ea67..44bfa07cc 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -210,7 +213,7 @@ def generate_vectorize_loop(codegen_state, sched_index): vectorization_info=VectorizationInfo( iname=iname, length=length, - space=length_aff.space)) + )) return build_loop_nest(new_codegen_state, sched_index+1) @@ -360,7 +363,7 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index, hints): # Note: this does not include loop_iname itself! usable_inames = get_usable_inames_for_conditional(kernel, sched_index, - codegen_state.codegen_cachemanager) + codegen_state.codegen_cache_manager) domain = kernel.get_inames_domain(loop_iname) diff --git a/loopy/codegen/result.py b/loopy/codegen/result.py index 7fcb4294a..02b5ce27a 100644 --- a/loopy/codegen/result.py +++ b/loopy/codegen/result.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2016 Andreas Kloeckner" __license__ = """ @@ -24,23 +27,18 @@ from typing import ( TYPE_CHECKING, Any, - Dict, - List, Mapping, - Optional, Sequence, - Tuple, - Union, ) -import islpy as isl - if TYPE_CHECKING: + import islpy + from loopy.codegen import CodeGenerationState -def process_preambles(preambles: Sequence[Tuple[int, str]]) -> Sequence[str]: +def process_preambles(preambles: Sequence[tuple[int, str]]) -> Sequence[str]: seen_preamble_tags = set() dedup_preambles = [] @@ -60,8 +58,6 @@ def process_preambles(preambles: Sequence[Tuple[int, str]]) -> Sequence[str]: __doc__ = """ .. currentmodule:: loopy.codegen.result -.. autoclass:: GeneratedProgram - .. autoclass:: CodeGenerationResult .. autofunction:: merge_codegen_results @@ -94,9 +90,9 @@ class GeneratedProgram: name: str is_device_program: bool ast: Any - body_ast: Optional[Any] = None + body_ast: Any | None = None - def copy(self, **kwargs: Any) -> "GeneratedProgram": + def copy(self, **kwargs: Any) -> GeneratedProgram: return replace(self, **kwargs) @@ -121,13 +117,13 @@ class CodeGenerationResult: .. automethod:: device_code .. automethod:: all_code """ - host_program: Optional[GeneratedProgram] + host_program: GeneratedProgram | None device_programs: Sequence[GeneratedProgram] - implemented_domains: Mapping[str, isl.Set] - host_preambles: Sequence[Tuple[str, str]] = () - device_preambles: Sequence[Tuple[str, str]] = () + implemented_domains: Mapping[str, islpy.Set] + host_preambles: Sequence[tuple[str, str]] = () + device_preambles: Sequence[tuple[str, str]] = () - def copy(self, **kwargs: Any) -> "CodeGenerationResult": + def copy(self, **kwargs: Any) -> CodeGenerationResult: return replace(self, **kwargs) @staticmethod @@ -185,7 +181,7 @@ def all_code(self): + str(self.host_program.ast)) def current_program( - self, codegen_state: "CodeGenerationState") -> GeneratedProgram: + self, codegen_state: CodeGenerationState) -> GeneratedProgram: if codegen_state.is_generating_device_code: if self.device_programs: result = self.device_programs[-1] @@ -210,9 +206,7 @@ def with_new_program(self, codegen_state, program): assert program.is_device_program return self.copy( device_programs=( - list(self.device_programs[:-1]) - + - [program])) + [*list(self.device_programs[:-1]), program])) else: assert program.name == codegen_state.gen_program_name assert not program.is_device_program @@ -233,8 +227,8 @@ def with_new_ast(self, codegen_state, new_ast): # {{{ support code for AST merging def merge_codegen_results( - codegen_state: "CodeGenerationState", - elements: Sequence[Union[CodeGenerationResult, Any]], collapse=True + codegen_state: CodeGenerationState, + elements: Sequence[CodeGenerationResult | Any], collapse=True ) -> CodeGenerationResult: elements = [el for el in elements if el is not None] @@ -251,9 +245,9 @@ def merge_codegen_results( ast_els = [] new_device_programs = [] - new_device_preambles: List[Tuple[str, str]] = [] + new_device_preambles: list[tuple[str, str]] = [] dev_program_names = set() - implemented_domains: Dict[str, isl.Set] = {} + implemented_domains: dict[str, islpy.Set] = {} codegen_result = None block_cls = codegen_state.ast_builder.ast_block_class diff --git a/loopy/codegen/tools.py b/loopy/codegen/tools.py index cb6285b08..be3d6ade5 100644 --- a/loopy/codegen/tools.py +++ b/loopy/codegen/tools.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2020 Kaushik Kulkarni" __license__ = """ @@ -22,13 +25,11 @@ from dataclasses import dataclass from functools import cached_property -from typing import Dict, FrozenSet, List +from typing import TYPE_CHECKING from pytools import memoize_method from loopy.kernel import LoopKernel -from loopy.kernel.data import Iname -from loopy.kernel.instruction import InstructionBase from loopy.schedule import ( Barrier, BeginBlockItem, @@ -41,9 +42,12 @@ ) -__doc__ = """ -.. currentmodule:: loopy.codegen.tools +if TYPE_CHECKING: + import loopy.kernel.data + from loopy.kernel.instruction import InstructionBase + +__doc__ = """ .. autoclass:: KernelProxyForCodegenOperationCacheManager .. autoclass:: CodegenOperationCacheManager @@ -56,9 +60,9 @@ class KernelProxyForCodegenOperationCacheManager: Proxy to :class:`loopy.LoopKernel` to be used by :class:`CodegenOperationCacheManager`. """ - instructions: List[InstructionBase] - linearization: List[ScheduleItem] - inames: Dict[str, Iname] + instructions: list[InstructionBase] + linearization: list[ScheduleItem] + inames: dict[str, loopy.kernel.data.Iname] @cached_property def id_to_insn(self): @@ -208,7 +212,7 @@ def get_insn_ids_for_block_at(self, sched_index): @memoize_method def get_concurrent_inames_in_a_callkernel( - self, callkernel_index: int) -> FrozenSet[str]: + self, callkernel_index: int) -> frozenset[str]: """ Returns a :class:`frozenset` of concurrent inames in a callkernel diff --git a/loopy/diagnostic.py b/loopy/diagnostic.py index 39e2fa591..be281158b 100644 --- a/loopy/diagnostic.py +++ b/loopy/diagnostic.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ diff --git a/loopy/expression.py b/loopy/expression.py index 2581ec022..e3eb65dc5 100644 --- a/loopy/expression.py +++ b/loopy/expression.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012-15 Andreas Kloeckner" __license__ = """ diff --git a/loopy/frontend/fortran/__init__.py b/loopy/frontend/fortran/__init__.py index 5e6ff24d0..7c184f73c 100644 --- a/loopy/frontend/fortran/__init__.py +++ b/loopy/frontend/fortran/__init__.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2013 Andreas Kloeckner" __license__ = """ @@ -225,7 +228,7 @@ def parse_transformed_fortran(source, free_form=True, strict=True, prev_sys_path = sys.path try: if infile_dirname: - sys.path = prev_sys_path + [infile_dirname] + sys.path = [*prev_sys_path, infile_dirname] if pre_transform_code is not None: proc_dict["_MODULE_SOURCE_CODE"] = pre_transform_code diff --git a/loopy/frontend/fortran/diagnostic.py b/loopy/frontend/fortran/diagnostic.py index b2ea02c05..5d3df2a21 100644 --- a/loopy/frontend/fortran/diagnostic.py +++ b/loopy/frontend/fortran/diagnostic.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2009 Andreas Kloeckner" __license__ = """ diff --git a/loopy/frontend/fortran/expression.py b/loopy/frontend/fortran/expression.py index bb8394515..54bca20bb 100644 --- a/loopy/frontend/fortran/expression.py +++ b/loopy/frontend/fortran/expression.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2013 Andreas Kloeckner" __license__ = """ @@ -22,6 +25,7 @@ import re from sys import intern +from typing import TYPE_CHECKING, ClassVar import numpy as np @@ -31,6 +35,12 @@ from loopy.frontend.fortran.diagnostic import TranslationError +if TYPE_CHECKING: + from collections.abc import Mapping + + from loopy.symbolic import LexTable + + _less_than = intern("less_than") _greater_than = intern("greater_than") _less_equal = intern("less_equal") @@ -65,7 +75,7 @@ def tuple_to_complex_literal(expr): # {{{ expression parser class FortranExpressionParser(ExpressionParserBase): - lex_table = [ + lex_table: ClassVar[LexTable] = [ (_less_than, pytools.lex.RE(r"\.lt\.", re.I)), (_greater_than, pytools.lex.RE(r"\.gt\.", re.I)), (_less_equal, pytools.lex.RE(r"\.le\.", re.I)), @@ -142,7 +152,7 @@ def parse_terminal(self, pstate): return ExpressionParserBase.parse_terminal( self, pstate) - COMP_MAP = { + COMP_MAP: ClassVar[Mapping[str, str]] = { _less_than: "<", _less_equal: "<=", _greater_than: ">", diff --git a/loopy/frontend/fortran/translator.py b/loopy/frontend/fortran/translator.py index fc9eace87..5000abf84 100644 --- a/loopy/frontend/fortran/translator.py +++ b/loopy/frontend/fortran/translator.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2013 Andreas Kloeckner" __license__ = """ @@ -22,6 +25,7 @@ import re from sys import intern +from typing import ClassVar from warnings import warn import numpy as np @@ -53,7 +57,7 @@ def __init__(self, scope): super().__init__() def get_cache_key(self, expr): - return super().get_cache_key(expr) + (self.scope,) + return (*super().get_cache_key(expr), self.scope) def map_subscript(self, expr): from pymbolic.primitives import Variable @@ -441,7 +445,7 @@ def map_Implicit(self, node): def map_Equivalence(self, node): raise NotImplementedError("equivalence") - TYPE_MAP = { + TYPE_MAP: ClassVar[dict[tuple[str, str], type[np.generic]]] = { ("real", ""): np.float32, ("real", "4"): np.float32, ("real", "8"): np.float64, @@ -455,9 +459,9 @@ def map_Equivalence(self, node): ("integer", "8"): np.int64, } if hasattr(np, "float128"): - TYPE_MAP[("real", "16")] = np.float128 # pylint:disable=no-member + TYPE_MAP["real", "16"] = np.float128 # pylint:disable=no-member if hasattr(np, "complex256"): - TYPE_MAP[("complex", "32")] = np.complex256 # pylint:disable=no-member + TYPE_MAP["complex", "32"] = np.complex256 # pylint:disable=no-member def dtype_from_stmt(self, stmt): length, kind = stmt.selector @@ -471,7 +475,7 @@ def dtype_from_stmt(self, stmt): else: raise RuntimeError("both length and kind specified") - return np.dtype(self.TYPE_MAP[(type(stmt).__name__.lower(), length)]) + return np.dtype(self.TYPE_MAP[type(stmt).__name__.lower(), length]) def map_type_decl(self, node): scope = self.scope_stack[-1] diff --git a/loopy/frontend/fortran/tree.py b/loopy/frontend/fortran/tree.py index b2af66f08..f1613b22f 100644 --- a/loopy/frontend/fortran/tree.py +++ b/loopy/frontend/fortran/tree.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2009 Andreas Kloeckner" __license__ = """ @@ -62,7 +65,7 @@ def rec(self, expr, *args, **kwargs): r"^(?P[_0-9a-zA-Z]+)\s*" r"(\((?P[-+*/0-9:a-zA-Z, \t]+)\))?" r"(\s*=\s*(?P.+))?" - "$") + r"$") def parse_dimension_specs(self, node, dim_decls): def parse_bounds(bounds_str): diff --git a/loopy/ipython_ext.py b/loopy/ipython_ext.py index ba734d8ba..92592bdba 100644 --- a/loopy/ipython_ext.py +++ b/loopy/ipython_ext.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from IPython.core.magic import Magics, cell_magic, magics_class import loopy as lp diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index 28aa3be30..04d0bcd98 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -1,4 +1,6 @@ """isl helpers""" +from __future__ import annotations + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -93,12 +95,12 @@ def make_slab(space, iname, start, stop, iname_multiplier=1): space = zero.get_domain_space() - from pymbolic.primitives import Expression + from pymbolic.primitives import ExpressionNode from loopy.symbolic import aff_from_expr - if isinstance(start, Expression): + if isinstance(start, ExpressionNode): start = aff_from_expr(space, start) - if isinstance(stop, Expression): + if isinstance(stop, ExpressionNode): stop = aff_from_expr(space, stop) if isinstance(start, int): diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 967640260..d612b5db3 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -1,4 +1,14 @@ -"""Kernel object.""" +""" +.. currentmodule:: loopy + +.. autoclass:: LoopKernel + +.. autoclass:: KernelState + :members: + :undoc-members: +""" +from __future__ import annotations + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -21,7 +31,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ - from collections import defaultdict from dataclasses import dataclass, field, fields, replace from enum import IntEnum @@ -31,25 +40,19 @@ TYPE_CHECKING, Any, Callable, - Dict, - FrozenSet, + ClassVar, Iterator, - List, Mapping, - Optional, Sequence, - Set, - Tuple, - Union, ) from warnings import warn import numpy as np from immutables import Map +import islpy # to help out Sphinx import islpy as isl from islpy import dim_type -from pymbolic import ArithmeticExpressionT from pytools import ( UniqueNameGenerator, generate_unique_names, @@ -58,10 +61,11 @@ ) from pytools.tag import Tag, Taggable +import loopy.codegen +import loopy.kernel.data # to help out Sphinx from loopy.diagnostic import CannotBranchDomainTree, LoopyError, StaticValueFindingError from loopy.kernel.data import ( ArrayArg, - Iname, KernelArgument, SubstitutionRule, TemporaryVariable, @@ -69,23 +73,24 @@ _ArraySeparationInfo, filter_iname_tags_by_type, ) -from loopy.kernel.instruction import InstructionBase -from loopy.options import Options -from loopy.schedule import ScheduleItem -from loopy.target import TargetBase from loopy.tools import update_persistent_hash from loopy.types import LoopyType, NumpyType -from loopy.typing import ExpressionT, InameStr if TYPE_CHECKING: - from loopy.codegen import PreambleInfo + from pymbolic import ArithmeticExpression + from loopy.kernel.function_interface import InKernelCallable + from loopy.kernel.instruction import InstructionBase + from loopy.options import Options + from loopy.schedule import ScheduleItem + from loopy.target import TargetBase + from loopy.typing import Expression, InameStr # {{{ loop kernel object -class KernelState(IntEnum): # noqa +class KernelState(IntEnum): INITIAL = 0 CALLS_RESOLVED = 1 PREPROCESSED = 2 @@ -99,12 +104,9 @@ def _get_inames_from_domains(domains): @dataclass(frozen=True) class _BoundsRecord: - lower_bound_pw_aff: isl.PwAff - upper_bound_pw_aff: isl.PwAff - size: isl.PwAff - - -PreambleGenerator = Callable[["PreambleInfo"], Iterator[Tuple[int, str]]] + lower_bound_pw_aff: islpy.PwAff + upper_bound_pw_aff: islpy.PwAff + size: islpy.PwAff @dataclass(frozen=True) @@ -144,7 +146,7 @@ class LoopKernel(Taggable): .. automethod:: tagged .. automethod:: without_tags """ - domains: Sequence[isl.BasicSet] + domains: Sequence[islpy.BasicSet] """Represents the :ref:`domain-tree`.""" instructions: Sequence[InstructionBase] @@ -153,13 +155,13 @@ class LoopKernel(Taggable): """ args: Sequence[KernelArgument] - assumptions: isl.BasicSet + assumptions: islpy.BasicSet """ Must be a :class:`islpy.BasicSet` parameter domain. """ temporary_variables: Mapping[str, TemporaryVariable] - inames: Mapping[InameStr, Iname] + inames: Mapping[InameStr, loopy.kernel.data.Iname] """ An entry is guaranteed to be present for each iname. """ @@ -167,16 +169,20 @@ class LoopKernel(Taggable): substitutions: Mapping[str, SubstitutionRule] options: Options target: TargetBase - tags: FrozenSet[Tag] + tags: frozenset[Tag] state: KernelState = KernelState.INITIAL name: str = "loopy_kernel" - preambles: Sequence[Tuple[int, str]] = () - preamble_generators: Sequence[PreambleGenerator] = () + preambles: Sequence[tuple[int, str]] = () + preamble_generators: Sequence[ + Callable[ + [loopy.codegen.PreambleInfo], + Iterator[tuple[int, str]]] + ] = () symbol_manglers: Sequence[ - Callable[["LoopKernel", str], Optional[Tuple[LoopyType, str]]]] = () - linearization: Optional[Sequence[ScheduleItem]] = None - iname_slab_increments: Mapping[InameStr, Tuple[int, int]] = field( + Callable[[LoopKernel, str], tuple[LoopyType, str] | None]] = () + linearization: Sequence[ScheduleItem] | None = None + iname_slab_increments: Mapping[InameStr, tuple[int, int]] = field( default_factory=Map) """ A mapping from inames to (lower_incr, @@ -184,7 +190,7 @@ class LoopKernel(Taggable): 'bulk' slabs with fewer conditionals. """ - loop_priority: FrozenSet[Tuple[InameStr, ...]] = field( + loop_priority: frozenset[tuple[InameStr, ...]] = field( default_factory=frozenset) """ A frozenset of priority constraints to the kernel. Each such constraint @@ -193,22 +199,20 @@ class LoopKernel(Taggable): with non-parallel implementation tags. """ - applied_iname_rewrites: Tuple[Dict[InameStr, ExpressionT], ...] = () + applied_iname_rewrites: tuple[dict[InameStr, Expression], ...] = () """ A list of past substitution dictionaries that were applied to the kernel. These are stored so that they may be repeated on expressions the user specifies later. """ - index_dtype: NumpyType = NumpyType(np.dtype(np.int32)) - silenced_warnings: FrozenSet[str] = frozenset() + index_dtype: NumpyType = NumpyType(np.dtype(np.int32)) # noqa: RUF009 + silenced_warnings: frozenset[str] = frozenset() # FIXME Yuck, this should go. - overridden_get_grid_sizes_for_insn_ids: Optional[ - Callable[ - [FrozenSet[str], - Dict[str, "InKernelCallable"], - bool], - Tuple[Tuple[int, ...], Tuple[int, ...]]]] = None + overridden_get_grid_sizes_for_insn_ids: \ + Callable[[frozenset[str], dict[str, InKernelCallable], bool], + tuple[tuple[int, ...], tuple[int, ...]] + ] | None = None def __post_init__(self): assert isinstance(self.assumptions, isl.BasicSet) @@ -281,7 +285,7 @@ def get_group_name_generator(self): return UniqueNameGenerator(set(self.all_group_names())) def get_var_descriptor( - self, name: str) -> Union[TemporaryVariable, KernelArgument]: + self, name: str) -> TemporaryVariable | KernelArgument: try: return self.arg_dict[name] except KeyError: @@ -317,7 +321,7 @@ def id_to_insn(self): # {{{ domain wrangling @memoize_method - def parents_per_domain(self) -> Sequence[Optional[int]]: + def parents_per_domain(self) -> Sequence[int | None]: """Return a list corresponding to self.domains (by index) containing domain indices which are nested around this domain. @@ -331,8 +335,8 @@ def parents_per_domain(self) -> Sequence[Optional[int]]: # determines the granularity of inames to be popped/decactivated # if we ascend a level. - iname_set_stack: List[Set[str]] = [] - result: List[Optional[int]] = [] + iname_set_stack: list[set[str]] = [] + result: list[int | None] = [] from loopy.kernel.tools import is_domain_dependent_on_inames @@ -459,7 +463,7 @@ def combine_domains(self, domains: Sequence[int]) -> isl.BasicSet: return result - def get_inames_domain(self, inames: FrozenSet[str]) -> isl.BasicSet: + def get_inames_domain(self, inames: frozenset[str]) -> isl.BasicSet: if not inames: return self.combine_domains(()) @@ -560,7 +564,7 @@ def all_inames(self): return frozenset(self.inames.keys()) @memoize_method - def all_params(self) -> FrozenSet[str]: + def all_params(self) -> frozenset[str]: all_inames = self.all_inames() result = set() @@ -758,7 +762,7 @@ def get_unwritten_value_args(self): # {{{ argument wrangling @cached_property - def arg_dict(self) -> Dict[str, KernelArgument]: + def arg_dict(self) -> dict[str, KernelArgument]: return {arg.name: arg for arg in self.args} @cached_property @@ -1035,9 +1039,9 @@ def get_grid_size_upper_bounds(self, callables_table, ignore_auto=False, def get_grid_size_upper_bounds_as_exprs( self, callables_table, ignore_auto=False, return_dict=False - ) -> Tuple[ - Tuple[ArithmeticExpressionT, ...], - Tuple[ArithmeticExpressionT, ...]]: + ) -> tuple[ + tuple[ArithmeticExpression, ...], + tuple[ArithmeticExpression, ...]]: """Return a tuple (global_size, local_size) containing a grid that could accommodate execution of *all* instructions in the kernel. @@ -1310,7 +1314,7 @@ def __setstate__(self, state): # {{{ persistent hash key generation / comparison - hash_fields = [ + hash_fields: ClassVar[Sequence[str]] = [ "domains", "instructions", "args", @@ -1361,18 +1365,19 @@ def __hash__(self): # }}} - def get_copy_kwargs(self, **kwargs: Any) -> Dict[str, Any]: + def get_copy_kwargs(self, **kwargs: Any) -> dict[str, Any]: if "domains" in kwargs: inames = kwargs.get("inames", self.inames) domains = kwargs["domains"] - kwargs["inames"] = {name: inames.get(name, Iname(name, frozenset())) + kwargs["inames"] = {name: inames.get(name, + loopy.kernel.data.Iname(name, frozenset())) for name in _get_inames_from_domains(domains)} assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains) return kwargs - def copy(self, **kwargs: Any) -> "LoopKernel": + def copy(self, **kwargs: Any) -> LoopKernel: result = replace(self, **self.get_copy_kwargs(**kwargs)) object.__setattr__(result, "_cache_manager", self.cache_manager) @@ -1391,11 +1396,11 @@ def copy(self, **kwargs: Any) -> "LoopKernel": return result - def _with_new_tags(self, tags) -> "LoopKernel": + def _with_new_tags(self, tags) -> LoopKernel: return replace(self, tags=tags) @memoize_method - def _separation_info(self) -> Dict[str, _ArraySeparationInfo]: + def _separation_info(self) -> dict[str, _ArraySeparationInfo]: return { arg.name: arg._separation_info for arg in self.args diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index 8cabbec23..9895685fb 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -24,28 +24,22 @@ """ import re -import sys from dataclasses import dataclass from typing import ( TYPE_CHECKING, + Any, Callable, ClassVar, - FrozenSet, - List, - Optional, Sequence, Tuple, - Type, TypeVar, - Union, cast, ) from warnings import warn import numpy as np # noqa -from typing_extensions import TypeAlias +from typing_extensions import Self, TypeAlias -from pymbolic import ArithmeticExpressionT from pymbolic.primitives import is_arithmetic_expression from pytools import ImmutableRecord from pytools.tag import Tag, Taggable @@ -53,19 +47,17 @@ from loopy.diagnostic import LoopyError from loopy.symbolic import flatten from loopy.types import LoopyType -from loopy.typing import ExpressionT, ShapeType, auto, is_integer +from loopy.typing import Expression, ShapeType, auto, is_integer if TYPE_CHECKING: + from pymbolic import ArithmeticExpression + from loopy.codegen import VectorizationInfo from loopy.kernel import LoopKernel - from loopy.kernel.data import ArrayArg, TemporaryVariable, auto + from loopy.kernel.data import ArrayArg, TemporaryVariable from loopy.target import TargetBase -if getattr(sys, "_BUILDING_SPHINX_DOCS", False): - from loopy.target import TargetBase # noqa: F811 - - T = TypeVar("T") @@ -93,10 +85,6 @@ See :class:`loopy.typing.ShapeType` -.. class:: ExpressionT - - See :class:`loopy.typing.ExpressionT` - .. class:: Tag See :class:`pytools.tag.Tag` @@ -150,7 +138,7 @@ class FixedStrideArrayDimTag(_StrideArrayDimTagBase): May be one of the following: - - A :class:`pymbolic.primitives.Expression`, including an + - A :data:`~pymbolic.typing.Expression`, including an integer, indicating the stride in units of the underlying array's :attr:`ArrayBase.dtype`. @@ -609,8 +597,8 @@ def convert_computed_to_fixed_dim_tags(name, num_user_axes, num_target_axes, # {{{ array base class (for arguments and temporary arrays) -ToShapeLikeConvertible: TypeAlias = (Tuple[ExpressionT | str, ...] - | ExpressionT | type[auto] | str | tuple[str, ...]) +ToShapeLikeConvertible: TypeAlias = (Tuple[Expression | str, ...] + | Expression | type[auto] | str | tuple[str, ...]) def _parse_shape_or_strides( @@ -634,12 +622,12 @@ def _parse_shape_or_strides( raise ValueError("shape can't be a list") if isinstance(x_parsed, tuple): - x_tup: tuple[ExpressionT | str, ...] = x_parsed + x_tup: tuple[Expression | str, ...] = x_parsed else: assert x_parsed is not auto - x_tup = (cast(ExpressionT, x_parsed),) + x_tup = (cast("Expression", x_parsed),) - def parse_arith(x: ExpressionT | str) -> ArithmeticExpressionT: + def parse_arith(x: Expression | str) -> ArithmeticExpression: if isinstance(x, str): res = parse(x) else: @@ -677,7 +665,7 @@ class ArrayBase(ImmutableRecord, Taggable): """ name: str - dtype: Optional[LoopyType] + dtype: LoopyType | None """The :class:`loopy.types.LoopyType` of the array. If this is *None*, :mod:`loopy` will try to continue without knowing the type of this array, where the idea is that precise knowledge of the type will become @@ -689,7 +677,7 @@ class ArrayBase(ImmutableRecord, Taggable): cannot be performed without knowledge of the exact *dtype*. """ - shape: Union[ShapeType, Type["auto"], None] + shape: ShapeType | type[auto] | None """ May be one of the following: @@ -710,11 +698,11 @@ class ArrayBase(ImmutableRecord, Taggable): may be *None*. """ - dim_tags: Optional[Sequence[ArrayDimImplementationTag]] + dim_tags: Sequence[ArrayDimImplementationTag] | None """See :ref:`data-dim-tags`. """ - offset: Union[ExpressionT, str, None] + offset: Expression | str | None """Offset from the beginning of the buffer to the point from which the strides are counted, in units of the :attr:`dtype`. May be one of @@ -726,7 +714,7 @@ class ArrayBase(ImmutableRecord, Taggable): is added automatically, immediately following this argument. """ - dim_names: Optional[Tuple[str, ...]] + dim_names: tuple[str, ...] | None """A tuple of strings providing names for the array axes, or *None*. If given, must have the same number of entries as :attr:`dim_tags` and :attr:`dim_tags`. These do not live in any particular namespace @@ -736,7 +724,7 @@ class ArrayBase(ImmutableRecord, Taggable): axis numbers. """ - alignment: Optional[int] + alignment: int | None """Memory alignment of the array in bytes. For temporary arrays, this ensures they are allocated with this alignment. For arguments, this entails a promise that the incoming array obeys this alignment @@ -751,7 +739,7 @@ class ArrayBase(ImmutableRecord, Taggable): .. versionadded:: 2018.1 """ - tags: FrozenSet[Tag] + tags: frozenset[Tag] """A (possibly empty) frozenset of instances of :class:`pytools.tag.Tag` intended for consumption by an application. @@ -762,7 +750,7 @@ class ArrayBase(ImmutableRecord, Taggable): # Note that order may also wind up in attributes, if the # number of dimensions has not yet been determined. - allowed_extra_kwargs: ClassVar[Tuple[str, ...]] = () + allowed_extra_kwargs: ClassVar[tuple[str, ...]] = () def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0, dim_names=None, strides=None, order=None, for_atomic=False, @@ -1080,16 +1068,18 @@ def num_user_axes(self, require_answer=True): else: return None - def map_exprs(self, mapper): + def map_exprs(self, mapper: Callable[[Expression], Expression]) -> Self: """Return a copy of self with all expressions replaced with what *mapper* transformed them into. """ changed = False - kwargs = {} + kwargs: dict[str, Any] = {} import loopy as lp if self.shape is not None and self.shape is not lp.auto: - def none_pass_mapper(s): + assert isinstance(self.shape, tuple) + + def none_pass_mapper(s: Expression | None) -> Expression | None: if s is None: return s else: @@ -1151,16 +1141,16 @@ def vector_size(self, target: TargetBase) -> int: # }}} def drop_vec_dims( - dim_tags: Tuple[ArrayDimImplementationTag, ...], - t: Tuple[T, ...]) -> Tuple[T, ...]: + dim_tags: tuple[ArrayDimImplementationTag, ...], + t: tuple[T, ...]) -> tuple[T, ...]: assert len(dim_tags) == len(t) return tuple(t_i for dim_tag, t_i in zip(dim_tags, t) if not isinstance(dim_tag, VectorArrayDimTag)) -def get_strides(array: ArrayBase) -> Tuple[ExpressionT, ...]: +def get_strides(array: ArrayBase) -> tuple[Expression, ...]: from pymbolic import var - result: List[ExpressionT] = [] + result: list[Expression] = [] if array.dim_tags is None: return () @@ -1187,11 +1177,11 @@ def get_strides(array: ArrayBase) -> Tuple[ExpressionT, ...]: @dataclass(frozen=True) class AccessInfo(ImmutableRecord): array_name: str - vector_index: Optional[int] - subscripts: Tuple[ExpressionT, ...] + vector_index: int | None + subscripts: tuple[Expression, ...] -def _apply_offset(sub: ExpressionT, ary: ArrayBase) -> ExpressionT: +def _apply_offset(sub: Expression, ary: ArrayBase) -> Expression: """ Helper for :func:`get_access_info`. Augments *ary*'s subscript index expression (*sub*) with its offset info. @@ -1221,16 +1211,16 @@ def _apply_offset(sub: ExpressionT, ary: ArrayBase) -> ExpressionT: else: # assume it's an expression # FIXME: mypy can't figure out that ExpressionT + ExpressionT works - return ary.offset + sub # type: ignore[call-overload, arg-type, operator] # noqa: E501 + return ary.offset + sub # type: ignore[call-overload, arg-type, operator] else: return sub -def get_access_info(kernel: "LoopKernel", - ary: Union["ArrayArg", "TemporaryVariable"], - index: Union[ExpressionT, Tuple[ExpressionT, ...]], - eval_expr: Callable[[ExpressionT], int], - vectorization_info: "VectorizationInfo") -> AccessInfo: +def get_access_info(kernel: LoopKernel, + ary: ArrayArg | TemporaryVariable, + index: Expression | tuple[Expression, ...], + eval_expr: Callable[[Expression], int], + vectorization_info: VectorizationInfo) -> AccessInfo: """ :arg ary: an object of type :class:`ArrayBase` :arg index: a tuple of indices representing a subscript into ary @@ -1283,7 +1273,7 @@ def eval_expr_assert_integer_constant(i, expr) -> int: num_target_axes = ary.num_target_axes() vector_index = None - subscripts: List[ExpressionT] = [0] * num_target_axes + subscripts: list[Expression] = [0] * num_target_axes vector_size = ary.vector_size(kernel.target) @@ -1302,7 +1292,7 @@ def eval_expr_assert_integer_constant(i, expr) -> int: index = tuple(remaining_index) # only arguments (not temporaries) may be sep-tagged - ary = cast(ArrayArg, + ary = cast("ArrayArg", kernel.arg_dict[ary._separation_info.subarray_names[tuple(sep_index)]]) # }}} diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 4f1803f24..e7228468c 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -1,4 +1,5 @@ """UI for kernel creation.""" +from __future__ import annotations __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -155,13 +156,13 @@ def expand_defines(insn, defines, single_valued=True): "in this context (when expanding '%s')" % define_name) replacements = [ - rep+((replace_pattern % define_name, subval),) + (*rep, (replace_pattern % define_name, subval)) for rep in replacements for subval in value ] else: replacements = [ - rep+((replace_pattern % define_name, value),) + (*rep, (replace_pattern % define_name, value)) for rep in replacements] for rep in replacements: @@ -285,14 +286,12 @@ def parse_nosync_option(opt_value): arrow_idx = value.find("->") if arrow_idx >= 0: result["inames_to_dup"] = ( - result.get("inames_to_dup", []) - + - [(value[:arrow_idx], value[arrow_idx+2:])]) + [*result.get("inames_to_dup", []), + (value[:arrow_idx], value[arrow_idx + 2:]) + ]) else: result["inames_to_dup"] = ( - result.get("inames_to_dup", []) - + - [(value, None)]) + [*result.get("inames_to_dup", []), (value, None)]) elif opt_key == "dep" and opt_value is not None: if opt_value.startswith("*"): @@ -681,7 +680,7 @@ def _count_open_paren_symbols(s): for c in s: val = _PAREN_PAIRS.get(c) if val is not None: - increment, cls = val + increment, _cls = val result += increment return result @@ -2403,7 +2402,7 @@ def make_function(domains, instructions, kernel_data=None, **kwargs): kernel_args.append(dat) continue - if isinstance(dat, ArrayBase) and isinstance(dat.shape, tuple): # noqa pylint:disable=no-member + if isinstance(dat, ArrayBase) and isinstance(dat.shape, tuple): # pylint: disable=no-member new_shape = [] for shape_axis in dat.shape: # pylint:disable=no-member if shape_axis is not None: diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 668e6a07d..913c946ec 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -30,22 +30,18 @@ from enum import Enum, IntEnum from sys import intern from typing import ( + TYPE_CHECKING, Any, ClassVar, - FrozenSet, - Optional, Sequence, Tuple, - Type, Union, cast, ) import numpy # FIXME: imported as numpy to allow sphinx to resolve things import numpy as np -from immutables import Map -from pymbolic import ArithmeticExpressionT, Variable from pytools import ImmutableRecord from pytools.tag import Tag, Taggable, UniqueTag as UniqueTagBase @@ -64,8 +60,15 @@ VarAtomicity, make_assignment, ) -from loopy.types import LoopyType, ToLoopyTypeConvertible -from loopy.typing import ExpressionT, ShapeType, auto +from loopy.typing import Expression, ShapeType, auto + + +if TYPE_CHECKING: + from immutables import Map + + from pymbolic import ArithmeticExpression, Variable + + from loopy.types import LoopyType, ToLoopyTypeConvertible __doc__ = """ @@ -103,17 +106,17 @@ # {{{ utilities -def _names_from_expr(expr: Union[None, ExpressionT, str]) -> FrozenSet[str]: +def _names_from_expr(expr: Expression | str | None) -> frozenset[str]: from numbers import Number from loopy.symbolic import DependencyMapper - dep_mapper = DependencyMapper() + dep_mapper: DependencyMapper[[]] = DependencyMapper() - from pymbolic.primitives import Expression + from pymbolic.primitives import ExpressionNode if isinstance(expr, str): return frozenset({expr}) - elif isinstance(expr, Expression): - return frozenset(cast(Variable, v).name for v in dep_mapper(expr)) + elif isinstance(expr, ExpressionNode): + return frozenset(cast("Variable", v).name for v in dep_mapper(expr)) elif expr is None: return frozenset() elif isinstance(expr, Number): @@ -123,7 +126,7 @@ def _names_from_expr(expr: Union[None, ExpressionT, str]) -> FrozenSet[str]: def _names_from_dim_tags( - dim_tags: Optional[Sequence[ArrayDimImplementationTag]]) -> FrozenSet[str]: + dim_tags: Sequence[ArrayDimImplementationTag] | None) -> frozenset[str]: from loopy.kernel.array import FixedStrideArrayDimTag if dim_tags is not None: return frozenset({ @@ -171,7 +174,7 @@ def strify_tag_type(): class InameImplementationTag(ImmutableRecord, UniqueTagBase): - __slots__: ClassVar[Tuple[str, ...]] = () + __slots__: ClassVar[tuple[str, ...]] = () def __hash__(self): return hash(self.key) @@ -303,10 +306,10 @@ def __str__(self): return "ord" -ToInameTagConvertible = Union[str, None, Tag] +ToInameTagConvertible = Union[str, Tag, None] -def parse_tag(tag: ToInameTagConvertible) -> Optional[Tag]: +def parse_tag(tag: ToInameTagConvertible) -> Tag | None: if tag is None: return tag @@ -365,7 +368,7 @@ class AddressSpace(IntEnum): GLOBAL = 2 @classmethod - def stringify(cls, val: Union["AddressSpace", Type[auto]]) -> str: + def stringify(cls, val: AddressSpace | type[auto]) -> str: if val == cls.PRIVATE: return "private" elif val == cls.LOCAL: @@ -397,7 +400,7 @@ class KernelArgument(ImmutableRecord): .. automethod:: supporting_names """ name: str - dtype: Optional[LoopyType] + dtype: LoopyType | None is_output: bool is_input: bool @@ -422,7 +425,7 @@ def __init__(self, **kwargs): ImmutableRecord.__init__(self, **kwargs) - def supporting_names(self) -> FrozenSet[str]: + def supporting_names(self) -> frozenset[str]: """'Supporting' names are those that are likely to be required to be present for any use of the argument. """ @@ -437,12 +440,12 @@ class _ArraySeparationInfo: this records the names of the actually present sub-arrays that should be used to realize this array. """ - sep_axis_indices_set: FrozenSet[int] - subarray_names: Map[Tuple[int, ...], str] + sep_axis_indices_set: frozenset[int] + subarray_names: Map[tuple[int, ...], str] class ArrayArg(ArrayBase, KernelArgument): - __doc__ = cast(str, ArrayBase.__doc__) + ( + __doc__ = cast("str", ArrayBase.__doc__) + ( """ .. attribute:: address_space @@ -465,7 +468,7 @@ class ArrayArg(ArrayBase, KernelArgument): address_space: AddressSpace # _separation_info is not user-facing and hence not documented. - _separation_info: Optional[_ArraySeparationInfo] + _separation_info: _ArraySeparationInfo | None allowed_extra_kwargs = ( "address_space", @@ -517,7 +520,7 @@ def update_persistent_hash(self, key_hash, key_builder): key_builder.rec(key_hash, self.is_input) key_builder.rec(key_hash, self._separation_info) - def supporting_names(self) -> FrozenSet[str]: + def supporting_names(self) -> frozenset[str]: # Do not consider separation info here: The subarrays don't support, they # replace this array. return ( @@ -580,7 +583,7 @@ def get_arg_decl(self, ast_builder, name_suffix, shape, dtype, is_written): return ast_builder.get_image_arg_decl(self.name + name_suffix, shape, self.num_target_axes(), dtype, is_written) - def supporting_names(self) -> FrozenSet[str]: + def supporting_names(self) -> frozenset[str]: return ( _names_from_expr(self.offset) | _names_from_dim_tags(self.dim_tags) @@ -644,7 +647,7 @@ def get_arg_decl(self, ast_builder): # {{{ temporary variable class TemporaryVariable(ArrayBase): - __doc__ = cast(str, ArrayBase.__doc__) + """ + __doc__ = cast("str", ArrayBase.__doc__) + """ .. autoattribute:: storage_shape .. autoattribute:: base_indices .. autoattribute:: address_space @@ -654,17 +657,17 @@ class TemporaryVariable(ArrayBase): .. autoattribute:: _base_storage_access_may_be_aliasing """ - storage_shape: Optional[ShapeType] - base_indices: Optional[Tuple[ExpressionT, ...]] - address_space: Union[AddressSpace, Type[auto]] - base_storage: Optional[str] + storage_shape: ShapeType | None + base_indices: tuple[Expression, ...] | None + address_space: AddressSpace | type[auto] + base_storage: str | None """The name of a storage array that is to be used to actually hold the data in this temporary, or *None*. If not *None* or the name of an existing variable, a variable of this name and appropriate size will be created. """ - initializer: Optional[numpy.ndarray] + initializer: numpy.ndarray | None """*None* or a :class:`numpy.ndarray` of data to be used to initialize the array. """ @@ -699,19 +702,19 @@ def __init__( self, name: str, dtype: ToLoopyTypeConvertible = None, - shape: Union[ShapeType, Type["auto"], None] = auto, - address_space: Union[AddressSpace, Type[auto], None] = None, - dim_tags: Optional[Sequence[ArrayDimImplementationTag]] = None, - offset: Union[ExpressionT, str, None] = 0, - dim_names: Optional[Tuple[str, ...]] = None, - strides: Optional[Tuple[ExpressionT, ...]] = None, + shape: ShapeType | type[auto] | None = auto, + address_space: AddressSpace | type[auto] | None = None, + dim_tags: Sequence[ArrayDimImplementationTag] | None = None, + offset: Expression | str | None = 0, + dim_names: tuple[str, ...] | None = None, + strides: tuple[Expression, ...] | None = None, order: str | None = None, - base_indices: Optional[Tuple[ExpressionT, ...]] = None, + base_indices: tuple[Expression, ...] | None = None, storage_shape: ShapeType | None = None, - base_storage: Optional[str] = None, - initializer: Optional[np.ndarray] = None, + base_storage: str | None = None, + initializer: np.ndarray | None = None, read_only: bool = False, _base_storage_access_may_be_aliasing: bool = False, @@ -813,7 +816,7 @@ def copy(self, **kwargs: Any) -> TemporaryVariable: return super().copy(**kwargs) @property - def nbytes(self) -> ExpressionT: + def nbytes(self) -> Expression: if self.storage_shape is not None: shape = self.storage_shape else: @@ -821,7 +824,7 @@ def nbytes(self) -> ExpressionT: raise ValueError("shape is None") if self.shape is auto: raise ValueError("shape is auto") - shape = cast(Tuple[ArithmeticExpressionT], self.shape) + shape = cast("Tuple[ArithmeticExpression]", self.shape) if self.dtype is None: raise ValueError("data type is indeterminate") @@ -878,7 +881,7 @@ def update_persistent_hash(self, key_hash, key_builder): key_builder.rec(key_hash, self.read_only) key_builder.rec(key_hash, self._base_storage_access_may_be_aliasing) - def supporting_names(self) -> FrozenSet[str]: + def supporting_names(self) -> frozenset[str]: return ( _names_from_expr(self.offset) | _names_from_dim_tags(self.dim_tags) @@ -902,7 +905,7 @@ class SubstitutionRule: name: str arguments: Sequence[str] - expression: ExpressionT + expression: Expression def copy(self, **kwargs: Any) -> SubstitutionRule: return replace(self, **kwargs) @@ -970,9 +973,9 @@ class Iname(Taggable): An instance of :class:`frozenset` of :class:`pytools.tag.Tag`. """ name: str - tags: FrozenSet[Tag] + tags: frozenset[Tag] - def copy(self, **kwargs: Any) -> "Iname": + def copy(self, **kwargs: Any) -> Iname: return replace(self, **kwargs) def _with_new_tags(self, tags): diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 22abeb8ae..146d40f4f 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -22,38 +22,41 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ - from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Callable, ClassVar, FrozenSet, Tuple, TypeVar +from dataclasses import dataclass, replace +from typing import TYPE_CHECKING, Any, Callable, TypeVar +from warnings import warn -from pytools import ImmutableRecord +from immutabledict import immutabledict +from typing_extensions import Self from loopy.diagnostic import LoopyError -from loopy.kernel import LoopKernel -from loopy.kernel.array import ArrayBase -from loopy.kernel.data import ArrayArg, ValueArg +from loopy.kernel.array import ArrayBase, ArrayDimImplementationTag +from loopy.kernel.data import AddressSpace, ArrayArg, ValueArg from loopy.symbolic import DependencyMapper, WalkMapper -from loopy.tools import update_persistent_hash if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + from typing_extensions import Self + from loopy.kernel import LoopKernel from loopy.translation_unit import CallablesTable, FunctionIdT + from loopy.types import LoopyType + from loopy.typing import ShapeType __doc__ = """ .. currentmodule:: loopy.kernel.function_interface +.. autoclass:: ArgDescriptor .. autoclass:: ValueArgDescriptor - .. autoclass:: ArrayArgDescriptor .. currentmodule:: loopy .. autoclass:: InKernelCallable - .. autoclass:: CallableKernel - .. autoclass:: ScalarCallable """ @@ -63,7 +66,7 @@ ArgDescriptorT = TypeVar("ArgDescriptorT", bound="ArgDescriptor") -class ArgDescriptor(ABC, ImmutableRecord): +class ArgDescriptor(ABC): @abstractmethod def map_expr( self, @@ -75,19 +78,25 @@ def map_expr( def depends_on(self) -> frozenset[str]: ... + @abstractmethod + def copy(self, **kwargs: Any) -> Self: + ... + +@dataclass(frozen=True) class ValueArgDescriptor(ArgDescriptor): - hash_fields = () def map_expr(self, subst_mapper): - return self.copy() + return self def depends_on(self): return frozenset() - update_persistent_hash = update_persistent_hash + def copy(self, **kwargs: Any) -> Self: + return replace(self, **kwargs) +@dataclass(frozen=True) class ArrayArgDescriptor(ArgDescriptor): """ Records information about an array argument to an in-kernel callable. To be @@ -95,46 +104,39 @@ class ArrayArgDescriptor(ArgDescriptor): :meth:`~loopy.InKernelCallable.with_descrs`, used for matching shape and address space of caller and callee kernels. - .. attribute:: shape - - Shape of the array. - - .. attribute:: address_space - - An attribute of :class:`loopy.AddressSpace`. - - .. attribute:: dim_tags - - A tuple of instances of - :class:`loopy.kernel.array.ArrayDimImplementationTag` + .. autoattribute:: shape + .. autoattribute:: address_space + .. autoattribute:: dim_tags .. automethod:: map_expr .. automethod:: depends_on """ - fields = {"shape", "address_space", "dim_tags"} - - def __init__(self, shape, address_space, dim_tags): + shape: ShapeType | None + address_space: AddressSpace + dim_tags: Sequence[ArrayDimImplementationTag] | None + """See :ref:`data-dim-tags`. + """ - # {{{ sanity checks + if __debug__: + def __post_init__(self): + # {{{ sanity checks - from loopy.kernel.array import ArrayDimImplementationTag - from loopy.kernel.data import auto + from loopy.kernel.array import ArrayDimImplementationTag + from loopy.kernel.data import auto - assert isinstance(shape, tuple) or shape in [None, auto] - assert isinstance(dim_tags, tuple) or dim_tags is None + assert isinstance(self.shape, tuple) or self.shape in [None, auto] + assert isinstance(self.dim_tags, tuple) or self.dim_tags is None - if dim_tags: - # FIXME at least vector dim tags should be supported - assert all(isinstance(dim_tag, ArrayDimImplementationTag) for dim_tag in - dim_tags) + if self.dim_tags: + # FIXME at least vector dim tags should be supported + assert all(isinstance(dim_tag, ArrayDimImplementationTag) for dim_tag in + self.dim_tags) - # }}} + # }}} - super().__init__( - shape=shape, - address_space=address_space, - dim_tags=dim_tags) + def copy(self, **kwargs: Any) -> Self: + return replace(self, **kwargs) def map_expr(self, f): """ @@ -173,11 +175,6 @@ def depends_on(self): return frozenset(var.name for var in result) - def update_persistent_hash(self, key_hash, key_builder): - key_builder.rec(key_hash, self.shape) - key_builder.rec(key_hash, self.address_space) - key_builder.rec(key_hash, self.dim_tags) - class ExpressionIsScalarChecker(WalkMapper): def __init__(self, kernel): @@ -308,25 +305,14 @@ def get_kw_pos_association(kernel): # {{{ template class -class InKernelCallable(ImmutableRecord): +@dataclass(frozen=True, init=False) +class InKernelCallable(ABC): """ An abstract interface to define a callable encountered in a kernel. - .. attribute:: name - - The name of the callable which can be encountered within expressions in - a kernel. - - .. attribute:: arg_id_to_dtype - - A mapping which indicates the arguments types and result types of the - callable. - - .. attribute:: arg_id_to_descr - - A mapping which gives indicates the argument shape and ``dim_tags`` it - would be responsible for generating code. - + .. autoattribute:: name + .. autoattribute:: arg_id_to_dtype + .. autoattribute:: arg_id_to_descr .. automethod:: __init__ .. automethod:: with_types @@ -352,17 +338,39 @@ class InKernelCallable(ImmutableRecord): return value with (0-based) index *i*. """ + arg_id_to_dtype: Mapping[int | str, LoopyType] | None + arg_id_to_descr: Mapping[int | str, ArgDescriptor] | None + + def __init__(self, + arg_id_to_dtype: Mapping[int | str, LoopyType] | None = None, + arg_id_to_descr: Mapping[int | str, ArgDescriptor] | None = None, + ) -> None: + try: + hash(arg_id_to_dtype) + except TypeError: + arg_id_to_dtype = immutabledict(arg_id_to_dtype) + warn("arg_id_to_dtype passed to InKernelCallable was not hashable. " + "This usage is deprecated and will stop working in 2026.", + DeprecationWarning, stacklevel=3) + + try: + hash(arg_id_to_descr) + except TypeError: + arg_id_to_descr = immutabledict(arg_id_to_descr) + warn("arg_id_to_descr passed to InKernelCallable was not hashable. " + "This usage is deprecated and will stop working in 2026.", + DeprecationWarning, stacklevel=3) + + object.__setattr__(self, "arg_id_to_dtype", arg_id_to_dtype) + object.__setattr__(self, "arg_id_to_descr", arg_id_to_descr) + + if TYPE_CHECKING: + @property + def name(self) -> str: + raise NotImplementedError() - hash_fields: ClassVar[Tuple[str, ...]] = ( - "name", "arg_id_to_dtype", "arg_id_to_descr") - - def __init__(self, name, arg_id_to_dtype=None, arg_id_to_descr=None): - - super().__init__(name=name, - arg_id_to_dtype=arg_id_to_dtype, - arg_id_to_descr=arg_id_to_descr) - - update_persistent_hash = update_persistent_hash + def copy(self, **kwargs: Any) -> Self: + return replace(self, **kwargs) def with_types(self, arg_id_to_dtype, clbl_inf_ctx): """ @@ -391,6 +399,7 @@ def with_types(self, arg_id_to_dtype, clbl_inf_ctx): raise NotImplementedError() + @abstractmethod def with_descrs(self, arg_id_to_descr, clbl_inf_ctx): """ :arg arg_id_to_descr: a mapping from argument identifiers (integers for @@ -418,12 +427,11 @@ def with_descrs(self, arg_id_to_descr, clbl_inf_ctx): other callables within it, then *clbl_inf_ctx* is returned as is. """ - raise NotImplementedError() - - def is_ready_for_codegen(self): + def is_ready_for_codegen(self) -> bool: return (self.arg_id_to_dtype is not None and self.arg_id_to_descr is not None) + @abstractmethod def get_hw_axes_sizes(self, arg_id_to_arg, space, callables_table): """ Returns ``gsizes, lsizes``, where *gsizes* and *lsizes* are mappings @@ -435,26 +443,28 @@ def get_hw_axes_sizes(self, arg_id_to_arg, space, callables_table): arguments at a call-site. :arg space: An instance of :class:`islpy.Space`. """ - raise NotImplementedError + ... + @abstractmethod def get_used_hw_axes(self, callables_table): """ Returns a tuple ``group_axes_used, local_axes_used``, where ``(group|local)_axes_used`` are :class:`frozenset` of hardware axes indices used by the callable. """ - raise NotImplementedError + @abstractmethod def generate_preambles(self, target): """ Yields the target specific preamble. """ raise NotImplementedError() + @abstractmethod def emit_call(self, expression_to_code_mapper, expression, target): + ... - raise NotImplementedError() - + @abstractmethod def emit_call_insn(self, insn, target, expression_to_code_mapper): """ Returns a tuple of ``(call, assignee_is_returned)`` which is the target @@ -469,23 +479,19 @@ def emit_call_insn(self, insn, target, expression_to_code_mapper): in the target as the statement ``f(c, d, &a, &b)``. """ - raise NotImplementedError() - - def __hash__(self): - return hash(self.hash_fields) - + @abstractmethod def with_added_arg(self, arg_dtype, arg_descr): """ Registers a new argument to the callable and returns the name of the argument in the callable's namespace. """ - raise NotImplementedError() + @abstractmethod def get_called_callables( self, callables_table: CallablesTable, recursive: bool = True - ) -> FrozenSet[FunctionIdT]: + ) -> frozenset[FunctionIdT]: """ Returns a :class:`frozenset` of callable ids called by *self* that are resolved via *callables_table*. @@ -496,27 +502,27 @@ def get_called_callables( callables, else only returns the callables directly called by *self*. """ - raise NotImplementedError + @abstractmethod def with_name(self, name): """ Returns a copy of *self* so that it could be referred by *name* in a :attr:`loopy.TranslationUnit.callables_table`'s namespace. """ - raise NotImplementedError + @abstractmethod def is_type_specialized(self): """ Returns *True* iff *self*'s type signature is known, else returns *False*. """ - raise NotImplementedError # }}} # {{{ scalar callable +@dataclass(frozen=True, init=False) class ScalarCallable(InKernelCallable): """ An abstract interface to a scalar callable encountered in a kernel. @@ -537,15 +543,20 @@ class ScalarCallable(InKernelCallable): The :meth:`ScalarCallable.with_types` is intended to assist with type specialization of the function and sub-classes must define it. """ - fields = {"name", "arg_id_to_dtype", "arg_id_to_descr", "name_in_target"} - hash_fields = InKernelCallable.hash_fields + ("name_in_target",) - - def __init__(self, name, arg_id_to_dtype=None, - arg_id_to_descr=None, name_in_target=None): - super().__init__(name=name, - arg_id_to_dtype=arg_id_to_dtype, - arg_id_to_descr=arg_id_to_descr) - self.name_in_target = name_in_target + name: str + name_in_target: str | None + + def __init__(self, + name: str, + arg_id_to_dtype: Mapping[int | str, LoopyType] | None = None, + arg_id_to_descr: Mapping[int | str, ArgDescriptor] | None = None, + name_in_target: str | None = None) -> None: + super().__init__( + arg_id_to_dtype=arg_id_to_dtype, + arg_id_to_descr=arg_id_to_descr, + ) + object.__setattr__(self, "name", name) + object.__setattr__(self, "name_in_target", name_in_target) def with_types(self, arg_id_to_dtype, callables_table): raise LoopyError("No type inference information present for " @@ -689,6 +700,7 @@ def is_type_specialized(self): # {{{ callable kernel +@dataclass(frozen=True, init=False) class CallableKernel(InKernelCallable): """ Records information about a callee kernel. Also provides interface through @@ -702,35 +714,27 @@ class CallableKernel(InKernelCallable): :meth:`CallableKernel.with_descrs` should be called in order to match the arguments' shapes/strides across the caller and the callee kernel. - .. attribute:: subkernel - - :class:`~loopy.LoopKernel` which is being called. - + .. autoattribute:: subkernel .. automethod:: with_descrs .. automethod:: with_types """ - fields = {"subkernel", "arg_id_to_dtype", "arg_id_to_descr"} - hash_fields = ("subkernel", "arg_id_to_dtype", "arg_id_to_descr") + subkernel: LoopKernel + + def __init__(self, + subkernel: LoopKernel, + arg_id_to_dtype: Mapping[int | str, LoopyType] | None = None, + arg_id_to_descr: Mapping[int | str, ArgDescriptor] | None = None, + ) -> None: - def __init__(self, subkernel, arg_id_to_dtype=None, - arg_id_to_descr=None): - assert isinstance(subkernel, LoopKernel) - super().__init__(name=subkernel.name, + super().__init__( arg_id_to_dtype=arg_id_to_dtype, arg_id_to_descr=arg_id_to_descr) - self.subkernel = subkernel - - def copy(self, subkernel=None, arg_id_to_dtype=None, - arg_id_to_descr=None): - if subkernel is None: - subkernel = self.subkernel - if arg_id_to_descr is None: - arg_id_to_descr = self.arg_id_to_descr - if arg_id_to_dtype is None: - arg_id_to_dtype = self.arg_id_to_dtype + object.__setattr__(self, "subkernel", subkernel) - return CallableKernel(subkernel, arg_id_to_dtype, arg_id_to_descr) + @property + def name(self) -> str: + return self.subkernel.name def with_types(self, arg_id_to_dtype, callables_table): kw_to_pos, pos_to_kw = get_kw_pos_association(self.subkernel) @@ -769,7 +773,7 @@ def with_types(self, arg_id_to_dtype, callables_table): # Return the kernel call with specialized subkernel and the corresponding # new arg_id_to_dtype return self.copy(subkernel=specialized_kernel, - arg_id_to_dtype=new_arg_id_to_dtype), callables_table + arg_id_to_dtype=immutabledict(new_arg_id_to_dtype)), callables_table def with_descrs(self, arg_id_to_descr, clbl_inf_ctx): @@ -844,7 +848,7 @@ def with_descrs(self, arg_id_to_descr, clbl_inf_ctx): # }}} return (self.copy(subkernel=subkernel, - arg_id_to_descr=arg_id_to_descr), + arg_id_to_descr=immutabledict(arg_id_to_descr)), clbl_inf_ctx) def with_added_arg(self, arg_dtype, arg_descr): @@ -852,19 +856,20 @@ def with_added_arg(self, arg_dtype, arg_descr): if isinstance(arg_descr, ValueArgDescriptor): subknl = self.subkernel.copy( - args=self.subkernel.args+[ + args=[ + *self.subkernel.args, ValueArg(var_name, arg_dtype, self.subkernel.target)]) - kw_to_pos, pos_to_kw = get_kw_pos_association(subknl) + kw_to_pos, _pos_to_kw = get_kw_pos_association(subknl) if self.arg_id_to_dtype is None: arg_id_to_dtype = {} else: - arg_id_to_dtype = self.arg_id_to_dtype.copy() + arg_id_to_dtype = dict(self.arg_id_to_dtype) if self.arg_id_to_descr is None: arg_id_to_descr = {} else: - arg_id_to_descr = self.arg_id_to_descr.copy() + arg_id_to_descr = dict(self.arg_id_to_descr) arg_id_to_dtype[var_name] = arg_dtype arg_id_to_descr[var_name] = arg_descr @@ -883,7 +888,7 @@ def with_added_arg(self, arg_dtype, arg_descr): def with_packing_for_args(self): from loopy.kernel.data import AddressSpace - kw_to_pos, pos_to_kw = get_kw_pos_association(self.subkernel) + _kw_to_pos, pos_to_kw = get_kw_pos_association(self.subkernel) arg_id_to_descr = {} @@ -931,6 +936,10 @@ def generate_preambles(self, target): return yield + def emit_call(self, expression_to_code_mapper, expression, target): + raise LoopyError("Kernel '{self.name}' cannot be called " + "from within an expression, use a call statement") + def emit_call_insn(self, insn, target, expression_to_code_mapper): from loopy.target.c import CFamilyTarget if not isinstance(target, CFamilyTarget): @@ -947,7 +956,7 @@ def emit_call_insn(self, insn, target, expression_to_code_mapper): parameters = list(parameters) par_dtypes = [self.arg_id_to_dtype[i] for i, _ in enumerate(parameters)] - kw_to_pos, pos_to_kw = get_kw_pos_association(self.subkernel) + _kw_to_pos, _pos_to_kw = get_kw_pos_association(self.subkernel) # insert the assignees at the required positions assignee_write_count = -1 diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 51d4856da..f882c09f3 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2016 Andreas Kloeckner" __license__ = """ @@ -27,7 +30,13 @@ from dataclasses import dataclass from functools import cached_property from sys import intern -from typing import Any, FrozenSet, Mapping, Optional, Sequence, Tuple, Type, Union +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Mapping, + Sequence, +) from warnings import warn import islpy as isl @@ -36,8 +45,11 @@ from loopy.diagnostic import LoopyError from loopy.tools import Optional as LoopyOptional -from loopy.types import LoopyType -from loopy.typing import ExpressionT, InameStr + + +if TYPE_CHECKING: + from loopy.types import LoopyType + from loopy.typing import Expression, InameStr # {{{ instruction tags @@ -115,8 +127,8 @@ class HappensAfter: statement-level dependencies of prior versions of :mod:`loopy`. """ - variable_name: Optional[str] - instances_rel: Optional[isl.Map] + variable_name: str | None + instances_rel: isl.Map | None # }}} @@ -244,20 +256,20 @@ class InstructionBase(ImmutableRecord, Taggable): Inherits from :class:`pytools.tag.Taggable`. """ - id: Optional[str] + id: str | None happens_after: Mapping[str, HappensAfter] depends_on_is_final: bool - groups: FrozenSet[str] - conflicts_with_groups: FrozenSet[str] - no_sync_with: FrozenSet[Tuple[str, str]] - predicates: FrozenSet[ExpressionT] - within_inames: FrozenSet[InameStr] + groups: frozenset[str] + conflicts_with_groups: frozenset[str] + no_sync_with: frozenset[tuple[str, str]] + predicates: frozenset[Expression] + within_inames: frozenset[InameStr] within_inames_is_final: bool priority: int # within_inames_is_final is deprecated and will be removed in version 2017.x. - fields = set("id depends_on_is_final " + fields: ClassVar[set[str]] = set("id depends_on_is_final " "groups conflicts_with_groups " "no_sync_with " "predicates " @@ -265,20 +277,20 @@ class InstructionBase(ImmutableRecord, Taggable): "priority".split()) def __init__(self, - id: Optional[str], - happens_after: Union[ - Mapping[str, HappensAfter], FrozenSet[str], str, None], - depends_on_is_final: Optional[bool], - groups: Optional[FrozenSet[str]], - conflicts_with_groups: Optional[FrozenSet[str]], - no_sync_with: Optional[FrozenSet[Tuple[str, str]]], - within_inames_is_final: Optional[bool], - within_inames: Optional[FrozenSet[str]], - priority: Optional[int], - predicates: Optional[FrozenSet[str]], - tags: Optional[FrozenSet[Tag]], + id: str | None, + happens_after: ( + Mapping[str, HappensAfter] | frozenset[str] | str | None), + depends_on_is_final: bool | None, + groups: frozenset[str] | None, + conflicts_with_groups: frozenset[str] | None, + no_sync_with: frozenset[tuple[str, str]] | None, + within_inames_is_final: bool | None, + within_inames: frozenset[str] | None, + priority: int | None, + predicates: frozenset[str] | None, + tags: frozenset[Tag] | None, *, - depends_on: Union[FrozenSet[str], str, None] = None, + depends_on: frozenset[str] | str | None = None, ) -> None: from immutabledict import immutabledict @@ -434,7 +446,7 @@ def read_dependency_names(self): return result - def reduction_inames(self) -> FrozenSet[str]: + def reduction_inames(self) -> frozenset[str]: raise NotImplementedError def sub_array_ref_inames(self): @@ -572,7 +584,7 @@ def __setstate__(self, val): self.within_inames = ( intern_frozenset_of_ids(self.within_inames)) - def _with_new_tags(self, tags: FrozenSet[Tag]): + def _with_new_tags(self, tags: frozenset[Tag]): return self.copy(tags=tags) # }}} @@ -634,7 +646,7 @@ def _get_assignee_subscript_deps(expr): # {{{ atomic ops -class MemoryOrdering: # noqa +class MemoryOrdering: """Ordering of atomic operations, defined as in C11 and OpenCL. .. attribute:: RELAXED @@ -662,7 +674,7 @@ def to_string(v): raise ValueError("Unknown value of MemoryOrdering") -class MemoryScope: # noqa +class MemoryScope: """Scope of atomicity, defined as in OpenCL. .. attribute:: auto @@ -901,35 +913,35 @@ class Assignment(MultiAssignmentBase): .. automethod:: __init__ """ - assignee: ExpressionT - expression: ExpressionT + assignee: Expression + expression: Expression temp_var_type: LoopyOptional - atomicity: Tuple[VarAtomicity, ...] + atomicity: tuple[VarAtomicity, ...] fields = MultiAssignmentBase.fields | \ set("assignee temp_var_type atomicity".split()) def __init__(self, - assignee: Union[str, ExpressionT], - expression: Union[str, ExpressionT], - id: Optional[str] = None, - happens_after: Union[ - Mapping[str, HappensAfter], FrozenSet[str], str, None] = None, - depends_on_is_final: Optional[bool] = None, - groups: Optional[FrozenSet[str]] = None, - conflicts_with_groups: Optional[FrozenSet[str]] = None, - no_sync_with: Optional[FrozenSet[Tuple[str, str]]] = None, - within_inames_is_final: Optional[bool] = None, - within_inames: Optional[FrozenSet[str]] = None, - priority: Optional[int] = None, - predicates: Optional[FrozenSet[str]] = None, - tags: Optional[FrozenSet[Tag]] = None, - temp_var_type: Union[ - Type[_not_provided], None, LoopyOptional, - LoopyType] = _not_provided, - atomicity: Tuple[VarAtomicity, ...] = (), + assignee: str | Expression, + expression: str | Expression, + id: str | None = None, + happens_after: + Mapping[str, HappensAfter] | frozenset[str] | str | None = None, + depends_on_is_final: bool | None = None, + groups: frozenset[str] | None = None, + conflicts_with_groups: frozenset[str] | None = None, + no_sync_with: frozenset[tuple[str, str]] | None = None, + within_inames_is_final: bool | None = None, + within_inames: frozenset[str] | None = None, + priority: int | None = None, + predicates: frozenset[str] | None = None, + tags: frozenset[Tag] | None = None, + temp_var_type: + type[_not_provided] | LoopyOptional | LoopyType | None + = _not_provided, + atomicity: tuple[VarAtomicity, ...] = (), *, - depends_on: Union[FrozenSet[str], str, None] = None, + depends_on: frozenset[str] | str | None = None, ) -> None: if temp_var_type is _not_provided: @@ -1271,8 +1283,8 @@ def modify_assignee_for_array_call(assignee): "SubArrayRef as its inputs") -def make_assignment(assignees: tuple[ExpressionT, ...], - expression: ExpressionT, +def make_assignment(assignees: tuple[Expression, ...], + expression: Expression, temp_var_types: ( Sequence[LoopyType | None] | None) = None, **kwargs: Any) -> Assignment | CallInstruction: @@ -1372,7 +1384,7 @@ class CInstruction(InstructionBase): .. attribute:: assignees A sequence (typically a :class:`tuple`) of variable references (with or - without subscript) as :class:`pymbolic.primitives.Expression` instances + without subscript) as :data:`pymbolic.typing.Expression` instances that :attr:`code` writes to. This is optional and only used for figuring out dependencies. """ diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 9a14aedd5..c48da4be9 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1,4 +1,5 @@ """Operations on the kernel object.""" +from __future__ import annotations __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -27,14 +28,13 @@ import sys from functools import reduce from sys import intern -from typing import AbstractSet, Dict, FrozenSet, List, Mapping, Sequence, Set +from typing import TYPE_CHECKING, AbstractSet, Mapping, Sequence import numpy as np import islpy as isl from islpy import dim_type from pytools import memoize_on_first_arg, natsorted -from pytools.tag import Tag from loopy.diagnostic import LoopyError, warn_with_kernel from loopy.kernel import LoopKernel @@ -46,7 +46,12 @@ ) from loopy.symbolic import CombineMapper from loopy.translation_unit import TranslationUnit, TUnitOrKernelT, for_each_kernel -from loopy.types import ToLoopyTypeConvertible + + +if TYPE_CHECKING: + from pytools.tag import Tag + + from loopy.types import ToLoopyTypeConvertible logger = logging.getLogger(__name__) @@ -100,7 +105,7 @@ def add_dtypes( def _add_dtypes_overdetermined(kernel, dtype_dict): - dtype_dict_remainder, new_args, new_temp_vars = _add_dtypes(kernel, dtype_dict) + _dtype_dict_remainder, new_args, new_temp_vars = _add_dtypes(kernel, dtype_dict) # do not throw error for unused args return kernel.copy(args=new_args, temporary_variables=new_temp_vars) @@ -1823,7 +1828,7 @@ def get_subkernels(kernel) -> Sequence[str]: @memoize_on_first_arg -def get_subkernel_to_insn_id_map(kernel: LoopKernel) -> Mapping[str, FrozenSet[str]]: +def get_subkernel_to_insn_id_map(kernel: LoopKernel) -> Mapping[str, frozenset[str]]: """Return a :class:`dict` mapping subkernel names to a :class:`frozenset` consisting of the instruction ids scheduled within the subkernel. The kernel must be scheduled. @@ -1837,7 +1842,7 @@ def get_subkernel_to_insn_id_map(kernel: LoopKernel) -> Mapping[str, FrozenSet[s from loopy.schedule import CallKernel, ReturnFromKernel, sched_item_to_insn_id subkernel = None - result: Dict[str, Set[str]] = {} + result: dict[str, set[str]] = {} for lin_item in kernel.linearization: if isinstance(lin_item, CallKernel): @@ -1855,7 +1860,7 @@ def get_subkernel_to_insn_id_map(kernel: LoopKernel) -> Mapping[str, FrozenSet[s @memoize_on_first_arg -def get_subkernel_extra_inames(kernel: LoopKernel) -> Mapping[str, FrozenSet[str]]: +def get_subkernel_extra_inames(kernel: LoopKernel) -> Mapping[str, frozenset[str]]: from loopy.kernel import KernelState if kernel.state != KernelState.LINEARIZED: raise LoopyError("Kernel must be scheduled") @@ -1863,7 +1868,7 @@ def get_subkernel_extra_inames(kernel: LoopKernel) -> Mapping[str, FrozenSet[str assert kernel.linearization is not None result = {} - inames: List[str] = [] + inames: list[str] = [] from loopy.schedule import CallKernel, EnterLoop, LeaveLoop diff --git a/loopy/library/function.py b/loopy/library/function.py index 7d274e492..8b61ad41a 100644 --- a/loopy/library/function.py +++ b/loopy/library/function.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -20,14 +23,19 @@ THE SOFTWARE. """ +from typing import TYPE_CHECKING + import numpy as np from loopy.diagnostic import LoopyError from loopy.kernel.function_interface import ScalarCallable -from loopy.translation_unit import CallablesTable from loopy.types import NumpyType +if TYPE_CHECKING: + from loopy.translation_unit import CallablesTable + + class MakeTupleCallable(ScalarCallable): def with_types(self, arg_id_to_dtype, callables_table): new_arg_id_to_dtype = arg_id_to_dtype.copy() diff --git a/loopy/library/random123.py b/loopy/library/random123.py index 0afb0abb9..f65fa7600 100644 --- a/loopy/library/random123.py +++ b/loopy/library/random123.py @@ -1,4 +1,5 @@ """Library integration with Random123.""" +from __future__ import annotations __copyright__ = "Copyright (C) 2016 Andreas Kloeckner" @@ -24,20 +25,35 @@ """ +from dataclasses import dataclass, replace +from typing import TYPE_CHECKING + import numpy as np from mako.template import Template -from pytools import ImmutableRecord +from pymbolic.typing import not_none from loopy.kernel.function_interface import ScalarCallable +if TYPE_CHECKING: + from loopy.target import TargetBase + + # {{{ rng metadata -class RNGInfo(ImmutableRecord): +@dataclass(frozen=True) +class RNGInfo: + name: str + pyopencl_header: str + generic_header: str + key_width: int + width: int | None = None + bits: int | None = None + @property - def full_name(self): - return "%s%dx%d" % (self.name, self.width, self.bits) + def full_name(self) -> str: + return "%s%dx%d" % (self.name, not_none(self.width), not_none(self.bits)) _philox_base_info = RNGInfo( @@ -53,15 +69,15 @@ def full_name(self): key_width=4) RNG_VARIANTS = [ - _philox_base_info.copy(width=2, bits=32), - _philox_base_info.copy(width=2, bits=64), - _philox_base_info.copy(width=4, bits=32), - _philox_base_info.copy(width=4, bits=64), - - _threefry_base_info.copy(width=2, bits=32), - _threefry_base_info.copy(width=2, bits=64), - _threefry_base_info.copy(width=4, bits=32), - _threefry_base_info.copy(width=4, bits=64), + replace(_philox_base_info, width=2, bits=32), + replace(_philox_base_info, width=2, bits=64), + replace(_philox_base_info, width=4, bits=32), + replace(_philox_base_info, width=4, bits=64), + + replace(_threefry_base_info, width=2, bits=32), + replace(_threefry_base_info, width=2, bits=64), + replace(_threefry_base_info, width=4, bits=32), + replace(_threefry_base_info, width=4, bits=64), ] FUNC_NAMES_TO_RNG = { @@ -165,12 +181,12 @@ def full_name(self): # }}} +@dataclass(frozen=True, init=False) class Random123Callable(ScalarCallable): """ Records information about for the random123 functions. """ - fields = ScalarCallable.fields | {"target"} - hash_fields = ScalarCallable.hash_fields + ("target",) + target: TargetBase def __init__(self, name, arg_id_to_dtype=None, arg_id_to_descr=None, name_in_target=None, target=None): @@ -179,7 +195,7 @@ def __init__(self, name, arg_id_to_dtype=None, arg_id_to_descr=arg_id_to_descr, name_in_target=name_in_target) - self.target = target + object.__setattr__(self, "target", target) def with_types(self, arg_id_to_dtype, callables_table): diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index 2d357d3b4..6ddc3fb86 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -21,11 +24,12 @@ """ +from typing import TYPE_CHECKING + import numpy as np from pymbolic import var from pymbolic.primitives import expr_dataclass -from pytools.persistent_dict import Hash, KeyBuilder from loopy.diagnostic import LoopyError from loopy.kernel.function_interface import ScalarCallable @@ -34,6 +38,10 @@ from loopy.types import NumpyType +if TYPE_CHECKING: + from pytools.persistent_dict import Hash, KeyBuilder + + __doc__ = """ .. currentmodule:: loopy.library.reduction @@ -329,7 +337,7 @@ def neutral_element(self, scalar_dtype, segment_flag_dtype, from loopy.library.function import MakeTupleCallable from loopy.translation_unit import add_callable_to_table - scalar_neutral_element, calables_table = ( + scalar_neutral_element, _calables_table = ( self.inner_reduction.neutral_element( scalar_dtype, callables_table, target)) @@ -347,8 +355,7 @@ def neutral_element(self, scalar_dtype, segment_flag_dtype, segment_flag_dtype.numpy_dtype.type(0)), callables_table def result_dtypes(self, scalar_dtype, segment_flag_dtype): - return (self.inner_reduction.result_dtypes(scalar_dtype) - + (segment_flag_dtype,)) + return ((*self.inner_reduction.result_dtypes(scalar_dtype), segment_flag_dtype)) def __str__(self): return "segmented(%s)" % self.which @@ -538,7 +545,7 @@ def register_reduction_parser(parser): _REDUCTION_OP_PARSERS.append(parser) -def parse_reduction_op(name): +def parse_reduction_op(name: str) -> ReductionOperation | None: import re red_op_match = re.match(r"^([a-z]+)_([a-z0-9_]+)$", name) @@ -571,12 +578,12 @@ class ReductionCallable(ScalarCallable): def with_types(self, arg_id_to_dtype, callables_table): scalar_dtype = arg_id_to_dtype[0] index_dtype = arg_id_to_dtype[1] - result_dtypes = self.name.reduction_op.result_dtypes(scalar_dtype, + result_dtypes = self.name.reduction_op.result_dtypes(scalar_dtype, # pylint: disable=no-member index_dtype) new_arg_id_to_dtype = arg_id_to_dtype.copy() new_arg_id_to_dtype[-1] = result_dtypes[0] new_arg_id_to_dtype[-2] = result_dtypes[1] - name_in_target = self.name.reduction_op.prefix(scalar_dtype, + name_in_target = self.name.reduction_op.prefix(scalar_dtype, # pylint: disable=no-member index_dtype) + "_op" return self.copy(arg_id_to_dtype=new_arg_id_to_dtype, @@ -594,7 +601,7 @@ def with_descrs(self, arg_id_to_descr, callables_table): class ArgExtOpCallable(ReductionCallable): def generate_preambles(self, target): - op = self.name.reduction_op + op = self.name.reduction_op # pylint: disable=no-member scalar_dtype = self.arg_id_to_dtype[-1] index_dtype = self.arg_id_to_dtype[-2] @@ -630,7 +637,7 @@ def generate_preambles(self, target): class SegmentOpCallable(ReductionCallable): def generate_preambles(self, target): - op = self.name.reduction_op + op = self.name.reduction_op # pylint: disable=no-member scalar_dtype = self.arg_id_to_dtype[-1] segment_flag_dtype = self.arg_id_to_dtype[-2] prefix = op.prefix(scalar_dtype, segment_flag_dtype) diff --git a/loopy/loop.py b/loopy/loop.py index 001cd80a8..9903474c9 100644 --- a/loopy/loop.py +++ b/loopy/loop.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ diff --git a/loopy/match.py b/loopy/match.py index ae52e6c65..ef18799df 100644 --- a/loopy/match.py +++ b/loopy/match.py @@ -1,5 +1,33 @@ -"""Matching functionality for instruction ids and substitution -rule invocations stacks.""" +""" +.. autoclass:: Matchable +.. autoclass:: StackMatchComponent +.. autoclass:: StackMatch + +.. autofunction:: parse_match + +.. autofunction:: parse_stack_match + +.. autodata:: ToStackMatchConvertible + +Match expressions +^^^^^^^^^^^^^^^^^ + +.. autoclass:: MatchExpressionBase +.. autoclass:: All +.. autoclass:: And +.. autoclass:: Or +.. autoclass:: Not +.. autoclass:: Id +.. autoclass:: ObjTagged +.. autoclass:: Tagged +.. autoclass:: Writes +.. autoclass:: Reads +.. autoclass:: InKernel +.. autoclass:: Iname + +""" + +from __future__ import annotations __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -28,43 +56,22 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from sys import intern -from typing import FrozenSet, List, Protocol, Sequence, Tuple, Union +from typing import TYPE_CHECKING, Protocol, Sequence, Union + +from typing_extensions import TypeAlias -from loopy.kernel import LoopKernel from loopy.kernel.instruction import InstructionBase NoneType = type(None) -import pytools.tag from pytools.lex import RE -__doc__ = """ -.. autoclass:: Matchable -.. autoclass:: StackMatchComponent -.. autoclass:: StackMatch - -.. autofunction:: parse_match - -.. autofunction:: parse_stack_match - -Match expressions -^^^^^^^^^^^^^^^^^ +if TYPE_CHECKING: + import pytools.tag -.. autoclass:: MatchExpressionBase -.. autoclass:: All -.. autoclass:: And -.. autoclass:: Or -.. autoclass:: Not -.. autoclass:: Id -.. autoclass:: ObjTagged -.. autoclass:: Tagged -.. autoclass:: Writes -.. autoclass:: Reads -.. autoclass:: InKernel -.. autoclass:: Iname -""" + from loopy.kernel import LoopKernel def re_from_glob(s: str) -> re.Pattern: @@ -133,7 +140,7 @@ class Matchable(Protocol): .. attribute:: tags """ @property - def tags(self) -> FrozenSet[pytools.tag.Tag]: + def tags(self) -> frozenset[pytools.tag.Tag]: ... @@ -494,7 +501,7 @@ def __call__(self, kernel: LoopKernel, stack: Sequence[Matchable]) -> bool: @dataclass(eq=True, frozen=True) class RuleInvocationMatchable: id: str - tags: FrozenSet[pytools.tag.Tag] + tags: frozenset[pytools.tag.Tag] def write_dependency_names(self): raise TypeError("writes: query may not be applied to rule invocations") @@ -516,11 +523,11 @@ class StackMatch: def __call__( self, kernel: LoopKernel, insn: InstructionBase, - rule_stack: Sequence[Tuple[str, FrozenSet[pytools.tag.Tag]]]) -> bool: + rule_stack: Sequence[tuple[str, frozenset[pytools.tag.Tag]]]) -> bool: """ :arg rule_stack: a tuple of (name, tags) rule invocation, outermost first """ - stack_of_matchables: List[Matchable] = [insn] + stack_of_matchables: list[Matchable] = [insn] for id, tags in rule_stack: stack_of_matchables.append(RuleInvocationMatchable(id, tags)) @@ -531,10 +538,10 @@ def __call__( # {{{ stack match parsing -ToStackMatchCovertible = Union[StackMatch, str, None] +ToStackMatchConvertible: TypeAlias = Union[StackMatch, str, None] -def parse_stack_match(smatch: ToStackMatchCovertible) -> StackMatch: +def parse_stack_match(smatch: ToStackMatchConvertible) -> StackMatch: """Syntax example:: ... > outer > ... > next > innermost $ diff --git a/loopy/options.py b/loopy/options.py index 293670774..d9547c846 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2013 Andreas Kloeckner" __license__ = """ @@ -23,12 +26,16 @@ import os import re -from typing import Any +from typing import TYPE_CHECKING, Any, ClassVar from warnings import warn from pytools import ImmutableRecord +if TYPE_CHECKING: + from collections.abc import Mapping + + ALLOW_TERMINAL_COLORS = True @@ -198,7 +205,7 @@ class Options(ImmutableRecord): RAW, WAR and WAW races. """ - _legacy_options_map = { + _legacy_options_map: ClassVar[Mapping[str, tuple[str, None] | None]] = { "cl_build_options": ("build_options", None), "write_cl": ("write_code", None), "highlight_cl": None, @@ -220,7 +227,7 @@ def __init__( kwargs = _apply_legacy_map(self._legacy_options_map, kwargs) try: - import colorama # noqa + import colorama # noqa: F401 except ImportError: allow_terminal_colors_def = False else: @@ -332,7 +339,7 @@ def _style(self): return _ColoramaStub() -KEY_VAL_RE = re.compile("^([a-zA-Z0-9]+)=(.*)$") +KEY_VAL_RE = re.compile(r"^([a-zA-Z0-9]+)=(.*)$") def make_options(options_arg): diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 3293e9a1e..aee4044be 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -21,7 +24,7 @@ """ import logging -from typing import FrozenSet, Iterable, List, Optional, Tuple, TypeVar, cast +from typing import TYPE_CHECKING, Iterable, List, TypeVar, cast logger = logging.getLogger(__name__) @@ -39,8 +42,6 @@ WriteRaceConditionWarning, warn_with_kernel, ) -from loopy.kernel import LoopKernel -from loopy.kernel.array import ArrayDimImplementationTag from loopy.kernel.data import ( ArrayArg, KernelArgument, @@ -68,7 +69,12 @@ # for the benefit of loopy.statistics, for now from loopy.type_inference import infer_unknown_types -from loopy.typing import ExpressionT + + +if TYPE_CHECKING: + from loopy.kernel import LoopKernel + from loopy.kernel.array import ArrayDimImplementationTag + from loopy.typing import Expression # {{{ check for writes to predicates @@ -135,8 +141,8 @@ def map_reduction(expr, rec): def _remove_at_indices( - indices: FrozenSet[int], values: Optional[Iterable[T]] - ) -> Optional[Tuple[T, ...]]: + indices: frozenset[int], values: Iterable[T] | None + ) -> tuple[T, ...] | None: """ Assumes *indices* is sorted. """ @@ -174,14 +180,14 @@ def make_arrays_for_sep_arrays(kernel: LoopKernel) -> LoopKernel: sep_axis_indices_set = frozenset(sep_axis_indices) assert isinstance(arg.shape, tuple) - new_shape: Optional[Tuple[ExpressionT, ...]] = \ + new_shape: tuple[Expression, ...] | None = \ _remove_at_indices(sep_axis_indices_set, arg.shape) - new_dim_tags: Optional[Tuple[ArrayDimImplementationTag, ...]] = \ + new_dim_tags: tuple[ArrayDimImplementationTag, ...] | None = \ _remove_at_indices(sep_axis_indices_set, arg.dim_tags) - new_dim_names: Optional[Tuple[Optional[str], ...]] = \ + new_dim_names: tuple[str | None, ...] | None = \ _remove_at_indices(sep_axis_indices_set, arg.dim_names) - sep_shape: List[ExpressionT] = [arg.shape[i] for i in sep_axis_indices] + sep_shape: list[Expression] = [arg.shape[i] for i in sep_axis_indices] for i, sep_shape_i in enumerate(sep_shape): if not isinstance(sep_shape_i, (int, np.integer)): raise LoopyError( @@ -193,7 +199,7 @@ def make_arrays_for_sep_arrays(kernel: LoopKernel) -> LoopKernel: sep_axis_indices_set=sep_axis_indices_set, subarray_names=Map({ ind: vng(f"{arg.name}_s{'_'.join(str(i) for i in ind)}") - for ind in np.ndindex(*cast(List[int], sep_shape))})) + for ind in np.ndindex(*cast("List[int]", sep_shape))})) new_args.append(arg.copy(_separation_info=sep_info)) @@ -220,11 +226,11 @@ def make_arrays_for_sep_arrays(kernel: LoopKernel) -> LoopKernel: # {{{ make temporary variables for offsets and strides def make_args_for_offsets_and_strides(kernel: LoopKernel) -> LoopKernel: - additional_args: List[KernelArgument] = [] + additional_args: list[KernelArgument] = [] vng = kernel.get_var_name_generator() - from pymbolic.primitives import Expression, Variable + from pymbolic.primitives import ExpressionNode, Variable from loopy.kernel.array import FixedStrideArrayDimTag @@ -241,13 +247,13 @@ def make_args_for_offsets_and_strides(kernel: LoopKernel) -> LoopKernel: additional_args.append(ValueArg( offset_name, kernel.index_dtype)) arg = arg.copy(offset=offset_name) - elif isinstance(arg.offset, (int, np.integer, Expression, str)): + elif isinstance(arg.offset, (int, np.integer, ExpressionNode, str)): pass else: raise LoopyError(f"invalid value of {what}") if arg.dim_tags is None: - new_dim_tags: Optional[Tuple[ArrayDimImplementationTag, ...]] \ + new_dim_tags: tuple[ArrayDimImplementationTag, ...] | None \ = arg.dim_tags else: new_dim_tags = () @@ -261,12 +267,12 @@ def make_args_for_offsets_and_strides(kernel: LoopKernel) -> LoopKernel: additional_args.append(ValueArg( stride_name, kernel.index_dtype)) elif isinstance( - dim_tag.stride, (int, np.integer, Expression)): + dim_tag.stride, (int, np.integer, ExpressionNode)): pass else: raise LoopyError(f"invalid value of {what}") - new_dim_tags = new_dim_tags + (dim_tag,) + new_dim_tags = (*new_dim_tags, dim_tag) arg = arg.copy(dim_tags=new_dim_tags) @@ -286,7 +292,7 @@ def make_args_for_offsets_and_strides(kernel: LoopKernel) -> LoopKernel: def zero_offsets_and_strides(kernel: LoopKernel) -> LoopKernel: made_changes = False - from pymbolic.primitives import Expression + from pymbolic.primitives import ExpressionNode # {{{ process arguments @@ -298,7 +304,7 @@ def zero_offsets_and_strides(kernel: LoopKernel) -> LoopKernel: if arg.offset is auto: made_changes = True arg = arg.copy(offset=0) - elif isinstance(arg.offset, (int, np.integer, Expression, str)): + elif isinstance(arg.offset, (int, np.integer, ExpressionNode, str)): from pymbolic.primitives import is_zero if not is_zero(arg.offset): raise LoopyError( @@ -499,7 +505,7 @@ def check_atomic_loads(kernel): for x in missed: if {x} & atomicity_candidates: insn = insn.copy( - atomicity=insn.atomicity + (AtomicLoad(x),)) + atomicity=(*insn.atomicity, AtomicLoad(x))) new_insns.append(insn) @@ -697,7 +703,7 @@ def _tuple_or_none(s): raise NotImplementedError() new_callable, clbl_inf_ctx = t_unit.callables_table[e].with_descrs( arg_id_to_descr, clbl_inf_ctx) - clbl_inf_ctx, new_name = clbl_inf_ctx.with_callable(e, new_callable, + clbl_inf_ctx, _new_name = clbl_inf_ctx.with_callable(e, new_callable, is_entrypoint=True) return clbl_inf_ctx.finish_program(t_unit) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 2460f5ed7..55e0a197b 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -27,7 +27,6 @@ import logging import sys -from collections.abc import Hashable, Iterator, Mapping, Sequence, Set from dataclasses import dataclass, replace from typing import ( TYPE_CHECKING, @@ -42,19 +41,22 @@ from pytools.persistent_dict import WriteOncePersistentDict from loopy.diagnostic import LoopyError, ScheduleDebugInputError, warn_with_kernel -from loopy.kernel.instruction import InstructionBase from loopy.tools import LoopyKeyBuilder, caches from loopy.typing import InameStr from loopy.version import DATA_MODEL_VERSION if TYPE_CHECKING: + from collections.abc import Hashable, Iterator, Mapping, Sequence, Set + from loopy.kernel import LoopKernel + from loopy.kernel.function_interface import InKernelCallable + from loopy.kernel.instruction import InstructionBase from loopy.schedule.tools import ( InameStrSet, LoopTree, ) - from loopy.translation_unit import CallablesTable, TranslationUnit + from loopy.translation_unit import CallablesTable, FunctionIdT, TranslationUnit logger = logging.getLogger(__name__) @@ -1020,7 +1022,7 @@ def _generate_loop_schedules_v2(kernel: LoopKernel) -> Sequence[ScheduleItem]: def iname_key(iname: str) -> str: all_ancestors = sorted(loop_tree.ancestors(iname), key=lambda x: loop_tree.depth(x)) - return ",".join(all_ancestors+[iname]) + return ",".join([*all_ancestors, iname]) def key(x: ScheduleItem) -> tuple[str, ...]: if isinstance(x, RunInstruction): @@ -1097,7 +1099,7 @@ def _generate_loop_schedules_internal( assert sched_state.within_subkernel is False yield from _generate_loop_schedules_internal( sched_state.copy( - schedule=sched_state.schedule + (next_preschedule_item,), + schedule=(*sched_state.schedule, next_preschedule_item), preschedule=sched_state.preschedule[1:], within_subkernel=True, may_schedule_global_barriers=False, @@ -1110,7 +1112,7 @@ def _generate_loop_schedules_internal( if sched_state.active_inames == sched_state.enclosing_subkernel_inames: yield from _generate_loop_schedules_internal( sched_state.copy( - schedule=sched_state.schedule + (next_preschedule_item,), + schedule=(*sched_state.schedule, next_preschedule_item), preschedule=sched_state.preschedule[1:], within_subkernel=False, may_schedule_global_barriers=True), @@ -1129,7 +1131,7 @@ def _generate_loop_schedules_internal( and next_preschedule_item.originating_insn_id is None): yield from _generate_loop_schedules_internal( sched_state.copy( - schedule=sched_state.schedule + (next_preschedule_item,), + schedule=(*sched_state.schedule, next_preschedule_item), preschedule=sched_state.preschedule[1:]), debug=debug) @@ -1289,7 +1291,7 @@ def insn_sort_key(insn_id): unscheduled_insn_ids=sched_state.unscheduled_insn_ids - iid_set, insn_ids_to_try=new_insn_ids_to_try, schedule=( - sched_state.schedule + (RunInstruction(insn_id=insn.id),)), + (*sched_state.schedule, RunInstruction(insn_id=insn.id))), preschedule=( sched_state.preschedule if insn_id not in sched_state.prescheduled_insn_ids @@ -1403,8 +1405,8 @@ def insn_sort_key(insn_id): for sub_sched in _generate_loop_schedules_internal( sched_state.copy( schedule=( - sched_state.schedule - + (LeaveLoop(iname=last_entered_loop),)), + (*sched_state.schedule, + LeaveLoop(iname=last_entered_loop))), active_inames=sched_state.active_inames[:-1], insn_ids_to_try=insn_ids_to_try, preschedule=( @@ -1613,10 +1615,9 @@ def insn_sort_key(insn_id): for sub_sched in _generate_loop_schedules_internal( sched_state.copy( schedule=( - sched_state.schedule - + (EnterLoop(iname=iname),)), + (*sched_state.schedule, EnterLoop(iname=iname))), active_inames=( - sched_state.active_inames + (iname,)), + (*sched_state.active_inames, iname)), entered_inames=( sched_state.entered_inames | frozenset((iname,))), @@ -2446,7 +2447,7 @@ def get_one_linearized_kernel( callables_table) if CACHING_ENABLED and not from_cache: - schedule_cache.store_if_not_present(sched_cache_key, result) # pylint: disable=possibly-used-before-assignment # noqa: E501 + schedule_cache.store_if_not_present(sched_cache_key, result) # pylint: disable=possibly-used-before-assignment return result @@ -2466,7 +2467,7 @@ def linearize(t_unit: TranslationUnit) -> TranslationUnit: pre_schedule_checks(t_unit) - new_callables = {} + new_callables: dict[FunctionIdT, InKernelCallable] = {} for name, clbl in t_unit.callables_table.items(): if isinstance(clbl, CallableKernel): diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index a0345049d..226757dea 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2016 Matt Wala" __license__ = """ @@ -45,9 +48,9 @@ def map_schedule_onto_host_or_device(kernel): if not kernel.target.split_kernel_at_global_barriers(): new_schedule = ( - [CallKernel(kernel_name=device_prog_name_gen())] + - list(kernel.linearization) + - [ReturnFromKernel(kernel_name=kernel.name)]) + [CallKernel(kernel_name=device_prog_name_gen()), + *kernel.linearization, + ReturnFromKernel(kernel_name=kernel.name)]) kernel = kernel.copy(linearization=new_schedule) else: kernel = map_schedule_onto_host_or_device_impl( @@ -92,19 +95,13 @@ def inner_mapper(start_idx, end_idx, new_schedule): schedule_required_splitting = True if current_chunk: new_schedule.extend( - [dummy_call.copy()] + - current_chunk + - [dummy_return.copy()]) + [dummy_call.copy(), *current_chunk, dummy_return.copy()]) new_schedule.extend( - [start_item] + - inner_schedule + - [end_item]) + [start_item, *inner_schedule, end_item]) current_chunk = [] else: current_chunk.extend( - [start_item] + - inner_schedule + - [end_item]) + [start_item, *inner_schedule, end_item]) elif isinstance(sched_item, Barrier): if sched_item.synchronization_kind == "global": @@ -112,9 +109,7 @@ def inner_mapper(start_idx, end_idx, new_schedule): schedule_required_splitting = True if current_chunk: new_schedule.extend( - [dummy_call.copy()] + - current_chunk + - [dummy_return.copy()]) + [dummy_call.copy(), *current_chunk, dummy_return.copy()]) new_schedule.append(sched_item) current_chunk = [] else: @@ -127,9 +122,7 @@ def inner_mapper(start_idx, end_idx, new_schedule): if current_chunk and schedule_required_splitting: # Wrap remainder of schedule into a kernel call. new_schedule.extend( - [dummy_call.copy()] + - current_chunk + - [dummy_return.copy()]) + [dummy_call.copy(), *current_chunk, dummy_return.copy()]) else: new_schedule.extend(current_chunk) @@ -142,9 +135,7 @@ def inner_mapper(start_idx, end_idx, new_schedule): if not split_kernel: # Wrap everything into a kernel call. new_schedule = ( - [dummy_call.copy()] + - new_schedule + - [dummy_return.copy()]) + [dummy_call.copy(), *new_schedule, dummy_return.copy()]) # Assign names to CallKernel / ReturnFromKernel instructions diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py index 3858462b1..b659ee7b7 100644 --- a/loopy/schedule/tools.py +++ b/loopy/schedule/tools.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = """ Copyright (C) 2016 Matt Wala Copyright (C) 2020 University of Illinois Board of Trustees @@ -22,13 +25,19 @@ .. autoclass:: AccessMapDescriptor .. autoclass:: WriteRaceChecker -.. autoclass:: InameStrSet .. autoclass:: LoopNestTree .. autoclass:: LoopTree .. autofunction:: separate_loop_nest .. autofunction:: get_partial_loop_nest_tree .. autofunction:: get_loop_tree + +References +^^^^^^^^^^ + +.. class:: InameStrSet + + See :class:`loopy.typing.InameStrSet` """ __license__ = """ @@ -52,10 +61,9 @@ """ import enum -from collections.abc import Callable, Collection, Mapping from dataclasses import dataclass from functools import cached_property, reduce -from typing import AbstractSet, Dict, FrozenSet, List, Sequence, Set, Tuple +from typing import TYPE_CHECKING, AbstractSet, Sequence from immutables import Map from typing_extensions import TypeAlias @@ -64,11 +72,16 @@ from pytools import memoize_method, memoize_on_first_arg from loopy.diagnostic import LoopyError -from loopy.kernel import LoopKernel from loopy.kernel.data import AddressSpace, ArrayArg, TemporaryVariable -from loopy.schedule import ScheduleItem from loopy.schedule.tree import Tree -from loopy.typing import InameStr, not_none +from loopy.typing import InameStr, InameStrSet, not_none + + +if TYPE_CHECKING: + from collections.abc import Callable, Collection, Mapping + + from loopy.kernel import LoopKernel + from loopy.schedule import ScheduleItem # {{{ block boundary finder @@ -97,7 +110,7 @@ def get_block_boundaries(schedule: Sequence[ScheduleItem]) -> Mapping[int, int]: # {{{ subkernel tools def temporaries_read_in_subkernel( - kernel: LoopKernel, subkernel_name: str) -> FrozenSet[str]: + kernel: LoopKernel, subkernel_name: str) -> frozenset[str]: from loopy.kernel.tools import get_subkernel_to_insn_id_map insn_ids = get_subkernel_to_insn_id_map(kernel)[subkernel_name] inames = frozenset().union(*(kernel.insn_inames(insn_id) @@ -115,7 +128,7 @@ def temporaries_read_in_subkernel( def temporaries_written_in_subkernel( - kernel: LoopKernel, subkernel_name: str) -> FrozenSet[str]: + kernel: LoopKernel, subkernel_name: str) -> frozenset[str]: from loopy.kernel.tools import get_subkernel_to_insn_id_map insn_ids = get_subkernel_to_insn_id_map(kernel)[subkernel_name] return frozenset(tv @@ -125,7 +138,7 @@ def temporaries_written_in_subkernel( def args_read_in_subkernel( - kernel: LoopKernel, subkernel_name: str) -> FrozenSet[str]: + kernel: LoopKernel, subkernel_name: str) -> frozenset[str]: from loopy.kernel.tools import get_subkernel_to_insn_id_map insn_ids = get_subkernel_to_insn_id_map(kernel)[subkernel_name] inames = frozenset().union(*(kernel.insn_inames(insn_id) @@ -142,7 +155,7 @@ def args_read_in_subkernel( def args_written_in_subkernel( - kernel: LoopKernel, subkernel_name: str) -> FrozenSet[str]: + kernel: LoopKernel, subkernel_name: str) -> frozenset[str]: from loopy.kernel.tools import get_subkernel_to_insn_id_map insn_ids = get_subkernel_to_insn_id_map(kernel)[subkernel_name] return frozenset(arg @@ -152,8 +165,8 @@ def args_written_in_subkernel( def supporting_temporary_names( - kernel: LoopKernel, tv_names: FrozenSet[str]) -> FrozenSet[str]: - result: Set[str] = set() + kernel: LoopKernel, tv_names: frozenset[str]) -> frozenset[str]: + result: set[str] = set() for name in tv_names: tv = kernel.temporary_variables[name] @@ -176,7 +189,7 @@ class KernelArgInfo: """ passed_arg_names: Sequence[str] - written_names: FrozenSet[str] + written_names: frozenset[str] @property def passed_names(self) -> Sequence[str]: @@ -208,7 +221,7 @@ def _should_temp_var_be_passed(tv: TemporaryVariable) -> bool: class _SupportingNameTracker: def __init__(self, kernel: LoopKernel): self.kernel = kernel - self.name_to_main_name: Dict[str, str] = {} + self.name_to_main_name: dict[str, str] = {} def add_supporting_names_for(self, name): var_descr = self.kernel.get_var_descriptor(name) @@ -218,8 +231,8 @@ def add_supporting_names_for(self, name): | {name}) def get_additional_args_and_tvs( - self, already_passed: Set[str] - ) -> Tuple[List[str], List[str]]: + self, already_passed: set[str] + ) -> tuple[list[str], list[str]]: additional_args = [] additional_temporaries = [] @@ -237,11 +250,11 @@ def get_additional_args_and_tvs( def _process_args_for_arg_info( - kernel: LoopKernel, args_read: Set[str], args_written: Set[str], + kernel: LoopKernel, args_read: set[str], args_written: set[str], supp_name_tracker: _SupportingNameTracker, used_only: bool, - ) -> List[str]: + ) -> list[str]: - args_expected: Set[str] = set() + args_expected: set[str] = set() passed_arg_names = [] for arg in kernel.args: @@ -319,7 +332,7 @@ def get_subkernel_arg_info( supp_name_tracker=supp_name_tracker, used_only=True) - passed_temporaries: List[str] = [] + passed_temporaries: list[str] = [] for tv_name in sorted(tvs_read | tvs_written): supp_name_tracker.add_supporting_names_for(tv_name) tv = kernel.temporary_variables[tv_name] @@ -671,7 +684,6 @@ def do_accesses_result_in_races(self, insn1, insn1_dir, insn2, insn2_dir, # }}} -InameStrSet: TypeAlias = FrozenSet[InameStr] LoopNestTree: TypeAlias = Tree[InameStrSet] LoopTree: TypeAlias = Tree[InameStr] @@ -783,8 +795,8 @@ def _add_inner_loops(tree, outer_loop_nest, inner_loop_nest): def _order_loop_nests( loop_nest_tree: LoopNestTree, - strict_priorities: FrozenSet[Tuple[InameStr, ...]], - relaxed_priorities: FrozenSet[Tuple[InameStr, ...]], + strict_priorities: frozenset[tuple[InameStr, ...]], + relaxed_priorities: frozenset[tuple[InameStr, ...]], iname_to_tree_node_id: Mapping[InameStr, InameStrSet], ) -> LoopTree: """ @@ -825,7 +837,7 @@ def _order_loop_nests( # toposort for each loop nest. def _update_nesting_constraints( - priorities: FrozenSet[Tuple[InameStr, ...]], + priorities: frozenset[tuple[InameStr, ...]], cannot_satisfy_callback: Callable[[str], None] ) -> None: """ @@ -862,10 +874,12 @@ def _update_nesting_constraints( .ancestors(inner_iname_nest)) ancestors_of_outer_iname = (loop_nest_tree .ancestors(outer_iname_nest)) - if outer_iname in ancestors_of_inner_iname: + if any(outer_iname in ancestor + for ancestor in ancestors_of_inner_iname): # nesting constraint already satisfied => do nothing pass - elif inner_iname in ancestors_of_outer_iname: + elif any(inner_iname in ancestor + for ancestor in ancestors_of_outer_iname): cannot_satisfy_callback("Cannot satisfy constraint that" f" iname '{inner_iname}' must be" f" nested within '{outer_iname}''.") @@ -968,7 +982,7 @@ def get_partial_loop_nest_tree(kernel: LoopKernel) -> LoopNestTree: tree = Tree.from_root(root) # mapping from iname to the innermost loop nest they are part of in *tree*. - iname_to_tree_node_id: Dict[InameStr, InameStrSet] = {} + iname_to_tree_node_id: dict[InameStr, InameStrSet] = {} # if there were any loop with no inames, those have been already account # for as the root. @@ -1067,7 +1081,7 @@ def get_loop_tree(kernel: LoopKernel) -> LoopTree: iname_to_tree_node_id = ( _get_iname_to_tree_node_id_from_partial_loop_nest_tree(tree)) - strict_loop_priorities: FrozenSet[Tuple[InameStr, ...]] = frozenset() + strict_loop_priorities: frozenset[tuple[InameStr, ...]] = frozenset() # {{{ impose constraints by the domain tree diff --git a/loopy/schedule/tree.py b/loopy/schedule/tree.py index e98724f83..3861aa75c 100644 --- a/loopy/schedule/tree.py +++ b/loopy/schedule/tree.py @@ -34,9 +34,10 @@ THE SOFTWARE. """ +import operator from collections.abc import Hashable, Iterator, Sequence from dataclasses import dataclass -from functools import cached_property +from functools import cached_property, reduce from typing import Generic, TypeVar from immutables import Map @@ -49,7 +50,9 @@ NodeT = TypeVar("NodeT", bound=Hashable) -@dataclass(frozen=True) +# Not frozen when optimizations are enabled because it is slower. +# Tree objects are immutable, and offer no way to mutate the tree. +@dataclass(frozen=__debug__) # type: ignore[literal-required] class Tree(Generic[NodeT]): """ An immutable tree containing nodes of type :class:`NodeT`. @@ -94,31 +97,23 @@ def ancestors(self, node: NodeT) -> tuple[NodeT, ...]: """ Returns a :class:`tuple` of nodes that are ancestors of *node*. """ - assert node in self - - if self.is_root(node): + parent = self.parent(node) + if parent is None: # => root return () - parent = self._child_to_parent[node] - assert parent is not None - - return (parent,) + self.ancestors(parent) + return (parent, *self.ancestors(parent)) def parent(self, node: NodeT) -> NodeT | None: """ Returns the parent of *node*. """ - assert node in self - return self._child_to_parent[node] def children(self, node: NodeT) -> tuple[NodeT, ...]: """ Returns the children of *node*. """ - assert node in self - return self._parent_to_children[node] @memoize_method @@ -126,25 +121,18 @@ def depth(self, node: NodeT) -> int: """ Returns the depth of *node*, with the root having depth 0. """ - assert node in self - - if self.is_root(node): - # => None - return 0 - parent_of_node = self.parent(node) - assert parent_of_node is not None + if parent_of_node is None: + return 0 return 1 + self.depth(parent_of_node) def is_root(self, node: NodeT) -> bool: - assert node in self - + """Return *True* if *node* is the root of the tree.""" return self.parent(node) is None def is_leaf(self, node: NodeT) -> bool: - assert node in self - + """Return *True* if *node* has no children.""" return len(self.children(node)) == 0 def __contains__(self, node: NodeT) -> bool: @@ -161,9 +149,11 @@ def add_node(self, node: NodeT, parent: NodeT) -> Tree[NodeT]: siblings = self._parent_to_children[parent] - return Tree((self._parent_to_children - .set(parent, siblings + (node,)) - .set(node, ())), + parent_to_children_mut = self._parent_to_children.mutate() + parent_to_children_mut[parent] = (*siblings, node) + parent_to_children_mut[node] = () + + return Tree(parent_to_children_mut.finish(), self._child_to_parent.set(node, parent)) def replace_node(self, node: NodeT, new_node: NodeT) -> Tree[NodeT]: @@ -231,15 +221,14 @@ def move_node(self, node: NodeT, new_parent: NodeT | None) -> Tree[NodeT]: assert parent is not None # parent=root handled as a special case siblings = self.children(parent) parents_new_children = tuple(frozenset(siblings) - frozenset([node])) - new_parents_children = self.children(new_parent) + (node,) + new_parents_children = (*self.children(new_parent), node) - new_child_to_parent = self._child_to_parent.set(node, new_parent) - new_parent_to_children = (self._parent_to_children - .set(parent, parents_new_children) - .set(new_parent, new_parents_children)) + parent_to_children_mut = self._parent_to_children.mutate() + parent_to_children_mut[parent] = parents_new_children + parent_to_children_mut[new_parent] = new_parents_children - return Tree(new_parent_to_children, - new_child_to_parent) + return Tree(parent_to_children_mut.finish(), + self._child_to_parent.set(node, new_parent)) def __str__(self) -> str: """ @@ -276,7 +265,7 @@ def post_process_last_child(children: Sequence[str]) -> list[str]: for c in children_result[:-1]] + [post_process_last_child(c) for c in children_result[-1:]]) - return [str(node)] + sum(children_result, start=[]) + return [str(node), *reduce(operator.iadd, children_result, [])] return "\n".join(rec(self.root)) diff --git a/loopy/statistics.py b/loopy/statistics.py index 5284dda2a..c781d5780 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -31,20 +31,9 @@ from dataclasses import dataclass, replace from enum import Enum, auto as enum_auto from functools import cached_property, partial -from typing import ( - Any, - Callable, - Generic, - Iterable, - Mapping, - Sequence, - Type, - TypeVar, - Union, - cast, -) from immutabledict import immutabledict +from typing import TYPE_CHECKING, ClassVar import islpy as isl import pymbolic.primitives as p @@ -73,6 +62,10 @@ from loopy.typing import Expression, ExpressionT, auto +if TYPE_CHECKING: + from collections.abc import Sequence + + __doc__ = """ .. currentmodule:: loopy @@ -443,7 +436,7 @@ def group_by(self, *args) -> ToCountMap[CountT]: # make sure all item keys have same type if self.count_map: - key_type = type(list(self.keys())[0]) + key_type = type(next(iter(self.keys()))) if not all(isinstance(x, key_type) for x in self.keys()): raise ValueError("ToCountMap: group_by() function may only " "be used on ToCountMaps with uniform keys") @@ -651,9 +644,10 @@ class CountGranularity(Enum): """ - WORKITEM = 0 - SUBGROUP = 1 - WORKGROUP = 2 + WORKITEM = "workitem" + SUBGROUP = "subgroup" + WORKGROUP = "workgroup" + ALL: ClassVar[Sequence[str]] = [WORKITEM, SUBGROUP, WORKGROUP] # }}} @@ -716,7 +710,6 @@ class Op: .. attribute:: tags A :class:`frozenset` of tags to the operation. - """ dtype: LoopyType | None = None op_type: OpType | None = None @@ -776,7 +769,7 @@ class MemAccess: .. attribute:: lid_strides A :class:`dict` of **{** :class:`int` **:** - :class:`pymbolic.primitives.Expression` or :class:`int` **}** that + :data:`~pymbolic.typing.Expression` or :class:`int` **}** that specifies local strides for each local id in the memory access index. Local ids not found will not be present in ``lid_strides.keys()``. Uniform access (i.e. work-items within a sub-group access the same @@ -787,7 +780,7 @@ class MemAccess: .. attribute:: gid_strides A :class:`dict` of **{** :class:`int` **:** - :class:`pymbolic.primitives.Expression` or :class:`int` **}** that + :data:`~pymbolic.typing.Expression` or :class:`int` **}** that specifies global strides for each global id in the memory access index. global ids not found will not be present in ``gid_strides.keys()``. @@ -1045,7 +1038,6 @@ def map_common_subexpression( raise RuntimeError("%s encountered %s--not supposed to happen" % (type(self).__name__, type(expr).__name__)) - map_substitution = map_common_subexpression map_derivative = map_common_subexpression map_slice = map_common_subexpression @@ -1227,11 +1219,6 @@ def map_common_subexpression(self, expr, tags): "common_subexpression, " "map_common_subexpression not implemented.") - def map_substitution(self, expr, tags): - raise NotImplementedError("ExpressionOpCounter encountered " - "substitution, " - "map_substitution not implemented.") - def map_derivative(self, expr, tags): raise NotImplementedError("ExpressionOpCounter encountered " "derivative, " @@ -1944,7 +1931,7 @@ def get_op_map( if len(t_unit.entrypoints) > 1: raise LoopyError("Must provide entrypoint") - entrypoint = list(t_unit.entrypoints)[0] + entrypoint = next(iter(program.entrypoints)) assert entrypoint in t_unit.entrypoints @@ -2175,7 +2162,7 @@ def get_mem_access_map( if len(t_unit.entrypoints) > 1: raise LoopyError("Must provide entrypoint") - entrypoint = list(t_unit.entrypoints)[0] + entrypoint = next(iter(program.entrypoints)) assert entrypoint in t_unit.entrypoints @@ -2308,7 +2295,7 @@ def get_synchronization_map( if len(t_unit.entrypoints) > 1: raise LoopyError("Must provide entrypoint") - entrypoint = list(t_unit.entrypoints)[0] + entrypoint = next(iter(program.entrypoints)) assert entrypoint in t_unit.entrypoints from loopy.preprocess import infer_unknown_types, preprocess_program @@ -2373,7 +2360,7 @@ def gather_access_footprints( if len(t_unit.entrypoints) > 1: raise LoopyError("Must provide entrypoint") - entrypoint = list(t_unit.entrypoints)[0] + entrypoint = next(iter(program.entrypoints)) assert entrypoint in t_unit.entrypoints diff --git a/loopy/symbolic.py b/loopy/symbolic.py index c595e8392..ff42749f8 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -24,30 +24,34 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ - - import re +from dataclasses import dataclass, replace from functools import cached_property, reduce from sys import intern from typing import ( TYPE_CHECKING, AbstractSet, Any, + ClassVar, + Concatenate, + Generic, Mapping, Sequence, + TypeAlias, cast, ) from warnings import warn import immutables import numpy as np +from typing_extensions import Self import islpy as isl import pymbolic.primitives # FIXME: also import by full name to allow sphinx to resolve import pymbolic.primitives as p import pytools.lex from islpy import dim_type -from pymbolic import ArithmeticExpressionT, Variable +from pymbolic import ArithmeticExpression, Variable from pymbolic.mapper import ( CachedCombineMapper as CombineMapperBase, CachedIdentityMapper as IdentityMapperBase, @@ -56,13 +60,17 @@ IdentityMapper as UncachedIdentityMapperBase, Mapper, P, + ResultT, WalkMapper as UncachedWalkMapperBase, ) from pymbolic.mapper.coefficient import CoefficientCollector as CoefficientCollectorBase from pymbolic.mapper.constant_folder import ( ConstantFoldingMapper as ConstantFoldingMapperBase, ) -from pymbolic.mapper.dependency import CachedDependencyMapper as DependencyMapperBase +from pymbolic.mapper.dependency import ( + CachedDependencyMapper as DependencyMapperBase, + DependenciesT, +) from pymbolic.mapper.evaluator import CachedEvaluationMapper as EvaluationMapperBase from pymbolic.mapper.flattener import FlattenMapper as FlattenMapperBase from pymbolic.mapper.stringifier import StringifyMapper as StringifyMapperBase @@ -71,8 +79,7 @@ ) from pymbolic.mapper.unifier import UnidirectionalUnifier as UnidirectionalUnifierBase from pymbolic.parser import Parser as ParserBase -from pymbolic.typing import ArithmeticOrExpressionT -from pytools import ImmutableRecord, memoize, memoize_method, memoize_on_first_arg +from pytools import memoize, memoize_method, memoize_on_first_arg from pytools.tag import Tag, Taggable, ToTagSetConvertible from loopy.diagnostic import ( @@ -80,12 +87,19 @@ LoopyError, UnableToDetermineAccessRangeError, ) -from loopy.types import LoopyType, NumpyType, ToLoopyTypeConvertible -from loopy.typing import ExpressionT, auto +from loopy.typing import Expression, not_none if TYPE_CHECKING: + from collections.abc import Callable, Collection, Iterable + + from pymbolic.typing import ArithmeticOrExpressionT + + from loopy.kernel import LoopKernel + from loopy.kernel.data import KernelArgument, SubstitutionRule, TemporaryVariable + from loopy.kernel.instruction import InstructionBase from loopy.library.reduction import ReductionOperation, ReductionOpFunction + from loopy.types import LoopyType, NumpyType, ToLoopyTypeConvertible __doc__ = """ @@ -107,12 +121,18 @@ .. autoclass:: LinearSubscript .. currentmodule:: loopy.symbolic +.. autoclass:: SubArrayRef + .. autoclass:: RuleArgument +.. autoclass:: ResolvedFunction + +Rule-aware Mappers +^^^^^^^^^^^^^^^^^^ + +.. autoclass:: SubstitutionRuleMappingContext .. autoclass:: ExpansionState .. autoclass:: RuleAwareIdentityMapper -.. autoclass:: ResolvedFunction -.. autoclass:: SubArrayRef Expression Manipulation Helpers @@ -126,6 +146,14 @@ .. class:: Variable See :class:`pymbolic.Variable`. + +.. class:: Expression + + See :data:`pymbolic.typing.Expression`. + +.. class:: _Expression + + See :class:`pymbolic.primitives.ExpressionNode`. """ @@ -136,23 +164,28 @@ def map_tagged_expression(self, expr: TaggedExpression, *args, **kwargs): new_expr = self.rec(expr.expr, *args, **kwargs) return TaggedExpression(expr.tags, new_expr) - def map_literal(self, expr: Literal, *args, **kwargs): + def map_literal(self, + expr: Literal, *args: P.args, **kwargs: P.kwargs) -> Expression: return expr - def map_array_literal(self, expr: ArrayLiteral, *args, **kwargs): + def map_array_literal( + self, + expr: ArrayLiteral, *args: P.args, **kwargs: P.kwargs + ) -> Expression: return type(expr)(tuple(self.rec(ch, *args, **kwargs) for ch in expr.children)) - def map_group_hw_index(self, expr, *args, **kwargs): + def map_group_hw_index(self, expr, *args: P.args, **kwargs: P.kwargs) -> Expression: return expr - def map_local_hw_index(self, expr, *args, **kwargs): + def map_local_hw_index(self, expr, *args: P.args, **kwargs: P.kwargs) -> Expression: return expr - def map_loopy_function_identifier(self, expr, *args, **kwargs): + def map_loopy_function_identifier(self, expr, *args: P.args, **kwargs: P.kwargs): return expr - def map_reduction(self, expr, *args, **kwargs): + def map_reduction(self, + expr: Reduction, *args: P.args, **kwargs: P.kwargs) -> Expression: mapped_inames = [self.rec(p.Variable(iname), *args, **kwargs) for iname in expr.inames] @@ -174,11 +207,15 @@ def map_reduction(self, expr, *args, **kwargs): new_expr, allow_simultaneous=expr.allow_simultaneous) - def map_tagged_variable(self, expr, *args, **kwargs): + def map_tagged_variable(self, expr: TaggedVariable, + *args: P.args, **kwargs: P.kwargs): # leaf, doesn't change return expr - def map_type_annotation(self, expr, *args, **kwargs): + def map_type_annotation( + self, expr: TypeAnnotation, + *args: P.args, **kwargs: P.kwargs + ) -> Expression: new_child = self.rec(expr.child, *args, **kwargs) if new_child is expr.child: @@ -186,18 +223,20 @@ def map_type_annotation(self, expr, *args, **kwargs): return type(expr)(expr.type, new_child) - def map_sub_array_ref(self, expr, *args, **kwargs): + def map_sub_array_ref(self, expr, *args: P.args, **kwargs: P.kwargs): new_inames = self.rec(expr.swept_inames, *args, **kwargs) new_subscript = self.rec(expr.subscript, *args, **kwargs) + assert isinstance(new_inames, tuple) + assert isinstance(new_subscript, p.Subscript) if (all(new_iname is old_iname for new_iname, old_iname in zip(new_inames, expr.swept_inames)) and new_subscript is expr.subscript): return expr - return SubArrayRef(new_inames, new_subscript) + return SubArrayRef(cast("tuple[Variable, ...]", new_inames), new_subscript) - def map_resolved_function(self, expr, *args, **kwargs): + def map_resolved_function(self, expr, *args: P.args, **kwargs: P.kwargs): # leaf, doesn't change return expr @@ -212,25 +251,25 @@ def map_resolved_function(self, expr, *args, **kwargs): class FlattenMapper(FlattenMapperBase, IdentityMapperMixin): # FIXME: Lies! This needs to be made precise. - def is_expr_integer_valued(self, expr: ExpressionT) -> bool: + def is_expr_integer_valued(self, expr: Expression) -> bool: return True def flatten(expr: ArithmeticOrExpressionT) -> ArithmeticOrExpressionT: - return cast(ArithmeticOrExpressionT, FlattenMapper()(expr)) + return cast("ArithmeticOrExpressionT", FlattenMapper()(expr)) -class IdentityMapper(IdentityMapperBase, IdentityMapperMixin): +class IdentityMapper(IdentityMapperBase, IdentityMapperMixin, Generic[P]): pass -class UncachedIdentityMapper(UncachedIdentityMapperBase, - IdentityMapperMixin): +class UncachedIdentityMapper(UncachedIdentityMapperBase[P], + IdentityMapperMixin[P]): pass class PartialEvaluationMapper( - EvaluationMapperBase, IdentityMapperMixin[P]): + EvaluationMapperBase, IdentityMapperMixin[[]]): def map_variable(self, expr): return expr @@ -238,70 +277,81 @@ def map_common_subexpression_uncached(self, expr): return type(expr)(self.rec(expr.child), expr.prefix, expr.scope) -class WalkMapperMixin: +class WalkMapperMixin(WalkMapperBase[P]): def map_tagged_expression(self, expr, *args, **kwargs): if not self.visit(expr, *args, **kwargs): return self.rec(expr.expr, *args, **kwargs) - - def map_literal(self, expr, *args, **kwargs): + + def map_literal(self, expr, *args: P.args, **kwargs: P.kwargs) -> None: self.visit(expr, *args, **kwargs) - def map_array_literal(self, expr, *args, **kwargs): + def map_array_literal(self, expr, *args: P.args, **kwargs: P.kwargs) -> None: if not self.visit(expr, *args, **kwargs): return for ch in expr.children: self.rec(ch, *args, **kwargs) - def map_group_hw_index(self, expr, *args, **kwargs): + def map_group_hw_index(self, expr, *args: P.args, **kwargs: P.kwargs) -> None: self.visit(expr, *args, **kwargs) - def map_local_hw_index(self, expr, *args, **kwargs): + def map_local_hw_index(self, expr, *args: P.args, **kwargs: P.kwargs) -> None: self.visit(expr, *args, **kwargs) - def map_reduction(self, expr, *args, **kwargs): + def map_reduction(self, expr, *args: P.args, **kwargs: P.kwargs) -> None: if not self.visit(expr, *args, **kwargs): return self.rec(expr.expr, *args, **kwargs) - def map_type_cast(self, expr, *args, **kwargs): + def map_type_cast(self, expr, *args: P.args, **kwargs: P.kwargs) -> None: if not self.visit(expr, *args, **kwargs): return self.rec(expr.child, *args, **kwargs) map_tagged_variable = WalkMapperBase.map_variable - def map_loopy_function_identifier(self, expr, *args, **kwargs): + def map_loopy_function_identifier( + self, expr, *args: P.args, **kwargs: P.kwargs + ) -> None: self.visit(expr, *args, **kwargs) - map_linear_subscript = WalkMapperBase.map_subscript + def map_linear_subscript(self, + expr: LinearSubscript, + *args: P.args, **kwargs: P.kwargs) -> None: + if not self.visit(expr, *args, **kwargs): + return + + self.rec(expr.aggregate, *args, **kwargs) + self.rec(expr.index, *args, **kwargs) + + self.post_visit(expr, *args, **kwargs) map_rule_argument = map_group_hw_index - def map_sub_array_ref(self, expr, *args): - if not self.visit(expr): + def map_sub_array_ref(self, expr, *args: P.args, **kwargs: P.kwargs): + if not self.visit(expr, *args, **kwargs): return - self.rec(expr.swept_inames, *args) - self.rec(expr.subscript, *args) + self.rec(expr.swept_inames, *args, **kwargs) + self.rec(expr.subscript, *args, **kwargs) - def map_resolved_function(self, expr, *args): - if not self.visit(expr): + def map_resolved_function(self, expr, *args, **kwargs): + if not self.visit(expr, *args, **kwargs): return - self.rec(expr.function, *args) + self.rec(expr.function, *args, **kwargs) map_fortran_division = WalkMapperBase.map_quotient -class WalkMapper(WalkMapperBase, WalkMapperMixin): +class WalkMapper(WalkMapperMixin[P], WalkMapperBase[P]): pass -class UncachedWalkMapper(UncachedWalkMapperBase, WalkMapperMixin): +class UncachedWalkMapper(WalkMapperMixin[P], UncachedWalkMapperBase[P]): pass @@ -310,26 +360,35 @@ class CallbackMapper(IdentityMapperMixin, CallbackMapperBase): map_resolved_function = CallbackMapperBase.map_constant -class CombineMapper(CombineMapperBase): +class CombineMapper(CombineMapperBase[ResultT, P]): def map_tagged_expression(self, expr, *args, **kwargs): return self.rec(expr.expr, *args, **kwargs) - - def map_reduction(self, expr, *args, **kwargs): + + def map_reduction(self, expr, *args: P.args, **kwargs: P.kwargs): return self.rec(expr.expr, *args, **kwargs) - def map_type_cast(self, expr, *args, **kwargs): + def map_type_cast(self, expr, *args: P.args, **kwargs: P.kwargs): return self.rec(expr.child, *args, **kwargs) - def map_sub_array_ref(self, expr, *args, **kwargs): + def map_sub_array_ref(self, expr, *args: P.args, **kwargs: P.kwargs): return self.combine(( self.rec(expr.subscript, *args, **kwargs), self.combine(tuple( self.rec(idx, *args, **kwargs) for idx in expr.swept_inames)))) - map_linear_subscript = CombineMapperBase.map_subscript + def map_linear_subscript(self, + expr: LinearSubscript, *args: P.args, **kwargs: P.kwargs + ) -> ResultT: + return self.combine( + [self.rec(expr.aggregate, *args, **kwargs), + self.rec(expr.index, *args, **kwargs)]) - map_fortran_division = CombineMapperBase.map_quotient + def map_fortran_division(self, + expr: FortranDivision, *args: P.args, **kwargs: P.kwargs) -> ResultT: + return self.combine(( + self.rec(expr.numerator, *args, **kwargs), + self.rec(expr.denominator, *args, **kwargs))) class SubstitutionMapper( @@ -348,11 +407,13 @@ def map_tagged_expression(self, expr, *args): from pymbolic.mapper.stringifier import PREC_NONE return f"TaggedExpression({expr.tags}, {self.rec(expr.expr, PREC_NONE)}" - def map_literal(self, expr, *args): + def map_literal(self, expr: Literal, enclosing_prec: int) -> str: return expr.s - def map_array_literal(self, expr, *args): - return "{%s}" % ", ".join(self.rec(ch) for ch in expr.children) + def map_array_literal(self, expr, enclosing_prec): + from pymbolic.mapper.stringifier import PREC_NONE + + return "{%s}" % ", ".join(self.rec(ch, PREC_NONE) for ch in expr.children) def map_group_hw_index(self, expr, enclosing_prec): return "grp.%d" % expr.index @@ -435,80 +496,97 @@ def map_tagged_variable(self, expr, other, urecs): return unify_many(urecs, new_uni_record) -class DependencyMapper(DependencyMapperBase): - def map_group_hw_index(self, expr, *args, **kwargs): +class DependencyMapper(DependencyMapperBase[P]): + def map_group_hw_index( + self, + expr: GroupHardwareAxisIndex, *args: P.args, **kwargs: P.kwargs + ) -> DependenciesT: return set() - def map_local_hw_index(self, expr, *args, **kwargs): + def map_local_hw_index( + self, + expr: LocalHardwareAxisIndex, *args: P.args, **kwargs: P.kwargs + ) -> DependenciesT: return set() - def map_call(self, expr, *args, **kwargs): + def map_call( + self, + expr: p.Call, *args: P.args, **kwargs: P.kwargs + ) -> DependenciesT: # Loopy does not have first-class functions. Do not descend # into 'function' attribute of Call. return self.rec(expr.parameters, *args, **kwargs) - def map_reduction(self, expr, *args, **kwargs): + def map_reduction( + self, + expr: Reduction, *args: P.args, **kwargs: P.kwargs + ) -> DependenciesT: deps = self.rec(expr.expr, *args, **kwargs) return deps - {Variable(iname) for iname in expr.inames} - def map_tagged_variable(self, expr, *args, **kwargs): + def map_tagged_variable( + self, + expr: TaggedVariable, *args: P.args, **kwargs: P.kwargs + ) -> DependenciesT: return {expr} - def map_loopy_function_identifier(self, expr, *args, **kwargs): + def map_loopy_function_identifier(self, expr, *args: P.args, **kwargs: P.kwargs): return set() def map_tagged_expression(self, expr, *args, **kwargs): deps = self.rec(expr.expr, *args, **kwargs) return deps - def map_sub_array_ref(self, expr, *args, **kwargs): + def map_sub_array_ref(self, expr, *args: P.args, **kwargs: P.kwargs): deps = self.rec(expr.subscript, *args, **kwargs) return deps - set(expr.swept_inames) map_linear_subscript = DependencyMapperBase.map_subscript - def map_type_cast(self, expr, *args, **kwargs): + def map_type_cast(self, expr, *args: P.args, **kwargs: P.kwargs): return self.rec(expr.child, *args, **kwargs) - def map_resolved_function(self, expr): - return self.rec(expr.function) + def map_resolved_function(self, expr, *args: P.args, **kwargs: P.kwargs): + return self.rec(expr.function, *args, **kwargs) - def map_literal(self, expr): + def map_literal(self, expr, *args: P.args, **kwargs: P.kwargs): return set() - def map_call_with_kwargs(self, expr): + def map_call_with_kwargs(self, expr, *args: P.args, **kwargs: P.kwargs): # See https://github.com/inducer/loopy/pull/323 raise NotImplementedError map_fortran_division = DependencyMapperBase.map_quotient -class SubstitutionRuleExpander(IdentityMapper): - def __init__(self, rules): +class SubstitutionRuleExpander(IdentityMapper[[]]): + def __init__(self, rules: Mapping[str, SubstitutionRule]) -> None: self.rules = rules super().__init__() - def __call__(self, expr, *args, **kwargs): + def __call__(self, expr: Expression) -> Expression: if not self.rules: return expr - return super().__call__(expr, *args, **kwargs) + return super().__call__(expr) - def map_variable(self, expr): + def map_variable(self, expr: Variable) -> Expression: if expr.name in self.rules: - return self.map_substitution(expr.name, self.rules[expr.name], ()) + return self.map_subst_rule(expr.name, self.rules[expr.name], ()) else: return super().map_variable(expr) - def map_call(self, expr): + def map_call(self, expr: p.Call) -> Expression: + assert isinstance(expr.function, Variable | ResolvedFunction) if expr.function.name in self.rules: - return self.map_substitution( + assert isinstance(expr.function.name, str) + return self.map_subst_rule( expr.function.name, self.rules[expr.function.name], expr.parameters) else: return super().map_call(expr) - def map_substitution(self, name, rule, arguments): + def map_subst_rule(self, name, rule, arguments): if len(rule.arguments) != len(arguments): from loopy.diagnostic import LoopyError raise LoopyError("number of arguments to '%s' does not match " @@ -528,7 +606,7 @@ def map_substitution(self, name, rule, arguments): # {{{ loopy-specific primitives -class LoopyExpressionBase(p.Expression): +class LoopyExpressionBase(p.ExpressionNode): def stringifier(self): from loopy.diagnostic import LoopyError raise LoopyError("pymbolic < 2019.1 is in use. Please upgrade.") @@ -562,7 +640,7 @@ class ArrayLiteral(LoopyExpressionBase): similar mappers). Not for use in Loopy source representation. """ - children: tuple[ExpressionT, ...] + children: tuple[Expression, ...] @p.expr_dataclass() @@ -625,7 +703,7 @@ class TypeAnnotation(LoopyExpressionBase): """ type: LoopyType - child: ExpressionT + child: Expression @p.expr_dataclass(init=False) @@ -641,10 +719,10 @@ class TypeCast(LoopyExpressionBase): # numpy pickling bug madness. (see loopy.types) _type_name: str - child: ExpressionT + child: Expression """The expression to be cast.""" - def __init__(self, type: ToLoopyTypeConvertible, child: ExpressionT): + def __init__(self, type: ToLoopyTypeConvertible, child: Expression): super().__init__() from loopy.types import NumpyType, to_loopy_type @@ -735,8 +813,6 @@ class Reduction(LoopyExpressionBase): .. autoattribute:: allow_simultaneous """ - init_arg_names = ("operation", "inames", "expr", "allow_simultaneous") - operation: ReductionOperation inames: Sequence[str] @@ -744,11 +820,11 @@ class Reduction(LoopyExpressionBase): carried out. """ - expr: ExpressionT + expr: Expression """An expression which may have tuple type. If the expression has tuple type, it must be one of the following: - * a :class:`tuple` of :class:`pymbolic.primitives.Expression`, or + * a :class:`tuple` of :data:`pymbolic.typing.Expression`, or * a :class:`loopy.symbolic.Reduction`, or * a function call or substitution rule invocation. """ @@ -762,7 +838,7 @@ def __init__(self, operation: ReductionOperation | str, inames: (tuple[str | pymbolic.primitives.Variable, ...] | pymbolic.primitives.Variable | str), - expr: ExpressionT, + expr: Expression, allow_simultaneous: bool = False ) -> None: if isinstance(inames, str): @@ -784,14 +860,17 @@ def strip_var(iname: Any) -> str: if isinstance(operation, str): from loopy.library.reduction import parse_reduction_op - operation = parse_reduction_op(operation) + op = parse_reduction_op(operation) + else: + op = operation + del operation from loopy.library.reduction import ReductionOperation - assert isinstance(operation, ReductionOperation) + assert isinstance(op, ReductionOperation) from loopy.diagnostic import LoopyError - if operation.arg_count > 1: + if op.arg_count > 1: from pymbolic.primitives import Call if not isinstance(expr, (tuple, Reduction, Call)): @@ -805,7 +884,7 @@ def strip_var(iname: Any) -> str: elif isinstance(expr, Reduction) and expr.is_tuple_typed: raise LoopyError("got a tuple typed argument to a scalar reduction") - object.__setattr__(self, "operation", operation) + object.__setattr__(self, "operation", op) object.__setattr__(self, "inames", inames) object.__setattr__(self, "expr", expr) object.__setattr__(self, "allow_simultaneous", allow_simultaneous) @@ -824,8 +903,8 @@ class LinearSubscript(LoopyExpressionBase): """Represents a linear index into a multi-dimensional array, completely ignoring any multi-dimensional layout. """ - aggregate: ExpressionT - index: ExpressionT + aggregate: Expression + index: Expression @p.expr_dataclass() @@ -887,19 +966,19 @@ def map_variable(self, expr): return expr -class VariableInAnExpression(CombineMapper): - def __init__(self, variables_to_search): +class VariableInAnExpression(CombineMapper[bool, []]): + def __init__(self, variables_to_search: Collection[Variable]) -> None: assert all(isinstance(variable, Variable) for variable in variables_to_search) self.variables_to_search = variables_to_search - def combine(self, values): + def combine(self, values) -> bool: return any(values) - def map_variable(self, expr): + def map_variable(self, expr) -> bool: return expr in self.variables_to_search - def map_constant(self, expr): + def map_constant(self, expr) -> bool: return False @@ -976,6 +1055,7 @@ def __post_init__(self): assert isinstance(self.subscript, p.Subscript) +@p.expr_dataclass() class FortranDivision(p.QuotientBase, LoopyExpressionBase): """This exists for the benefit of the Fortran frontend, which specializes to floating point division for floating point inputs and round-to-zero @@ -987,46 +1067,51 @@ class FortranDivision(p.QuotientBase, LoopyExpressionBase): This is not a documented expression node type. It may disappear at any moment. """ - mapper_method = "map_fortran_division" # }}} -class DependencyMapperWithReductionInames(DependencyMapper): - def __init__(self, *args, **kwargs): +class DependencyMapperWithReductionInames(DependencyMapper[P]): + def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) - self.reduction_inames = set() + self.reduction_inames: set[str] = set() - def map_reduction(self, expr, *args, **kwargs): + def map_reduction( + self, + expr: Reduction, *args: P.args, **kwargs: P.kwargs + ) -> DependenciesT: self.reduction_inames.update(expr.inames) return super().map_reduction(expr, *args, **kwargs) @memoize -def _get_dependencies_and_reduction_inames(expr): - dep_mapper = DependencyMapperWithReductionInames(composite_leaves=False) - deps = frozenset(dep.name for dep in dep_mapper(expr)) +def _get_dependencies_and_reduction_inames( + expr: Expression + ) -> tuple[AbstractSet[str], AbstractSet[str]]: + dep_mapper: DependencyMapperWithReductionInames[[]] = \ + DependencyMapperWithReductionInames(composite_leaves=False) + deps = frozenset(cast("Variable", dep).name for dep in dep_mapper(expr)) reduction_inames = dep_mapper.reduction_inames return deps, reduction_inames -def get_dependencies(expr: ExpressionT | type[auto]) -> AbstractSet[str]: +def get_dependencies(expr: Expression) -> AbstractSet[str]: return _get_dependencies_and_reduction_inames(expr)[0] -def get_reduction_inames(expr: ExpressionT) -> AbstractSet[str]: +def get_reduction_inames(expr: Expression) -> AbstractSet[str]: return _get_dependencies_and_reduction_inames(expr)[1] -class SubArrayRefSweptInamesCollector(CombineMapper): - def combine(self, values): +class SubArrayRefSweptInamesCollector(CombineMapper[AbstractSet[str], []]): + def combine(self, values: Iterable[AbstractSet[str]]) -> AbstractSet[str]: import operator return reduce(operator.or_, values, frozenset()) - def map_sub_array_ref(self, expr): + def map_sub_array_ref(self, expr) -> AbstractSet[str]: return frozenset({iname.name for iname in expr.swept_inames}) - def map_constant(self, expr): + def map_constant(self, expr) -> AbstractSet[str]: return frozenset() map_variable = map_constant @@ -1055,7 +1140,8 @@ def parse_tagged_name(expr): raise RuntimeError("subst rule name not understood: %s" % expr) -class ExpansionState(ImmutableRecord): +@dataclass(frozen=True) +class ExpansionState: """ .. attribute:: kernel .. attribute:: instruction @@ -1069,19 +1155,18 @@ class ExpansionState(ImmutableRecord): a dict representing current argument values """ - def __init__(self, kernel, instruction, stack, arg_context): - if not isinstance(arg_context, immutables.Map): - warn(f"Got a {type(arg_context)} for arg_context," - " expected `immutables.Map`. This is deprecated" - " and will result in an error from 2023.", - DeprecationWarning, stacklevel=2) - arg_context = immutables.Map(arg_context) - super().__init__(kernel=kernel, - instruction=instruction, - stack=stack, - arg_context=arg_context) - - def __hash__(self): + kernel: LoopKernel + instruction: InstructionBase + stack: tuple[tuple[str, Tag], ...] + arg_context: immutables.Map[str, Expression] + + def __post_init__(self) -> None: + hash(self.arg_context) + + def copy(self, **kwargs: Any) -> Self: + return replace(self, **kwargs) + + def __hash__(self) -> int: # do not try to be precise about hash of loopy kernel # or the instruction as computing the hash of pymbolic # expressions could have exponential complexity @@ -1089,8 +1174,8 @@ def __hash__(self): self.stack, self.arg_context)) @property - def insn_id(self): - return self.instruction.id + def insn_id(self) -> str: + return not_none(self.instruction.id) def apply_arg_context(self, expr): from pymbolic.mapper.substitutor import make_subst_func @@ -1098,34 +1183,34 @@ def apply_arg_context(self, expr): make_subst_func(self.arg_context))(expr) -class SubstitutionRuleRenamer(IdentityMapper): +class SubstitutionRuleRenamer(IdentityMapper[[]]): def __init__(self, renames): self.renames = renames super().__init__() - def map_call(self, expr): + def map_call(self, expr: p.Call) -> Expression: if not isinstance(expr.function, p.Variable): - return IdentityMapper.map_call(self, expr) + return super().map_call(expr) name, tags = parse_tagged_name(expr.function) new_name = self.renames.get(name) if new_name is None: - return IdentityMapper.map_call(self, expr) + return super().map_call(expr) if tags: - sym = TaggedVariable(new_name, tags) + sym: p.Variable = TaggedVariable(new_name, tags) else: sym = p.Variable(new_name) return type(expr)(sym, tuple(self.rec(child) for child in expr.parameters)) - def map_variable(self, expr): + def map_variable(self, expr: Variable) -> Expression: name, tags = parse_tagged_name(expr) new_name = self.renames.get(name) if new_name is None: - return IdentityMapper.map_variable(self, expr) + return super().map_variable(expr) if tags: return TaggedVariable(new_name, tags) @@ -1256,7 +1341,7 @@ def finish_kernel(self, kernel): instructions=new_insns) -class RuleAwareIdentityMapper(IdentityMapper): +class RuleAwareIdentityMapper(IdentityMapper[Concatenate[ExpansionState, P]]): """Note: the third argument dragged around by this mapper is the current :class:`ExpansionState`. @@ -1264,30 +1349,33 @@ class RuleAwareIdentityMapper(IdentityMapper): are in :attr:`ExpansionState.arg_context`. """ - def __init__(self, rule_mapping_context): + def __init__(self, rule_mapping_context: SubstitutionRuleMappingContext) -> None: self.rule_mapping_context = rule_mapping_context super().__init__() - def map_variable(self, expr, expn_state, *args, **kwargs): + def map_variable( + self, expr: Variable, expn_state: ExpansionState, + *args: P.args, **kwargs: P.kwargs + ) -> Expression: name, tags = parse_tagged_name(expr) if name not in self.rule_mapping_context.old_subst_rules: - return IdentityMapper.map_variable(self, expr, expn_state, *args, - **kwargs) + return super().map_variable(expr, expn_state, *args, **kwargs) else: - return self.map_substitution(name, tags, (), expn_state, *args, - **kwargs) + return self.map_subst_rule(name, tags, (), expn_state, *args, **kwargs) - def map_call(self, expr, expn_state, *args, **kwargs): + def map_call( + self, expr: p.Call, expn_state: ExpansionState, + *args: P.args, **kwargs: P.kwargs + ) -> Expression: if not isinstance(expr.function, p.Variable): - return IdentityMapper.map_call(self, expr, expn_state, - *args, **kwargs) + return super().map_call(expr, expn_state, *args, **kwargs) name, tags = parse_tagged_name(expr.function) if name not in self.rule_mapping_context.old_subst_rules: return super().map_call(expr, expn_state, *args, **kwargs) else: - return self.map_substitution(name, tags, + return self.map_subst_rule(name, tags, self.rec(expr.parameters, expn_state, *args, @@ -1299,9 +1387,9 @@ def map_call(self, expr, expn_state, *args, **kwargs): def make_new_arg_context( rule_name: str, arg_names: Sequence[str], - arguments: Sequence[ExpressionT], - arg_context: Mapping[str, ExpressionT] - ) -> Mapping[str, ExpressionT]: + arguments: Sequence[Expression], + arg_context: Mapping[str, Expression] + ) -> Mapping[str, Expression]: if len(arg_names) != len(arguments): raise RuntimeError("Rule '%s' invoked with %d arguments (needs %d)" % (rule_name, len(arguments), len(arg_names), )) @@ -1312,14 +1400,17 @@ def make_new_arg_context( formal_arg_name: arg_subst_map(arg_value) for formal_arg_name, arg_value in zip(arg_names, arguments)}) - def map_substitution(self, name, tags, arguments, expn_state, - *args, **kwargs): + def map_subst_rule( + self, name: str, tags, arguments, expn_state: ExpansionState, + *args: P.args, **kwargs: P.kwargs + ) -> Expression: rule = self.rule_mapping_context.old_subst_rules[name] rec_arguments = self.rec(arguments, expn_state, *args, **kwargs) + assert isinstance(rec_arguments, tuple) new_expn_state = expn_state.copy( - stack=expn_state.stack + ((name, tags),), + stack=(*expn_state.stack, (name, tags)), arg_context=self.make_new_arg_context( name, rule.arguments, rec_arguments, expn_state.arg_context)) @@ -1329,7 +1420,7 @@ def map_substitution(self, name, tags, arguments, expn_state, name, rule.arguments, result) if tags: - sym = TaggedVariable(new_name, tags) + sym: p.Variable = TaggedVariable(new_name, tags) else: sym = p.Variable(new_name) @@ -1356,8 +1447,8 @@ def __call__(self, expr, kernel, insn): def map_instruction(self, kernel, insn): return insn - def map_kernel(self, kernel, within=lambda *args: True, - map_args=True, map_tvs=True): + def map_kernel(self, kernel: LoopKernel, within=lambda *args: True, + map_args: bool = True, map_tvs: bool = True) -> LoopKernel: new_insns = [ # While subst rules are not allowed in assignees, the mapper # may perform tasks entirely unrelated to subst rules, so @@ -1377,7 +1468,7 @@ def map_kernel(self, kernel, within=lambda *args: True, # {{{ args if map_args: - new_args = [ + new_args: Sequence[KernelArgument] = [ arg.map_exprs(non_insn_self) if isinstance(arg, ArrayBase) else arg for arg in kernel.args] else: @@ -1388,11 +1479,11 @@ def map_kernel(self, kernel, within=lambda *args: True, # {{{ tvs if map_tvs: - new_tvs = { + new_tvs: Mapping[str, TemporaryVariable] = { tv_name: tv.map_exprs(non_insn_self) for tv_name, tv in kernel.temporary_variables.items()} else: - new_tvs = kernel.temporary_variables.copy() + new_tvs = kernel.temporary_variables # }}} @@ -1403,7 +1494,7 @@ def map_kernel(self, kernel, within=lambda *args: True, temporary_variables=new_tvs) -class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper): +class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper[[]]): """ Mapper to substitute expressions and record any divergence of substitution rule expressions of :class:`loopy.LoopKernel`. @@ -1440,7 +1531,7 @@ def within(self, kernel, instruction, stack): else: return self._within(kernel, instruction, stack) - def map_variable(self, expr, expn_state): + def map_variable(self, expr: Variable, expn_state: ExpansionState) -> Expression: if (expr.name in expn_state.arg_context or not self.within( expn_state.kernel, expn_state.instruction, expn_state.stack)): @@ -1456,15 +1547,17 @@ def map_variable(self, expr, expn_state): expr, expn_state) -class RuleAwareSubstitutionRuleExpander(RuleAwareIdentityMapper): +class RuleAwareSubstitutionRuleExpander(RuleAwareIdentityMapper[[]]): def __init__(self, rule_mapping_context, rules, within): super().__init__(rule_mapping_context) self.rules = rules self.within = within - def map_substitution(self, name, tags, arguments, expn_state): - new_stack = expn_state.stack + ((name, tags),) + def map_subst_rule( + self, name: str, tags, arguments, expn_state: ExpansionState + ) -> Expression: + new_stack = (*expn_state.stack, (name, tags)) if self.within(expn_state.kernel, expn_state.instruction, new_stack): # expand @@ -1486,31 +1579,38 @@ def map_substitution(self, name, tags, arguments, expn_state): else: # do not expand - return super().map_substitution( - name, tags, arguments, expn_state) + return super().map_subst_rule(name, tags, arguments, expn_state) # }}} # {{{ functions to primitives, parsing -class VarToTaggedVarMapper(IdentityMapper): - def map_variable(self, expr): +class VarToTaggedVarMapper(IdentityMapper[[]]): + def map_variable(self, expr: Variable) -> Variable: dollar_idx = expr.name.find("$") if dollar_idx == -1: return expr else: - return TaggedVariable(expr.name[:dollar_idx], - expr.name[dollar_idx+1:]) + from loopy.kernel.instruction import LegacyStringInstructionTag + return TaggedVariable( + expr.name[:dollar_idx], + frozenset({ + LegacyStringInstructionTag(expr.name[dollar_idx+1:]) + }) + ) -class FunctionToPrimitiveMapper(UncachedIdentityMapper): +class FunctionToPrimitiveMapper(UncachedIdentityMapper[[]]): """Looks for invocations of a function called 'cse' or 'reduce' and turns those into the actual pymbolic primitives used for that. """ - def _parse_reduction(self, operation, inames, red_exprs, - allow_simultaneous=False): + def _parse_reduction(self, + operation: ReductionOperation, + inames: Expression, + red_exprs: tuple[Expression, ...], + allow_simultaneous: bool = False) -> Reduction: if isinstance(inames, p.Variable): inames = (inames,) @@ -1518,7 +1618,7 @@ def _parse_reduction(self, operation, inames, red_exprs, raise TypeError("iname argument to reduce() must be a symbol " "or a list/tuple of symbols") - processed_inames = [] + processed_inames: list[str] = [] for iname in inames: if not isinstance(iname, p.Variable): raise TypeError("iname argument to reduce() must be a symbol " @@ -1527,16 +1627,18 @@ def _parse_reduction(self, operation, inames, red_exprs, processed_inames.append(iname.name) if len(red_exprs) == 1: - red_exprs = red_exprs[0] + expr_or_exprs: Expression | tuple[Expression, ...] = red_exprs[0] + else: + expr_or_exprs = red_exprs - return Reduction(operation, tuple(processed_inames), red_exprs, + return Reduction(operation, tuple(processed_inames), expr_or_exprs, allow_simultaneous=allow_simultaneous) - def map_call(self, expr): + def map_call(self, expr: p.Call) -> Expression: from loopy.library.reduction import parse_reduction_op if not isinstance(expr.function, p.Variable): - return IdentityMapper.map_call(self, expr) + return super().map_call(expr) name = expr.function.name if name == "cse": @@ -1557,11 +1659,11 @@ def map_call(self, expr): elif name in ["reduce", "simul_reduce"]: if len(expr.parameters) >= 3: - operation, inames = expr.parameters[:2] + op_expr, inames = expr.parameters[:2] red_exprs = expr.parameters[2:] - operation = parse_reduction_op(str(operation)) - return self._parse_reduction(operation, inames, + operation = parse_reduction_op(str(op_expr)) + return self._parse_reduction(not_none(operation), inames, tuple(self.rec(red_expr) for red_expr in red_exprs), allow_simultaneous=(name == "simul_reduce")) else: @@ -1602,18 +1704,22 @@ def map_call(self, expr): return self._parse_reduction(operation, inames, red_exprs) else: - return IdentityMapper.map_call(self, expr) + return super().map_call(expr) # {{{ customization to pymbolic parser _open_dbl_bracket = intern("open_dbl_bracket") -TRAILING_FLOAT_TAG_RE = re.compile("^(.*?)([a-zA-Z]*)$") +TRAILING_FLOAT_TAG_RE = re.compile(r"^(.*?)([a-zA-Z]*)$") + + +LexTable: TypeAlias = Sequence[ + tuple[str, pytools.lex.RE | tuple[str | pytools.lex.RE, ...]]] class LoopyParser(ParserBase): - lex_table = [ + lex_table: ClassVar[LexTable] = [ (_open_dbl_bracket, pytools.lex.RE(r"\[\[")), *ParserBase.lex_table ] @@ -1724,8 +1830,8 @@ def map_subscript(self, expr): # {{{ variable index expression collector -class ArrayAccessFinder(CombineMapper): - def __init__(self, tgt_vector_name=None): +class ArrayAccessFinder(CombineMapper[AbstractSet[p.Subscript], []]): + def __init__(self, tgt_vector_name: str | None = None) -> None: self.tgt_vector_name = tgt_vector_name super().__init__() @@ -1733,27 +1839,27 @@ def combine(self, values): from pytools import flatten return set(flatten(values)) - def map_constant(self, expr): + def map_constant(self, expr: object) -> AbstractSet[p.Subscript]: return set() - def map_algebraic_leaf(self, expr): + def map_algebraic_leaf(self, expr) -> AbstractSet[p.Subscript]: return set() - def map_subscript(self, expr): + def map_subscript(self, expr) -> AbstractSet[p.Subscript]: assert isinstance(expr.aggregate, p.Variable) if self.tgt_vector_name is None \ or expr.aggregate.name == self.tgt_vector_name: return {expr} | self.rec(expr.index) else: - return CombineMapper.map_subscript(self, expr) + return super().map_subscript(expr) # }}} # {{{ (pw)aff to expr conversion -def aff_to_expr(aff: isl.Aff) -> ArithmeticExpressionT: +def aff_to_expr(aff: isl.Aff) -> ArithmeticExpression: from pymbolic import var denom = aff.get_denominator_val().to_python() @@ -1774,7 +1880,7 @@ def aff_to_expr(aff: isl.Aff) -> ArithmeticExpressionT: return flatten(result // denom) -def pw_aff_to_expr(pw_aff: isl.PwAff, int_ok: bool = False) -> ExpressionT: +def pw_aff_to_expr(pw_aff: isl.PwAff, int_ok: bool = False) -> Expression: if isinstance(pw_aff, int): if not int_ok: warn("expected PwAff, got int", stacklevel=2) @@ -1816,7 +1922,7 @@ def pw_aff_to_pw_aff_implemented_by_expr(pw_aff: isl.PwAff) -> isl.PwAff: # {{{ (pw)aff_from_expr -class PwAffEvaluationMapper(EvaluationMapperBase, IdentityMapperMixin): +class PwAffEvaluationMapper(EvaluationMapperBase[isl.PwAff], IdentityMapperMixin[[]]): def __init__(self, space, vars_to_zero): self.zero = isl.Aff.zero_on_domain(isl.LocalSpace.from_space(space)) @@ -1869,7 +1975,7 @@ def map_remainder(self, expr): raise TypeError("modulo non-constant in '%s' not supported " "for as-pwaff evaluation" % expr) - (s, denom_aff), = denom.get_pieces() + (_s, denom_aff), = denom.get_pieces() denom = denom_aff.get_constant_val() return num.mod_val(denom) @@ -1888,7 +1994,7 @@ def map_call(self, expr): "for as-pwaff evaluation") -def aff_from_expr(space: isl.Space, expr: ExpressionT, vars_to_zero=None) -> isl.Aff: +def aff_from_expr(space: isl.Space, expr: Expression, vars_to_zero=None) -> isl.Aff: if vars_to_zero is None: vars_to_zero = frozenset() @@ -1896,7 +2002,7 @@ def aff_from_expr(space: isl.Space, expr: ExpressionT, vars_to_zero=None) -> isl pieces = pwaff.get_pieces() if len(pieces) == 1: - (s, aff), = pieces + (_s, aff), = pieces return aff else: from loopy.diagnostic import ExpressionNotAffineError @@ -1952,7 +2058,7 @@ def guarded_pwaff_from_expr(space, expr, vars_to_zero=None): # {{{ (pw_)?qpoly_from_expr -class PwQPolyEvaluationMapper(EvaluationMapperBase): +class PwQPolyEvaluationMapper(EvaluationMapperBase[isl.PwQPolynomial]): def __init__(self, space, vars_to_zero): zero_qpoly = isl.QPolynomial.zero_on_domain(space) @@ -2000,7 +2106,7 @@ def qpolynomial_from_expr(space, expr): pieces = pw_qpoly.get_pieces() if len(pieces) == 1: - (s, qpoly), = pieces + (_s, qpoly), = pieces return qpoly else: raise RuntimeError("expression '%s' could not be converted to a " @@ -2029,7 +2135,7 @@ def simplify_using_aff(kernel, expr): """ Simplifies *expr* on *kernel*'s domain. - :arg expr: An instance of :class:`pymbolic.primitives.Expression`. + :arg expr: An instance of :data:`pymbolic.typing.Expression`. """ deps = get_dependencies(expr) @@ -2153,7 +2259,7 @@ def constraint_to_cond_expr(cns): # {{{ isl_set_from_expr -class ConditionExpressionToBooleanOpsExpression(IdentityMapper): +class ConditionExpressionToBooleanOpsExpression(IdentityMapper[[]]): """ Mapper to convert expressions into composition of boolean operation nodes according to C-semantics. @@ -2183,7 +2289,7 @@ def map_reduction(self, expr): "to affine") -class AffineConditionToISLSetMapper(IdentityMapper): +class AffineConditionToISLSetMapper(IdentityMapper[[]]): """ Mapper to convert a condition :class:`~pymbolic.primitives.Expression` to a :class:`~islpy.Set`. @@ -2304,15 +2410,20 @@ def set_to_cond_expr(isl_set): # {{{ Reduction callback mapper -class ReductionCallbackMapper(UncachedIdentityMapper): - def __init__(self, callback): +class ReductionCallbackMapper(UncachedIdentityMapper[P]): + def __init__( + self, + callback: Callable[[ + Reduction, + Callable[Concatenate[Expression, P], Expression] + ], Expression]) -> None: self.callback = callback super().__init__() - def map_reduction(self, expr, **kwargs): + def map_reduction(self, expr, *args: P.args, **kwargs: P.kwargs) -> Expression: result = self.callback(expr, self.rec, **kwargs) if result is None: - return IdentityMapper.map_reduction(self, expr, **kwargs) + return super().map_reduction(expr, *args, **kwargs) return result # }}} @@ -2320,8 +2431,8 @@ def map_reduction(self, expr, **kwargs): # {{{ index dependency finding -class IndexVariableFinder(CombineMapper): - def __init__(self, include_reduction_inames): +class IndexVariableFinder(CombineMapper[AbstractSet[Expression], []]): + def __init__(self, include_reduction_inames: bool) -> None: self.include_reduction_inames = include_reduction_inames def combine(self, values): @@ -2362,11 +2473,11 @@ def map_reduction(self, expr): # {{{ wildcard -> unique variable mapper class WildcardToUniqueVariableMapper(IdentityMapper): - def __init__(self, unique_var_name_factory): + def __init__(self, unique_var_name_factory: Callable[[], str]) -> None: self.unique_var_name_factory = unique_var_name_factory super().__init__() - def map_wildcard(self, expr): + def map_wildcard(self, expr: p.Wildcard) -> Variable: from pymbolic import var return var(self.unique_var_name_factory()) @@ -2375,7 +2486,7 @@ def map_wildcard(self, expr): # {{{ prime ("'") adder -class PrimeAdder(IdentityMapper): +class PrimeAdder(IdentityMapper[[]]): def __init__(self, which_vars): self.which_vars = which_vars @@ -2520,7 +2631,7 @@ def get_access_range(domain, subscript, assumptions=None, shape=None, # {{{ access range mapper -class BatchedAccessMapMapper(WalkMapper): +class BatchedAccessMapMapper(WalkMapper[[AbstractSet[str]]]): def __init__(self, kernel, var_names, overestimate=False): self.kernel = kernel @@ -2531,7 +2642,7 @@ def __init__(self, kernel, var_names, overestimate=False): self._var_names = set(var_names) super().__init__() - def get_access_range(self, var_name): + def get_access_range(self, var_name: str) -> isl.Set: loops_to_amaps = self.access_maps[var_name] if not loops_to_amaps: return None @@ -2540,9 +2651,9 @@ def get_access_range(self, var_name): from functools import reduce return reduce(operator.or_, (val.range() for val in loops_to_amaps.values())) - def map_subscript(self, expr, inames): + def map_subscript(self, expr: p.Subscript, inames: AbstractSet[str]) -> None: domain = self.kernel.get_inames_domain(inames) - WalkMapper.map_subscript(self, expr, inames) + super().map_subscript(expr, inames) assert isinstance(expr.aggregate, p.Variable) @@ -2585,19 +2696,21 @@ def map_subscript(self, expr, inames): else: self.access_maps[arg_name][inames] |= access_map - def map_linear_subscript(self, expr, inames): + def map_linear_subscript( + self, + expr: LinearSubscript, inames: AbstractSet[str] + ) -> None: self.rec(expr.index, inames) + assert isinstance(expr.aggregate, Variable) if expr.aggregate.name in self._var_names: self.bad_subscripts[expr.aggregate.name].append(expr) def map_reduction(self, expr, inames): return WalkMapper.map_reduction(self, expr, inames | set(expr.inames)) - def map_type_cast(self, expr, inames): - return self.rec(expr.child, inames) - - def map_sub_array_ref(self, expr, inames): + def map_sub_array_ref(self, expr: SubArrayRef, inames: AbstractSet[str]) -> None: + assert isinstance(expr.subscript.aggregate, Variable) arg_name = expr.subscript.aggregate.name if arg_name not in self._var_names: return @@ -2739,14 +2852,16 @@ def do_access_ranges_overlap_conservative( # {{{ is_expression_equal -def is_expression_equal(a, b): +def is_expression_equal(a: Expression, b: Expression) -> bool: if a == b: return True - if isinstance(a, p.Expression) or isinstance(b, p.Expression): + if isinstance(a, p.ExpressionNode) or isinstance(b, p.ExpressionNode): if a is None or b is None: return False + assert p.is_arithmetic_expression(a) + assert p.is_arithmetic_expression(b) maybe_zero = a - b from pymbolic import distribute @@ -2757,7 +2872,10 @@ def is_expression_equal(a, b): return False -def is_tuple_of_expressions_equal(a, b): +def is_tuple_of_expressions_equal( + a: Expression | None, + b: Expression | None, + ) -> bool: if a is None or b is None: if a is None and b is None: return True diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 48ab04f89..00f1891bd 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -10,15 +10,6 @@ .. autoclass:: OpenCLTarget .. autoclass:: PyOpenCLTarget .. autoclass:: ISPCTarget - -References to Canonical Names ------------------------------ - -.. currentmodule:: loopy.target - -.. class:: TargetBase - - See :class:`loopy.TargetBase`. """ from __future__ import annotations @@ -52,9 +43,7 @@ Any, ClassVar, Generic, - Optional, Sequence, - Tuple, TypeVar, ) @@ -64,7 +53,7 @@ from loopy.codegen.result import CodeGenerationResult from loopy.target.execution import ExecutorBase from loopy.translation_unit import FunctionIdT, TranslationUnit - from loopy.typing import ExpressionT + from loopy.typing import Expression ASTType = TypeVar("ASTType") @@ -79,8 +68,8 @@ class TargetBase: # {{{ hashing/equality - hash_fields: ClassVar[Tuple[str, ...]] = () - comparison_fields: ClassVar[Tuple[str, ...]] = () + hash_fields: ClassVar[tuple[str, ...]] = () + comparison_fields: ClassVar[tuple[str, ...]] = () def __hash__(self): # NOTE: _hash_value may vanish during pickling @@ -226,7 +215,7 @@ def get_function_definition( def get_function_declaration( self, codegen_state: CodeGenerationState, codegen_result: CodeGenerationResult, schedule_index: int - ) -> Tuple[Sequence[Tuple[str, str]], Optional[ASTType]]: + ) -> tuple[Sequence[tuple[str, str]], ASTType | None]: """Returns preambles and the AST for the function declaration.""" raise NotImplementedError @@ -240,8 +229,8 @@ def get_temporary_decls(self, codegen_state: CodeGenerationState, def get_kernel_call(self, codegen_state: CodeGenerationState, subkernel_name: str, - gsize: Tuple[ExpressionT, ...], - lsize: Tuple[ExpressionT, ...]) -> Optional[ASTType]: + gsize: tuple[Expression, ...], + lsize: tuple[Expression, ...]) -> ASTType | None: raise NotImplementedError() @property @@ -331,7 +320,7 @@ def get_function_definition(self, codegen_state, codegen_result, def get_function_declaration( self, codegen_state, codegen_result, schedule_index, - ) -> Tuple[Sequence[Tuple[str, str]], None]: + ) -> tuple[Sequence[tuple[str, str]], None]: return [], None def get_temporary_decls(self, codegen_state, schedule_index): diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 9f227bd37..a4990b5c6 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -1,4 +1,5 @@ """Plain C target and base for other C-family languages.""" +from __future__ import annotations __copyright__ = "Copyright (C) 2015 Andreas Kloeckner" @@ -24,9 +25,9 @@ """ import re -from typing import Any, Optional, Sequence, Tuple, cast +from typing import TYPE_CHECKING, Any, Sequence, cast -import numpy as np # noqa +import numpy as np import pymbolic.primitives as p from cgen import ( @@ -43,10 +44,7 @@ from pymbolic.mapper.stringifier import PREC_NONE from pytools import memoize_method -from loopy.codegen import CodeGenerationState -from loopy.codegen.result import CodeGenerationResult from loopy.diagnostic import LoopyError, LoopyTypeError -from loopy.kernel import LoopKernel from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag from loopy.kernel.data import ( AddressSpace, @@ -57,14 +55,20 @@ ValueArg, ) from loopy.kernel.function_interface import ScalarCallable -from loopy.schedule import CallKernel from loopy.symbolic import IdentityMapper from loopy.target import ASTBuilderBase, DummyHostASTBuilder, TargetBase -from loopy.target.execution import ExecutorBase from loopy.tools import remove_common_indentation -from loopy.translation_unit import FunctionIdT, TranslationUnit from loopy.types import LoopyType, NumpyType, to_loopy_type -from loopy.typing import ExpressionT, auto +from loopy.typing import Expression, auto + + +if TYPE_CHECKING: + from loopy.codegen import CodeGenerationState + from loopy.codegen.result import CodeGenerationResult + from loopy.kernel import LoopKernel + from loopy.schedule import CallKernel + from loopy.target.execution import ExecutorBase + from loopy.translation_unit import FunctionIdT, TranslationUnit __doc__ = """ @@ -259,7 +263,7 @@ def _preamble_generator(preamble_info, func_qualifier="inline"): inline {res_ctype} {func.c_name}({base_ctype} x, {exp_ctype} n) {{ if (n == 0) return 1; - {re.sub("^", 14*" ", signed_exponent_preamble, flags=re.M)} + {re.sub(r"^", 14*" ", signed_exponent_preamble, flags=re.M)} {res_ctype} y = 1; @@ -414,8 +418,8 @@ class CFamilyTarget(TargetBase): usable as a common base for C99, C++, OpenCL, CUDA, and the like. """ - hash_fields = TargetBase.hash_fields + ("fortran_abi",) - comparison_fields = TargetBase.comparison_fields + ("fortran_abi",) + hash_fields = (*TargetBase.hash_fields, "fortran_abi") + comparison_fields = (*TargetBase.comparison_fields, "fortran_abi") def __init__(self, fortran_abi=False): self.fortran_abi = fortran_abi @@ -772,16 +776,13 @@ class CFamilyASTBuilder(ASTBuilderBase[Generable]): def symbol_manglers(self): return ( - super().symbol_manglers() + [ - c_symbol_mangler - ]) + [*super().symbol_manglers(), c_symbol_mangler]) def preamble_generators(self): return ( - super().preamble_generators() + [ - lambda preamble_info: _preamble_generator(preamble_info, - self.preamble_function_qualifier), - ]) + [*super().preamble_generators(), + lambda preamble_info: _preamble_generator( + preamble_info, self.preamble_function_qualifier)]) @property def known_callables(self): @@ -794,9 +795,12 @@ def known_callables(self): # {{{ code generation def get_function_definition( - self, codegen_state: CodeGenerationState, + self, + codegen_state: CodeGenerationState, codegen_result: CodeGenerationResult, - schedule_index: int, function_decl: Generable, function_body: Generable + schedule_index: int, + function_decl: Generable, + function_body: Generable ) -> Generable: kernel = codegen_state.kernel assert kernel.linearization is not None @@ -828,37 +832,44 @@ def get_function_definition( self.get_temporary_var_declarator(codegen_state, tv)) if tv.initializer is not None: - decl = Initializer(decl, generate_array_literal( + init_decl = Initializer(decl, generate_array_literal( codegen_state, tv, tv.initializer)) + else: + init_decl = decl + + result.append(init_decl) - result.append(decl) + assert isinstance(function_decl, FunctionDeclarationWrapper) + if not isinstance(function_body, Block): + function_body = Block([function_body]) fbody = FunctionBody(function_decl, function_body) + if not result: return fbody else: - return Collection(result+[Line(), fbody]) + return Collection([*result, Line(), fbody]) def get_function_declaration( self, codegen_state: CodeGenerationState, codegen_result: CodeGenerationResult, schedule_index: int - ) -> Tuple[Sequence[Tuple[str, str]], Generable]: + ) -> tuple[Sequence[tuple[str, str]], Generable]: kernel = codegen_state.kernel assert codegen_state.kernel.linearization is not None subkernel_name = cast( - CallKernel, + "CallKernel", codegen_state.kernel.linearization[schedule_index] ).kernel_name from cgen import FunctionDeclaration, Value - name = codegen_result.current_program(codegen_state).name + name_str = codegen_result.current_program(codegen_state).name if self.target.fortran_abi: - name += "_" + name_str += "_" if codegen_state.is_entrypoint: - name = Value("void", name) + name: Declarator = Value("void", name_str) # subkernel launches occur only as part of entrypoint kernels for now from loopy.schedule.tools import get_subkernel_arg_info @@ -866,7 +877,7 @@ def get_function_declaration( passed_names = skai.passed_names written_names = skai.written_names else: - name = Value("static void", name) + name = Value("static void", name_str) passed_names = [arg.name for arg in kernel.args] written_names = kernel.get_written_variables() @@ -880,8 +891,8 @@ def get_function_declaration( def get_kernel_call(self, codegen_state: CodeGenerationState, subkernel_name: str, - gsize: Tuple[ExpressionT, ...], - lsize: Tuple[ExpressionT, ...]) -> Optional[Generable]: + gsize: tuple[Expression, ...], + lsize: tuple[Expression, ...]) -> Generable | None: return None def emit_temp_var_decl_for_tv_with_base_storage(self, @@ -895,11 +906,11 @@ def emit_temp_var_decl_for_tv_with_base_storage(self, assert isinstance(tv.address_space, AddressSpace) ecm = codegen_state.expression_to_code_mapper - cast_decl = POD(self, tv.dtype, "") - temp_var_decl = POD(self, tv.dtype, tv.name) + cast_decl: Declarator = POD(self, tv.dtype, "") + temp_var_decl: Declarator = POD(self, tv.dtype, tv.name) if tv._base_storage_access_may_be_aliasing: - ptrtype = _ConstPointer + ptrtype: type[Pointer] = _ConstPointer else: # The 'restrict' part of this is a complete lie--of course # all these temporaries are aliased. But we're promising to @@ -948,8 +959,6 @@ def get_temporary_decls(self, codegen_state, schedule_index): sub_knl_temps | supporting_temporary_names(kernel, sub_knl_temps)) - ecm = self.get_expression_to_code_mapper(codegen_state) - for tv_name in sorted(sub_knl_temps): tv = kernel.temporary_variables[tv_name] if not tv.base_storage: @@ -1021,7 +1030,7 @@ def wrap_global_constant(self, decl: Declarator) -> Declarator: def get_value_arg_declaraotor( self, name: str, dtype: LoopyType, is_written: bool) -> Declarator: - result = POD(self, dtype, name) + result: Declarator = POD(self, dtype, name) if not is_written: from cgen import Const @@ -1051,7 +1060,7 @@ def get_array_base_declarator(self, ary: ArrayBase) -> Declarator: def get_array_arg_declarator( self, arg: ArrayArg, is_written: bool) -> Declarator: from cgen import RestrictPointer - arg_decl = RestrictPointer( + arg_decl: Declarator = RestrictPointer( self.wrap_decl_for_address_space( self.get_array_base_declarator(arg), arg.address_space)) @@ -1073,10 +1082,10 @@ def get_temporary_arg_decl( from cgen import RestrictPointer assert temp_var.address_space is not auto - arg_decl = RestrictPointer( + arg_decl: Declarator = RestrictPointer( self.wrap_decl_for_address_space( self.get_array_base_declarator(temp_var), - cast(AddressSpace, temp_var.address_space))) + cast("AddressSpace", temp_var.address_space))) if not is_written: arg_decl = Const(arg_decl) @@ -1281,7 +1290,7 @@ def emit_sequential_loop(self, codegen_state, iname, iname_dtype, inner) if hints: - return Collection(list(hints) + [loop]) + return Collection([*list(hints), loop]) else: return loop @@ -1339,8 +1348,7 @@ def map_expression(self, expr): def map_function_decl_wrapper(self, node): self.decls.append(node.subdecl) - return super()\ - .map_function_decl_wrapper(node) + return super().map_function_decl_wrapper(node) def generate_header(kernel, codegen_result=None): @@ -1397,9 +1405,7 @@ def get_dtype_registry(self): class CASTBuilder(CFamilyASTBuilder): def preamble_generators(self): return ( - super().preamble_generators() + [ - c99_preamble_generator, - ]) + [*super().preamble_generators(), c99_preamble_generator]) # }}} diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index 9cde501a7..270c3d0dc 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2017 Nick Curtis" __license__ = """ @@ -25,30 +28,34 @@ import os import tempfile from dataclasses import dataclass -from typing import Any, Callable, Optional, Sequence, Tuple, Union +from typing import TYPE_CHECKING, Any, Callable, ClassVar, Sequence import numpy as np from codepy.jit import compile_from_string from codepy.toolchain import GCCToolchain, ToolchainGuessError, guess_toolchain -from immutables import Map from pytools import memoize_method from pytools.codegen import CodeGenerator, Indentation from pytools.prefork import ExecError -from loopy.codegen.result import GeneratedProgram -from loopy.kernel import LoopKernel from loopy.kernel.array import ArrayBase -from loopy.kernel.data import ArrayArg -from loopy.schedule.tools import KernelArgInfo from loopy.target.execution import ( ExecutionWrapperGeneratorBase, ExecutorBase, get_highlighted_code, ) -from loopy.translation_unit import TranslationUnit from loopy.types import LoopyType -from loopy.typing import ExpressionT + + +if TYPE_CHECKING: + from immutables import Map + + from loopy.codegen.result import GeneratedProgram + from loopy.kernel import LoopKernel + from loopy.kernel.data import ArrayArg + from loopy.schedule.tools import KernelArgInfo + from loopy.translation_unit import TranslationUnit + from loopy.typing import Expression logger = logging.getLogger(__name__) @@ -105,7 +112,7 @@ def handle_non_numpy_arg(self, gen, arg): def handle_alloc( self, gen: CodeGenerator, arg: ArrayArg, - strify: Callable[[Union[ExpressionT, Tuple[ExpressionT]]], str], + strify: Callable[[Expression | tuple[Expression]], str], skip_arg_checks: bool) -> None: """ Handle allocation of non-specified arguments for C-execution @@ -324,7 +331,7 @@ def build(self, name, code, debug=False, wait_on_error=None, c_fname = self._tempname("code." + self.source_suffix) # build object - _, mod_name, ext_file, recompiled = \ + _, _mod_name, ext_file, recompiled = \ compile_from_string( self.toolchain.copy( cflags=self.toolchain.cflags+list(extra_build_options)), @@ -365,15 +372,15 @@ def __init__(self, toolchain=None, # {{{ placeholder till ctypes fixes: https://github.com/python/cpython/issues/61103 class Complex64(ctypes.Structure): - _fields_ = [("real", ctypes.c_float), ("imag", ctypes.c_float)] + _fields_: ClassVar = [("real", ctypes.c_float), ("imag", ctypes.c_float)] class Complex128(ctypes.Structure): - _fields_ = [("real", ctypes.c_double), ("imag", ctypes.c_double)] + _fields_: ClassVar = [("real", ctypes.c_double), ("imag", ctypes.c_double)] class Complex256(ctypes.Structure): - _fields_ = [("real", ctypes.c_longdouble), ("imag", ctypes.c_longdouble)] + _fields_: ClassVar = [("real", ctypes.c_longdouble), ("imag", ctypes.c_longdouble)] _NUMPY_COMPLEX_TYPE_TO_CTYPE = { @@ -425,7 +432,7 @@ class CompiledCKernel: def __init__(self, kernel: LoopKernel, devprog: GeneratedProgram, passed_names: Sequence[str], dev_code: str, - comp: Optional["CCompiler"] = None): + comp: CCompiler | None = None): # get code and build self.code = dev_code self.comp = comp if comp is not None else CCompiler() @@ -473,7 +480,7 @@ class CExecutor(ExecutorBase): .. automethod:: __call__ """ - def __init__(self, program, entrypoint, compiler: Optional["CCompiler"] = None): + def __init__(self, program, entrypoint, compiler: CCompiler | None = None): """ :arg kernel: may be a loopy.LoopKernel, a generator returning kernels (a warning will be issued if more than one is returned). If the @@ -493,7 +500,7 @@ def get_wrapper_generator(self): @memoize_method def translation_unit_info(self, - arg_to_dtype: Optional[Map[str, LoopyType]] = None) -> _KernelInfo: + arg_to_dtype: Map[str, LoopyType] | None = None) -> _KernelInfo: t_unit = self.get_typed_and_scheduled_translation_unit(arg_to_dtype) from loopy.codegen import generate_code_v2 diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 0c15faa58..b1723e9d6 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -21,7 +24,7 @@ """ -from typing import Optional +from typing import TYPE_CHECKING import numpy as np @@ -44,11 +47,14 @@ from loopy.diagnostic import LoopyError from loopy.expression import dtype_to_type_context -from loopy.symbolic import TypeCast from loopy.target.c import CExpression from loopy.type_inference import TypeReader from loopy.types import LoopyType -from loopy.typing import ExpressionT, is_integer +from loopy.typing import Expression, is_integer + + +if TYPE_CHECKING: + from loopy.symbolic import TypeCast __doc__ = """ @@ -92,7 +98,7 @@ def with_assignments(self, names_to_vars): type_inf_mapper = self.type_inf_mapper.with_assignments(names_to_vars) return type(self)(self.codegen_state, self.fortran_abi, type_inf_mapper) - def infer_type(self, expr: ExpressionT) -> LoopyType: + def infer_type(self, expr: Expression) -> LoopyType: result = self.type_inf_mapper(expr) assert isinstance(result, LoopyType) @@ -123,7 +129,7 @@ def wrap_in_typecast(self, actual_type: LoopyType, needed_type: LoopyType, s): return s - def rec(self, expr, type_context=None, needed_type: Optional[LoopyType] = None): # type: ignore[override] + def rec(self, expr, type_context=None, needed_type: LoopyType | None = None): # type: ignore[override] result = super().rec(expr, type_context) if needed_type is None: @@ -476,7 +482,7 @@ def map_constant(self, expr, type_context): elif np.isfinite(expr): if type_context == "f": - return Literal(repr(float((expr)))+"f") + return Literal(repr(float(expr))+"f") elif type_context == "d": return Literal(repr(float(expr))) elif type_context in ["i", "b"]: @@ -641,7 +647,7 @@ def map_constant(self, expr, prec): # FIXME: Add type suffixes? return repr(int(expr)) elif isinstance(expr, np.float32): - return f"{repr(float(expr))}f" + return f"{float(expr)!r}f" elif isinstance(expr, np.float64): return repr(float(expr)) else: diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index afeb5cee2..50d2ac7fe 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -1,4 +1,5 @@ """CUDA target independent of PyCUDA.""" +from __future__ import annotations __copyright__ = "Copyright (C) 2015 Andreas Kloeckner" @@ -23,16 +24,14 @@ THE SOFTWARE. """ -from typing import Sequence, Tuple +from typing import TYPE_CHECKING, Sequence import numpy as np -from cgen import Const, Declarator, Generable +from cgen import Const, Declarator, Generable, Pointer from pymbolic import var from pytools import memoize_method -from loopy.codegen import CodeGenerationState -from loopy.codegen.result import CodeGenerationResult from loopy.diagnostic import LoopyError, LoopyTypeError from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag, VectorArrayDimTag from loopy.kernel.data import ( @@ -48,6 +47,11 @@ from loopy.types import NumpyType +if TYPE_CHECKING: + from loopy.codegen import CodeGenerationState + from loopy.codegen.result import CodeGenerationResult + + # {{{ vector types class vec: # noqa @@ -186,7 +190,7 @@ def cuda_with_types(self, arg_id_to_dtype, callables_table): input_dtype = arg_id_to_dtype[0] - scalar_dtype, offset, field_name = input_dtype.fields["x"] + scalar_dtype, _offset, _field_name = input_dtype.fields["x"] return_dtype = scalar_dtype return self.copy(arg_id_to_dtype={0: input_dtype, 1: input_dtype, -1: return_dtype}) @@ -332,7 +336,7 @@ def known_callables(self): def get_function_declaration( self, codegen_state: CodeGenerationState, codegen_result: CodeGenerationResult, schedule_index: int - ) -> Tuple[Sequence[Tuple[str, str]], Generable]: + ) -> tuple[Sequence[tuple[str, str]], Generable]: preambles, fdecl = super().get_function_declaration( codegen_state, codegen_result, schedule_index) @@ -369,8 +373,7 @@ def get_function_declaration( def preamble_generators(self): return ( - super().preamble_generators() + [ - cuda_preamble_generator]) + [*super().preamble_generators(), cuda_preamble_generator]) # }}} @@ -449,7 +452,7 @@ def get_array_base_declarator(self, ary: ArrayBase) -> Declarator: def get_array_arg_declarator( self, arg: ArrayArg, is_written: bool) -> Declarator: from cgen.cuda import CudaRestrictPointer - arg_decl = CudaRestrictPointer( + arg_decl: Declarator = CudaRestrictPointer( self.get_array_base_declarator(arg)) if not is_written: @@ -478,11 +481,11 @@ def emit_temp_var_decl_for_tv_with_base_storage(self, assert tv.base_storage is not None ecm = codegen_state.expression_to_code_mapper - cast_decl = POD(self, tv.dtype, "") - temp_var_decl = POD(self, tv.dtype, tv.name) + cast_decl: Declarator = POD(self, tv.dtype, "") + temp_var_decl: Declarator = POD(self, tv.dtype, tv.name) if tv._base_storage_access_may_be_aliasing: - ptrtype = _ConstPointer + ptrtype: type[Pointer] = _ConstPointer else: # The 'restrict' part of this is a complete lie--of course # all these temporaries are aliased. But we're promising to diff --git a/loopy/target/execution.py b/loopy/target/execution.py index 2443a1420..cb737f95a 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012-17 Andreas Kloeckner, Nick Curtis" __license__ = """ @@ -25,17 +28,11 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from typing import ( + TYPE_CHECKING, Any, Callable, - Dict, - FrozenSet, - List, Mapping, - Optional, Sequence, - Set, - Tuple, - Union, cast, ) @@ -54,14 +51,17 @@ from loopy.kernel import KernelState, LoopKernel from loopy.kernel.data import ArrayArg, _ArraySeparationInfo, auto -from loopy.schedule.tools import KernelArgInfo from loopy.tools import LoopyKeyBuilder, caches -from loopy.translation_unit import TranslationUnit from loopy.types import LoopyType, NumpyType -from loopy.typing import ExpressionT, integer_expr_or_err +from loopy.typing import Expression, integer_expr_or_err from loopy.version import DATA_MODEL_VERSION +if TYPE_CHECKING: + from loopy.schedule.tools import KernelArgInfo + from loopy.translation_unit import TranslationUnit + + # {{{ object array argument packing class SeparateArrayPackingController: @@ -74,10 +74,10 @@ class SeparateArrayPackingController: It also repacks outgoing arrays of this type back into an object array. """ - def __init__(self, packing_info: Dict[str, _ArraySeparationInfo]) -> None: + def __init__(self, packing_info: dict[str, _ArraySeparationInfo]) -> None: # These must work to index tuples if 1D. def untuple_length_1_indices( - ind: Tuple[int, ...]) -> Union[int, Tuple[int, ...]]: + ind: tuple[int, ...]) -> int | tuple[int, ...]: if len(ind) == 1: return ind[0] else: @@ -91,7 +91,7 @@ def untuple_length_1_indices( for name, sep_info in packing_info.items() } - def __call__(self, kernel_kwargs: Dict[str, Any]) -> Dict[str, Any]: + def __call__(self, kernel_kwargs: dict[str, Any]) -> dict[str, Any]: kernel_kwargs = kernel_kwargs.copy() for name, ind_to_subary_name in self.packing_info.items(): @@ -109,7 +109,7 @@ def __call__(self, kernel_kwargs: Dict[str, Any]) -> Dict[str, Any]: # {{{ ExecutionWrapperGeneratorBase -def _str_to_expr(name_or_expr: Union[str, ExpressionT]) -> ExpressionT: +def _str_to_expr(name_or_expr: str | Expression) -> Expression: if isinstance(name_or_expr, str): return var(name_or_expr) else: @@ -118,14 +118,14 @@ def _str_to_expr(name_or_expr: Union[str, ExpressionT]) -> ExpressionT: @dataclass(frozen=True) class _ArgFindingEquation: - lhs: ExpressionT - rhs: ExpressionT + lhs: Expression + rhs: Expression # Arg finding code is sorted by priority, all equations (across all unknowns) # of lowest priority first. order: int - based_on_names: FrozenSet[str] + based_on_names: frozenset[str] class ExecutionWrapperGeneratorBase(ABC): @@ -172,11 +172,11 @@ def generate_integer_arg_finding_from_array_data( from loopy.kernel.array import get_strides from loopy.kernel.data import ArrayArg from loopy.symbolic import DependencyMapper, StringifyMapper - dep_map = DependencyMapper() + dep_map: DependencyMapper[[]] = DependencyMapper() # {{{ find equations - equations: List[_ArgFindingEquation] = [] + equations: list[_ArgFindingEquation] = [] for arg_name in kai.passed_arg_names: arg = kernel.arg_dict[arg_name] @@ -252,7 +252,7 @@ def generate_integer_arg_finding_from_array_data( # {{{ regroup equations by unknown order_to_unknown_to_equations: \ - Dict[int, Dict[str, List[_ArgFindingEquation]]] = {} + dict[int, dict[str, list[_ArgFindingEquation]]] = {} for eqn in equations: deps = dep_map(eqn.rhs) @@ -261,8 +261,8 @@ def generate_integer_arg_finding_from_array_data( unknown_var, = deps order_to_unknown_to_equations \ .setdefault(eqn.order, {}) \ - .setdefault(cast(Variable, unknown_var).name, []) \ - .append((eqn)) + .setdefault(cast("Variable", unknown_var).name, []) \ + .append(eqn) else: # Zero deps: nothing to determine, forget about it. # 2+ deps: not implemented @@ -287,7 +287,7 @@ def generate_integer_arg_finding_from_array_data( key=lambda eqn: eqn.order) subgen = CodeGenerator() - seen_based_on_names: Set[FrozenSet[str]] = set() + seen_based_on_names: set[frozenset[str]] = set() if_or_elif = "if" @@ -389,7 +389,7 @@ def handle_non_numpy_arg(self, gen: CodeGenerator, arg): def handle_alloc( self, gen: CodeGenerator, arg: ArrayArg, - strify: Callable[[Union[ExpressionT, Tuple[ExpressionT]]], str], + strify: Callable[[Expression | tuple[Expression]], str], skip_arg_checks: bool) -> None: """ Handle allocation of non-specified arguments for C-execution @@ -534,7 +534,7 @@ def strify_allowing_none(shape_axis): else: return strify(shape_axis) - def strify_tuple(t: Optional[Tuple[ExpressionT, ...]]) -> str: + def strify_tuple(t: tuple[Expression, ...] | None) -> str: if t is None: return "None" if len(t) == 0: @@ -735,7 +735,7 @@ def __call__(self, program, entrypoint, codegen_result): typed_and_scheduled_cache: WriteOncePersistentDict[ - Tuple[str, TranslationUnit, Optional[Mapping[str, LoopyType]]], + tuple[str, TranslationUnit, Mapping[str, LoopyType] | None], TranslationUnit ] = WriteOncePersistentDict( "loopy-typed-and-scheduled-cache-v1-"+DATA_MODEL_VERSION, @@ -747,7 +747,7 @@ def __call__(self, program, entrypoint, codegen_result): invoker_cache: WriteOncePersistentDict[ - Tuple[str, TranslationUnit, str], + tuple[str, TranslationUnit, str], str ] = WriteOncePersistentDict( "loopy-invoker-cache-v10-"+DATA_MODEL_VERSION, @@ -767,7 +767,7 @@ class ExecutorBase: .. automethod:: __call__ """ - packing_controller: Optional[SeparateArrayPackingController] + packing_controller: SeparateArrayPackingController | None def __init__(self, t_unit: TranslationUnit, entrypoint: str): self.t_unit = t_unit @@ -817,7 +817,7 @@ def check_for_required_array_arguments(self, input_args): "your argument.") def get_typed_and_scheduled_translation_unit_uncached( - self, arg_to_dtype: Optional[Map[str, LoopyType]] + self, arg_to_dtype: Map[str, LoopyType] | None ) -> TranslationUnit: t_unit = self.t_unit @@ -854,7 +854,7 @@ def get_typed_and_scheduled_translation_unit_uncached( return t_unit def get_typed_and_scheduled_translation_unit( - self, arg_to_dtype: Optional[Map[str, LoopyType]] + self, arg_to_dtype: Map[str, LoopyType] | None ) -> TranslationUnit: from loopy import CACHING_ENABLED @@ -876,7 +876,7 @@ def get_typed_and_scheduled_translation_unit( return t_unit - def arg_to_dtype(self, kwargs) -> Optional[Map[str, LoopyType]]: + def arg_to_dtype(self, kwargs) -> Map[str, LoopyType] | None: if not self.has_runtime_typed_args: return None @@ -904,7 +904,7 @@ def get_highlighted_code(self, entrypoint, arg_to_dtype=None, code=None): def get_code( self, entrypoint: str, - arg_to_dtype: Optional[Map[str, LoopyType]] = None) -> str: + arg_to_dtype: Map[str, LoopyType] | None = None) -> str: kernel = self.get_typed_and_scheduled_translation_unit(arg_to_dtype) from loopy.codegen import generate_code_v2 diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 1cd7a5bd2..e493ee3e9 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -1,4 +1,5 @@ """Target for Intel ISPC.""" +from __future__ import annotations __copyright__ = "Copyright (C) 2015 Andreas Kloeckner" @@ -24,9 +25,9 @@ """ -from typing import Sequence, Tuple, cast +from typing import TYPE_CHECKING, Sequence, cast -import numpy as np # noqa +import numpy as np import pymbolic.primitives as p from cgen import Collection, Const, Declarator, Generable @@ -34,16 +35,19 @@ from pymbolic.mapper.stringifier import PREC_NONE from pytools import memoize_method -from loopy.codegen import CodeGenerationState -from loopy.codegen.result import CodeGenerationResult from loopy.diagnostic import LoopyError from loopy.kernel.data import AddressSpace, ArrayArg, TemporaryVariable -from loopy.schedule import CallKernel from loopy.symbolic import Literal from loopy.target.c import CFamilyASTBuilder, CFamilyTarget from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper -from loopy.types import LoopyType -from loopy.typing import ExpressionT + + +if TYPE_CHECKING: + from loopy.codegen import CodeGenerationState + from loopy.codegen.result import CodeGenerationResult + from loopy.schedule import CallKernel + from loopy.types import LoopyType + from loopy.typing import Expression # {{{ expression mapper @@ -114,7 +118,7 @@ def map_subscript(self, expr, type_context): and ary.address_space == AddressSpace.PRIVATE): # generate access code for access to private-index temporaries - gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs() + _gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs() if lsize: lsize, = lsize from pymbolic import evaluate @@ -174,7 +178,7 @@ class ISPCTarget(CFamilyTarget): device_program_name_suffix = "_inner" def pre_codegen_entrypoint_check(self, kernel, callables_table): - gsize, lsize = kernel.get_grid_size_upper_bounds_as_exprs( + _gsize, lsize = kernel.get_grid_size_upper_bounds_as_exprs( callables_table) if len(lsize) > 1: for ls_i in lsize[1:]: @@ -208,13 +212,13 @@ class ISPCASTBuilder(CFamilyASTBuilder): def get_function_declaration( self, codegen_state: CodeGenerationState, codegen_result: CodeGenerationResult, schedule_index: int - ) -> Tuple[Sequence[Tuple[str, str]], Generable]: + ) -> tuple[Sequence[tuple[str, str]], Generable]: name = codegen_result.current_program(codegen_state).name kernel = codegen_state.kernel assert codegen_state.kernel.linearization is not None subkernel_name = cast( - CallKernel, + "CallKernel", codegen_state.kernel.linearization[schedule_index] ).kernel_name @@ -237,7 +241,7 @@ def get_function_declaration( for arg_name in passed_names] if codegen_state.is_generating_device_code: - result = ISPCTask( + result: Declarator = ISPCTask( FunctionDeclaration( Value("void", name), arg_decls)) @@ -252,8 +256,8 @@ def get_function_declaration( def get_kernel_call(self, codegen_state: CodeGenerationState, subkernel_name: str, - gsize: Tuple[ExpressionT, ...], - lsize: Tuple[ExpressionT, ...]) -> Generable: + gsize: tuple[Expression, ...], + lsize: tuple[Expression, ...]) -> Generable: kernel = codegen_state.kernel ecm = self.get_expression_to_code_mapper(codegen_state) @@ -323,7 +327,7 @@ def get_array_arg_declarator( self, arg: ArrayArg, is_written: bool) -> Declarator: # FIXME restrict? from cgen.ispc import ISPCUniform, ISPCUniformPointer - decl = ISPCUniform( + decl: Declarator = ISPCUniform( ISPCUniformPointer(self.get_array_base_declarator(arg))) if not is_written: @@ -499,7 +503,7 @@ def emit_sequential_loop(self, codegen_state, iname, iname_dtype, inner) if hints: - return Collection(list(hints) + [loop]) + return Collection([*list(hints), loop]) else: return loop diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 14383e54f..d14dd9e30 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -1,4 +1,5 @@ """OpenCL target independent of PyOpenCL.""" +from __future__ import annotations __copyright__ = "Copyright (C) 2015 Andreas Kloeckner" @@ -23,16 +24,13 @@ THE SOFTWARE. """ -from typing import Sequence, Tuple +from typing import TYPE_CHECKING, Literal, Sequence import numpy as np -from cgen import Declarator, Generable from pymbolic import var from pytools import memoize_method -from loopy.codegen import CodeGenerationState -from loopy.codegen.result import CodeGenerationResult from loopy.diagnostic import LoopyError, LoopyTypeError from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag, VectorArrayDimTag from loopy.kernel.data import AddressSpace, ConstantArg, ImageArg @@ -42,6 +40,13 @@ from loopy.types import NumpyType +if TYPE_CHECKING: + from cgen import Declarator, Generable + + from loopy.codegen import CodeGenerationState + from loopy.codegen.result import CodeGenerationResult + + # {{{ dtype registry wrappers @@ -321,7 +326,7 @@ def with_types(self, arg_id_to_dtype, callables_table): callables_table) dtype = arg_id_to_dtype[0] - scalar_dtype, offset, field_name = dtype.numpy_dtype.fields["s0"] + scalar_dtype, _offset, _field_name = dtype.numpy_dtype.fields["s0"] return ( self.copy(name_in_target=name, arg_id_to_dtype={-1: NumpyType(scalar_dtype), 0: dtype, 1: dtype}), @@ -618,15 +623,12 @@ def known_callables(self): def symbol_manglers(self): return ( - super().symbol_manglers() + [ - opencl_symbol_mangler - ]) + [*super().symbol_manglers(), opencl_symbol_mangler]) def preamble_generators(self): return ( - super().preamble_generators() + [ - opencl_preamble_generator]) + [*super().preamble_generators(), opencl_preamble_generator]) # }}} @@ -635,7 +637,7 @@ def preamble_generators(self): def get_function_declaration( self, codegen_state: CodeGenerationState, codegen_result: CodeGenerationResult, schedule_index: int - ) -> Tuple[Sequence[Tuple[str, str]], Generable]: + ) -> tuple[Sequence[tuple[str, str]], Generable]: preambles, fdecl = super().get_function_declaration( codegen_state, codegen_result, schedule_index) @@ -764,12 +766,9 @@ def get_constant_arg_declarator(self, arg: ConstantArg) -> Declarator: def get_image_arg_declarator( self, arg: ImageArg, is_written: bool) -> Declarator: - if is_written: - mode = "w" - else: - mode = "r" - from cgen.opencl import CLImage + + mode: Literal["r", "w"] = "w" if is_written else "r" return CLImage(arg.num_target_axes(), mode, arg.name) # }}} diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index e4da6cd8b..9add453d7 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -26,17 +26,17 @@ """ import logging -from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Tuple, Union, cast +from typing import TYPE_CHECKING, Any, Sequence, cast from warnings import warn import numpy as np -import genpy import pymbolic.primitives as p from cgen import ( Block, Collection, Const, + Declarator, FunctionBody, Generable, Initializer, @@ -45,10 +45,7 @@ ) from cgen.opencl import CLGlobal -from loopy.codegen import CodeGenerationState -from loopy.codegen.result import CodeGenerationResult from loopy.diagnostic import LoopyError, LoopyTypeError -from loopy.kernel import LoopKernel from loopy.kernel.data import ( ArrayArg, ConstantArg, @@ -63,18 +60,23 @@ OpenCLCASTBuilder, OpenCLTarget, ) -from loopy.target.pyopencl_execution import PyOpenCLExecutor from loopy.target.python import PythonASTBuilderBase -from loopy.translation_unit import FunctionIdT, TranslationUnit from loopy.types import NumpyType -from loopy.typing import ExpressionT logger = logging.getLogger(__name__) if TYPE_CHECKING: + import genpy import pyopencl as cl + from loopy.codegen import CodeGenerationState + from loopy.codegen.result import CodeGenerationResult + from loopy.kernel import LoopKernel + from loopy.target.pyopencl_execution import PyOpenCLExecutor + from loopy.translation_unit import FunctionIdT, TranslationUnit + from loopy.typing import Expression + # {{{ pyopencl function scopers @@ -506,23 +508,23 @@ class PyOpenCLTarget(OpenCLTarget): """ # FIXME make prefixes conform to naming rules - # (see Reference: Loopy’s Model of a Kernel) + # (see Reference: Loopy's Model of a Kernel) host_program_name_prefix = "_lpy_host_" host_program_name_suffix = "" # FIXME Not yet complete - limit_arg_size_nbytes: Optional[int] + limit_arg_size_nbytes: int | None pointer_size_nbytes: int def __init__( self, device=None, *, pyopencl_module_name: str = "_lpy_cl", atomics_flavor=None, use_int8_for_bool: bool = True, - limit_arg_size_nbytes: Optional[int] = None, - pointer_size_nbytes: Optional[int] = None + limit_arg_size_nbytes: int | None = None, + pointer_size_nbytes: int | None = None ) -> None: # This ensures the dtype registry is populated. - import pyopencl.tools # noqa + import pyopencl.tools super().__init__( atomics_flavor=atomics_flavor, @@ -553,10 +555,8 @@ def device(self): return None # NB: Not including 'device', as that is handled specially here. - hash_fields = OpenCLTarget.hash_fields + ( - "pyopencl_module_name",) - comparison_fields = OpenCLTarget.comparison_fields + ( - "pyopencl_module_name",) + hash_fields = (*OpenCLTarget.hash_fields, "pyopencl_module_name") + comparison_fields = (*OpenCLTarget.comparison_fields, "pyopencl_module_name") def get_host_ast_builder(self): return PyOpenCLPythonASTBuilder(self) @@ -621,7 +621,7 @@ def get_kernel_executor_cache_key(self, queue, **kwargs): # type-ignore because we're making things from *args: Any more concrete, # and mypy doesn't like it. def get_kernel_executor(self, t_unit: TranslationUnit, # type: ignore[override] - queue_or_context: Union[cl.CommandQueue, cl.Context], + queue_or_context: cl.CommandQueue | cl.Context, *args: Any, entrypoint: FunctionIdT, **kwargs: Any ) -> PyOpenCLExecutor: from pyopencl import CommandQueue @@ -648,7 +648,7 @@ def generate_value_arg_setup( import loopy as lp from loopy.kernel.array import ArrayBase - result: List[genpy.Generable] = [] + result: list[genpy.Generable] = [] gen = result.append buf_indices_and_args = [] @@ -733,10 +733,10 @@ def generate_array_arg_setup( from loopy.kernel.array import ArrayBase - result: List[genpy.Generable] = [] + result: list[genpy.Generable] = [] gen = result.append - cl_indices_and_args: List[Union[int, str]] = [] + cl_indices_and_args: list[int | str] = [] for arg_idx, passed_name in enumerate(passed_names): if passed_name in kernel.all_inames(): continue @@ -774,9 +774,8 @@ def get_function_definition( kai = get_kernel_arg_info(codegen_state.kernel) args = ( - ["_lpy_cl_kernels", "queue"] - + list(kai.passed_arg_names) - + ["wait_for=None", "allocator=None"]) + ["_lpy_cl_kernels", "queue", *kai.passed_arg_names, + "wait_for=None", "allocator=None"]) from genpy import For, Function, Line, Return, Statement as S, Suite return Function( @@ -803,7 +802,7 @@ def get_function_definition( def get_function_declaration( self, codegen_state: CodeGenerationState, codegen_result: CodeGenerationResult, schedule_index: int - ) -> Tuple[Sequence[Tuple[str, str]], Optional[genpy.Generable]]: + ) -> tuple[Sequence[tuple[str, str]], genpy.Generable | None]: # no such thing in Python return [], None @@ -855,7 +854,7 @@ def get_temporary_decls(self, codegen_state, schedule_index): def get_kernel_call( self, codegen_state: CodeGenerationState, subkernel_name: str, - gsize: Tuple[ExpressionT, ...], lsize: Tuple[ExpressionT, ...] + gsize: tuple[Expression, ...], lsize: tuple[Expression, ...] ) -> genpy.Suite: from genpy import Assert, Assign, Comment, Line, Suite @@ -920,7 +919,7 @@ def get_kernel_call( "_lpy_cl.mem_flags.READ_ONLY " "| _lpy_cl.mem_flags.COPY_HOST_PTR, " "hostbuf=" - f"_lpy_pack({repr(''.join(struct_pack_types))}, " + f"_lpy_pack({''.join(struct_pack_types)!r}, " f"{', '.join(struct_pack_args)}))"), Line(f"_lpy_knl.set_arg({cl_arg_count}, _lpy_overflow_args_buf)") ]) @@ -980,8 +979,8 @@ def get_kernel_call( def split_args_for_overflow( kernel: LoopKernel, passed_names: Sequence[str], - *, limit_arg_size_nbytes: Optional[int], pointer_size_nbytes: int - ) -> Tuple[Sequence[str], Sequence[str]]: + *, limit_arg_size_nbytes: int | None, pointer_size_nbytes: int + ) -> tuple[Sequence[str], Sequence[str]]: if limit_arg_size_nbytes is None: return passed_names, [] @@ -1027,15 +1026,18 @@ class PyOpenCLCASTBuilder(OpenCLCASTBuilder): # {{{ function decl/def, with arg overflow handling def get_function_definition( - self, codegen_state: CodeGenerationState, + self, + codegen_state: CodeGenerationState, codegen_result: CodeGenerationResult, - schedule_index: int, function_decl: Generable, function_body: Generable, - ) -> Tuple[Sequence[Tuple[str, str]], Generable]: + schedule_index: int, + function_decl: Generable, + function_body: Generable, + ) -> Generable: assert isinstance(function_body, Block) kernel = codegen_state.kernel assert kernel.linearization is not None - subkernel_name = cast(CallKernel, + subkernel_name = cast("CallKernel", kernel.linearization[schedule_index]).kernel_name result = [] @@ -1063,10 +1065,12 @@ def get_function_definition( if tv.initializer is not None: from loopy.target.c import generate_array_literal - decl = Initializer(decl, generate_array_literal( + init_decl = Initializer(decl, generate_array_literal( codegen_state, tv, tv.initializer)) + else: + init_decl = decl - result.append(decl) + result.append(init_decl) # {{{ unpack overflow args @@ -1092,34 +1096,40 @@ def get_function_definition( # }}} + from loopy.target.c import FunctionDeclarationWrapper + + assert isinstance(function_decl, FunctionDeclarationWrapper) + if not isinstance(function_body, Block): + function_body = Block([function_body]) + fbody = FunctionBody(function_decl, function_body) if not result: return fbody else: - return Collection(result+[Line(), fbody]) + return Collection([*result, Line(), fbody]) def get_function_declaration( self, codegen_state: CodeGenerationState, codegen_result: CodeGenerationResult, schedule_index: int - ) -> Tuple[Sequence[Tuple[str, str]], Generable]: + ) -> tuple[Sequence[tuple[str, str]], Generable]: kernel = codegen_state.kernel assert codegen_state.kernel.linearization is not None subkernel_name = cast( - CallKernel, + "CallKernel", codegen_state.kernel.linearization[schedule_index] ).kernel_name from cgen import FunctionDeclaration, Struct, Value - name = codegen_result.current_program(codegen_state).name + name_str = codegen_result.current_program(codegen_state).name if self.target.fortran_abi: - name += "_" + name_str += "_" from loopy.target.c import FunctionDeclarationWrapper if codegen_state.is_entrypoint: - name = Value("void", name) + name = Value("void", name_str) # subkernel launches occur only as part of entrypoint kernels for now from loopy.schedule.tools import get_subkernel_arg_info @@ -1149,7 +1159,7 @@ def get_function_declaration( (f"declare-{arg_overflow_struct_name}", str(arg_overflow_struct)) ] if struct_overflow_arg_names else [] - arg_struct_args = [CLGlobal(Const(Pointer(Value( + arg_struct_args: list[Declarator] = [CLGlobal(Const(Pointer(Value( f"struct {arg_overflow_struct_name}", "_lpy_overflow_args"))))] else: @@ -1168,7 +1178,7 @@ def get_function_declaration( + arg_struct_args ))) else: - name = Value("static void", name) + name = Value("static void", name_str) passed_names = [arg.name for arg in kernel.args] written_names = kernel.get_written_variables() @@ -1195,9 +1205,7 @@ def known_callables(self): return callables def preamble_generators(self): - return ([ - pyopencl_preamble_generator, - ] + super().preamble_generators()) + return ([pyopencl_preamble_generator, *super().preamble_generators()]) # }}} diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index be859ab70..c9191e1d1 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -23,29 +26,31 @@ import logging from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence +from typing import TYPE_CHECKING, Any, Callable, Sequence import numpy as np -from immutables import Map from pytools import memoize_method from pytools.codegen import CodeGenerator, Indentation -from loopy.codegen.result import CodeGenerationResult -from loopy.kernel import LoopKernel from loopy.kernel.data import ArrayArg -from loopy.schedule.tools import KernelArgInfo from loopy.target.execution import ExecutionWrapperGeneratorBase, ExecutorBase -from loopy.types import LoopyType -from loopy.typing import ExpressionT, integer_expr_or_err +from loopy.typing import Expression, integer_expr_or_err logger = logging.getLogger(__name__) if TYPE_CHECKING: + from immutables import Map + import pyopencl as cl + from loopy.codegen.result import CodeGenerationResult + from loopy.kernel import LoopKernel + from loopy.schedule.tools import KernelArgInfo + from loopy.types import LoopyType + # {{{ invoker generation @@ -109,7 +114,7 @@ def handle_non_numpy_arg(self, gen: CodeGenerator, arg: ArrayArg) -> None: def handle_alloc( self, gen: CodeGenerator, arg: ArrayArg, - strify: Callable[[ExpressionT], str], + strify: Callable[[Expression], str], skip_arg_checks: bool) -> None: """ Handle allocation of non-specified arguments for pyopencl execution @@ -201,9 +206,8 @@ def generate_invocation(self, gen: CodeGenerator, kernel: LoopKernel, gen("") - arg_list = (["_lpy_cl_kernels", "queue"] - + list(args) - + ["wait_for=wait_for", "allocator=allocator"]) + arg_list = (["_lpy_cl_kernels", "queue", *args, + "wait_for=wait_for", "allocator=allocator"]) gen(f"_lpy_evt = {host_program_name}({', '.join(arg_list)})") if kernel.options.cl_exec_manage_array_events: @@ -274,7 +278,7 @@ def get_arg_pass(self, arg): @dataclass(frozen=True) class _KernelInfo: - cl_kernels: "_Kernels" + cl_kernels: _Kernels invoker: Callable[..., Any] @@ -292,7 +296,7 @@ class PyOpenCLExecutor(ExecutorBase): .. automethod:: __call__ """ - def __init__(self, context: "cl.Context", t_unit, entrypoint): + def __init__(self, context: cl.Context, t_unit, entrypoint): super().__init__(t_unit, entrypoint) self.context = context @@ -307,7 +311,7 @@ def get_wrapper_generator(self): @memoize_method def translation_unit_info( self, - arg_to_dtype: Optional[Map[str, LoopyType]] = None) -> _KernelInfo: + arg_to_dtype: Map[str, LoopyType] | None = None) -> _KernelInfo: t_unit = self.get_typed_and_scheduled_translation_unit(arg_to_dtype) # FIXME: now just need to add the types to the arguments diff --git a/loopy/target/python.py b/loopy/target/python.py index 3a8747f38..1b2560402 100644 --- a/loopy/target/python.py +++ b/loopy/target/python.py @@ -1,4 +1,5 @@ """Python host AST builder for integration with PyOpenCL.""" +from __future__ import annotations __copyright__ = "Copyright (C) 2016 Andreas Kloeckner" @@ -23,7 +24,7 @@ THE SOFTWARE. """ -from typing import Optional, Sequence, Tuple +from typing import TYPE_CHECKING, Sequence import numpy as np @@ -31,14 +32,17 @@ from pymbolic.mapper import Mapper from pymbolic.mapper.stringifier import StringifyMapper -from loopy.codegen import CodeGenerationState -from loopy.codegen.result import CodeGenerationResult -from loopy.diagnostic import LoopyError # noqa +from loopy.diagnostic import LoopyError from loopy.kernel.data import ValueArg from loopy.target import ASTBuilderBase from loopy.type_inference import TypeReader +if TYPE_CHECKING: + from loopy.codegen import CodeGenerationState + from loopy.codegen.result import CodeGenerationResult + + # {{{ expression to code class ExpressionToPythonMapper(StringifyMapper): @@ -161,9 +165,7 @@ def known_callables(self): def preamble_generators(self): return ( - super().preamble_generators() + [ - _base_python_preamble_generator - ]) + [*super().preamble_generators(), _base_python_preamble_generator]) # {{{ code generation guts @@ -175,7 +177,7 @@ def ast_module(self): def get_function_declaration( self, codegen_state: CodeGenerationState, codegen_result: CodeGenerationResult, schedule_index: int - ) -> Tuple[Sequence[Tuple[str, str]], Optional[Generable]]: + ) -> tuple[Sequence[tuple[str, str]], Generable | None]: return [], None def get_function_definition(self, codegen_state, codegen_result, diff --git a/loopy/tools.py b/loopy/tools.py index bb4904bf2..2f18cfb91 100644 --- a/loopy/tools.py +++ b/loopy/tools.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -24,7 +27,6 @@ import logging from functools import cached_property from sys import intern -from typing import List import numpy as np from immutables import Map @@ -136,8 +138,8 @@ def hash_key(self): kb = LoopyKeyBuilder() # Build the key. For faster hashing, avoid hashing field names. key = ( - (self.class_.__name__.encode("utf-8"),) + - tuple(self.field_dict[k] for k in sorted(self.field_dict.keys()))) + (self.class_.__name__.encode("utf-8"), + *(self.field_dict[k] for k in sorted(self.field_dict.keys())))) return kb(key) @@ -242,25 +244,14 @@ def build_ispc_shared_lib( from subprocess import check_call - ispc_cmd = ([ispc_bin, - "--pic", - "-o", "ispc.o"] - + ispc_options - + list(ispc_source_names)) + ispc_cmd = ([ispc_bin, "--pic", "-o", "ispc.o", *ispc_options, *ispc_source_names]) if not quiet: print(" ".join(ispc_cmd)) check_call(ispc_cmd, cwd=cwd) - cxx_cmd = ([ - cxx_bin, - "-shared", "-Wl,--export-dynamic", - "-fPIC", - "-oshared.so", - "ispc.o", - ] - + cxx_options - + list(cxx_source_names)) + cxx_cmd = ([cxx_bin, "-shared", "-Wl,--export-dynamic", "-fPIC", "-oshared.so", + "ispc.o", *cxx_options, *cxx_source_names]) check_call(cxx_cmd, cwd=cwd) @@ -279,7 +270,7 @@ def address_from_numpy(obj): if ary_intf is None: raise RuntimeError("no array interface") - buf_base, is_read_only = ary_intf["data"] + buf_base, _is_read_only = ary_intf["data"] return buf_base + ary_intf.get("offset", 0) @@ -316,10 +307,10 @@ def empty_aligned(shape, dtype, order="C", n=64): # We now need to know how to offset base_ary # so it is correctly aligned - _array_aligned_offset = (n-address_from_numpy(base_ary)) % n + array_aligned_offset = (n-address_from_numpy(base_ary)) % n array = np.frombuffer( - base_ary[_array_aligned_offset:_array_aligned_offset-n].data, + base_ary[array_aligned_offset:array_aligned_offset-n].data, dtype=dtype).reshape(shape, order=order) return array @@ -535,7 +526,7 @@ class Optional: The value, if present. """ - __slots__ = ("has_value", "_value") + __slots__ = ("_value", "has_value") def __init__(self, value=_no_value): self.has_value = value is not _no_value @@ -828,7 +819,7 @@ def t_unit_to_python(t_unit, var_name="t_unit", "from pymbolic.primitives import *", "import immutables", ]) - body_str = "\n".join(knl_python_code_srcs + ["\n", merge_stmt]) + body_str = "\n".join([*knl_python_code_srcs, "\n", merge_stmt]) python_code = "\n".join([preamble_str, "\n", body_str]) assert _is_generated_t_unit_the_same(python_code, var_name, t_unit) @@ -843,7 +834,7 @@ def t_unit_to_python(t_unit, var_name="t_unit", # {{{ cache management -caches: List[WriteOncePersistentDict] = [] +caches: list[WriteOncePersistentDict] = [] def clear_in_mem_caches() -> None: diff --git a/loopy/transform/__init__.py b/loopy/transform/__init__.py index 625781167..9a205fe23 100644 --- a/loopy/transform/__init__.py +++ b/loopy/transform/__init__.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py index 7ab5e376e..16ebaa5c0 100644 --- a/loopy/transform/add_barrier.py +++ b/loopy/transform/add_barrier.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2017 Kaushik Kulkarni" __license__ = """ @@ -89,7 +92,7 @@ def add_barrier(kernel, insn_before="", insn_after="", id_based_on=None, synchronization_kind=synchronization_kind, mem_kind=mem_kind) - new_kernel = kernel.copy(instructions=kernel.instructions + [barrier_to_add]) + new_kernel = kernel.copy(instructions=[*kernel.instructions, barrier_to_add]) if insn_after is not None: new_kernel = add_dependency(new_kernel, insn_match=insn_after, diff --git a/loopy/transform/arithmetic.py b/loopy/transform/arithmetic.py index b527c087b..8ad7d658b 100644 --- a/loopy/transform/arithmetic.py +++ b/loopy/transform/arithmetic.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ diff --git a/loopy/transform/array_buffer_map.py b/loopy/transform/array_buffer_map.py index 81b5c933f..fb54dedd4 100644 --- a/loopy/transform/array_buffer_map.py +++ b/loopy/transform/array_buffer_map.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012-2015 Andreas Kloeckner" __license__ = """ @@ -23,18 +26,21 @@ from abc import ABC, abstractmethod from dataclasses import dataclass, replace -from typing import Any, Callable, Optional, Sequence, Tuple +from typing import TYPE_CHECKING, Any, Callable, Sequence from typing_extensions import Self import islpy as isl from islpy import dim_type -from pymbolic import ArithmeticExpressionT, var +from pymbolic import ArithmeticExpression, var from pymbolic.mapper.substitutor import make_subst_func from pytools import memoize_method from loopy.symbolic import SubstitutionMapper, get_dependencies -from loopy.typing import ExpressionT + + +if TYPE_CHECKING: + from loopy.typing import Expression @dataclass(frozen=True) @@ -47,7 +53,7 @@ class AccessDescriptor: """ identifier: Any = None - storage_axis_exprs: Optional[Sequence[ArithmeticExpressionT]] = None + storage_axis_exprs: Sequence[ArithmeticExpression] | None = None def copy(self, **kwargs) -> Self: return replace(self, **kwargs) @@ -72,10 +78,10 @@ def to_parameters_or_project_out(param_inames, set_inames, set): # {{{ construct storage->sweep map def build_per_access_storage_to_domain_map( - storage_axis_exprs: Sequence[ExpressionT], + storage_axis_exprs: Sequence[Expression], domain: isl.BasicSet, storage_axis_names: Sequence[str], - prime_sweep_inames: Callable[[ExpressionT], ExpressionT] + prime_sweep_inames: Callable[[Expression], Expression] ) -> isl.BasicMap: map_space = domain.space @@ -203,10 +209,10 @@ def compute_bounds(kernel, domain, stor2sweep, # {{{ array-to-buffer map class ArrayToBufferMapBase(ABC): - non1_storage_axis_names: Tuple[str, ...] - storage_base_indices: Tuple[ArithmeticExpressionT, ...] - non1_storage_shape: Tuple[ArithmeticExpressionT, ...] - non1_storage_axis_flags: Tuple[ArithmeticExpressionT, ...] + non1_storage_axis_names: tuple[str, ...] + storage_base_indices: tuple[ArithmeticExpression, ...] + non1_storage_shape: tuple[ArithmeticExpression, ...] + non1_storage_axis_flags: tuple[ArithmeticExpression, ...] @abstractmethod def is_access_descriptor_in_footprint(self, accdesc: AccessDescriptor) -> bool: diff --git a/loopy/transform/batch.py b/loopy/transform/batch.py index 04c5ea385..3a755746e 100644 --- a/loopy/transform/batch.py +++ b/loopy/transform/batch.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -83,7 +86,7 @@ def map_subscript(self, expr, expn_state): if not isinstance(idx, tuple): idx = (idx,) - return type(expr)(expr.aggregate, (self.batch_iname_expr,) + idx) + return type(expr)(expr.aggregate, (self.batch_iname_expr, *idx)) def map_variable(self, expr, expn_state): if not self.needs_batch_subscript(expr.name): @@ -98,7 +101,7 @@ def _add_unique_dim_name(name, dim_names): from pytools import UniqueNameGenerator ng = UniqueNameGenerator(set(dim_names)) - return (ng(name),) + tuple(dim_names) + return (ng(name), *tuple(dim_names)) @for_each_kernel @@ -143,7 +146,7 @@ def to_batched(kernel, nbatches, batch_varying_args, batch_iname_prefix="ibatch" nbatches_expr = nbatches batch_domain = isl.BasicSet(batch_dom_str) - new_domains = [batch_domain] + kernel.domains + new_domains = [batch_domain, *kernel.domains] for arg in kernel.args: if arg.name in batch_varying_args: @@ -152,7 +155,7 @@ def to_batched(kernel, nbatches, batch_varying_args, batch_iname_prefix="ibatch" dim_tags="c") else: arg = arg.copy( - shape=(nbatches_expr,) + arg.shape, + shape=(nbatches_expr, *arg.shape), dim_tags=("c",) * (len(arg.shape) + 1), dim_names=_add_unique_dim_name("ibatch", arg.dim_names)) @@ -168,7 +171,7 @@ def to_batched(kernel, nbatches, batch_varying_args, batch_iname_prefix="ibatch" for temp in kernel.temporary_variables.values(): if temp_needs_batching_if_not_sequential(temp, batch_varying_args): new_temps[temp.name] = temp.copy( - shape=(nbatches_expr,) + temp.shape, + shape=(nbatches_expr, *temp.shape), dim_tags=("c",) * (len(temp.shape) + 1), dim_names=_add_unique_dim_name("ibatch", temp.dim_names)) else: diff --git a/loopy/transform/buffer.py b/loopy/transform/buffer.py index c8339f550..f113e453d 100644 --- a/loopy/transform/buffer.py +++ b/loopy/transform/buffer.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012-2015 Andreas Kloeckner" __license__ = """ @@ -125,7 +128,7 @@ def map_array_access(self, index, expn_state): # Can't possibly be nested, but recurse anyway to # make sure substitution rules referenced below here # do not get thrown away. - self.rec(result, expn_state.copy(arg_context={})) + self.rec(result, expn_state.copy(arg_context=Map())) return result diff --git a/loopy/transform/callable.py b/loopy/transform/callable.py index 0210eaee2..8669a4abb 100644 --- a/loopy/transform/callable.py +++ b/loopy/transform/callable.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2018 Kaushik Kulkarni" __license__ = """ @@ -20,7 +23,8 @@ THE SOFTWARE. """ -from collections.abc import Sequence + +from typing import TYPE_CHECKING from immutables import Map @@ -49,6 +53,10 @@ from loopy.translation_unit import FunctionIdT, TranslationUnit, for_each_kernel +if TYPE_CHECKING: + from collections.abc import Sequence + + __doc__ = """ .. currentmodule:: loopy @@ -314,7 +322,7 @@ def _inline_call_instruction(caller_knl, callee_knl, call_insn): parameters = call_insn.expression.parameters # reads from loopy.kernel.function_interface import get_kw_pos_association - kw_to_pos, pos_to_kw = get_kw_pos_association(callee_knl) + _kw_to_pos, pos_to_kw = get_kw_pos_association(callee_knl) for i, par in enumerate(parameters): arg_map[pos_to_kw[i]] = par diff --git a/loopy/transform/concatenate.py b/loopy/transform/concatenate.py index cd095c462..5ecd234cd 100644 --- a/loopy/transform/concatenate.py +++ b/loopy/transform/concatenate.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2022 Isuru Fernando" __license__ = """ @@ -25,7 +28,7 @@ .. autofunction:: concatenate_arrays """ -from typing import List, Optional, Sequence +from typing import Sequence import numpy as np @@ -42,7 +45,7 @@ def concatenate_arrays( kernel: LoopKernel, array_names: Sequence[str], - new_name: Optional[str] = None, + new_name: str | None = None, axis_nr: int = 0) -> LoopKernel: """Merges arrays (temporaries or arguments) into one array along the axis given by *axis_nr*. @@ -125,7 +128,7 @@ def modify_array_access(expr): new_tvs[new_name] = new_ary return kernel.copy(temporary_variables=new_tvs) elif isinstance(new_ary, ArrayArg): - new_args: List[KernelArgument] = [] + new_args: list[KernelArgument] = [] inserted = False for arg in kernel.args: if arg.name in array_names: diff --git a/loopy/transform/data.py b/loopy/transform/data.py index c63604f8c..80a0c4a12 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -21,7 +24,7 @@ """ from dataclasses import dataclass, replace -from typing import Dict, Optional, Tuple, cast +from typing import TYPE_CHECKING, cast from warnings import warn import numpy as np @@ -36,7 +39,10 @@ from loopy.kernel.function_interface import CallableKernel, ScalarCallable from loopy.translation_unit import TranslationUnit, for_each_kernel from loopy.types import LoopyType -from loopy.typing import ExpressionT + + +if TYPE_CHECKING: + from loopy.typing import Expression # {{{ convenience: add_prefetch @@ -124,7 +130,7 @@ def _process_footprint_subscripts(kernel, rule_name, sweep_inames, kernel = _add_kernel_axis(kernel, axis_name, 0, arg.shape[axis_nr], frozenset(sweep_inames) | fsub_dependencies) - sweep_inames = sweep_inames + [axis_name] + sweep_inames = [*sweep_inames, axis_name] inames_to_be_removed.append(axis_name) new_fsub.append(Variable(axis_name)) @@ -229,10 +235,10 @@ def add_prefetch_for_single_kernel(kernel, callables_table, var_name, from pymbolic import var uni_template = parsed_var_name if len(parameters) > 1: - uni_template = uni_template.index( - tuple(var(par_name) for par_name in parameters)) + uni_template = uni_template[ + tuple(var(par_name) for par_name in parameters)] elif len(parameters) == 1: - uni_template = uni_template.index(var(parameters[0])) + uni_template = uni_template[var(parameters[0])] # }}} @@ -984,11 +990,11 @@ def add_padding_to_avoid_bank_conflicts(kernel, device): @dataclass(frozen=True) class _BaseStorageInfo: name: str - next_offset: ExpressionT - approx_nbytes: Optional[int] = None + next_offset: Expression + approx_nbytes: int | None = None -def _sym_max(a: ExpressionT, b: ExpressionT) -> ExpressionT: +def _sym_max(a: Expression, b: Expression) -> Expression: from numbers import Number if isinstance(a, Number) and isinstance(b, Number): return max(a, b) @@ -999,9 +1005,9 @@ def _sym_max(a: ExpressionT, b: ExpressionT) -> ExpressionT: @for_each_kernel def allocate_temporaries_for_base_storage(kernel: LoopKernel, - only_address_space: Optional[int] = None, + only_address_space: int | None = None, aliased=True, - max_nbytes: Optional[int] = None, + max_nbytes: int | None = None, ) -> LoopKernel: from pytools import product @@ -1010,8 +1016,8 @@ def allocate_temporaries_for_base_storage(kernel: LoopKernel, vng = kernel.get_var_name_generator() - name_aspace_dtype_to_bsi: Dict[ - Tuple[str, AddressSpace, LoopyType], _BaseStorageInfo] = {} + name_aspace_dtype_to_bsi: dict[ + tuple[str, AddressSpace, LoopyType], _BaseStorageInfo] = {} for tv in sorted( kernel.temporary_variables.values(), @@ -1052,7 +1058,7 @@ def allocate_temporaries_for_base_storage(kernel: LoopKernel, approx_array_nbytes = 0 bs_key = (tv.base_storage, - cast(AddressSpace, tv.address_space), tv.dtype) + cast("AddressSpace", tv.address_space), tv.dtype) bsi = name_aspace_dtype_to_bsi.get(bs_key) if bsi is None or ( diff --git a/loopy/transform/diff.py b/loopy/transform/diff.py index 6c2688d90..c29a1895a 100644 --- a/loopy/transform/diff.py +++ b/loopy/transform/diff.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2015 Andreas Kloeckner" __license__ = """ diff --git a/loopy/transform/fusion.py b/loopy/transform/fusion.py index 8e047c036..b16d837f6 100644 --- a/loopy/transform/fusion.py +++ b/loopy/transform/fusion.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 795154099..c68c8be53 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -22,7 +25,7 @@ from collections.abc import Collection, Iterable, Mapping, Sequence -from typing import Any, FrozenSet, Optional +from typing import TYPE_CHECKING, Any from typing_extensions import TypeAlias @@ -33,8 +36,6 @@ from loopy.diagnostic import LoopyError from loopy.kernel import LoopKernel from loopy.kernel.function_interface import CallableKernel -from loopy.kernel.instruction import InstructionBase -from loopy.match import ToStackMatchCovertible from loopy.symbolic import ( RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, @@ -43,6 +44,11 @@ from loopy.translation_unit import TranslationUnit, for_each_kernel +if TYPE_CHECKING: + from loopy.kernel.instruction import InstructionBase + from loopy.match import ToStackMatchConvertible + + __doc__ = """ .. currentmodule:: loopy @@ -296,16 +302,16 @@ def _split_iname_backend(kernel, iname_to_split, new_prio = () for prio_iname in prio: if prio_iname == iname_to_split: - new_prio = new_prio + (outer_iname, inner_iname) + new_prio = (*new_prio, outer_iname, inner_iname) else: - new_prio = new_prio + (prio_iname,) + new_prio = (*new_prio, prio_iname) new_priorities.append(new_prio) kernel = kernel.copy( domains=new_domains, iname_slab_increments=iname_slab_increments, instructions=new_insns, - applied_iname_rewrites=kernel.applied_iname_rewrites+(subst_map,), + applied_iname_rewrites=(*kernel.applied_iname_rewrites, subst_map), loop_priority=frozenset(new_priorities)) rule_mapping_context = SubstitutionRuleMappingContext( @@ -630,7 +636,7 @@ def subst_within_inames(fid): .copy( instructions=new_insns, domains=domch.get_domains_with(new_domain), - applied_iname_rewrites=kernel.applied_iname_rewrites + (subst_dict,) + applied_iname_rewrites=(*kernel.applied_iname_rewrites, subst_dict) )) from loopy.match import parse_stack_match @@ -1051,7 +1057,7 @@ def get_iname_duplication_options(kernel): if isinstance(kernel, TranslationUnit): if len([clbl for clbl in kernel.callables_table.values() if isinstance(clbl, CallableKernel)]) == 1: - kernel = kernel[list(kernel.entrypoints)[0]] + kernel = kernel[next(iter(kernel.entrypoints))] assert isinstance(kernel, LoopKernel) @@ -1096,7 +1102,7 @@ def has_schedulable_iname_nesting(kernel): if isinstance(kernel, TranslationUnit): if len([clbl for clbl in kernel.callables_table.values() if isinstance(clbl, CallableKernel)]) == 1: - kernel = kernel[list(kernel.entrypoints)[0]] + kernel = kernel[next(iter(kernel.entrypoints))] return not bool(next(get_iname_duplication_options(kernel), False)) # }}} @@ -1398,7 +1404,7 @@ def parse_equation(eqn): rule_mapping_context.finish_kernel( old_to_new.map_kernel(kernel)) .copy( - applied_iname_rewrites=kernel.applied_iname_rewrites + (subst_dict,) + applied_iname_rewrites=(*kernel.applied_iname_rewrites, subst_dict) )) # }}} @@ -1744,7 +1750,7 @@ def add_inames_to_insn(kernel, inames, insn_match): # {{{ remove_inames_from_insn @for_each_kernel -def remove_inames_from_insn(kernel: LoopKernel, inames: FrozenSet[str], +def remove_inames_from_insn(kernel: LoopKernel, inames: frozenset[str], insn_match) -> LoopKernel: """ :arg inames: a frozenset of inames that will be added to the @@ -1832,7 +1838,7 @@ def remove_predicates_from_insn(kernel, predicates, insn_match): class _MapDomainMapper(RuleAwareIdentityMapper): def __init__(self, rule_mapping_context, new_inames, substitutions): - super(_MapDomainMapper, self).__init__(rule_mapping_context) + super().__init__(rule_mapping_context) self.old_inames = frozenset(substitutions) self.new_inames = new_inames @@ -1852,7 +1858,7 @@ def map_reduction(self, expr, expn_state): if arg_ctx_overlap: if arg_ctx_overlap == red_overlap: # All variables are shadowed by context, that's OK. - return super(_MapDomainMapper, self).map_reduction( + return super().map_reduction( expr, expn_state) else: raise LoopyError("Reduction '%s' has" @@ -1871,14 +1877,14 @@ def map_reduction(self, expr, expn_state): self.rec(expr.expr, expn_state), expr.allow_simultaneous) else: - return super(_MapDomainMapper, self).map_reduction(expr, expn_state) + return super().map_reduction(expr, expn_state) def map_variable(self, expr, expn_state): if (expr.name in self.old_inames and expr.name not in expn_state.arg_context): return self.substitutions[expr.name] else: - return super(_MapDomainMapper, self).map_variable(expr, expn_state) + return super().map_variable(expr, expn_state) # }}} @@ -2082,7 +2088,7 @@ def map_domain(kernel, transform_map): substitutions[iname] = subst_from_map var_substitutions[var(iname)] = subst_from_map - applied_iname_rewrites = applied_iname_rewrites + (var_substitutions,) + applied_iname_rewrites = (*applied_iname_rewrites, var_substitutions) del var_substitutions # }}} @@ -2375,8 +2381,8 @@ def rename_inames( old_inames: Collection[str], new_iname: str, existing_ok: bool = False, - within: ToStackMatchCovertible = None, - raise_on_domain_mismatch: Optional[bool] = None + within: ToStackMatchConvertible = None, + raise_on_domain_mismatch: bool | None = None ) -> LoopKernel: r""" :arg old_inames: A collection of inames that must be renamed to **new_iname**. @@ -2519,9 +2525,9 @@ def rename_iname( old_iname: str, new_iname: str, existing_ok: bool = False, - within: ToStackMatchCovertible = None, + within: ToStackMatchConvertible = None, preserve_tags: bool = True, - raise_on_domain_mismatch: Optional[bool] = None + raise_on_domain_mismatch: bool | None = None ) -> LoopKernel: r""" Single iname version of :func:`loopy.rename_inames`. diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index 494bbf0bc..ec876ea03 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -20,16 +23,19 @@ THE SOFTWARE. """ -from typing import List, Mapping, Sequence, Tuple +from typing import TYPE_CHECKING, Mapping, Sequence from loopy.diagnostic import LoopyError from loopy.kernel import LoopKernel from loopy.kernel.function_interface import CallableKernel, ScalarCallable -from loopy.kernel.instruction import InstructionBase from loopy.symbolic import RuleAwareIdentityMapper from loopy.translation_unit import TranslationUnit, for_each_kernel +if TYPE_CHECKING: + from loopy.kernel.instruction import InstructionBase + + # {{{ find_instructions def find_instructions_in_single_kernel(kernel, insn_match): @@ -263,8 +269,8 @@ def replace_instruction_ids_in_insn( ) -> InstructionBase: changed = False new_depends_on = list(insn.depends_on) - extra_depends_on: List[str] = [] - new_no_sync_with: List[Tuple[str, str]] = [] + extra_depends_on: list[str] = [] + new_no_sync_with: list[tuple[str, str]] = [] if insn.id in replacements: assert isinstance(insn.id, str) diff --git a/loopy/transform/pack_and_unpack_args.py b/loopy/transform/pack_and_unpack_args.py index 2a82952c2..9dc5f9a9b 100644 --- a/loopy/transform/pack_and_unpack_args.py +++ b/loopy/transform/pack_and_unpack_args.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2018 Tianjiao Sun, Kaushik Kulkarni" __license__ = """ @@ -222,9 +225,9 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, new_indices = tuple(simplify_via_aff(i) for i in new_indices) pack_lhs_assignee = pack_subst_mapper( - var(pack_name).index(new_indices)) + var(pack_name)[new_indices]) unpack_rhs = unpack_subst_mapper( - var(pack_name).index(new_indices)) + var(pack_name)[new_indices]) # }}} @@ -266,13 +269,13 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, in_knl_callable.arg_id_to_descr[arg_id].shape): iname_set = iname_set & make_slab(space, iname.name, 0, axis_length) - new_domains = new_domains + [iname_set] + new_domains = [*new_domains, iname_set] # }}} new_id_to_parameters[arg_id] = SubArrayRef( tuple(updated_swept_inames), - (var(pack_name).index(tuple(updated_swept_inames)))) + (var(pack_name)[tuple(updated_swept_inames)])) else: new_id_to_parameters[arg_id] = p @@ -290,8 +293,8 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, new_ilp_inames), expression=new_call_insn.expression.function(*new_params), assignees=new_assignees) - old_insn_to_new_insns[insn.id] = (packing_insns + [new_call_insn] + - unpacking_insns) + old_insn_to_new_insns[insn.id] = ([ + *packing_insns, new_call_insn, *unpacking_insns]) if old_insn_to_new_insns: new_instructions = [] diff --git a/loopy/transform/padding.py b/loopy/transform/padding.py index 76deccc44..9afc28f9f 100644 --- a/loopy/transform/padding.py +++ b/loopy/transform/padding.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ diff --git a/loopy/transform/parameter.py b/loopy/transform/parameter.py index fb1bc0b71..73732a16b 100644 --- a/loopy/transform/parameter.py +++ b/loopy/transform/parameter.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index b0fbb5468..3988b1f5d 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -22,19 +25,17 @@ from dataclasses import dataclass -from typing import FrozenSet, List, Optional, Sequence, Type, Union, cast +from typing import TYPE_CHECKING, Sequence, cast import numpy as np from immutables import Map import islpy as isl -from pymbolic import ArithmeticExpressionT, var +from pymbolic import ArithmeticExpression, var from pymbolic.mapper.substitutor import make_subst_func from pytools import memoize_on_first_arg -from pytools.tag import Tag from loopy.diagnostic import LoopyError -from loopy.kernel import LoopKernel from loopy.kernel.data import AddressSpace from loopy.kernel.function_interface import CallableKernel, ScalarCallable from loopy.kernel.instruction import InstructionBase, MultiAssignmentBase @@ -42,7 +43,6 @@ find_most_recent_global_barrier, kernel_has_global_barriers, ) -from loopy.match import ToStackMatchCovertible from loopy.symbolic import ( CombineMapper, RuleAwareIdentityMapper, @@ -60,7 +60,7 @@ from loopy.translation_unit import CallablesTable, TranslationUnit from loopy.types import LoopyType, ToLoopyTypeConvertible, to_loopy_type from loopy.typing import ( - ExpressionT, + Expression, auto, integer_expr_or_err, integer_or_err, @@ -68,6 +68,13 @@ ) +if TYPE_CHECKING: + from pytools.tag import Tag + + from loopy.kernel import LoopKernel + from loopy.match import ToStackMatchConvertible + + # {{{ contains_subst_rule_invocation class FunctionNameCollector(CombineMapper): @@ -101,9 +108,9 @@ def _get_called_names(insn): assert isinstance(insn, MultiAssignmentBase) from functools import reduce - from pymbolic.primitives import Expression + from pymbolic.primitives import ExpressionNode return ((_get_calls_in_expr(insn.expression) - if isinstance(insn.expression, Expression) + if isinstance(insn.expression, ExpressionNode) else frozenset()) # indices of assignees might call the subst rules | reduce(frozenset.union, @@ -113,7 +120,7 @@ def _get_called_names(insn): | reduce(frozenset.union, (_get_calls_in_expr(pred) for pred in insn.predicates - if isinstance(pred, Expression)), + if isinstance(pred, ExpressionNode)), frozenset()) ) @@ -133,14 +140,14 @@ def contains_a_subst_rule_invocation(kernel, insn): @dataclass(frozen=True) class RuleAccessDescriptor(AccessDescriptor): - args: Optional[Sequence[ArithmeticExpressionT]] = None + args: Sequence[ArithmeticExpression] | None = None def access_descriptor_id(args, expansion_stack): return (args, expansion_stack) -def storage_axis_exprs(storage_axis_sources, args) -> Sequence[ExpressionT]: +def storage_axis_exprs(storage_axis_sources, args) -> Sequence[Expression]: result = [] for saxis_source in storage_axis_sources: @@ -170,7 +177,7 @@ def __init__(self, rule_mapping_context, kernel, subst_name, subst_tag, within) self.access_descriptors: list[RuleAccessDescriptor] = [] - def map_substitution(self, name, tag, arguments, expn_state): + def map_subst_rule(self, name, tag, arguments, expn_state): process_me = name == self.subst_name if self.subst_tag is not None and self.subst_tag != tag: @@ -182,7 +189,7 @@ def map_substitution(self, name, tag, arguments, expn_state): expn_state.stack) if not process_me: - return super().map_substitution( + return super().map_subst_rule( name, tag, arguments, expn_state) rule = self.rule_mapping_context.old_subst_rules[name] @@ -207,7 +214,7 @@ def map_substitution(self, name, tag, arguments, expn_state): ", ".join(arg_deps - self.kernel.all_inames()), ), stacklevel=1) - return super().map_substitution( + return super().map_subst_rule( name, tag, arguments, expn_state) args = [arg_context[arg_name] for arg_name in rule.arguments] @@ -252,7 +259,7 @@ def __init__(self, rule_mapping_context, subst_name, subst_tag, within, self.compute_read_variables = compute_read_variables self.compute_insn_depends_on = set() - def map_substitution(self, name, tag, arguments, expn_state): + def map_subst_rule(self, name, tag, arguments, expn_state): if not ( name == self.subst_name and self.within( @@ -260,7 +267,7 @@ def map_substitution(self, name, tag, arguments, expn_state): expn_state.instruction, expn_state.stack) and (self.subst_tag is None or self.subst_tag == tag)): - return super().map_substitution( + return super().map_subst_rule( name, tag, arguments, expn_state) # {{{ check if in footprint @@ -275,7 +282,7 @@ def map_substitution(self, name, tag, arguments, expn_state): self.storage_axis_sources, args)) if not self.array_base_map.is_access_descriptor_in_footprint(accdesc): - return super().map_substitution( + return super().map_subst_rule( name, tag, arguments, expn_state) # }}} @@ -380,20 +387,20 @@ def precompute_for_single_kernel( callables_table: CallablesTable, subst_use, sweep_inames=None, - within: ToStackMatchCovertible = None, + within: ToStackMatchConvertible = None, *, storage_axes=None, - temporary_name: Optional[str] = None, - precompute_inames: Optional[Sequence[str]] = None, - precompute_outer_inames: Optional[FrozenSet[str]] = None, + temporary_name: str | None = None, + precompute_inames: Sequence[str] | None = None, + precompute_outer_inames: frozenset[str] | None = None, storage_axis_to_tag=None, - default_tag: Union[None, Tag, str] = None, + default_tag: Tag | str | None = None, - dtype: Optional[ToLoopyTypeConvertible] = None, + dtype: ToLoopyTypeConvertible | None = None, fetch_bounding_box: bool = False, - temporary_address_space: Union[AddressSpace, None, Type[auto]] = None, - compute_insn_id: Optional[str] = None, + temporary_address_space: AddressSpace | type[auto] | None = None, + compute_insn_id: str | None = None, _enable_mirgecom_workaround: bool = False, ) -> LoopKernel: """Precompute the expression described in the substitution rule determined by @@ -514,7 +521,7 @@ def precompute_for_single_kernel( footprint_generators = None - subst_name: Optional[str] = None + subst_name: str | None = None subst_tag = None from pymbolic.primitives import Call, Variable @@ -577,9 +584,9 @@ def precompute_for_single_kernel( for fpg in footprint_generators: if isinstance(fpg, Variable): - args: tuple[ArithmeticExpressionT, ...] = () + args: tuple[ArithmeticExpression, ...] = () elif isinstance(fpg, Call): - args = cast(tuple[ArithmeticExpressionT, ...], fpg.parameters) + args = cast("tuple[ArithmeticExpression, ...]", fpg.parameters) else: raise ValueError("footprint generator must " "be substitution rule invocation") @@ -674,8 +681,8 @@ def precompute_for_single_kernel( prior_storage_axis_name_dict = {} - storage_axis_names: List[str] = [] - storage_axis_sources: List[Union[str, int]] = [] # number for arg#, or iname + storage_axis_names: list[str] = [] + storage_axis_sources: list[str | int] = [] # number for arg#, or iname # {{{ check for pre-existing precompute_inames @@ -772,8 +779,7 @@ def precompute_for_single_kernel( if abm.non1_storage_axis_flags[i]: non1_storage_axis_names.append(saxis) else: - if saxis in new_iname_to_tag: - del new_iname_to_tag[saxis] + new_iname_to_tag.pop(saxis, None) if saxis in preexisting_precompute_inames: raise LoopyError("precompute axis %d (1-based) was " @@ -922,8 +928,8 @@ def add_assumptions(d): # should. if _enable_mirgecom_workaround: - from pymbolic.primitives import Expression - if is_length_1 and not isinstance(base_index, Expression): + from pymbolic.primitives import ExpressionNode + if is_length_1 and not isinstance(base_index, ExpressionNode): # I.e. base_index is an integer. from pytools import is_single_valued if is_single_valued( @@ -963,7 +969,7 @@ def add_assumptions(d): # within_inames determined below ) compute_dep_id = compute_insn_id - added_compute_insns: List[InstructionBase] = [compute_insn] + added_compute_insns: list[InstructionBase] = [compute_insn] if temporary_address_space == AddressSpace.GLOBAL: barrier_insn_id = kernel.make_unique_instruction_id( @@ -1028,7 +1034,7 @@ def add_assumptions(d): and insn.within_inames & prior_storage_axis_names): insn = (insn .with_transformed_expressions( - lambda expr: expr_subst_map(expr, kernel, insn)) # noqa: B023,E501 + lambda expr: expr_subst_map(expr, kernel, insn)) # noqa: B023 .copy(within_inames=frozenset( new_iname for iname in insn.within_inames diff --git a/loopy/transform/privatize.py b/loopy/transform/privatize.py index ca31368d2..6e7eb1fb6 100644 --- a/loopy/transform/privatize.py +++ b/loopy/transform/privatize.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2015 Andreas Kloeckner" __license__ = """ diff --git a/loopy/transform/realize_reduction.py b/loopy/transform/realize_reduction.py index 7d1f3c870..5f504e722 100644 --- a/loopy/transform/realize_reduction.py +++ b/loopy/transform/realize_reduction.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = """ Copyright (C) 2012 Andreas Kloeckner Copyright (C) 2022 University of Illinois Board of Trustees @@ -26,7 +29,7 @@ import logging from dataclasses import dataclass, replace -from typing import Callable, Dict, FrozenSet, List, Optional, Sequence, Set, Tuple +from typing import TYPE_CHECKING, Callable, Sequence logger = logging.getLogger(__name__) @@ -34,12 +37,9 @@ from immutables import Map import islpy as isl -from pymbolic.primitives import Expression from pytools import memoize_on_first_arg -from pytools.tag import Tag from loopy.diagnostic import LoopyError, ReductionIsNotTriangularError, warn_with_kernel -from loopy.kernel import LoopKernel from loopy.kernel.data import AddressSpace, TemporaryVariable, make_assignment from loopy.kernel.function_interface import CallableKernel from loopy.kernel.instruction import Assignment, InstructionBase, MultiAssignmentBase @@ -48,6 +48,13 @@ from loopy.translation_unit import ConcreteCallablesTable, TranslationUnit +if TYPE_CHECKING: + from pymbolic.primitives import ExpressionNode + from pytools.tag import Tag + + from loopy.kernel import LoopKernel + + # {{{ reduction realization context @dataclass @@ -59,14 +66,14 @@ class _ChangeFlag: class _ReductionRealizationContext: # {{{ read-only - mapper: "RealizeReductionCallbackMapper" + mapper: RealizeReductionCallbackMapper force_scan: bool automagic_scans_ok: bool unknown_types_ok: bool # FIXME: This feels like a broken-by-design concept. - force_outer_iname_for_scan: Optional[str] + force_outer_iname_for_scan: str | None # We use the original kernel for a number of lookups whose value # we do not change and which might be already cached on it. @@ -82,17 +89,17 @@ class _ReductionRealizationContext: insn_id_gen: Callable[[str], str] var_name_gen: Callable[[str], str] - additional_temporary_variables: Dict[str, TemporaryVariable] - additional_insns: List[InstructionBase] - domains: List[isl.BasicSet] - additional_iname_tags: Dict[str, Sequence[Tag]] + additional_temporary_variables: dict[str, TemporaryVariable] + additional_insns: list[InstructionBase] + domains: list[isl.BasicSet] + additional_iname_tags: dict[str, Sequence[Tag]] # list only to facilitate mutation - boxed_callables_table: List[ConcreteCallablesTable] + boxed_callables_table: list[ConcreteCallablesTable] # FIXME: This is a broken-by-design concept. Local-parallel scans emit a # reduction internally. This serves to avoid force_scan acting on that # reduction. - inames_added_for_scan: Set[str] + inames_added_for_scan: set[str] # }}} @@ -100,10 +107,10 @@ class _ReductionRealizationContext: # These are attributes from 'surrounding' instruction, for generated # instructions to potentially inherit. - surrounding_within_inames: FrozenSet[str] - surrounding_depends_on: FrozenSet[str] - surrounding_no_sync_with: FrozenSet[Tuple[str, str]] - surrounding_predicates: FrozenSet[Expression] + surrounding_within_inames: frozenset[str] + surrounding_depends_on: frozenset[str] + surrounding_no_sync_with: frozenset[tuple[str, str]] + surrounding_predicates: frozenset[ExpressionNode] # }}} @@ -113,10 +120,10 @@ class _ReductionRealizationContext: # These are requested additions to attributes of the surrounding instruction. # FIXME add_within_inames seems broken by design. - surrounding_insn_add_within_inames: Set[str] + surrounding_insn_add_within_inames: set[str] - surrounding_insn_add_depends_on: Set[str] - surrounding_insn_add_no_sync_with: Set[Tuple[str, str]] + surrounding_insn_add_depends_on: set[str] + surrounding_insn_add_no_sync_with: set[tuple[str, str]] # }}} @@ -171,9 +178,9 @@ def get_insn_kwargs(self): @dataclass(frozen=True) class _InameClassification: - sequential: Tuple[str, ...] - local_parallel: Tuple[str, ...] - nonlocal_parallel: Tuple[str, ...] + sequential: tuple[str, ...] + local_parallel: tuple[str, ...] + nonlocal_parallel: tuple[str, ...] def _classify_reduction_inames(red_realize_ctx, inames): @@ -1124,7 +1131,7 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, red_realize_ctx=red_realize_ctx, name_based_on="acc_"+red_iname, nvars=nresults, - shape=outer_local_iname_sizes + (size,), + shape=(*outer_local_iname_sizes, size), dtypes=reduction_dtypes, address_space=AddressSpace.LOCAL) @@ -1151,7 +1158,7 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, init_insn = make_assignment( id=init_id, assignees=tuple( - acc_var[outer_local_iname_vars + (var(base_exec_iname),)] + acc_var[(*outer_local_iname_vars, var(base_exec_iname))] for acc_var in acc_vars), expression=neutral, within_inames=( @@ -1234,7 +1241,7 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, transfer_insn = make_assignment( id=transfer_id, assignees=tuple( - acc_var[outer_local_iname_vars + (var(red_iname),)] + acc_var[(*outer_local_iname_vars, var(red_iname))] for acc_var in acc_vars), expression=expression, **transfer_red_realize_ctx.get_insn_kwargs()) @@ -1269,12 +1276,11 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, arg_dtypes, _strip_if_scalar(acc_vars, tuple( acc_var[ - outer_local_iname_vars + (var(stage_exec_iname),)] + (*outer_local_iname_vars, var(stage_exec_iname))] for acc_var in acc_vars)), _strip_if_scalar(acc_vars, tuple( acc_var[ - outer_local_iname_vars + ( - var(stage_exec_iname) + new_size,)] + (*outer_local_iname_vars, var(stage_exec_iname) + new_size)] for acc_var in acc_vars)), red_realize_ctx.boxed_callables_table[0], orig_kernel.target) @@ -1282,7 +1288,7 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, stage_insn = make_assignment( id=stage_id, assignees=tuple( - acc_var[outer_local_iname_vars + (var(stage_exec_iname),)] + acc_var[(*outer_local_iname_vars, var(stage_exec_iname))] for acc_var in acc_vars), expression=expression, within_inames=( @@ -1307,9 +1313,9 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, if nresults == 1: assert len(acc_vars) == 1 - return acc_vars[0][outer_local_iname_vars + (0,)] + return acc_vars[0][(*outer_local_iname_vars, 0)] else: - return [acc_var[outer_local_iname_vars + (0,)] for acc_var in + return [acc_var[(*outer_local_iname_vars, 0)] for acc_var in acc_vars] # }}} @@ -1419,7 +1425,7 @@ def map_scan_seq(red_realize_ctx, expr, nresults, arg_dtypes, assignees=acc_vars, within_inames=( red_realize_ctx.surrounding_within_inames - - frozenset((scan_param.sweep_iname,) + expr.inames)), + - frozenset((scan_param.sweep_iname, *expr.inames))), within_inames_is_final=True, depends_on=init_insn_depends_on, expression=expression, @@ -1558,7 +1564,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, red_realize_ctx=red_realize_ctx, name_based_on="acc_"+scan_param.scan_iname, nvars=nresults, - shape=outer_local_iname_sizes + (scan_size,), + shape=(*outer_local_iname_sizes, scan_size), dtypes=reduction_dtypes, address_space=AddressSpace.LOCAL) @@ -1579,7 +1585,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, init_insn = make_assignment( id=init_id, assignees=tuple( - acc_var[outer_local_iname_vars + (var(base_exec_iname),)] + acc_var[(*outer_local_iname_vars, var(base_exec_iname))] for acc_var in acc_vars), expression=neutral, within_inames=base_iname_deps | frozenset([base_exec_iname]), @@ -1640,8 +1646,10 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, f"{red_realize_ctx.id_prefix}_{scan_param.scan_iname}_transfer") transfer_insn = make_assignment( id=transfer_id, - assignees=(acc_var[outer_local_iname_vars - + (var(scan_param.sweep_iname) - sweep_lower_bound_expr,)],), + assignees=(acc_var[( + *outer_local_iname_vars, + var(scan_param.sweep_iname) - sweep_lower_bound_expr) + ],), expression=pre_scan_result_i, within_inames=( red_realize_ctx.surrounding_within_inames @@ -1684,8 +1692,8 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, assignees=(read_var,), expression=( acc_var[ - outer_local_iname_vars - + (var(stage_exec_iname) - cur_size,)]), + (*outer_local_iname_vars, + var(stage_exec_iname) - cur_size)]), within_inames=( base_iname_deps | frozenset([stage_exec_iname])), within_inames_is_final=True, @@ -1713,7 +1721,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, _strip_if_scalar(acc_vars, read_vars), _strip_if_scalar(acc_vars, tuple( acc_var[ - outer_local_iname_vars + (var(stage_exec_iname),)] + (*outer_local_iname_vars, var(stage_exec_iname))] for acc_var in acc_vars)), red_realize_ctx.boxed_callables_table[0], orig_kernel.target) @@ -1721,7 +1729,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, write_stage_insn = make_assignment( id=write_stage_id, assignees=tuple( - acc_var[outer_local_iname_vars + (var(stage_exec_iname),)] + acc_var[(*outer_local_iname_vars, var(stage_exec_iname))] for acc_var in acc_vars), expression=expression, within_inames=( @@ -1744,9 +1752,9 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, if nresults == 1: assert len(acc_vars) == 1 - return acc_vars[0][outer_local_iname_vars + (output_idx,)] + return acc_vars[0][(*outer_local_iname_vars, output_idx)] else: - return [acc_var[outer_local_iname_vars + (output_idx,)] + return [acc_var[(*outer_local_iname_vars, output_idx)] for acc_var in acc_vars] # }}} diff --git a/loopy/transform/save.py b/loopy/transform/save.py index bd25dec36..e1dbfd99d 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2016 Matt Wala" __license__ = """ @@ -77,7 +80,7 @@ def get_successor_relation(self): for idx, (item, next_item) in enumerate(zip( reversed(self.schedule), - reversed(self.schedule + [None]))): + reversed([*self.schedule, None]))): sched_idx = len(self.schedule) - idx - 1 # Look at next_item @@ -760,7 +763,7 @@ def save_and_reload_temporaries(program, entrypoint=None): if entrypoint is None: if len(program.entrypoints) != 1: raise LoopyError("Missing argument 'entrypoint'.") - entrypoint = list(program.entrypoints)[0] + entrypoint = next(iter(program.entrypoints)) knl = program[entrypoint] diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py index 422d22568..3ca981aa0 100644 --- a/loopy/transform/subst.py +++ b/loopy/transform/subst.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -36,7 +39,7 @@ class ExprDescriptor(ImmutableRecord): - __slots__ = ["insn", "expr", "unif_var_dict"] + __slots__ = ["expr", "insn", "unif_var_dict"] # {{{ extract_subst diff --git a/loopy/translation_unit.py b/loopy/translation_unit.py index ed68bb36e..6826876b0 100644 --- a/loopy/translation_unit.py +++ b/loopy/translation_unit.py @@ -31,9 +31,7 @@ TYPE_CHECKING, Any, Callable, - FrozenSet, Mapping, - Optional, TypeVar, Union, ) @@ -45,7 +43,6 @@ from pymbolic.primitives import Call, Variable from loopy.diagnostic import DirectCallUncachedWarning, LoopyError -from loopy.kernel import LoopKernel from loopy.kernel.function_interface import ( CallableKernel, InKernelCallable, @@ -57,10 +54,11 @@ RuleAwareIdentityMapper, SubstitutionRuleMappingContext, ) -from loopy.target import TargetBase if TYPE_CHECKING: + from loopy.kernel import LoopKernel + from loopy.target import TargetBase from loopy.target.execution import ExecutorBase @@ -237,7 +235,7 @@ class TranslationUnit: callables_table: ConcreteCallablesTable target: TargetBase - entrypoints: FrozenSet[str] + entrypoints: frozenset[str] def __post_init__(self): @@ -336,6 +334,7 @@ def default_entrypoint(self) -> LoopKernel: ep_name, = self.entrypoints entrypoint = self[ep_name] + from loopy import LoopKernel if not isinstance(entrypoint, LoopKernel): raise ValueError("default entrypoint is not a kernel") @@ -346,7 +345,7 @@ def default_entrypoint(self) -> LoopKernel: " determined.") def executor(self, - *args, entrypoint: Optional[str] = None, **kwargs) -> ExecutorBase: + *args, entrypoint: str | None = None, **kwargs) -> ExecutorBase: """Return an object that hosts caches of compiled code for execution (i.e. a subclass of :class:`ExecutorBase`, specific to an execution environment (e.g. an OpenCL context) and a given entrypoint. @@ -584,9 +583,9 @@ class CallablesInferenceContext: """ callables: Mapping[str, InKernelCallable] clbl_name_gen: Callable[[str], str] - renames: Mapping[str, FrozenSet[str]] = field( + renames: Mapping[str, frozenset[str]] = field( default_factory=lambda: collections.defaultdict(frozenset)) - new_entrypoints: FrozenSet[str] = frozenset() + new_entrypoints: frozenset[str] = frozenset() def copy(self, **kwargs: Any) -> CallablesInferenceContext: return replace(self, **kwargs) @@ -749,7 +748,7 @@ def __getitem__(self, name): # }}} -TUnitOrKernelT = TypeVar("TUnitOrKernelT", LoopKernel, TranslationUnit) +TUnitOrKernelT = TypeVar("TUnitOrKernelT", "LoopKernel", TranslationUnit) # {{{ helper functions @@ -778,6 +777,7 @@ def _collective_check( *args: P.args, **kwargs: P.kwargs ) -> None: + from loopy import LoopKernel if isinstance(t_unit_or_kernel, TranslationUnit): for clbl in t_unit_or_kernel.callables_table.values(): if isinstance(clbl, CallableKernel): @@ -807,6 +807,7 @@ def _collective_transform( *args: P.args, **kwargs: P.kwargs ) -> TUnitOrKernelT: + from loopy import LoopKernel if isinstance(t_unit_or_kernel, TranslationUnit): t_unit = t_unit_or_kernel new_callables = {} @@ -886,7 +887,7 @@ def resolve_callables(t_unit: TranslationUnit) -> TranslationUnit: # get loopy specific callables known_callables.update(get_loopy_callables()) - callables_table = {} + callables_table: dict[FunctionIdT, InKernelCallable] = {} # callables: name of the calls seen in the program callables = {name for name, clbl in t_unit.callables_table.items() diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 57548ab6f..b3c6ffddd 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012-16 Andreas Kloeckner" __license__ = """ @@ -100,7 +103,7 @@ def map_call(self, expr, expn_state): else: return super().map_call(expr, expn_state) else: - return self.map_substitution(name, tag, expr.parameters, expn_state) + return self.map_subst_rule(name, tag, expr.parameters, expn_state) def map_call_with_kwargs(self, expr): # See https://github.com/inducer/loopy/pull/323 @@ -1060,7 +1063,7 @@ def infer_unknown_types( t_unit[e].args if arg.dtype not in (None, auto)} new_callable, clbl_inf_ctx = t_unit.callables_table[e].with_types( arg_id_to_dtype, clbl_inf_ctx) - clbl_inf_ctx, new_name = clbl_inf_ctx.with_callable(e, new_callable, + clbl_inf_ctx, _new_name = clbl_inf_ctx.with_callable(e, new_callable, is_entrypoint=True) if expect_completion: from loopy.types import LoopyType diff --git a/loopy/types.py b/loopy/types.py index 223b59cc5..fd7db51fa 100644 --- a/loopy/types.py +++ b/loopy/types.py @@ -203,13 +203,13 @@ def __eq__(self, other: object) -> bool: # }}} -ToLoopyTypeConvertible: TypeAlias = Union[Type[auto], None, np.dtype, LoopyType] +ToLoopyTypeConvertible: TypeAlias = Union[Type[auto], np.dtype, LoopyType, None] def to_loopy_type(dtype: ToLoopyTypeConvertible, allow_auto: bool = False, allow_none: bool = False, for_atomic: bool = False - ) -> Union[Type[auto], None, LoopyType]: + ) -> type[auto] | LoopyType | None: if dtype is None: if allow_none: return None @@ -262,7 +262,7 @@ def to_loopy_type(dtype: ToLoopyTypeConvertible, } -def to_unsigned_dtype(dtype: "np.dtype[Any]") -> "np.dtype[Any]": +def to_unsigned_dtype(dtype: np.dtype[Any]) -> np.dtype[Any]: if dtype.kind == "u": return dtype if dtype.kind != "i": diff --git a/loopy/typing.py b/loopy/typing.py index 7cc7209b9..5316c356b 100644 --- a/loopy/typing.py +++ b/loopy/typing.py @@ -1,6 +1,11 @@ """ -.. autoclass:: ExpressionT +.. autoclass:: Expression .. autoclass:: ShapeType +.. autodata:: InameStr +.. autodata:: InameStrSet + +.. currentmodule:: loopy + .. autoclass:: auto """ @@ -31,21 +36,22 @@ """ -from typing import Optional, Tuple, TypeVar +from typing import Tuple, TypeVar import numpy as np from typing_extensions import TypeAlias, TypeIs -from pymbolic.primitives import Expression -from pymbolic.typing import ArithmeticExpressionT, ExpressionT, IntegerT +from pymbolic.primitives import ExpressionNode +from pymbolic.typing import ArithmeticExpression, Expression, Integer # The Fortran parser may insert dimensions of 'None', but I'd like to phase # that out, so we're not encoding that in the type. -ShapeType: TypeAlias = Tuple[ArithmeticExpressionT, ...] +ShapeType: TypeAlias = Tuple[ArithmeticExpression, ...] StridesType: TypeAlias = ShapeType InameStr: TypeAlias = str +InameStrSet: TypeAlias = frozenset[InameStr] class auto: # noqa @@ -58,7 +64,7 @@ class auto: # noqa T = TypeVar("T") -def not_none(obj: Optional[T]) -> T: +def not_none(obj: T | None) -> T: assert obj is not None return obj @@ -67,15 +73,15 @@ def is_integer(obj: object) -> TypeIs[int | np.integer]: return isinstance(obj, (int, np.integer)) -def integer_or_err(expr: ExpressionT) -> IntegerT: +def integer_or_err(expr: Expression) -> Integer: if isinstance(expr, (int, np.integer)): return expr else: raise ValueError(f"expected integer, got {type(expr)}") -def integer_expr_or_err(expr: ExpressionT) -> IntegerT | Expression: - if isinstance(expr, (int, np.integer, Expression)): +def integer_expr_or_err(expr: Expression) -> Integer | ExpressionNode: + if isinstance(expr, (int, np.integer, ExpressionNode)): return expr else: raise ValueError(f"expected integer or expression, got {type(expr)}") diff --git a/loopy/version.py b/loopy/version.py index 609e6c179..8e350caf8 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -26,7 +29,7 @@ VERSION_TEXT = metadata.version("loopy") -_match = re.match("^([0-9.]+)([a-z0-9]*?)$", VERSION_TEXT) +_match = re.match(r"^([0-9.]+)([a-z0-9]*?)$", VERSION_TEXT) assert _match is not None VERSION_STATUS = _match.group(2) VERSION = tuple(int(nr) for nr in _match.group(1).split(".")) diff --git a/proto-tests/test_fem_assembly.py b/proto-tests/test_fem_assembly.py index 9103c42cc..0a28d5ccb 100644 --- a/proto-tests/test_fem_assembly.py +++ b/proto-tests/test_fem_assembly.py @@ -25,10 +25,10 @@ def test_laplacian_stiffness(ctx_factory): knl = lp.make_kernel(ctx.devices[0], "[Nc] -> {[K,i,j,q, dx_axis, ax_b]: 0<=K=63", -] +requires = ["hatchling"] +build-backend = "hatchling.build" [project] name = "loopy" @@ -32,7 +30,7 @@ classifiers = [ ] dependencies = [ "pytools>=2024.1.5", - "pymbolic>=2024.1", + "pymbolic>=2024.2.2", "genpy>=2016.1.2", # https://github.com/inducer/loopy/pull/419 @@ -62,22 +60,22 @@ fortran = [ "ply>=3.6", ] +[tool.hatch.metadata] +allow-direct-references = true + +[tool.hatch.build.targets.sdist] +exclude = [ + "/.git*", + "/doc/_build", + "/run-*.sh", +] + [project.scripts] [project.urls] Documentation = "https://documen.tician.de/loopy" Homepage = "https://github.com/inducer/loopy" -[tool.setuptools.packages.find] -include = [ - "loopy*", -] - -[tool.setuptools.package-data] -loopy = [ - "py.typed", -] - [tool.setuptools.package-dir] # https://github.com/Infleqtion/client-superstaq/pull/715 "" = "." @@ -99,11 +97,10 @@ extend-select = [ "Q", # flake8-quotes "W", # pycodestyle - # TODO - # "UP", # pyupgrade - # "RUF", # ruff - - "RUF022", # __all__ isn't sorted + "UP", # pyupgrade + "RUF", # ruff + "FA", + "TC", ] extend-ignore = [ "C90", # McCabe complexity @@ -115,8 +112,8 @@ extend-ignore = [ # FIXME "NPY002", # numpy rng - "C408", # unnecssary dict() -> literal - "F841", # local variable unused + "UP031", # .format instead of %s + "UP032", # .format instead of %s ] [tool.ruff.lint.per-file-ignores] @@ -124,6 +121,12 @@ extend-ignore = [ "loopy/target/c/compyte/ndarray/**/*.py" = ["Q", "B", "E", "F", "N", "C4"] "loopy/frontend/fortran/translator.py" = ["N802", "B028"] "proto-tests/*.py" = ["B"] +"contrib/**/*.py" = ["I002"] +"doc/conf.py" = ["I002"] +"*.ipynb" = ["I002"] +"examples/**/*.py" = ["I002"] +"proto-tests/**/*.py" = ["I002"] +"test/**/*.py" = ["I002"] [tool.ruff.lint.flake8-quotes] docstring-quotes = "double" @@ -144,6 +147,7 @@ known-local-folder = [ "loopy", ] lines-after-imports = 2 +required-imports = ["from __future__ import annotations"] [tool.mypy] python_version = "3.10" @@ -153,24 +157,23 @@ warn_unused_ignores = true # check_untyped_defs = true exclude = [ - "loopy/target/c/compyte/ndarray/.*", "loopy/target/c/compyte/array.py", ] [[tool.mypy.overrides]] module = [ + "loopy.symbolic", +] +# check_untyped_defs = true + +[[tool.mypy.overrides]] +module = [ + "IPython.*", + "fparser.*", "islpy.*", - "pymbolic.*", - "cgen.*", - "genpy.*", - "pyopencl.*", - "colorama.*", - "codepy.*", "mako.*", - "fparser.*", "ply.*", - "pygments.*", - "IPython.*", + "pyopencl.*", ] ignore_missing_imports = true @@ -190,6 +193,8 @@ dout = "dout" ue = "ue" # used in an ordering context, "ab" / "ba" ba = "ba" +# Fortran Loopy +floopy = "floopy" "dependees" = "dependees" diff --git a/test/gnuma_loopy_transforms.py b/test/gnuma_loopy_transforms.py index 9c4400406..1b8842a66 100644 --- a/test/gnuma_loopy_transforms.py +++ b/test/gnuma_loopy_transforms.py @@ -39,6 +39,6 @@ def set_D_storage_format(kernel): # noqa: N802 def set_up_volume_loop(kernel, Nq): # noqa kernel = lp.fix_parameters(kernel, Nq=Nq) kernel = lp.prioritize_loops(kernel, "e,k,j,i") - kernel = lp.tag_inames(kernel, dict(e="g.0", j="l.1", i="l.0")) + kernel = lp.tag_inames(kernel, {"e": "g.0", "j": "l.1", "i": "l.0"}) kernel = lp.assume(kernel, "elements >= 1") return kernel diff --git a/test/test_apps.py b/test/test_apps.py index c4cffaee1..8c32aa9ac 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -27,7 +27,7 @@ import pytest import pyopencl as cl -import pyopencl.clmath # noqa +import pyopencl.clmath import pyopencl.clrandom # noqa import loopy as lp @@ -102,7 +102,7 @@ def variant_1(knl): def variant_2(knl): knl = lp.split_iname(knl, "im_x", 16, outer_tag="g.0", inner_tag="l.0") knl = lp.split_iname(knl, "im_y", 16, outer_tag="g.1", inner_tag="l.1") - knl = lp.tag_inames(knl, dict(ifeat="g.2")) + knl = lp.tag_inames(knl, {"ifeat": "g.2"}) knl = lp.add_prefetch(knl, "f[ifeat,:,:,:]", fetch_outer_inames="im_x_outer, im_y_outer, ifeat", default_tag="l.auto") @@ -117,10 +117,10 @@ def variant_2(knl): variant_2 ]: lp.auto_test_vs_ref(ref_knl, ctx, variant(knl), - parameters=dict( - im_w=128, im_h=128, f_w=f_w, - nfeats=3, nimgs=3 - )) + parameters={ + "im_w": 128, "im_h": 128, "f_w": f_w, + "nfeats": 3, "nimgs": 3 + }) def test_convolution_with_nonzero_base(ctx_factory): @@ -175,10 +175,10 @@ def variant_1(knl): variant_1, ]: lp.auto_test_vs_ref(ref_knl, ctx, variant(knl), - parameters=dict( - im_w=128, im_h=128, f_w=f_w, - nfeats=12, nimgs=17 - )) + parameters={ + "im_w": 128, "im_h": 128, "f_w": f_w, + "nfeats": 12, "nimgs": 17 + }) # }}} @@ -227,12 +227,12 @@ def test_rob_stroud_bernstein(): knl = lp.split_iname(knl, "el", 16, inner_tag="l.0") knl = lp.split_iname(knl, "el_outer", 2, outer_tag="g.0", inner_tag="ilp", slabs=(0, 1)) - knl = lp.tag_inames(knl, dict(i2="l.1", alpha1="unr", alpha2="unr")) - knl = lp.add_dtypes(knl, dict( - qpts=np.float32, - coeffs=np.float32, - tmp=np.float32, - )) + knl = lp.tag_inames(knl, {"i2": "l.1", "alpha1": "unr", "alpha2": "unr"}) + knl = lp.add_dtypes(knl, { + "qpts": np.float32, + "coeffs": np.float32, + "tmp": np.float32, + }) print(lp.generate_code_v2(knl)) @@ -306,18 +306,18 @@ def test_rob_stroud_bernstein_full(): knl = lp.split_iname(knl, "el", 16, inner_tag="l.0") knl = lp.split_iname(knl, "el_outer", 2, outer_tag="g.0", inner_tag="ilp", slabs=(0, 1)) - knl = lp.tag_inames(knl, dict(i2="l.1", alpha1="unr", alpha2="unr")) + knl = lp.tag_inames(knl, {"i2": "l.1", "alpha1": "unr", "alpha2": "unr"}) from pickle import dumps, loads knl = loads(dumps(knl)) knl = lp.add_dtypes(knl, - dict( - qpts=np.float32, - tmp=np.float32, - coeffs=np.float32, - result=np.float32, - )) + { + "qpts": np.float32, + "tmp": np.float32, + "coeffs": np.float32, + "result": np.float32, + }) print(lp.generate_code_v2(knl)) @@ -393,7 +393,7 @@ def test_stencil_with_overfetch(ctx_factory): # https://github.com/pocl/pocl/issues/205 pytest.skip("takes very long to compile on pocl") - knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32)) + knl = lp.add_and_infer_dtypes(knl, {"a": np.float32}) ref_knl = knl @@ -411,7 +411,7 @@ def variant_overfetch(knl): n = 200 lp.auto_test_vs_ref(ref_knl, ctx, variant(knl), print_ref_code=False, - op_count=[n*n], parameters=dict(n=n), op_label=["cells"]) + op_count=[n*n], parameters={"n": n}, op_label=["cells"]) def test_sum_factorization(): @@ -531,8 +531,8 @@ def test_fd_demo(): # u = cl.clrandom.rand(queue, (n+2, n+2), dtype=np.float32) knl = lp.set_options(knl, write_code=True) - knl = lp.add_and_infer_dtypes(knl, dict(u=np.float32)) - code, inf = lp.generate_code(knl) + knl = lp.add_and_infer_dtypes(knl, {"u": np.float32}) + code, _inf = lp.generate_code(knl) print(code) assert "double" not in code @@ -555,7 +555,7 @@ def test_fd_1d(ctx_factory): lp.auto_test_vs_ref( ref_knl, ctx, knl, - parameters=dict(n=2048)) + parameters={"n": 2048}) def test_poisson_fem(ctx_factory): @@ -600,12 +600,12 @@ def variant_2(knl): return knl def add_types(knl): - return lp.add_and_infer_dtypes(knl, dict( - w=np.float32, - J=np.float32, - DPsi=np.float32, - DFinv=np.float32, - )) + return lp.add_and_infer_dtypes(knl, { + "w": np.float32, + "J": np.float32, + "DPsi": np.float32, + "DFinv": np.float32, + }) for variant in [ # variant_1, @@ -615,7 +615,7 @@ def add_types(knl): lp.auto_test_vs_ref( add_types(ref_knl), ctx, add_types(knl), - parameters=dict(n=5, nels=15, nbf=5, sdim=2, nqp=7)) + parameters={"n": 5, "nels": 15, "nbf": 5, "sdim": 2, "nqp": 7}) def test_domain_tree_nesting(): diff --git a/test/test_c_execution.py b/test/test_c_execution.py index 6208b9aed..9943d41df 100644 --- a/test/test_c_execution.py +++ b/test/test_c_execution.py @@ -365,7 +365,7 @@ def test_one_length_loop(): def test_scalar_global_args(): n = np.random.default_rng().integers(30, 100) - evt, (out,) = lp.make_kernel( + _evt, (out,) = lp.make_kernel( "{[i]: 0<=i temp[i, 0] = 17 temp[i, 1] = 15 """) - knl = lp.tag_inames(knl, dict(i="l.0")) + knl = lp.tag_inames(knl, {"i": "l.0"}) print(lp.generate_code_v2(knl).device_code()) @@ -788,12 +788,12 @@ def test_make_copy_kernel(ctx_factory): cknl1 = lp.fix_parameters(cknl1, n2=3) cknl1 = lp.set_options(cknl1, write_code=True) - evt, a2 = cknl1(queue, input=a1) + _evt, a2 = cknl1(queue, input=a1) cknl2 = lp.make_copy_kernel("c,c,c", intermediate_format) cknl2 = lp.fix_parameters(cknl2, n2=3) - evt, a3 = cknl2(queue, input=a2) + _evt, a3 = cknl2(queue, input=a2) assert (a1 == a3).all() @@ -810,7 +810,7 @@ def test_make_copy_kernel_with_offsets(ctx_factory): cknl1 = lp.fix_parameters(cknl1, n0=3) cknl1 = lp.set_options(cknl1, write_code=True) - evt, (a2_dev,) = cknl1(queue, input=a1_dev) + _evt, (a2_dev,) = cknl1(queue, input=a1_dev) assert (a1 == a2_dev.get()).all() @@ -830,14 +830,14 @@ def test_auto_test_can_detect_problems(ctx_factory): a[i,i] = 25 """) - ref_knl = lp.add_and_infer_dtypes(ref_knl, dict(a=np.float32)) - knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32)) + ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.float32}) + knl = lp.add_and_infer_dtypes(knl, {"a": np.float32}) from loopy.diagnostic import AutomaticTestFailure with pytest.raises(AutomaticTestFailure): lp.auto_test_vs_ref( ref_knl, ctx, knl, - parameters=dict(n=123)) + parameters={"n": 123}) def test_auto_test_zero_warmup_rounds(ctx_factory): @@ -849,11 +849,11 @@ def test_auto_test_zero_warmup_rounds(ctx_factory): a[i,j] = 25 """) - ref_knl = lp.add_and_infer_dtypes(ref_knl, dict(a=np.float32)) + ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.float32}) lp.auto_test_vs_ref( ref_knl, ctx, ref_knl, - parameters=dict(n=12), + parameters={"n": 12}, warmup_rounds=0) @@ -894,7 +894,7 @@ def test_atomic(ctx_factory, dtype): ref_knl = knl knl = lp.split_iname(knl, "i", 512) knl = lp.split_iname(knl, "i_inner", 128, outer_tag="unr", inner_tag="g.0") - lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=10000)) + lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters={"n": 10000}) @pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64]) @@ -1001,7 +1001,7 @@ def test_literal_local_barrier(ctx_factory): ref_knl = knl - lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5)) + lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters={"n": 5}) def test_local_barrier_mem_kind(): @@ -1056,7 +1056,7 @@ def test_kernel_splitting(ctx_factory): print(cgr.device_code()) print(cgr.host_code()) - lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5)) + lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters={"n": 5}) def test_kernel_splitting_with_loop(ctx_factory): @@ -1090,7 +1090,7 @@ def test_kernel_splitting_with_loop(ctx_factory): print(cgr.device_code()) print(cgr.host_code()) - lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5)) + lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters={"n": 5}) def save_and_reload_temporaries_test(queue, prog, out_expect, debug=False): @@ -1126,7 +1126,7 @@ def test_save_of_private_scalar(ctx_factory, hw_loop, debug=False): """, seq_dependencies=True) if hw_loop: - prog = lp.tag_inames(prog, dict(i="g.0")) + prog = lp.tag_inames(prog, {"i": "g.0"}) save_and_reload_temporaries_test(queue, prog, np.arange(8), debug) @@ -1167,7 +1167,7 @@ def test_save_of_private_array_in_hw_loop(ctx_factory, debug=False): end """, seq_dependencies=True) - knl = lp.tag_inames(knl, dict(i="g.0")) + knl = lp.tag_inames(knl, {"i": "g.0"}) knl = lp.set_temporary_address_space(knl, "t", "private") save_and_reload_temporaries_test( @@ -1217,7 +1217,7 @@ def test_save_of_private_multidim_array_in_hw_loop(ctx_factory, debug=False): """, seq_dependencies=True) knl = lp.set_temporary_address_space(knl, "t", "private") - knl = lp.tag_inames(knl, dict(i="g.0")) + knl = lp.tag_inames(knl, {"i": "g.0"}) result = np.array([np.vstack(8 * (np.arange(8),)) for i in range(8)]) save_and_reload_temporaries_test(queue, knl, result, debug) @@ -1250,7 +1250,7 @@ def test_save_of_multiple_private_temporaries(ctx_factory, hw_loop, debug=False) knl = lp.set_temporary_address_space(knl, "t_arr", "private") if hw_loop: - knl = lp.tag_inames(knl, dict(i="g.0")) + knl = lp.tag_inames(knl, {"i": "g.0"}) result = np.array([1, 10, 10, 10, 10, 10, 10, 10, 10, 9]) @@ -1273,7 +1273,7 @@ def test_save_of_local_array(ctx_factory, debug=False): """, seq_dependencies=True) knl = lp.set_temporary_address_space(knl, "t", "local") - knl = lp.tag_inames(knl, dict(i="g.0", j="l.0")) + knl = lp.tag_inames(knl, {"i": "g.0", "j": "l.0"}) save_and_reload_temporaries_test(queue, knl, np.arange(8), debug) @@ -1295,7 +1295,7 @@ def test_save_of_local_array_with_explicit_local_barrier(ctx_factory, debug=Fals """, seq_dependencies=True) knl = lp.set_temporary_address_space(knl, "t", "local") - knl = lp.tag_inames(knl, dict(i="g.0", j="l.0")) + knl = lp.tag_inames(knl, {"i": "g.0", "j": "l.0"}) save_and_reload_temporaries_test(queue, knl, np.arange(8), debug) @@ -1316,7 +1316,7 @@ def test_save_local_multidim_array(ctx_factory, debug=False): """, seq_dependencies=True) knl = lp.set_temporary_address_space(knl, "t_local", "local") - knl = lp.tag_inames(knl, dict(j="l.0", i="g.0")) + knl = lp.tag_inames(knl, {"j": "l.0", "i": "g.0"}) save_and_reload_temporaries_test(queue, knl, 1, debug) @@ -1336,7 +1336,7 @@ def test_save_with_base_storage(ctx_factory, debug=False): "...", seq_dependencies=True) - knl = lp.tag_inames(knl, dict(i="l.0")) + knl = lp.tag_inames(knl, {"i": "l.0"}) knl = lp.set_temporary_address_space(knl, "a", "local") knl = lp.set_temporary_address_space(knl, "b", "local") @@ -1359,7 +1359,7 @@ def test_save_ambiguous_storage_requirements(): """, seq_dependencies=True) - knl = lp.tag_inames(knl, dict(i="g.0", j="l.0")) + knl = lp.tag_inames(knl, {"i": "g.0", "j": "l.0"}) knl = lp.duplicate_inames(knl, "j", within="writes:out", tags={"j": "l.0"}) knl = lp.set_temporary_address_space(knl, "a", "local") @@ -1382,7 +1382,7 @@ def test_save_across_inames_with_same_tag(ctx_factory, debug=False): "...", seq_dependencies=True) - knl = lp.tag_inames(knl, dict(i="l.0")) + knl = lp.tag_inames(knl, {"i": "l.0"}) knl = lp.duplicate_inames(knl, "i", within="reads:a", tags={"i": "l.0"}) save_and_reload_temporaries_test(queue, knl, np.arange(10), debug) @@ -1453,7 +1453,7 @@ def test_global_temporary(ctx_factory): print(cgr.device_code()) # print(cgr.host_code()) - lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5)) + lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters={"n": 5}) def test_assign_to_linear_subscript(ctx_factory): @@ -1508,7 +1508,7 @@ def test_finite_difference_expr_subst(ctx_factory): ]) fused_knl = lp.set_options(fused_knl, write_code=True) - evt, _ = fused_knl(queue, u=u, h=np.float32(1e-1)) + _evt, _ = fused_knl(queue, u=u, h=np.float32(1e-1)) fused_knl = lp.assignment_to_subst(fused_knl, "f") @@ -1517,7 +1517,7 @@ def test_finite_difference_expr_subst(ctx_factory): # This is the real test here: The automatically generated # shape expressions are '2+n' and the ones above are 'n+2'. # Is loopy smart enough to understand that these are equal? - evt, _ = fused_knl(queue, u=u, h=np.float32(1e-1)) + _evt, _ = fused_knl(queue, u=u, h=np.float32(1e-1)) fused0_knl = lp.affine_map_inames(fused_knl, "i", "inew", "inew+1=i") @@ -1530,7 +1530,7 @@ def test_finite_difference_expr_subst(ctx_factory): precomp_knl = lp.tag_inames(precomp_knl, {"j_0_outer": "unr"}) precomp_knl = lp.set_options(precomp_knl, return_dict=True) - evt, _ = precomp_knl(queue, u=u, h=h) + _evt, _ = precomp_knl(queue, u=u, h=h) # {{{ call without returned values @@ -1549,7 +1549,7 @@ def test_call_with_no_returned_value(ctx_factory): from library_for_test import NoRetFunction knl = lp.register_callable(knl, "f", NoRetFunction("f")) - evt, _ = knl(queue) + _evt, _ = knl(queue) # }}} @@ -1644,7 +1644,7 @@ def test_sequential_dependencies(ctx_factory): print(prog["loopy_kernel"].stringify(with_dependencies=True)) - lp.auto_test_vs_ref(prog, ctx, prog, parameters=dict(n=5)) + lp.auto_test_vs_ref(prog, ctx, prog, parameters={"n": 5}) def test_nop(ctx_factory): @@ -1666,7 +1666,7 @@ def test_nop(ctx_factory): knl = lp.fix_parameters(knl, n=15) knl = lp.add_and_infer_dtypes(knl, {"z": np.float64}) - lp.auto_test_vs_ref(knl, ctx, knl, parameters=dict(ntrips=5)) + lp.auto_test_vs_ref(knl, ctx, knl, parameters={"ntrips": 5}) def test_global_barrier(ctx_factory): @@ -1708,7 +1708,7 @@ def test_global_barrier(ctx_factory): print(knl) - lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(ntrips=5, n=10)) + lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters={"ntrips": 5, "n": 10}) def test_missing_global_barrier(): @@ -1842,7 +1842,7 @@ def test_temp_initializer(ctx_factory, src_order, tmp_order): knl = lp.set_options(knl, write_code=True) knl = lp.fix_parameters(knl, n=a.shape[0]) - evt, (a2,) = knl(queue, out_host=True) + _evt, (a2,) = knl(queue, out_host=True) assert np.array_equal(a, a2) @@ -1939,7 +1939,7 @@ def test_if_else(ctx_factory): """ ) - evt, (out,) = knl(queue, out_host=True) + _evt, (out,) = knl(queue, out_host=True) out_ref = np.empty(50) out_ref[::3] = 15 @@ -1967,7 +1967,7 @@ def test_if_else(ctx_factory): """ ) - evt, (out,) = knl(queue, out_host=True) + _evt, (out,) = knl(queue, out_host=True) out_ref = np.zeros(50) out_ref[1::2] = 4 @@ -2000,7 +2000,7 @@ def test_if_else(ctx_factory): """ ) - evt, (out,) = knl(queue, out_host=True) + _evt, (out,) = knl(queue, out_host=True) out_ref = np.zeros((50, 50)) out_ref[:25, 0::2] = 1 @@ -2036,7 +2036,7 @@ def test_tight_loop_bounds(ctx_factory): knl = lp.set_options(knl, write_code=True) - evt, (out,) = knl(queue, out_host=True) + _evt, (out,) = knl(queue, out_host=True) assert (out == np.arange(10)).all() @@ -2247,7 +2247,7 @@ def test_barrier_insertion_near_top_of_loop(): """, seq_dependencies=True) - prog = lp.tag_inames(prog, dict(i="l.0")) + prog = lp.tag_inames(prog, {"i": "l.0"}) prog = lp.set_temporary_address_space(prog, "a", "local") prog = lp.set_temporary_address_space(prog, "b", "local") prog = lp.preprocess_kernel(prog) @@ -2273,7 +2273,7 @@ def test_barrier_insertion_near_bottom_of_loop(): end """, seq_dependencies=True) - prog = lp.tag_inames(prog, dict(i="l.0")) + prog = lp.tag_inames(prog, {"i": "l.0"}) prog = lp.set_temporary_address_space(prog, "a", "local") prog = lp.set_temporary_address_space(prog, "b", "local") prog = lp.preprocess_kernel(prog) @@ -2449,7 +2449,7 @@ def test_inames_conditional_generation(ctx_factory): "...", seq_dependencies=True) - knl = lp.tag_inames(knl, dict(i="g.0")) + knl = lp.tag_inames(knl, {"i": "g.0"}) with cl.CommandQueue(ctx) as queue: knl(queue) @@ -2465,7 +2465,7 @@ def test_fixed_parameters(ctx_factory): <>tmp[i] = i {id=init} tmp[0] = 0 {dep=init} """, - fixed_parameters=dict(n=1)) + fixed_parameters={"n": 1}) knl(queue) @@ -2485,7 +2485,7 @@ def test_execution_backend_can_cache_dtypes(ctx_factory): queue = cl.CommandQueue(ctx) knl = lp.make_kernel("{[i]: 0 <= i < 10}", "<>tmp[i] = i") - knl = lp.add_dtypes(knl, dict(tmp=int)) + knl = lp.add_dtypes(knl, {"tmp": int}) knl(queue) @@ -2542,7 +2542,7 @@ def test_relaxed_stride_checks(ctx_factory): mat = np.zeros((1, 10), order="F") b = np.zeros(10) - evt, (a,) = knl(queue, A=mat, b=b) + _evt, (a,) = knl(queue, A=mat, b=b) assert a == 0 @@ -2720,7 +2720,7 @@ def test_dump_binary(ctx_factory): ref_knl = knl lp.auto_test_vs_ref( - ref_knl, ctx, knl, parameters=dict(n=5), + ref_knl, ctx, knl, parameters={"n": 5}, dump_binary=True) @@ -2964,7 +2964,7 @@ def test_split_iname_within(ctx_factory): x[i, j] = 3 {id=a} y[i, j] = 2 * y[i, j] {id=b} """, - options=dict(write_code=True)) + options={"write_code": True}) ref_knl = knl @@ -2975,7 +2975,7 @@ def test_split_iname_within(ctx_factory): outer_tag="g.0", inner_tag="l.0", within="id:b") - lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5)) + lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters={"n": 5}) @pytest.mark.parametrize("base_type,exp_type", [ @@ -3020,7 +3020,7 @@ def _make_random_np_array(shape, dtype): knl = lp.add_dtypes(knl, {"base": base_type, "power": exp_type}) - evt, (result,) = knl(queue, base=base, power=power) + _evt, (result,) = knl(queue, base=base, power=power) assert result.dtype == expected_result.dtype @@ -3070,7 +3070,7 @@ def test_scalar_temporary(ctx_factory): lp.TemporaryVariable("tmp", address_space=lp.AddressSpace.GLOBAL, shape=lp.auto), ...]) - evt, (out, ) = knl(queue, x=x_in) + _evt, (out, ) = knl(queue, x=x_in) np.testing.assert_allclose(4*x_in, out.get()) @@ -3255,7 +3255,7 @@ def test_zero_stride_array(ctx_factory): y[i, j] = 1 """, [lp.GlobalArg("y", shape=(10, 0))]) - evt, (out,) = knl(cq) + _evt, (out,) = knl(cq) assert out.shape == (10, 0) @@ -3271,13 +3271,13 @@ def test_sep_array_ordering(ctx_factory): """ x[k, i] = k """, - [lp.GlobalArg("x", shape=("noutputs", "m"), dim_tags="sep,C")] + [...], - fixed_parameters=dict(noutputs=n), + [lp.GlobalArg("x", shape=("noutputs", "m"), dim_tags="sep,C"), ...], + fixed_parameters={"noutputs": n}, ) knl = lp.tag_inames(knl, "k:unr") x = [cl.array.empty(cq, (0,), dtype=np.float64) for i in range(n)] - evt, out = knl(cq, x=x) + _evt, out = knl(cq, x=x) for i in range(n): assert out[i] is x[i], f"failed on input x{i}: {id(out[i])} {id(x[i])}" @@ -3532,7 +3532,7 @@ def test_type_inference_of_clbls_in_substitutions(ctx_factory): y[i] = subst_0(i) """) - evt, (out,) = knl(cq) + _evt, (out,) = knl(cq) np.testing.assert_allclose(out.get(), np.abs(10.0*(np.arange(10)-5))) @@ -3677,8 +3677,8 @@ def test_no_unnecessary_lbarrier(ctx_factory): """, assumptions="n>=0") - t_unit = lp.add_dtypes(t_unit, dict(ai=np.float32)) - t_unit = lp.tag_inames(t_unit, dict(i_inner="l.0", i_outer="g.0")) + t_unit = lp.add_dtypes(t_unit, {"ai": np.float32}) + t_unit = lp.tag_inames(t_unit, {"i_inner": "l.0", "i_outer": "g.0"}) t_unit = lp.set_temporary_address_space(t_unit, "s_a", "local") t_unit = lp.prioritize_loops(t_unit, "i_outer,i_inner") @@ -3704,6 +3704,21 @@ def test_long_kernel(): lp.get_one_linearized_kernel(t_unit.default_entrypoint, t_unit.callables_table) +@pytest.mark.filterwarnings("error:.*:loopy.LoopyWarning") +def test_loop_imperfect_nest_priorities_in_v2_scheduler(): + # Reported by Connor Ward. See . + knl = lp.make_kernel( + "{ [i,j,k]: 0 <= i,j,k < 5}", + """ + x[i, j] = i + j + y[i, k] = i + k + """, + loop_priority=frozenset({("i", "j"), ("i", "k")}), + ) + + lp.generate_code_v2(knl) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) diff --git a/test/test_nbody.py b/test/test_nbody.py index e258d801e..02dcb1743 100644 --- a/test/test_nbody.py +++ b/test/test_nbody.py @@ -81,7 +81,7 @@ def variant_gpu(knl): knl = lp.add_prefetch(knl, "x[j,k]", ["j_inner", "k"], ["x_fetch_j", "x_fetch_k"], fetch_outer_inames="i_outer, j_outer", default_tag=None) - knl = lp.tag_inames(knl, dict(x_fetch_k="unr", x_fetch_j="l.0")) + knl = lp.tag_inames(knl, {"x_fetch_k": "unr", "x_fetch_j": "l.0"}) knl = lp.add_prefetch(knl, "x[i,k]", ["k"], default_tag=None) knl = lp.prioritize_loops(knl, ["j_outer", "j_inner"]) return knl diff --git a/test/test_numa_diff.py b/test/test_numa_diff.py index 55ff270a2..92020b73a 100644 --- a/test/test_numa_diff.py +++ b/test/test_numa_diff.py @@ -84,7 +84,7 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa hsv = lp.fix_parameters(hsv, Nq=Nq) hsv = lp.prioritize_loops(hsv, "e,k,j,i") - hsv = lp.tag_inames(hsv, dict(e="g.0", j="l.1", i="l.0")) + hsv = lp.tag_inames(hsv, {"e": "g.0", "j": "l.1", "i": "l.0"}) hsv = lp.assume(hsv, "elements >= 1") hsv = fix_euler_parameters(hsv, p_p0=1, p_Gamma=1.4, p_R=1) @@ -168,7 +168,7 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa hsv = lp.rename_iname(hsv, "n", n_iname, within="id:"+reader.id, existing_ok=True) - hsv = lp.tag_inames(hsv, dict(ii="l.0", jj="l.1")) + hsv = lp.tag_inames(hsv, {"ii": "l.0", "jj": "l.1"}) for iname in flux_ilp_inames: hsv = lp.tag_inames(hsv, {iname: "ilp"}) @@ -193,9 +193,9 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa if opt_level == 4: tap_hsv = hsv - tap_hsv = lp.tag_inames(tap_hsv, dict( - Q_dim_field_inner="unr", - Q_dim_field_outer="unr")) + tap_hsv = lp.tag_inames(tap_hsv, { + "Q_dim_field_inner": "unr", + "Q_dim_field_outer": "unr"}) hsv = lp.buffer_array(hsv, "rhsQ", ilp_inames, fetch_bounding_box=True, default_tag="for", @@ -203,11 +203,11 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa if opt_level == 5: tap_hsv = hsv - tap_hsv = lp.tag_inames(tap_hsv, dict( - rhsQ_init_field_inner="unr", rhsQ_store_field_inner="unr", - rhsQ_init_field_outer="unr", rhsQ_store_field_outer="unr", - Q_dim_field_inner="unr", - Q_dim_field_outer="unr")) + tap_hsv = lp.tag_inames(tap_hsv, { + "rhsQ_init_field_inner": "unr", "rhsQ_store_field_inner": "unr", + "rhsQ_init_field_outer": "unr", "rhsQ_store_field_outer": "unr", + "Q_dim_field_inner": "unr", + "Q_dim_field_outer": "unr"}) # buffer axes need to be vectorized in order for this to work hsv = lp.tag_array_axes(hsv, "rhsQ_buf", "c?,vec,c") @@ -219,17 +219,17 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa if opt_level == 6: tap_hsv = hsv - tap_hsv = lp.tag_inames(tap_hsv, dict( - rhsQ_init_field_inner="unr", rhsQ_store_field_inner="unr", - rhsQ_init_field_outer="unr", rhsQ_store_field_outer="unr", - Q_dim_field_inner="unr", - Q_dim_field_outer="unr")) - - hsv = lp.tag_inames(hsv, dict( - rhsQ_init_field_inner="vec", rhsQ_store_field_inner="vec", - rhsQ_init_field_outer="unr", rhsQ_store_field_outer="unr", - Q_dim_field_inner="vec", - Q_dim_field_outer="unr")) + tap_hsv = lp.tag_inames(tap_hsv, { + "rhsQ_init_field_inner": "unr", "rhsQ_store_field_inner": "unr", + "rhsQ_init_field_outer": "unr", "rhsQ_store_field_outer": "unr", + "Q_dim_field_inner": "unr", + "Q_dim_field_outer": "unr"}) + + hsv = lp.tag_inames(hsv, { + "rhsQ_init_field_inner": "vec", "rhsQ_store_field_inner": "vec", + "rhsQ_init_field_outer": "unr", "rhsQ_store_field_outer": "unr", + "Q_dim_field_inner": "vec", + "Q_dim_field_outer": "unr"}) if opt_level == 7: tap_hsv = hsv @@ -266,7 +266,7 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa # add a simple transformation for it # hsv = hsv.copy(name="horizontalStrongVolumeKernel") - results = lp.auto_test_vs_ref(ref_hsv, ctx, hsv, parameters=dict(elements=300), + results = lp.auto_test_vs_ref(ref_hsv, ctx, hsv, parameters={"elements": 300}, quiet=True) elapsed = results["elapsed_wall"] diff --git a/test/test_reduction.py b/test/test_reduction.py index 0ca1a2650..125d247d9 100644 --- a/test/test_reduction.py +++ b/test/test_reduction.py @@ -27,8 +27,8 @@ import pytest import pyopencl as cl -import pyopencl.clmath # noqa -import pyopencl.clrandom # noqa +import pyopencl.clmath +import pyopencl.clrandom import pyopencl.version import loopy as lp @@ -86,7 +86,7 @@ def test_empty_reduction(ctx_factory): print(knl) knl = lp.set_options(knl, write_code=True) - evt, (a,) = knl(queue) + _evt, (a,) = knl(queue) assert (a.get() == 0).all() @@ -113,7 +113,7 @@ def test_nested_dependent_reduction(ctx_factory): n = 330 ell = np.arange(n, dtype=np.int32) - evt, (a,) = knl(queue, ell=ell, n=n, out_host=True) + _evt, (a,) = knl(queue, ell=ell, n=n, out_host=True) tgt_result = (2*ell-1)*2*ell/2 assert (a == tgt_result).all() @@ -314,7 +314,7 @@ def test_argmax(ctx_factory): knl = lp.set_options(knl, write_code=True, allow_terminal_colors=True) a = np.random.randn(10000).astype(dtype) - evt, (max_idx, max_val) = knl(queue, a=a, out_host=True) + _evt, (max_idx, max_val) = knl(queue, a=a, out_host=True) assert max_val == np.max(np.abs(a)) assert max_idx == np.where(np.abs(a) == max_val)[-1] @@ -333,7 +333,7 @@ def test_simul_reduce(ctx_factory): ], assumptions="n>=1") - evt, (a, b) = knl(queue, n=n) + _evt, (a, b) = knl(queue, n=n) ref = sum(i*j for i in range(n) for j in range(n)) assert a.get() == ref @@ -358,7 +358,7 @@ def test_reduction_library(ctx_factory, op_name, np_op): assumptions="n>=1") a = np.random.randn(20, 10) - evt, (res,) = knl(queue, a=a) + _evt, (res,) = knl(queue, a=a) assert np.allclose(res, np_op(a, axis=1)) @@ -395,7 +395,7 @@ def test_double_sum_made_unique(ctx_factory): knl = lp.make_reduction_inames_unique(knl) print(knl) - evt, (a, b) = knl(queue, n=n) + _evt, (a, b) = knl(queue, n=n) ref = sum(i*j for i in range(n) for j in range(n)) assert a.get() == ref @@ -408,14 +408,14 @@ def test_parallel_multi_output_reduction(ctx_factory): """ max_val, max_indices = argmax(i, abs(a[i]), i) """) - knl = lp.tag_inames(knl, dict(i="l.0")) - knl = lp.add_dtypes(knl, dict(a=np.float64)) + knl = lp.tag_inames(knl, {"i": "l.0"}) + knl = lp.add_dtypes(knl, {"a": np.float64}) ctx = ctx_factory() with cl.CommandQueue(ctx) as queue: a = np.random.rand(128) - out, (max_index, max_val) = knl(queue, a=a) + _out, (max_index, max_val) = knl(queue, a=a) assert max_val == np.max(a) assert max_index == np.argmax(np.abs(a)) @@ -497,7 +497,7 @@ def test_reduction_in_conditional(ctx_factory): knl = lp.preprocess_program(knl) - evt, (out,) = knl(cq) + _evt, (out,) = knl(cq) assert (out == 45).all() diff --git a/test/test_scan.py b/test/test_scan.py index 986a30daa..47c2e04b4 100644 --- a/test/test_scan.py +++ b/test/test_scan.py @@ -30,8 +30,8 @@ import pytest import pyopencl as cl -import pyopencl.clmath # noqa -import pyopencl.clrandom # noqa +import pyopencl.clmath +import pyopencl.clrandom import loopy as lp @@ -77,7 +77,7 @@ def test_sequential_scan(ctx_factory, n, stride): knl = lp.fix_parameters(knl, n=n) knl = lp.realize_reduction(knl, force_scan=True) - evt, (a,) = knl(queue) + _evt, (a,) = knl(queue) assert (a.get() == np.cumsum(np.arange(stride*n)**2)[::stride]).all() @@ -108,7 +108,7 @@ def test_scan_with_different_lower_bound_from_sweep( knl = lp.fix_parameters(knl, sweep_lbound=sweep_lbound, scan_lbound=scan_lbound) knl = lp.realize_reduction(knl, force_scan=True) - evt, (out,) = knl(queue, n=n) + _evt, (out,) = knl(queue, n=n) assert (out.get() == np.cumsum(np.arange(scan_lbound, 2*n+scan_lbound)**2)[::2]).all() @@ -138,7 +138,7 @@ def test_force_outer_iname_for_scan(): "[n] -> {[i,j,k]: 0<=k {[i]: 0<=i temp = 2*b[i] a[i] = temp """) - knl = lp.add_and_infer_dtypes(knl, dict(b=np.float32)) + knl = lp.add_and_infer_dtypes(knl, {"b": np.float32}) knl = lp.set_array_axis_names(knl, "a,b", "i") knl = lp.split_array_dim(knl, [("a", 0), ("b", 0)], 4, - split_kwargs=dict(slabs=(0, 1))) + split_kwargs={"slabs": (0, 1)}) knl = lp.tag_array_axes(knl, "a,b", "c,vec") ref_knl = knl @@ -301,11 +301,11 @@ def test_vectorize(ctx_factory): knl = lp.tag_inames(knl, {"i_inner": "vec"}) knl = lp.preprocess_kernel(knl) - code, inf = lp.generate_code(knl) + _code, _inf = lp.generate_code(knl) lp.auto_test_vs_ref( ref_knl, ctx, knl, - parameters=dict(n=30)) + parameters={"n": 30}) def test_extract_subst(ctx_factory): @@ -363,10 +363,10 @@ def test_tag_data_axes(ctx_factory): lp.tag_array_axes(knl, "out", "N1,N0,c") knl = lp.tag_array_axes(knl, "out", "N1,N0,N2") - knl = lp.tag_inames(knl, dict(j="g.0", i="g.1")) + knl = lp.tag_inames(knl, {"j": "g.0", "i": "g.1"}) lp.auto_test_vs_ref(ref_knl, ctx, knl, - parameters=dict(n=20)) + parameters={"n": 20}) def test_set_arg_order(): @@ -393,7 +393,7 @@ class BarTag(UniqueTag): knl = t_unit.default_entrypoint - tags = knl.iname_tags("i") + knl.iname_tags("i") assert not knl.iname_tags_of_type("i", FooTag) assert not knl.iname_tags_of_type("i", BarTag) @@ -428,11 +428,11 @@ def test_precompute_confusing_subst_arguments(ctx_factory): b[i,j] = D(j) """, name="precomputer") - prog = lp.add_and_infer_dtypes(prog, dict(a=np.float32)) + prog = lp.add_and_infer_dtypes(prog, {"a": np.float32}) ref_prog = prog - prog = lp.tag_inames(prog, dict(j="g.1")) + prog = lp.tag_inames(prog, {"j": "g.1"}) prog = lp.split_iname(prog, "i", 128, outer_tag="g.0", inner_tag="l.0") from loopy.symbolic import get_dependencies @@ -444,7 +444,7 @@ def test_precompute_confusing_subst_arguments(ctx_factory): lp.auto_test_vs_ref( ref_prog, ctx, prog, - parameters=dict(n=12345)) + parameters={"n": 12345}) def test_precompute_nested_subst(ctx_factory): @@ -458,7 +458,7 @@ def test_precompute_nested_subst(ctx_factory): b[i] = D """, name="precomputer") - prog = lp.add_and_infer_dtypes(prog, dict(a=np.float32)) + prog = lp.add_and_infer_dtypes(prog, {"a": np.float32}) ref_prog = prog @@ -482,7 +482,7 @@ def test_precompute_nested_subst(ctx_factory): lp.auto_test_vs_ref( ref_prog, ctx, prog, - parameters=dict(n=12345)) + parameters={"n": 12345}) def test_precompute_with_preexisting_inames(ctx_factory): @@ -517,7 +517,7 @@ def test_precompute_with_preexisting_inames(ctx_factory): lp.auto_test_vs_ref( ref_knl, ctx, knl, - parameters=dict(E=200)) + parameters={"E": 200}) def test_precompute_with_preexisting_inames_fail(): @@ -798,13 +798,7 @@ def test_map_domain_transform_map_validity_and_errors(ctx_factory): # Prioritize loops desired_prio = "x, t_outer, t_inner, z, y_new" - # Use constrain_loop_nesting if it's available - cln_attr = getattr(lp, "constrain_loop_nesting", None) - if cln_attr is not None: - knl_map_dom = lp.constrain_loop_nesting( # noqa pylint:disable=no-member - knl_map_dom, desired_prio) - else: - knl_map_dom = lp.prioritize_loops(knl_map_dom, desired_prio) + knl_map_dom = lp.prioritize_loops(knl_map_dom, desired_prio) # Get a linearization proc_knl_map_dom = lp.preprocess_kernel(knl_map_dom) @@ -818,11 +812,7 @@ def test_map_domain_transform_map_validity_and_errors(ctx_factory): knl_split_iname = ref_knl knl_split_iname = lp.split_iname(knl_split_iname, "t", 16) knl_split_iname = lp.rename_iname(knl_split_iname, "y", "y_new") - try: - # Use constrain_loop_nesting if it's available - knl_split_iname = lp.constrain_loop_nesting(knl_split_iname, desired_prio) - except AttributeError: - knl_split_iname = lp.prioritize_loops(knl_split_iname, desired_prio) + knl_split_iname = lp.prioritize_loops(knl_split_iname, desired_prio) proc_knl_split_iname = lp.preprocess_kernel(knl_split_iname) lin_knl_split_iname = lp.get_one_linearized_kernel( proc_knl_split_iname["loopy_kernel"], proc_knl_split_iname.callables_table) @@ -1164,7 +1154,7 @@ def test_rename_argument_with_auto_stride(ctx_factory): assert code_str.find("double const *__restrict__ x_new,") != -1 assert code_str.find("double const *__restrict__ x,") == -1 - evt, (out, ) = knl(queue, x_new=np.random.rand(10)) + _evt, (_out, ) = knl(queue, x_new=np.random.rand(10)) def test_rename_argument_with_assumptions():