Skip to content

Commit

Permalink
Merge pull request #24 from firedrakeproject/connorjward/check-upstream
Browse files Browse the repository at this point in the history
Merge upstream
  • Loading branch information
connorjward authored Jun 20, 2024
2 parents 967461b + 636ee6c commit 8600e53
Show file tree
Hide file tree
Showing 19 changed files with 176 additions and 107 deletions.
12 changes: 5 additions & 7 deletions .github/workflows/autopush.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,19 @@ on:
push:
branches:
- main
- kernel_callables_v3-edit2

jobs:
autopush:
name: Automatic push to gitlab.tiker.net
if: startsWith(github.repository, 'inducer/')
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: |
mkdir ~/.ssh && echo -e "Host gitlab.tiker.net\n\tStrictHostKeyChecking no\n" >> ~/.ssh/config
eval $(ssh-agent) && echo "$GITLAB_AUTOPUSH_KEY" | ssh-add -
git fetch --unshallow
TGT_BRANCH="${GITHUB_REF#refs/heads/}"
echo "pushing to $TGT_BRANCH..."
git push "[email protected]:inducer/$(basename $GITHUB_REPOSITORY).git" "$TGT_BRANCH"
curl -L -O https://tiker.net/ci-support-v0
. ./ci-support-v0
mirror_github_to_gitlab
env:
GITLAB_AUTOPUSH_KEY: ${{ secrets.GITLAB_AUTOPUSH_KEY }}
Expand Down
4 changes: 2 additions & 2 deletions doc/ref_call.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ call site. For example, a call to ``sin(x)`` in :mod:`loopy` is type-generic to
begin with, but it later specialized to either ``sinf``, ``sin`` or ``sinl``
depending on the type of its argument ``x``. A callable's behavior during type
or shape specialization is encoded via
:meth:`~loopy.kernel.function_interface.InKernelCallable.with_types` and
:meth:`~loopy.kernel.function_interface.InKernelCallable.with_descrs`.
:meth:`~loopy.InKernelCallable.with_types` and
:meth:`~loopy.InKernelCallable.with_descrs`.


Registering callables
Expand Down
11 changes: 11 additions & 0 deletions doc/ref_other.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,17 @@ Obtaining Kernel Performance Statistics
Controlling caching
-------------------

.. envvar:: LOOPY_NO_CACHE
.. envvar:: CG_NO_CACHE

By default, loopy will cache (on disk) the result of various stages
of code generation to speed up future code generation of the same kernel.
By setting the environment variables :envvar:`LOOPY_NO_CACHE` or
:envvar:`CG_NO_CACHE` to any
string that :func:`pytools.strtobool` evaluates as ``True``, this caching
is suppressed.


.. autofunction:: set_caching_enabled

.. autoclass:: CacheMode
Expand Down
9 changes: 2 additions & 7 deletions doc/ref_translation_unit.rst
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
.. currentmodule:: loopy

TranslationUnit
===============

.. autoclass:: TranslationUnit

Reference
---------
Translation Units
=================

.. automodule:: loopy.translation_unit
17 changes: 11 additions & 6 deletions loopy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
SubstitutionRule,
CallMangleInfo)
from loopy.kernel.function_interface import (
CallableKernel, ScalarCallable)
InKernelCallable, CallableKernel, ScalarCallable)
from loopy.translation_unit import (
TranslationUnit, make_program)

Expand Down Expand Up @@ -186,7 +186,7 @@
"CallInstruction", "CInstruction", "NoOpInstruction",
"BarrierInstruction",

"ScalarCallable", "CallableKernel",
"InKernelCallable", "ScalarCallable", "CallableKernel",

"TranslationUnit", "make_program",

Expand Down Expand Up @@ -390,7 +390,7 @@ def register_preamble_generators(kernel: LoopKernel, preamble_generators):
if pgen not in new_pgens:
if not unpickles_equally(pgen):
raise LoopyError("preamble generator '%s' does not "
"compare equally after being upickled "
"compare equally after being unpickled "
"and would thus disrupt loopy's caches"
% pgen)

Expand All @@ -408,7 +408,7 @@ def register_symbol_manglers(kernel, manglers):
if m not in new_manglers:
if not unpickles_equally(m):
raise LoopyError("mangler '%s' does not "
"compare equally after being upickled "
"compare equally after being unpickled "
"and would disrupt loopy's caches"
% m)

Expand All @@ -422,10 +422,15 @@ def register_symbol_manglers(kernel, manglers):
# {{{ cache control

import os
from pytools import strtobool

# Caching is enabled by default, but can be disabled by setting
# the environment variables LOOPY_NO_CACHE or CG_NO_CACHE to a
# 'true' value.
CACHING_ENABLED = (
"LOOPY_NO_CACHE" not in os.environ
not strtobool(os.environ.get("LOOPY_NO_CACHE", "false"))
and
"CG_NO_CACHE" not in os.environ)
not strtobool(os.environ.get("CG_NO_CACHE", "false")))


def set_caching_enabled(flag):
Expand Down
68 changes: 34 additions & 34 deletions loopy/codegen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
Optional, Tuple, TYPE_CHECKING)
from dataclasses import dataclass, replace
import logging

from loopy.codegen.result import CodeGenerationResult
from loopy.translation_unit import CallablesTable, TranslationUnit
logger = logging.getLogger(__name__)

import islpy as isl
Expand All @@ -40,7 +43,6 @@
from loopy.typing import ExpressionT
from loopy.kernel import LoopKernel
from loopy.target import TargetBase
from loopy.kernel.function_interface import InKernelCallable


from loopy.symbolic import CombineMapper
Expand Down Expand Up @@ -192,7 +194,7 @@ class CodeGenerationState:

var_subst_map: Map[str, ExpressionT]
allow_complex: bool
callables_table: Mapping[str, InKernelCallable]
callables_table: CallablesTable
is_entrypoint: bool
var_name_generator: UniqueNameGenerator
is_generating_device_code: bool
Expand Down Expand Up @@ -310,9 +312,13 @@ def ast_builder(self):
# }}}


code_gen_cache = WriteOncePersistentDict(
code_gen_cache: WriteOncePersistentDict[
TranslationUnit,
CodeGenerationResult
] = WriteOncePersistentDict(
"loopy-code-gen-cache-v3-"+DATA_MODEL_VERSION,
key_builder=LoopyKeyBuilder())
key_builder=LoopyKeyBuilder(),
safe_sync=False)


caches.append(code_gen_cache)
Expand Down Expand Up @@ -561,13 +567,7 @@ def all_code(self):
self.host_programs.values()))


def generate_code_v2(program):
"""
Returns an instance of :class:`CodeGenerationResult`.
:param program: An instance of :class:`loopy.TranslationUnit`.
"""

def generate_code_v2(t_unit: TranslationUnit) -> CodeGenerationResult:
from loopy.kernel import LoopKernel
from loopy.translation_unit import make_program

Expand All @@ -576,46 +576,46 @@ def generate_code_v2(program):
from loopy import CACHING_ENABLED

if CACHING_ENABLED:
input_program = program
input_t_unit = t_unit
try:
result = code_gen_cache[input_program]
logger.debug(f"TranslationUnit with entrypoints {program.entrypoints}:"
result = code_gen_cache[input_t_unit]
logger.debug(f"TranslationUnit with entrypoints {t_unit.entrypoints}:"
" code generation cache hit")
return result
except KeyError:
logger.debug(f"TranslationUnit with entrypoints {program.entrypoints}:"
logger.debug(f"TranslationUnit with entrypoints {t_unit.entrypoints}:"
" code generation cache miss")

# }}}

if isinstance(program, LoopKernel):
program = make_program(program)
if isinstance(t_unit, LoopKernel):
t_unit = make_program(t_unit)

from loopy.kernel import KernelState
if program.state < KernelState.PREPROCESSED:
if t_unit.state < KernelState.PREPROCESSED:
# Note that we cannot have preprocessing separately for everyone.
# Since, now the preprocessing of each one depends on the other.
# So we check if any one of the callable kernels are not preprocesses
# then, we have to do the preprocessing of every other kernel.
from loopy.preprocess import preprocess_program
program = preprocess_program(program)
t_unit = preprocess_program(t_unit)

from loopy.type_inference import infer_unknown_types
program = infer_unknown_types(program, expect_completion=True)
t_unit = infer_unknown_types(t_unit, expect_completion=True)

if program.state < KernelState.LINEARIZED:
if t_unit.state < KernelState.LINEARIZED:
from loopy.schedule import linearize
program = linearize(program)
t_unit = linearize(t_unit)

# Why diverge? Generated code for a non-entrypoint kernel and an entrypoint
# kernel isn't same for a general loopy target. For example in OpenCL, a
# kernel callable from host and the one supposed to be callable from device
# have different function signatures. To generate correct code, each
# callable should be exclusively an entrypoint or a non-entrypoint kernel.
program = diverge_callee_entrypoints(program)
t_unit = diverge_callee_entrypoints(t_unit)

from loopy.check import pre_codegen_checks
pre_codegen_checks(program)
pre_codegen_checks(t_unit)

host_programs = {}
device_programs = []
Expand All @@ -624,13 +624,13 @@ def generate_code_v2(program):

# {{{ collect host/device programs

for func_id in sorted(key for key, val in program.callables_table.items()
for func_id in sorted(key for key, val in t_unit.callables_table.items()
if isinstance(val, CallableKernel)):
cgr = generate_code_for_a_single_kernel(program[func_id],
program.callables_table,
program.target,
func_id in program.entrypoints)
if func_id in program.entrypoints:
cgr = generate_code_for_a_single_kernel(t_unit[func_id],
t_unit.callables_table,
t_unit.target,
func_id in t_unit.entrypoints)
if func_id in t_unit.entrypoints:
host_programs[func_id] = cgr.host_program
else:
assert len(cgr.device_programs) == 1
Expand All @@ -643,14 +643,14 @@ def generate_code_v2(program):

# {{{ collect preambles

for clbl in program.callables_table.values():
device_preambles.extend(list(clbl.generate_preambles(program.target)))
for clbl in t_unit.callables_table.values():
device_preambles.extend(list(clbl.generate_preambles(t_unit.target)))

# }}}

# adding the callee fdecls to the device_programs
device_programs = ([device_programs[0].copy(
ast=program.target.get_device_ast_builder().ast_module.Collection(
ast=t_unit.target.get_device_ast_builder().ast_module.Collection(
callee_fdecls+[device_programs[0].ast]))] +
device_programs[1:])
cgr = TranslationUnitCodeGenerationResult(
Expand All @@ -659,7 +659,7 @@ def generate_code_v2(program):
device_preambles=device_preambles)

if CACHING_ENABLED:
code_gen_cache.store_if_not_present(input_program, cgr)
code_gen_cache.store_if_not_present(input_t_unit, cgr)

return cgr

Expand Down
2 changes: 2 additions & 0 deletions loopy/codegen/instruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ def generate_assignment_instruction_code(codegen_state, insn):

del lhs_is_vector
del rhs_is_vector
else:
is_vector = False

# }}}

Expand Down
24 changes: 18 additions & 6 deletions loopy/kernel/function_interface.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

__copyright__ = "Copyright (C) 2018 Andreas Kloeckner, Kaushik Kulkarni"

__license__ = """
Expand All @@ -20,7 +22,7 @@
THE SOFTWARE.
"""

from typing import ClassVar, Tuple
from typing import ClassVar, FrozenSet, Tuple, TYPE_CHECKING

from pytools import ImmutableRecord
from loopy.diagnostic import LoopyError
Expand All @@ -31,13 +33,18 @@
from loopy.kernel.data import ValueArg, ArrayArg
from loopy.symbolic import DependencyMapper, WalkMapper

if TYPE_CHECKING:
from loopy.translation_unit import CallablesTable, FunctionIdT

__doc__ = """
.. currentmodule:: loopy.kernel.function_interface
.. autoclass:: ValueArgDescriptor
.. autoclass:: ArrayArgDescriptor
.. currentmodule:: loopy
.. autoclass:: InKernelCallable
.. autoclass:: CallableKernel
Expand All @@ -64,7 +71,7 @@ class ArrayArgDescriptor(ImmutableRecord):
"""
Records information about an array argument to an in-kernel callable. To be
passed to and returned from
:meth:`InKernelCallable.with_descrs`, used for
:meth:`~loopy.InKernelCallable.with_descrs`, used for
matching shape and address space of caller and callee kernels.
.. attribute:: shape
Expand Down Expand Up @@ -367,9 +374,10 @@ def with_types(self, arg_id_to_dtype, clbl_inf_ctx):
def with_descrs(self, arg_id_to_descr, clbl_inf_ctx):
"""
:arg arg_id_to_descr: a mapping from argument identifiers (integers for
positional arguments) to instances of :class:`ArrayArgDescriptor`
or :class:`ValueArgDescriptor`. Unspecified/unknown descriptors are
not represented in *arg_id_to_type*.
positional arguments) to instances of
:class:`~loopy.kernel.function_interface.ArrayArgDescriptor`
or :class:`~loopy.kernel.function_interface.ValueArgDescriptor`.
Unspecified/unknown descriptors are not represented in *arg_id_to_type*.
Return values are denoted by negative integers, with the first
returned value identified as *-1*.
Expand Down Expand Up @@ -453,7 +461,11 @@ def with_added_arg(self, arg_dtype, arg_descr):
"""
raise NotImplementedError()

def get_called_callables(self, callables_table, recursive=True):
def get_called_callables(
self,
callables_table: CallablesTable,
recursive: bool = True
) -> FrozenSet[FunctionIdT]:
"""
Returns a :class:`frozenset` of callable ids called by *self* that are
resolved via *callables_table*.
Expand Down
7 changes: 3 additions & 4 deletions loopy/library/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from loopy.kernel.function_interface import ScalarCallable
from loopy.diagnostic import LoopyError
from loopy.translation_unit import CallablesTable
from loopy.types import NumpyType
import numpy as np

Expand Down Expand Up @@ -105,7 +106,7 @@ def emit_call_insn(self, insn, target, expression_to_code_mapper):
target), True


def get_loopy_callables():
def get_loopy_callables() -> CallablesTable:
"""
Returns a mapping from function ids to corresponding
:class:`loopy.kernel.function_interface.InKernelCallable` for functions
Expand All @@ -116,13 +117,11 @@ def get_loopy_callables():
- callables that have a predefined meaning in :mod:`loo.py` like
``make_tuple``, ``index_of``, ``indexof_vec``.
"""
known_callables = {
return {
"make_tuple": MakeTupleCallable(name="make_tuple"),
"indexof": IndexOfCallable(name="indexof"),
"indexof_vec": IndexOfCallable(name="indexof_vec"),
}

return known_callables


# vim: foldmethod=marker
Loading

0 comments on commit 8600e53

Please sign in to comment.