Skip to content

Commit

Permalink
re-use most of the previous implementation for the fallback implement…
Browse files Browse the repository at this point in the history
…ation
  • Loading branch information
kaushikcfd committed May 11, 2021
1 parent 1e29450 commit 0203d09
Show file tree
Hide file tree
Showing 2 changed files with 172 additions and 155 deletions.
322 changes: 168 additions & 154 deletions loopy/schedule/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from pytools import ImmutableRecord
import sys
import islpy as isl
from loopy.diagnostic import warn_with_kernel, LoopyError # noqa

from pytools import MinRecursionLimit, ProcessLogger
Expand Down Expand Up @@ -194,11 +195,12 @@ def find_loop_nest_with_map(kernel):
"""
result = {}

from loopy.kernel.data import ConcurrentTag
from loopy.kernel.data import ConcurrentTag, IlpBaseTag

all_nonpar_inames = {
iname for iname in kernel.all_inames()
if not kernel.iname_tags_of_type(iname, ConcurrentTag)}
if not kernel.iname_tags_of_type(iname,
(ConcurrentTag, IlpBaseTag))}

iname_to_insns = kernel.iname_to_insns()

Expand All @@ -214,24 +216,40 @@ def find_loop_nest_around_map(kernel):
"""Returns a dictionary mapping inames to other inames that are
always nested around them.
"""
from collections import defaultdict
from loopy.schedule.tools import get_loop_nest_tree
result = {}

tree = get_loop_nest_tree(kernel)
all_inames = kernel.all_inames()

loop_nest_around_map = defaultdict(frozenset)
iname_to_insns = kernel.iname_to_insns()

for node in tree.all_nodes_itr():
if node.identifier == tree.root:
continue
iname = node.identifier
depth = tree.depth(iname)
all_ancestors = frozenset(tree.ancestor(iname, d).identifier
for d in range(1, depth))
# examine pairs of all inames--O(n**2), I know.
from loopy.kernel.data import IlpBaseTag
for inner_iname in all_inames:
result[inner_iname] = set()
for outer_iname in all_inames:
if inner_iname == outer_iname:
continue

loop_nest_around_map[iname] = all_ancestors
if kernel.iname_tags_of_type(outer_iname, IlpBaseTag):
# ILP tags are special because they are parallel tags
# and therefore 'in principle' nest around everything.
# But they're realized by the scheduler as a loop
# at the innermost level, so we'll cut them some
# slack here.
continue

if iname_to_insns[inner_iname] < iname_to_insns[outer_iname]:
result[inner_iname].add(outer_iname)

for dom_idx, dom in enumerate(kernel.domains):
for outer_iname in dom.get_var_names(isl.dim_type.param):
if outer_iname not in all_inames:
continue

for inner_iname in dom.get_var_names(isl.dim_type.set):
result[inner_iname].add(outer_iname)

return loop_nest_around_map
return result


def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map):
Expand Down Expand Up @@ -802,17 +820,24 @@ def _get_dep_equivalent_nests(tree, within1, within2):
return iname1, iname2


class V2SchedulerNotImplementedException(RuntimeError):
pass


def generate_loop_schedules_v2(kernel):
from loopy.schedule.tools import get_loop_nest_tree
from functools import reduce
from pytools.graph import compute_topological_order
from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag

if any(insn.priority != 0 for insn in kernel.instructions):
raise NotImplementedError
raise V2SchedulerNotImplementedException("v2 scheduler cannot schedule"
" kernels with instruction priorities set.")

if kernel.schedule is not None:
raise NotImplementedError
# cannnot handle preschedule yet
raise V2SchedulerNotImplementedException("v2 scheduler cannot schedule"
" prescheduled kernels.")

concurrent_inames = {iname for iname in kernel.all_inames()
if kernel.iname_tags_of_type(iname, ConcurrentTag)}
Expand Down Expand Up @@ -2074,154 +2099,143 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
from loopy.check import pre_schedule_checks
pre_schedule_checks(kernel)

can_v2_scheduler_handle = (
# v2-scheduler cannot handle insn groups
all(len(insn.conflicts_with_groups) == 0
for insn in kernel.instructions)
# v2-scheduler cannot handle prescheduled kernel
and (not kernel.schedule)
# v2-scheduler cannot handle instruction priorities
and all(insn.priority == 0
for insn in kernel.instructions)
)

if can_v2_scheduler_handle:
try:
gen_sched = generate_loop_schedules_v2(kernel)
yield postprocess_schedule(kernel, gen_sched)
else:
schedule_count = 0

debug = ScheduleDebugger(**debug_args)

preschedule = (kernel.schedule

if kernel.state == KernelState.LINEARIZED

else ())

prescheduled_inames = {
insn.iname
for insn in preschedule
if isinstance(insn, EnterLoop)}

prescheduled_insn_ids = {
insn_id
for item in preschedule
for insn_id in sched_item_to_insn_id(item)}

from loopy.kernel.data import (IlpBaseTag, ConcurrentTag, VectorizeTag,
filter_iname_tags_by_type)
ilp_inames = {
name
for name, iname in kernel.inames.items()
if filter_iname_tags_by_type(iname.tags, IlpBaseTag)}
vec_inames = {
name
for name, iname in kernel.inames.items()
if filter_iname_tags_by_type(iname.tags, VectorizeTag)}
parallel_inames = {
name
for name, iname in kernel.inames.items()
if filter_iname_tags_by_type(iname.tags, ConcurrentTag)}

loop_nest_with_map = find_loop_nest_with_map(kernel)
loop_nest_around_map = find_loop_nest_around_map(kernel)
sched_state = SchedulerState(
kernel=kernel,
loop_nest_around_map=loop_nest_around_map,
loop_insn_dep_map=find_loop_insn_dep_map(
kernel,
loop_nest_with_map=loop_nest_with_map,
loop_nest_around_map=loop_nest_around_map),
breakable_inames=ilp_inames,
ilp_inames=ilp_inames,
vec_inames=vec_inames,

prescheduled_inames=prescheduled_inames,
prescheduled_insn_ids=prescheduled_insn_ids,

# time-varying part
active_inames=(),
entered_inames=frozenset(),
enclosing_subkernel_inames=(),

schedule=(),

unscheduled_insn_ids={insn.id for insn in kernel.instructions},
scheduled_insn_ids=frozenset(),
within_subkernel=kernel.state != KernelState.LINEARIZED,
may_schedule_global_barriers=True,

preschedule=preschedule,
insn_ids_to_try=None,

# ilp and vec are not parallel for the purposes of the scheduler
parallel_inames=parallel_inames - ilp_inames - vec_inames,

group_insn_counts=group_insn_counts(kernel),
active_group_counts={},

insns_in_topologically_sorted_order=(
get_insns_in_topologically_sorted_order(kernel)),
)

schedule_gen_kwargs = {}

def print_longest_dead_end():
if debug.interactive:
print("Loopy will now show you the scheduler state at the point")
print("where the longest (dead-end) schedule was generated, in the")
print("the hope that some of this makes sense and helps you find")
print("the issue.")
print()
print("To disable this interactive behavior, pass")
print(" debug_args=dict(interactive=False)")
print("to generate_loop_schedules().")
print(75*"-")
input("Enter:")
print()
print()

debug.debug_length = len(debug.longest_rejected_schedule)
while True:
try:
for _ in generate_loop_schedules_internal(
sched_state, debug=debug, **schedule_gen_kwargs):
pass

except ScheduleDebugInput as e:
debug.debug_length = int(str(e))
continue
return
except V2SchedulerNotImplementedException as e:
from warnings import warn
warn(f"Falling back to a slow scheduler implementation due to: {e}")

break
schedule_count = 0

try:
for gen_sched in generate_loop_schedules_internal(
sched_state, debug=debug, **schedule_gen_kwargs):
debug.stop()
debug = ScheduleDebugger(**debug_args)

preschedule = kernel.schedule if kernel.state == KernelState.LINEARIZED else ()

new_kernel = postprocess_schedule(kernel, gen_sched)
yield new_kernel
prescheduled_inames = {
insn.iname
for insn in preschedule
if isinstance(insn, EnterLoop)}

debug.start()
prescheduled_insn_ids = {
insn_id
for item in preschedule
for insn_id in sched_item_to_insn_id(item)}

from loopy.kernel.data import (IlpBaseTag, ConcurrentTag, VectorizeTag,
filter_iname_tags_by_type)
ilp_inames = {
name
for name, iname in kernel.inames.items()
if filter_iname_tags_by_type(iname.tags, IlpBaseTag)}
vec_inames = {
name
for name, iname in kernel.inames.items()
if filter_iname_tags_by_type(iname.tags, VectorizeTag)}
parallel_inames = {
name
for name, iname in kernel.inames.items()
if filter_iname_tags_by_type(iname.tags, ConcurrentTag)}

loop_nest_with_map = find_loop_nest_with_map(kernel)
loop_nest_around_map = find_loop_nest_around_map(kernel)
sched_state = SchedulerState(
kernel=kernel,
loop_nest_around_map=loop_nest_around_map,
loop_insn_dep_map=find_loop_insn_dep_map(
kernel,
loop_nest_with_map=loop_nest_with_map,
loop_nest_around_map=loop_nest_around_map),
breakable_inames=ilp_inames,
ilp_inames=ilp_inames,
vec_inames=vec_inames,

prescheduled_inames=prescheduled_inames,
prescheduled_insn_ids=prescheduled_insn_ids,

# time-varying part
active_inames=(),
entered_inames=frozenset(),
enclosing_subkernel_inames=(),

schedule=(),

unscheduled_insn_ids={insn.id for insn in kernel.instructions},
scheduled_insn_ids=frozenset(),
within_subkernel=kernel.state != KernelState.LINEARIZED,
may_schedule_global_barriers=True,

preschedule=preschedule,
insn_ids_to_try=None,

schedule_count += 1
# ilp and vec are not parallel for the purposes of the scheduler
parallel_inames=parallel_inames - ilp_inames - vec_inames,

except KeyboardInterrupt:
group_insn_counts=group_insn_counts(kernel),
active_group_counts={},

insns_in_topologically_sorted_order=(
get_insns_in_topologically_sorted_order(kernel)),
)

schedule_gen_kwargs = {}

def print_longest_dead_end():
if debug.interactive:
print("Loopy will now show you the scheduler state at the point")
print("where the longest (dead-end) schedule was generated, in the")
print("the hope that some of this makes sense and helps you find")
print("the issue.")
print()
print("To disable this interactive behavior, pass")
print(" debug_args=dict(interactive=False)")
print("to generate_loop_schedules().")
print(75*"-")
print("Interrupted during scheduling")
print(75*"-")
print_longest_dead_end()
raise
input("Enter:")
print()
print()

debug.done_scheduling()
if not schedule_count:
print(75*"-")
print("ERROR: Sorry--loopy did not find a schedule for your kernel.")
print(75*"-")
print_longest_dead_end()
raise RuntimeError("no valid schedules found")
debug.debug_length = len(debug.longest_rejected_schedule)
while True:
try:
for _ in generate_loop_schedules_internal(
sched_state, debug=debug, **schedule_gen_kwargs):
pass

except ScheduleDebugInput as e:
debug.debug_length = int(str(e))
continue

break

try:
for gen_sched in generate_loop_schedules_internal(
sched_state, debug=debug, **schedule_gen_kwargs):
debug.stop()

new_kernel = postprocess_schedule(kernel, gen_sched)
yield new_kernel

debug.start()

schedule_count += 1

except KeyboardInterrupt:
print()
print(75*"-")
print("Interrupted during scheduling")
print(75*"-")
print_longest_dead_end()
raise

debug.done_scheduling()
if not schedule_count:
print(75*"-")
print("ERROR: Sorry--loopy did not find a schedule for your kernel.")
print(75*"-")
print_longest_dead_end()
raise RuntimeError("no valid schedules found")

logger.info("%s: schedule done" % kernel.name)

Expand Down
5 changes: 4 additions & 1 deletion loopy/schedule/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,10 @@ def _update_flow_requirements(priorities, cannot_satisfy_callback):
# inner iname and outer iname are indirect family members
# => must be realized via dependencies in the linearization
# phase
raise NotImplementedError
from loopy.schedule import V2SchedulerNotImplementedException
raise V2SchedulerNotImplementedException("cannot"
" schedule kernels with priority dependencies"
" between sibling loop nests")

def _raise_loopy_err(x):
raise LoopyError(x)
Expand Down

0 comments on commit 0203d09

Please sign in to comment.