Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify tests to use aircc.py (cont'd) #912

Merged
merged 22 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
e6f674b
Modify test 11 to use aircc.py
erwei-xilinx Feb 24, 2025
b46d3e1
Modify test 10 to use aircc.py
erwei-xilinx Feb 24, 2025
32c111a
Modify test 10 to use aircc.py
erwei-xilinx Feb 24, 2025
a3b0b2b
Modify test 09 to use aircc
erwei-xilinx Feb 24, 2025
efe318f
Fixup typo
erwei-xilinx Feb 24, 2025
80eaeca
Modify test 06 to use aircc.py
erwei-xilinx Feb 24, 2025
4379bd8
Add channel_multiplexing option in aircc to specify how channel time-…
erwei-xilinx Feb 24, 2025
7467761
Fixup test 11 XRTBackend options
erwei-xilinx Feb 24, 2025
02ed29d
Modify test 08 to use aircc
erwei-xilinx Feb 24, 2025
ed8ce33
Fixup parsing list of strings
erwei-xilinx Feb 24, 2025
aeb082e
Fixup init arg val
erwei-xilinx Feb 24, 2025
7ef5e98
Modify test 07 to use aircc
erwei-xilinx Feb 24, 2025
8107a7e
Fixup xclbin filename
erwei-xilinx Feb 24, 2025
f2e9e80
Modify test 04 to use aircc
erwei-xilinx Feb 24, 2025
7d91fa0
When lowering linalg to call, also link the parent air.herd to the fu…
erwei-xilinx Feb 24, 2025
f4fe32b
Modify the --lower-linalg-to-func option in aircc to take in the obj …
erwei-xilinx Feb 24, 2025
122c14c
Update tests to pass in func call link through aircc
erwei-xilinx Feb 24, 2025
7180f11
Update tests to pass in func call link through aircc
erwei-xilinx Feb 25, 2025
6e53dc0
Omit pingpong for test 07
erwei-xilinx Feb 25, 2025
71cbeaf
Enable trace configuration through aircc
erwei-xilinx Feb 25, 2025
1729a49
Modify test 14 to use aircc
erwei-xilinx Feb 25, 2025
541cc2d
Remove legacy test file
erwei-xilinx Feb 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions mlir/lib/Conversion/AIRToAIEPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3679,6 +3679,12 @@ class AIRLinalgOpToLibraryCallRewrite
op, fnNameAttr.getValue(), TypeRange(),
ValueRange(ArrayRef<Value>(libFnOperands)));

if (auto herd = op->getParentOfType<air::HerdOp>())
rewriter.modifyOpInPlace(herd, [&]() {
herd->setAttr("link_with",
StringAttr::get(rewriter.getContext(), linkWith));
});

return success();
}

Expand Down
36 changes: 31 additions & 5 deletions python/air/backend/xrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,16 @@ class XRTBackend(AirBackend):

def __init__(
self,
verbose=False,
omit_while_true_loop=False,
omit_pingpong=False,
lower_linalg_to_func=False,
air_loop_fusion=False,
verbose: bool = False,
omit_while_true_loop: bool = False,
omit_pingpong: bool = False,
lower_linalg_to_func: str = None,
air_loop_fusion: bool = False,
runtime_loop_tiling_sizes: list[int] = [4, 4],
omit_auto_broadcast: bool = False,
channel_multiplexing: list[str] = [],
trace_offset: int = 0,
trace_size: int = 0,
):
"""Constructor for XRTBackend

Expand All @@ -61,6 +65,10 @@ def __init__(
lower_linalg_to_func: configure aircc to lower linalg.generic to function calls, or loops.
air_loop_fusion: configure aircc to add air-loop-fusion experimental pass.
runtime_loop_tiling_sizes: configure aircc to add extra runtime loop tiling using the experimental affine-loop-opt pass.
omit_auto_broadcast: configure aircc to omit the detection and lowering of broadcast data movements.
channel_multiplexing: configure aircc to perform air channel multiplexing on specified memroy spaces.
trace_offset: configure aircc to stream out profiling traces at outputs, starting from the specified offset.
trace_size: configure aircc to stream out profiling traces at outputs, with specified trace data size.
"""
super().__init__()
self.verbose = verbose
Expand All @@ -69,6 +77,10 @@ def __init__(
self.lower_linalg_to_func = lower_linalg_to_func
self.air_loop_fusion = air_loop_fusion
self.runtime_loop_tiling_sizes = runtime_loop_tiling_sizes
self.omit_auto_broadcast = omit_auto_broadcast
self.channel_multiplexing = channel_multiplexing
self.trace_offset = trace_offset
self.trace_size = trace_size
self.currently_loaded = False

def __del__(self):
Expand Down Expand Up @@ -131,10 +143,24 @@ def compile(

if self.lower_linalg_to_func:
aircc_options += ["--lower-linalg-to-func"]
aircc_options += [self.lower_linalg_to_func]

if self.air_loop_fusion:
aircc_options += ["--air-loop-fusion"]

if self.omit_auto_broadcast:
aircc_options += ["--omit-auto-broadcast"]

if len(self.channel_multiplexing) != 0:
aircc_options += ["--air-channel-multiplexing"]
aircc_options += self.channel_multiplexing

if self.trace_size != 0:
aircc_options += ["-trace-size"]
aircc_options += [str(self.trace_size)]
aircc_options += ["-trace-offset"]
aircc_options += [str(self.trace_offset)]

aircc.run(air_module, aircc_options)

return XRTCompileArtifact(xclbin, kernel, insts)
Expand Down
12 changes: 12 additions & 0 deletions python/air/backend/xrt_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,21 @@ def __init__(
lower_linalg_to_func: bool = False,
air_loop_fusion: bool = False,
runtime_loop_tiling_sizes: list[int] = [4, 4],
omit_auto_broadcast: bool = False,
channel_multiplexing: list[str] = [],
trace_offset: int = 0,
trace_size: int = 0,
):
self.verbose = verbose
self.omit_while_true_loop = omit_while_true_loop
self.omit_pingpong = omit_pingpong
self.lower_linalg_to_func = lower_linalg_to_func
self.air_loop_fusion = air_loop_fusion
self.runtime_loop_tiling_sizes = runtime_loop_tiling_sizes
self.omit_auto_broadcast = omit_auto_broadcast
self.channel_multiplexing = channel_multiplexing
self.trace_offset = trace_offset
self.trace_size = trace_size

def run_test(
self,
Expand All @@ -88,6 +96,10 @@ def run_test(
lower_linalg_to_func=self.lower_linalg_to_func,
air_loop_fusion=self.air_loop_fusion,
runtime_loop_tiling_sizes=self.runtime_loop_tiling_sizes,
omit_auto_broadcast=self.omit_auto_broadcast,
channel_multiplexing=self.channel_multiplexing,
trace_offset=self.trace_offset,
trace_size=self.trace_size,
)

# run the module - slots are input/output for now, assume non-overlapping inputs/outputs
Expand Down
21 changes: 18 additions & 3 deletions python/air/compiler/aircc/cl_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,10 @@ def parse_args(args=None):
)
parser.add_argument(
"--lower-linalg-to-func",
type=str,
dest="lower_linalg_to_func",
default=False,
action="store_true",
help="Whether to run pass which lowers linalg.generic ops to function calls. If False, then they lower to loops.",
default=None,
help="Whether to run pass which lowers linalg.generic ops to function calls. If a string is passed in, then register the string value as the object file name to link with.",
)
parser.add_argument(
"--air-loop-fusion",
Expand All @@ -145,6 +145,21 @@ def parse_args(args=None):
default=[4, 4],
help="Adds tiling factors to be applied to the runtime host affine loop nest. It is an experimental pass which enforces extra innermost tilings at runtime, to comply with constraints of certain hardware",
)
parser.add_argument(
"--omit-auto-broadcast",
dest="omit_auto_broadcast",
default=False,
action="store_true",
help="Omits the air-dependency-schedule-opt pass, which detects and lowers broadcasts",
)
parser.add_argument(
"--air-channel-multiplexing",
type=str,
nargs="*", # Accept zero or more strings
dest="channel_multiplexing",
default=[],
help="Adds memory spaces to which air channels shall get time-multiplexed, if operating on them",
)

opts = parser.parse_args(args)
return opts
38 changes: 31 additions & 7 deletions python/air/compiler/aircc/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,22 @@ def get_L2_splitting_analysis_pass():


def get_air_optimization_pass(
device, omit_pingpong=True, lower_linalg_to_func=False, air_loop_fusion=False
device,
omit_pingpong=False,
lower_linalg_to_func=None,
air_loop_fusion=False,
omit_auto_broadcast=False,
channel_multiplexing=[],
):
OPTIMIZATION_PASSES = [
"air-dependency",
"air-dependency-schedule-opt",
"air-specialize-dma-broadcast",
]
if not omit_auto_broadcast:
OPTIMIZATION_PASSES += [
"air-dependency-schedule-opt",
"air-specialize-dma-broadcast",
]
OPTIMIZATION_PASSES += [
"air-dma-to-channel",
"canonicalize",
"cse",
Expand All @@ -53,7 +63,18 @@ def get_air_optimization_pass(
"air-isolate-async-dma-loop-nests",
"canonicalize",
"cse",
"air-fuse-channels",
]
if len(channel_multiplexing) != 0:
OPTIMIZATION_PASSES += [
"air-fuse-channels{aggressive-mode="
+ ",".join(s for s in channel_multiplexing)
+ "}",
]
else:
OPTIMIZATION_PASSES += [
"air-fuse-channels",
]
OPTIMIZATION_PASSES += [
"canonicalize",
"cse",
]
Expand All @@ -75,9 +96,9 @@ def get_air_optimization_pass(
"canonicalize",
"cse",
]
if lower_linalg_to_func:
if lower_linalg_to_func != None:
OPTIMIZATION_PASSES += [
"air-linalg-to-func",
"air-linalg-to-func{link-with=" + f"{lower_linalg_to_func}" + "}",
]
else:
OPTIMIZATION_PASSES += [
Expand Down Expand Up @@ -415,12 +436,13 @@ def run(mlir_module, args=None):
air_collapse_herd_to_cols_pass = (
"func.func(air-collapse-herd{" + f"max-col-size={4} " + "})"
)
trace_col_offset = 1 if int(opts.trace_size) > 0 else 0
air_place_pass = (
"air-place-herds{"
+ f"num-rows={opts.num_rows} "
+ f"num-cols={opts.num_cols} "
+ f"row-anchor={opts.row_offset} "
+ f"col-anchor={opts.col_offset}"
+ f"col-anchor={opts.col_offset + trace_col_offset}"
+ "}"
)

Expand All @@ -436,6 +458,8 @@ def run(mlir_module, args=None):
opts.omit_pingpong,
opts.lower_linalg_to_func,
opts.air_loop_fusion,
opts.omit_auto_broadcast,
opts.channel_multiplexing,
)
if "npu" in opts.device
else []
Expand Down
147 changes: 17 additions & 130 deletions test/xrt/01_air_to_npu/aie.py → test/xrt/01_air_to_npu/gen.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import air
import air.compiler.util
# gen.py -*- Python -*-
#
# Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved.
# SPDX-License-Identifier: MIT

from air.dialects import linalg, arith, func, memref
from air.ir import *
import air.passmanager
from air.dialects.air import module_builder
from air.dialects.linalg.opdsl.lang import *
from air.compiler.util import run_transform
import argparse
import sys

from air.backend.xrt import XRTBackend
from air.ir import *
import air.passmanager


@linalg_structured_op
Expand Down Expand Up @@ -117,130 +121,13 @@ def forward(lhs, rhs):
pm = air.passmanager.PassManager.parse(pipeline, context=context)
pm.run(air_module.operation)

with open("air_sync.mlir", "w") as f:
f.write(str(air_module))

################################################
## Extract event dependency and optimize schedule
################################################
###############################################
# Run compile and load
###############################################

pipeline = (
"builtin.module("
+ ",".join(
[
"air-dependency",
"air-dependency-schedule-opt",
"air-specialize-dma-broadcast",
"air-dma-to-channel",
"canonicalize",
"cse",
"air-dependency-canonicalize",
"canonicalize",
"cse",
"air-label-scf-for-to-ping-pong",
]
)
+ ")"
backend = XRTBackend(
air_loop_fusion=True,
trace_offset=opts.trace_offset,
trace_size=opts.trace_size,
)
pm = air.passmanager.PassManager.parse(pipeline, context=context)
pm.run(air_module.operation)
# Not sure why parsing the ir solves the segmentation fault...
air_module = Module.parse(str(air_module), context=context)
pipeline = (
"builtin.module("
+ ",".join(
[
"air-ping-pong-transform",
"air-dealias-memref",
"canonicalize",
"cse",
"air-isolate-async-dma-loop-nests",
"func.func(air-opt-memtile-dma-bds{device=npu1_4col})",
"canonicalize",
"cse",
]
)
+ ")"
)
pm = air.passmanager.PassManager.parse(pipeline, context=context)
pm.run(air_module.operation)
with open("aircc_input.mlir", "w") as f:
f.write(str(air_module))

################################################
## Place herd to segment
################################################

air_async_module = Module.parse(str(air_module), context=context)
col_anchor = 1 if opts.trace_size > 0 else 0
pipeline = (
"builtin.module("
+ ",".join(
[
"func.func(air-collapse-herd)",
"canonicalize",
"cse",
"air-place-herds{num-rows=4 num-cols=1 row-anchor=2 col-anchor="
+ str(col_anchor)
+ "}",
"canonicalize",
"cse",
"func.func(air-renumber-dma)",
"func.func(convert-linalg-to-loops)",
]
)
+ ")"
)

pm = air.passmanager.PassManager.parse(pipeline, context=context)
pm.run(air_module.operation)
with open("air_placed.mlir", "w") as f:
f.write(str(air_module))

# ################################################
# ## MLIR-AIR to MLIR-AIE
# ################################################

air_to_aie_pass = (
"air-to-aie{row-offset=2 col-offset=0 device=npu1_4col emit-while-loop=true"
)
if opts.trace_size > 0:
air_to_aie_pass = air_to_aie_pass + " insert-trace-packet-flow=true"
air_to_aie_pass = air_to_aie_pass + "}"
pipeline = (
"builtin.module("
+ ",".join(
[
air_to_aie_pass,
"canonicalize",
]
)
+ ")"
)
pm = air.passmanager.PassManager.parse(pipeline, context=context)
pm.run(air_module.operation)
with open("aircc_decomp_aiecc.mlir", "w") as f:
f.write(str(air_module))

################################################
## MLIR-AIR runtime lowering
################################################

pipeline = (
"builtin.module("
+ ",".join(
[
"func.func(air-opt-shim-dma-bds{device=npu1_4col})",
"air-to-std",
"airrt-to-npu{"
+ f"trace-offset={opts.trace_offset} trace-size={opts.trace_size}"
+ "}",
"canonicalize",
]
)
+ ")"
)
pm = air.passmanager.PassManager.parse(pipeline, context=context)
pm.run(air_module.operation)
with open("aie.mlir", "w") as f:
f.write(str(air_module))
module_function = backend.compile_and_load(air_module)
Loading
Loading