Skip to content

Commit

Permalink
Fixed trace.py and modified traced designs to use new fix. (#2058)
Browse files Browse the repository at this point in the history
  • Loading branch information
jackl-xilinx authored Feb 21, 2025
1 parent ca0478a commit 8799e99
Show file tree
Hide file tree
Showing 19 changed files with 129 additions and 75 deletions.
2 changes: 2 additions & 0 deletions aie_kernels/aie2/bf16_softmax.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
void softmax_simple_bf16(bfloat16 *restrict input_vector,
bfloat16 *restrict output_vector,
const int32_t vector_size) {
event0();
// Find maximum for numerical stability
float max_val = (float)input_vector[0];
for (uint32_t i = 1; i < vector_size; i++) {
Expand Down Expand Up @@ -47,6 +48,7 @@ void softmax_simple_bf16(bfloat16 *restrict input_vector,
float val = (float)output_vector[i] * inv_sum;
output_vector[i] = (bfloat16)val;
}
event1();
return;
}

Expand Down
10 changes: 6 additions & 4 deletions programming_examples/basic/passthrough_kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ build/aie2_lineBased_8b_${data_size}.mlir: ${srcdir}/${aie_py_src}
mkdir -p ${@D}
python3 $< ${device} ${data_size} 0 > $@

build/aie_trace__lineBased_8b_${data_size}.mlir: ${srcdir}/${aie_py_src}
build/aie2_trace_lineBased_8b_${data_size}.mlir: ${srcdir}/${aie_py_src}
mkdir -p ${@D}
python3 $< ${device} ${data_size} ${trace_size} > $@

Expand All @@ -59,7 +59,7 @@ else
--xclbin-name=${@F} --npu-insts-name=insts_${data_size}.txt $(<:%=../%)
endif

build/final_trace_${data_size}.xclbin: build/aie2_lineBased_8b_${data_size}.mlir build/passThrough.cc.o
build/final_trace_${data_size}.xclbin: build/aie2_trace_lineBased_8b_${data_size}.mlir build/passThrough.cc.o
mkdir -p ${@D}
ifeq ($(device),npu)
cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
Expand Down Expand Up @@ -93,11 +93,13 @@ run_py: build/final_${data_size}.xclbin build/insts_${data_size}.txt

trace: ${targetname}_${data_size}.exe build/final_trace_${data_size}.xclbin build/insts_${data_size}.txt
${powershell} ./$< -x build/final_trace_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -t ${trace_size}
../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace_${data_size}.mlir --colshift 1 > trace_vs.json
${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie2_trace_lineBased_8b_${data_size}.mlir --colshift 1 > trace_passthrough_kernel.json
${srcdir}/../../utils/get_trace_summary.py --filename trace_passthrough_kernel.json

trace_py: build/final_trace_${data_size}.xclbin build/insts_${data_size}.txt
${powershell} python3 ${srcdir}/test.py -x build/final_trace_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -t ${trace_size} -s ${data_size}
../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace_${data_size}.mlir --colshift 1 > trace_vs.json
${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie2_trace_lineBased_8b_${data_size}.mlir --colshift 1 > trace_passthrough_kernel.json
${srcdir}/../../utils/get_trace_summary.py --filename trace_passthrough_kernel.json

clean_trace:
rm -rf tmpTrace trace.txt parse*json trace*json
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ def device_body():
ShimTile = tile(0, 0)
ComputeTile2 = tile(0, 2)

# Set up a circuit-switched flow from core to shim for tracing information
# Set up a packet-switched flow from core to shim for tracing information
tiles_to_trace = [ComputeTile2]
if trace_size > 0:
flow(ComputeTile2, WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1)
trace_utils.configure_packet_tracing_flow(tiles_to_trace, ShimTile)

# AIE-array data movement with object fifos
of_in = object_fifo("in", ShimTile, ComputeTile2, 2, line_ty)
Expand All @@ -60,13 +61,14 @@ def core_body():
@runtime_sequence(vector_ty, vector_ty, vector_ty)
def sequence(inTensor, outTensor, notUsed):
if trace_size > 0:
trace_utils.configure_simple_tracing_aie2(
ComputeTile2,
ShimTile,
trace_utils.configure_packet_tracing_aie2(
tiles_to_trace=tiles_to_trace,
shim=ShimTile,
trace_size=trace_size,
trace_offset=N,
ddr_id=1,
size=trace_size,
offset=N,
)

in_task = shim_dma_single_bd_task(
of_in, inTensor, sizes=[1, 1, 1, N], issue_token=True
)
Expand All @@ -77,6 +79,8 @@ def sequence(inTensor, outTensor, notUsed):
dma_start_task(in_task, out_task)
dma_await_task(in_task, out_task)

trace_utils.gen_trace_done_aie2(ShimTile)


try:
device_name = str(sys.argv[1])
Expand Down
2 changes: 1 addition & 1 deletion programming_examples/basic/passthrough_kernel/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ int main(int argc, const char *argv[]) {
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3));
auto bo_out =
xrt::bo(device, PASSTHROUGH_SIZE * sizeof(DATATYPE) + trace_size,
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3));
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4));

if (verbosity >= 1)
std::cout << "Writing data into buffer objects.\n";
Expand Down
14 changes: 11 additions & 3 deletions programming_examples/basic/vector_scalar_mul/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ device = npu
targetname = vector_scalar_mul
data_size = 4096
trace_size = 8192
CHESS ?= true
CHESS ?= false

aie_py_src=${targetname}.py
use_alt?=0
Expand All @@ -29,8 +29,6 @@ endif

all: build/final_${data_size}.xclbin build/insts_${data_size}.txt

kristof: build/insts_${data_size}.txt

build/%.o: %.cc
mkdir -p ${@D}
ifeq ($(device),npu)
Expand All @@ -56,6 +54,7 @@ build/aie_trace_${data_size}.mlir: ${srcdir}/${aie_py_src}
#build/insts_${data_size}.txt: build/final_${data_size}.xclbin
build/final_${data_size}.xclbin: build/aie_${data_size}.mlir build/scale.o
mkdir -p ${@D}
ifeq ($(device),npu)
ifeq ($(CHESS), true)
cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
--aie-generate-npu --npu-insts-name=insts_${data_size}.txt $(<:%=../%)
Expand All @@ -64,9 +63,14 @@ else
--no-xchesscc --no-xbridge \
--aie-generate-npu --npu-insts-name=insts_${data_size}.txt $(<:%=../%)
endif
else
cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
--aie-generate-npu --npu-insts-name=insts_${data_size}.txt $(<:%=../%)
endif

build/final_trace_${data_size}.xclbin: build/aie_trace_${data_size}.mlir build/scale.o
mkdir -p ${@D}
ifeq ($(device),npu)
ifeq ($(CHESS), true)
cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
--aie-generate-npu --npu-insts-name=insts_${data_size}.txt $(<:%=../%)
Expand All @@ -75,6 +79,10 @@ else
--no-xchesscc --no-xbridge \
--aie-generate-npu --npu-insts-name=insts_${data_size}.txt $(<:%=../%)
endif
else
cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
--aie-generate-npu --npu-insts-name=insts_${data_size}.txt $(<:%=../%)
endif

${targetname}_${data_size}.exe: ${srcdir}/test.cpp
rm -rf _build
Expand Down
1 change: 1 addition & 0 deletions programming_examples/ml/conv2d/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ run_py: build/final.xclbin
trace_py: build/final_trace.xclbin
${powershell} python3 ${srcdir}/test.py -x build/final_trace.xclbin -i build/insts_trace.txt -k MLIR_AIE -wd ${width} -ht ${height} -ic ${in_channels} -oc ${out_channels} -t ${trace_size}
${srcdir}/../../utils/parse_trace.py --filename log/trace_conv2d.txt --mlir build/aie_trace.mlir --colshift 1 > log/trace_conv2d.json
${srcdir}/../../utils/get_trace_summary.py --filename log/trace_conv2d.json

clean:
rm -rf build *.elf* *.lst *.bif ${mlirFileName}.mlir.prj log* *.xclbin sim \
Expand Down
2 changes: 2 additions & 0 deletions programming_examples/ml/conv2d/conv2d_alt.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ def sequence(I, W, O):
dma_start_task(in_act_task, in_wts_task, out_task)
dma_await_task(in_act_task, in_wts_task, out_task)

trace_utils.gen_trace_done_aie2(ShimTile)

# print(ctx.module.operation.verify())
print(ctx.module)

Expand Down
5 changes: 3 additions & 2 deletions programming_examples/ml/eltwise_add/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,10 @@ build/final_trace.xclbin: build/aie_trace.mlir build/add.o
mkdir -p ${@D}
ifeq ($(CHESS), true)
cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
--no-xchesscc --no-xbridge \
--xclbin-name=${@F} --npu-insts-name=insts.txt ${<F}
else
cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
--no-xchesscc --no-xbridge \
--xclbin-name=${@F} --npu-insts-name=insts.txt ${<F}
endif

Expand All @@ -83,7 +83,8 @@ run: ${targetname}.exe build/final.xclbin

trace: ${targetname}.exe build/final_trace.xclbin
${powershell} ./$< -x build/final_trace.xclbin -i build/insts.txt -k MLIR_AIE -t ${trace_size}
../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace.mlir --colshift 1 > trace_eltwise_add.json
${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace.mlir --colshift 1 > trace_eltwise_add.json
${srcdir}/../../utils/get_trace_summary.py --filename trace_eltwise_add.json


clean_trace:
Expand Down
15 changes: 7 additions & 8 deletions programming_examples/ml/eltwise_add/eltwise_add_alt.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,10 @@ def device_body():
MemTile = tile(0, 1)
cores = [tile(0, 2 + i) for i in range(n_cores)]

# Set up a circuit-switched flow from core to shim for tracing information
# Set up a packet-switched flow from core to shim for tracing information
tiles_to_trace = [cores[0]]
if trace_size > 0:
flow(cores[0], WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1)
trace_utils.configure_packet_tracing_flow(tiles_to_trace, ShimTile)

inA_fifos = []
inB_fifos = []
Expand Down Expand Up @@ -137,12 +138,8 @@ def core_body():
def sequence(A, B, C):

if trace_size > 0:
trace_utils.configure_simple_tracing_aie2(
cores[0],
ShimTile,
ddr_id=2,
size=trace_size,
offset=N_in_bytes,
trace_utils.configure_packet_tracing_aie2(
tiles_to_trace, ShimTile, trace_size, N_in_bytes
)

a_task = shim_dma_single_bd_task(
Expand All @@ -164,6 +161,8 @@ def sequence(A, B, C):
dma_start_task(a_task, b_task, c_task)
dma_await_task(a_task, b_task, c_task)

trace_utils.gen_trace_done_aie2(ShimTile)


try:
trace_size = 0 if (len(sys.argv) < 2) else int(sys.argv[1])
Expand Down
4 changes: 3 additions & 1 deletion programming_examples/ml/eltwise_mul/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ run: ${targetname}.exe build/final.xclbin

trace: ${targetname}.exe build/final_trace.xclbin
${powershell} ./$< -x build/final_trace.xclbin -i build/insts.txt -k MLIR_AIE -t ${trace_size}
../../utils/parse_eventIR.py --filename trace.txt --mlir build/aie_trace.mlir --colshift 1 > parse_eventIR_vs.json
${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace.mlir --colshift 1 > trace_eltwise_mul.json
${srcdir}/../../utils/get_trace_summary.py --filename trace_eltwise_mul.json

clean:
rm -rf build _build ${targetname}.exe
15 changes: 7 additions & 8 deletions programming_examples/ml/eltwise_mul/eltwise_mul_alt.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,10 @@ def device_body():
MemTile = tile(0, 1)
cores = [tile(0, 2 + i) for i in range(n_cores)]

# Set up a circuit-switched flow from core to shim for tracing information
# Set up a packet-switched flow from core to shim for tracing information
tiles_to_trace = [cores[0]]
if trace_size > 0:
flow(cores[0], WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1)
trace_utils.configure_packet_tracing_flow(tiles_to_trace, ShimTile)

inA_fifos = []
inB_fifos = []
Expand Down Expand Up @@ -136,12 +137,8 @@ def core_body():
def sequence(A, B, C):

if trace_size > 0:
trace_utils.configure_simple_tracing_aie2(
cores[0],
ShimTile,
ddr_id=2,
size=trace_size,
offset=N_in_bytes,
trace_utils.configure_packet_tracing_aie2(
tiles_to_trace, ShimTile, trace_size, N_in_bytes
)

a_task = shim_dma_single_bd_task(
Expand All @@ -163,6 +160,8 @@ def sequence(A, B, C):
dma_start_task(a_task, b_task, c_task)
dma_await_task(a_task, b_task, c_task)

trace_utils.gen_trace_done_aie2(ShimTile)


try:
trace_size = 0 if (len(sys.argv) < 2) else int(sys.argv[1])
Expand Down
3 changes: 2 additions & 1 deletion programming_examples/ml/relu/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ run: ${targetname}.exe build/final.xclbin

trace: ${targetname}.exe build/final_trace.xclbin
${powershell} ./$< -x build/final_trace.xclbin -i build/insts.txt -k MLIR_AIE -t ${trace_size}
../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace.mlir --colshift 1 > trace_relu.json
${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace.mlir --colshift 1 > trace_relu.json
${srcdir}/../../utils/get_trace_summary.py --filename trace_relu.json

clean_trace:
rm -rf trace.txt trace_relu.json
Expand Down
17 changes: 10 additions & 7 deletions programming_examples/ml/relu/relu_alt.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,10 @@ def device_body():
of_offsets = []
object_fifo_link(outC_fifos, outC, of_offsets, [])

# Set up a circuit-switched flow from core to shim for tracing information
# Set up a packet-switched flow from core to shim for tracing information
tiles_to_trace = [cores[0]]
if trace_size > 0:
flow(cores[0], WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1)
trace_utils.configure_packet_tracing_flow(tiles_to_trace, ShimTile)

# Set up compute tiles
for i in range(n_cores):
Expand All @@ -113,12 +114,12 @@ def core_body():
def sequence(A, C):

if trace_size > 0:
trace_utils.configure_simple_tracing_aie2(
cores[0],
ShimTile,
trace_utils.configure_packet_tracing_aie2(
tiles_to_trace=tiles_to_trace,
shim=ShimTile,
trace_size=trace_size,
trace_offset=N_in_bytes,
ddr_id=1,
size=trace_size,
offset=N_in_bytes,
)

in_task = shim_dma_single_bd_task(
Expand All @@ -134,6 +135,8 @@ def sequence(A, C):
dma_start_task(in_task, out_task)
dma_await_task(in_task, out_task)

trace_utils.gen_trace_done_aie2(ShimTile)


try:
trace_size = 0 if (len(sys.argv) != 2) else int(sys.argv[1])
Expand Down
3 changes: 2 additions & 1 deletion programming_examples/ml/softmax/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ profile: ${targetname}.exe build/final.xclbin

trace: ${targetname}.exe build/final_trace.xclbin
${powershell} ./$< -x build/final_trace.xclbin -i build/insts.txt -k MLIR_AIE -t ${trace_size}
../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace.mlir --colshift 1 > trace_softmax.json
${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace.mlir --colshift 1 > trace_softmax.json
${srcdir}/../../utils/get_trace_summary.py --filename trace_softmax.json

clean_trace:
rm -rf trace.txt trace_softmax.json
Expand Down
17 changes: 10 additions & 7 deletions programming_examples/ml/softmax/softmax_alt.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,10 @@ def device_body():
object_fifo_link(inA, inA_fifos, [], of_a_offsets)
object_fifo_link(outC_fifos, outC, of_c_offsets, [])

# Set up a circuit-switched flow from core to shim for tracing information
# Set up a packet-switched flow from core to shim for tracing information
tiles_to_trace = [cores[0]]
if trace_size > 0:
flow(cores[0], WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1)
trace_utils.configure_packet_tracing_flow(tiles_to_trace, ShimTile)

# Set up compute tiles
for i in range(n_cores):
Expand All @@ -112,12 +113,12 @@ def core_body():
def sequence(A, C):

if trace_size > 0:
trace_utils.configure_simple_tracing_aie2(
cores[0],
ShimTile,
trace_utils.configure_packet_tracing_aie2(
tiles_to_trace=tiles_to_trace,
shim=ShimTile,
trace_size=trace_size,
trace_offset=N_in_bytes,
ddr_id=1,
size=trace_size,
offset=N_in_bytes,
)

in_task = shim_dma_single_bd_task(
Expand All @@ -132,6 +133,8 @@ def sequence(A, C):
dma_start_task(in_task, out_task)
dma_await_task(in_task, out_task)

trace_utils.gen_trace_done_aie2(ShimTile)


try:
trace_size = 0 if (len(sys.argv) != 2) else int(sys.argv[1])
Expand Down
2 changes: 1 addition & 1 deletion programming_guide/section-4/section-4b/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ all: build/final.xclbin

targetname = myFirstProgram
trace_size = 8192
CHESS ?= true
CHESS ?= false

build/aie.mlir: ${srcdir}/aie2.py
mkdir -p ${@D}
Expand Down
2 changes: 2 additions & 0 deletions programming_guide/section-4/section-4b/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ def sequence(A, F, C):
)
npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, 4096])
dma_wait(of_in, of_factor, of_out)
if enableTrace:
trace_utils.gen_trace_done_aie2(ShimTile)


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 8799e99

Please sign in to comment.