Skip to content

Commit

Permalink
Refactor host test code and Makefile to help prevent buffer mismatch …
Browse files Browse the repository at this point in the history
…hangs (#2059)
  • Loading branch information
jackl-xilinx authored Feb 22, 2025
1 parent 18c0fdc commit 9214068
Show file tree
Hide file tree
Showing 9 changed files with 444 additions and 194 deletions.
8 changes: 6 additions & 2 deletions programming_examples/basic/vector_scalar_mul/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ else()
set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
endif()

set(VECTORSCALARMUL_SIZE 4096 CACHE STRING "vector size")
set(IN1_SIZE 8192 CACHE STRING "in1 buffer size")
set(IN2_SIZE 4 CACHE STRING "in2 buffer size")
set(OUT_SIZE 8192 CACHE STRING "out buffer size")
set(TARGET_NAME test CACHE STRING "Target to be built")

SET (ProjectName ${TARGET_NAME})
Expand All @@ -51,7 +53,9 @@ add_executable(${currentTarget}
)

target_compile_definitions(${currentTarget} PUBLIC
VECTORSCALARMUL_SIZE=${VECTORSCALARMUL_SIZE}
IN1_SIZE=${IN1_SIZE}
IN2_SIZE=${IN2_SIZE}
OUT_SIZE=${OUT_SIZE}
DISABLE_ABI_CHECK=1
)

Expand Down
25 changes: 15 additions & 10 deletions programming_examples/basic/vector_scalar_mul/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@ VPATH := ${srcdir}/../../../aie_kernels/aie2

device = npu
targetname = vector_scalar_mul
data_size = 4096
# in1_size = 4096
in1_size = 8192 # in bytes
in2_size = 4 # in bytes, should always be 4 (1x int32)
out_size = 8192 # in bytes, should always be equal to in1_size
trace_size = 8192
CHESS ?= false

data_size = in1_size
aie_py_src=${targetname}.py
use_alt?=0

Expand All @@ -45,11 +49,11 @@ endif

build/aie_${data_size}.mlir: ${srcdir}/${aie_py_src}
mkdir -p ${@D}
python3 $< ${device} ${data_size} 0 > $@
python3 $< ${device} ${in1_size} ${in2_size} ${out_size} 0 > $@

build/aie_trace_${data_size}.mlir: ${srcdir}/${aie_py_src}
mkdir -p ${@D}
python3 $< ${device} ${data_size} ${trace_size} > $@
python3 $< ${device} ${in1_size} ${in2_size} ${out_size} ${trace_size} > $@

#build/insts_${data_size}.txt: build/final_${data_size}.xclbin
build/final_${data_size}.xclbin: build/aie_${data_size}.mlir build/scale.o
Expand Down Expand Up @@ -87,7 +91,7 @@ endif
${targetname}_${data_size}.exe: ${srcdir}/test.cpp
rm -rf _build
mkdir -p _build
cd _build && ${powershell} cmake ${srcdir} -DTARGET_NAME=${targetname}_${data_size} -DVECTORSCALARMUL_SIZE=${data_size}
cd _build && ${powershell} cmake ${srcdir} -DTARGET_NAME=${targetname}_${data_size} -DIN1_SIZE=${in1_size} -DIN2_SIZE=${in2_size} -DOUT_SIZE=${out_size}
cd _build && ${powershell} cmake --build . --config Release
ifeq "${powershell}" "powershell.exe"
cp _build/${targetname}_${data_size}.exe $@
Expand All @@ -99,17 +103,18 @@ run: ${targetname}_${data_size}.exe build/final_${data_size}.xclbin build/insts_
${powershell} ./$< -x build/final_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE

run_py: build/final_${data_size}.xclbin build/insts_${data_size}.txt
${powershell} python3 ${srcdir}/test.py -x build/final_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -s ${data_size}
${powershell} python3 ${srcdir}/test.py -x build/final_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -i1s ${in1_size} -i2s ${in2_size} -os ${out_size}

trace: ${targetname}_${data_size}.exe build/final_trace_${data_size}.xclbin build/insts_${data_size}.txt
${powershell} ./$< -x build/final_trace_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -t ${trace_size}
${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace_${data_size}.mlir --colshift 1 > trace_vs.json
${srcdir}/../../utils/get_trace_summary.py --filename trace_vs.json
${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace_${data_size}.mlir --colshift 1 > trace_vector_scalar_mul.json
${srcdir}/../../utils/get_trace_summary.py --filename trace_vector_scalar_mul.json

trace_py: build/final_trace_${data_size}.xclbin build/insts_${data_size}.txt
${powershell} python3 ${srcdir}/test.py -x build/final_trace_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -t ${trace_size} -s ${data_size}
${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace_${data_size}.mlir --colshift 1 > trace_vs.json
${srcdir}/../../utils/get_trace_summary.py --filename trace_vs.json
#${powershell} python3 ${srcdir}/test_orig.py -x build/final_trace_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -t ${trace_size} -i1s ${in1_size} -i2s ${in2_size} -os ${out_size} --size 4096
${powershell} python3 ${srcdir}/test.py -x build/final_trace_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -t ${trace_size} -i1s ${in1_size} -i2s ${in2_size} -os ${out_size}
${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace_${data_size}.mlir --colshift 1 > trace_vector_scalar_mul.json
${srcdir}/../../utils/get_trace_summary.py --filename trace_vector_scalar_mul.json


clean_trace:
Expand Down
160 changes: 50 additions & 110 deletions programming_examples/basic/vector_scalar_mul/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,115 +8,48 @@
//
//===----------------------------------------------------------------------===//

#include "xrt_test_wrapper.h"
#include <cstdint>
#include <fstream>
#include <iostream>
#include <sstream>

#include "test_utils.h"
#include "xrt/xrt_bo.h"
//*****************************************************************************
// Modify this section to customize buffer datatypes, initialization functions,
// and verify function. The other place to reconfigure your design is the
// Makefile.
//*****************************************************************************

#ifndef DATATYPES_USING_DEFINED
#define DATATYPES_USING_DEFINED
// ------------------------------------------------------
// Configure this to match your buffer data type
// ------------------------------------------------------
// using DATATYPE = std::uint8_t;
// using DATATYPE = std::uint32_t;
using DATATYPE = std::uint16_t;
using DATATYPE_IN1 = std::uint16_t;
using DATATYPE_IN2 = std::int32_t;
using DATATYPE_OUT = std::uint16_t;
#endif

const int scaleFactor = 3;

namespace po = boost::program_options;

int main(int argc, const char *argv[]) {

// Program arguments parsing
po::options_description desc("Allowed options");
po::variables_map vm;
test_utils::add_default_options(desc);

test_utils::parse_options(argc, argv, desc, vm);
int verbosity = vm["verbosity"].as<int>();
int trace_size = vm["trace_sz"].as<int>();

constexpr bool VERIFY = true;
constexpr int IN_VOLUME = VECTORSCALARMUL_SIZE;
constexpr int OUT_VOLUME = IN_VOLUME;

int IN_SIZE = IN_VOLUME * sizeof(DATATYPE);
int OUT_SIZE = OUT_VOLUME * sizeof(DATATYPE) + trace_size;

// Load instruction sequence
std::vector<uint32_t> instr_v =
test_utils::load_instr_sequence(vm["instr"].as<std::string>());

if (verbosity >= 1)
std::cout << "Sequence instr count: " << instr_v.size() << "\n";

// Start the XRT context and load the kernel
xrt::device device;
xrt::kernel kernel;

test_utils::init_xrt_load_kernel(device, kernel, verbosity,
vm["xclbin"].as<std::string>(),
vm["kernel"].as<std::string>());

// set up the buffer objects
auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int),
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1));
auto bo_inA =
xrt::bo(device, IN_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3));
auto bo_inFactor = xrt::bo(device, 1 * sizeof(int32_t),
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4));
auto bo_outC =
xrt::bo(device, OUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(5));

if (verbosity >= 1)
std::cout << "Writing data into buffer objects.\n";

// Copy instruction stream to xrt buffer object
void *bufInstr = bo_instr.map<void *>();
memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int));

// Initialize buffer bo_inA
DATATYPE *bufInA = bo_inA.map<DATATYPE *>();
for (int i = 0; i < IN_VOLUME; i++)
bufInA[i] = i + 1;

// Initialize buffer bo_inFactor
int32_t *bufInFactor = bo_inFactor.map<int32_t *>();
*bufInFactor = (DATATYPE)scaleFactor;

// Zero out buffer bo_outC
DATATYPE *bufOut = bo_outC.map<DATATYPE *>();
memset(bufOut, 0, OUT_SIZE);

// sync host to device memories
bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_inFactor.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_outC.sync(XCL_BO_SYNC_BO_TO_DEVICE);
// Initialize Input buffer 1
void initialize_bufIn1(DATATYPE_IN1 *bufIn1, int SIZE) {
for (int i = 0; i < SIZE; i++)
bufIn1[i] = i + 1;
}

// Execute the kernel and wait to finish
if (verbosity >= 1)
std::cout << "Running Kernel.\n";
unsigned int opcode = 3;
auto run =
kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_inFactor, bo_outC);
run.wait();
// Initialize Input buffer 2
void initialize_bufIn2(DATATYPE_IN2 *bufIn2, int SIZE) {
bufIn2[0] = 3; // scaleFactor
}

// Sync device to host memories
bo_outC.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
// Initialize Output buffer
void initialize_bufOut(DATATYPE_OUT *bufOut, int SIZE) {
memset(bufOut, 0, SIZE);
}

// Compare out to golden
// Functional correctness verifyer
int verify_vector_scalar_mul(DATATYPE_IN1 *bufIn1, DATATYPE_IN2 *bufIn2,
DATATYPE_OUT *bufOut, int SIZE, int verbosity) {
int errors = 0;
if (verbosity >= 1) {
std::cout << "Verifying results ..." << std::endl;
}
for (uint32_t i = 0; i < IN_VOLUME; i++) {
int32_t ref = bufInA[i] * scaleFactor;

for (int i = 0; i < SIZE; i++) {
int32_t ref = bufIn1[i] * bufIn2[0];
int32_t test = bufOut[i];
if (test != ref) {
if (verbosity >= 1)
Expand All @@ -127,21 +60,28 @@ int main(int argc, const char *argv[]) {
std::cout << "Correct output " << test << " == " << ref << std::endl;
}
}
return errors;
}

if (trace_size > 0) {
test_utils::write_out_trace(((char *)bufOut) + IN_SIZE, trace_size,
vm["trace_file"].as<std::string>());
}
//*****************************************************************************
// Should not need to modify below section
//*****************************************************************************

// Print Pass/Fail result of our test
if (!errors) {
std::cout << std::endl << "PASS!" << std::endl << std::endl;
return 0;
} else {
std::cout << std::endl
<< errors << " mismatches." << std::endl
<< std::endl;
std::cout << std::endl << "fail." << std::endl << std::endl;
return 1;
}
int main(int argc, const char *argv[]) {

// constexpr int IN1_VOLUME = VECTORSCALARMUL_SIZE; // 1024; define via
// Makefile constexpr int IN2_VOLUME = 1; constexpr int OUT_VOLUME =
// IN1_VOLUME; // define via Makefile

constexpr int IN1_VOLUME = IN1_SIZE / sizeof(DATATYPE_IN1);
constexpr int IN2_VOLUME = IN2_SIZE / sizeof(DATATYPE_IN2);
constexpr int OUT_VOLUME = OUT_SIZE / sizeof(DATATYPE_OUT);

args myargs = parse_args(argc, argv);

int res = xrt_test_run<DATATYPE_IN1, DATATYPE_IN2, DATATYPE_OUT,
initialize_bufIn1, initialize_bufIn2,
initialize_bufOut, verify_vector_scalar_mul>(
IN1_VOLUME, IN2_VOLUME, OUT_VOLUME, myargs);
return res;
}
102 changes: 45 additions & 57 deletions programming_examples/basic/vector_scalar_mul/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,79 +7,67 @@
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
import numpy as np
import sys
import time
from aie.utils.xrt import setup_aie, write_out_trace, execute
import aie.utils.xrt as xrt_utils
import aie.utils.test as test_utils


def main(opts):
print("Running...\n")
in1_size = int(opts.in1_size) # in bytes
in2_size = int(opts.in2_size) # in bytes
out_size = int(opts.out_size) # in bytes

data_size = int(opts.size)
vector_dtype = np.int16
scalar_dtype = np.int32
scale_factor = 3
size_out = data_size * 2
print("output buffer size: " + str(size_out))
# --------------------------------------------------------------------------
# ----- Edit your data types -----------------------------------------------
# --------------------------------------------------------------------------

enable_trace = opts.trace_size > 0
in1_dtype = np.int16
in2_dtype = np.int32
out_dtype = in1_dtype

app = setup_aie(
opts.xclbin,
opts.instr,
data_size,
vector_dtype,
1,
scalar_dtype,
data_size,
vector_dtype,
enable_trace=enable_trace,
trace_size=opts.trace_size,
)
input_vector = np.arange(1, data_size + 1, dtype=vector_dtype)
input_factor = np.array([3], dtype=scalar_dtype)
# aie_output = execute_on_aie(app, input_vector, input_factor)
# --------------------------------------------------------------------------

in1_volume = in1_size // np.dtype(in1_dtype).itemsize
in2_volume = in2_size // np.dtype(in2_dtype).itemsize
out_volume = out_size // np.dtype(out_dtype).itemsize

start = time.time_ns()
full_output = execute(app, input_vector, input_factor)
stop = time.time_ns()
npu_time = stop - start
print("npu_time: ", npu_time)
# --------------------------------------------------------------------------
# ----- Edit your data init and reference data here ------------------------
# --------------------------------------------------------------------------

# aie_output = full_output[:size_out].view(np.int8)
# aie_output = full_output[:size_out].view(np.uint8)
aie_output = full_output[:size_out].view(np.int16)
if enable_trace:
trace_buffer = full_output[size_out:].view(np.uint32)
# check buffer sizes
assert in2_size == 4
assert out_size == in1_size

ref = np.arange(1, data_size + 1, dtype=vector_dtype) * scale_factor
scale_factor = 3

# Initialize data
in1_data = np.arange(1, in1_volume + 1, dtype=in1_dtype)
in2_data = np.array([scale_factor], dtype=in2_dtype)
out_data = np.zeros([out_volume], dtype=out_dtype)

if enable_trace:
# trace_buffer = full_output[3920:]
print("trace_buffer shape: ", trace_buffer.shape)
print("trace_buffer dtype: ", trace_buffer.dtype)
# write_out_trace(trace_buffer, str(opts.trace_file))
write_out_trace(trace_buffer, "trace.txt")
# Define reference data
ref = np.arange(1, in1_volume + 1, dtype=out_dtype) * scale_factor

# Copy output results and verify they are correct
errors = 0
if opts.verify:
if opts.verbosity >= 1:
print("Verifying results ...")
e = np.equal(ref, aie_output)
errors = np.size(e) - np.count_nonzero(e)
# --------------------------------------------------------------------------

if not errors:
print("\nPASS!\n")
sys.exit(0)
else:
print("\nError count: ", errors)
print("\nFailed.\n")
sys.exit(1)
print("Running...\n")
res = xrt_utils.xrt_test_run(
in1_dtype,
in2_dtype,
out_dtype,
in1_data,
in2_data,
out_data,
in1_volume,
in2_volume,
out_volume,
ref,
opts,
)
sys.exit(res)


if __name__ == "__main__":
p = test_utils.create_default_argparser()
p.add_argument("-s", "--size", required=True, dest="size", help="Vector size")
opts = p.parse_args(sys.argv[1:])
main(opts)
Loading

0 comments on commit 9214068

Please sign in to comment.