Skip to content

Commit

Permalink
Add Arkouda IB performance, correctness scripts (#26707)
Browse files Browse the repository at this point in the history
... and, while there, also improve debugging/maintenance capabilities.
Commits in this PR should be squashed.

- adds `log` function to be able to print things from Arkouda's
`sub_test`
- use `ARKOUDA_SERVER_LAUNCH_PREFIX` to wire in `chpl_launchcmd` in the
server launch command. This will be supported by
Bears-R-Us/arkouda#4088
- Add the following environment variables to disable parts of Arkouda
testing. This has been very valuable during development, where I could
simply make the Jenkins use an existing Arkouda build to reduce the
turnaround time significantly:
  - `CHPL_TEST_ARKOUDA_SKIP_RM_CLONE`
  - `CHPL_TEST_ARKOUDA_SKIP_CHAPEL_PY`
  - `CHPL_TEST_ARKOUDA_SKIP_BUILD`
  - `CHPL_TEST_ARKOUDA_SKIP_MAKE_CHECK`
- `CHPL_TEST_ARKOUDA_SKIP_UNIT_TESTS` (this will be used by the new
performance testing to avoid unit testing in those configs)
- While there, this also adds a `-nobuild` flag to `nightly` to avoid
rebuilding Chapel in each run. Obviously, this is for maintenance only.
 - In total, this PR adds 4 jobs:
   - `util/cron/test-perf.hpe-apollo-hdr.arkouda.bash`
   - `util/cron/test-perf.hpe-apollo-hdr.arkouda.release.bash`
   - `util/cron/test.hpe-apollo-hdr.arkouda.bash`
   - `util/cron/test.hpe-apollo-hdr.arkouda.release.bash`

Corresponding Jenkins config PR:
https://github.hpe.com/hpe/hpc-chapel-ci-config/pull/1361

[Reviewed by @ShreyasKhandekar]
  • Loading branch information
e-kayrakli authored Feb 15, 2025
2 parents 0f2f4f0 + 498dd90 commit 7369e34
Show file tree
Hide file tree
Showing 20 changed files with 305 additions and 93 deletions.
5 changes: 5 additions & 0 deletions test/studies/arkouda/functions.bash
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,8 @@ function log_success() {
local msg=$@
echo "[Success matching ${msg}]"
}

function log() {
local msg=$@
echo "[${msg}]"
}
90 changes: 58 additions & 32 deletions test/studies/arkouda/sub_test
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,43 @@ subtest_start
DFLT_TIMEOUT=${CHPL_TEST_TIMEOUT:-300}
export ARKOUDA_CLIENT_TIMEOUT=${ARKOUDA_CLIENT_TIMEOUT:-$DFLT_TIMEOUT}

if [ -n "${CHPL_TEST_LAUNCHCMD}" ] ; then
export ARKOUDA_SERVER_LAUNCH_PREFIX=$(echo "$CHPL_TEST_LAUNCHCMD --walltime=$ARKOUDA_CLIENT_TIMEOUT" | envsubst)
fi

# Arkouda needs chpl in PATH
bin_subdir=$($CHPL_HOME/util/chplenv/chpl_bin_subdir.py)
export "PATH=$CHPL_HOME/bin/$bin_subdir:$PATH"
chpl --version

export ARKOUDA_HOME=$CWD/arkouda
rm -rf ${ARKOUDA_HOME}

# Clone Arkouda
if ! git clone --depth=1 ${ARKOUDA_URL} --branch=${ARKOUDA_BRANCH} ; then
log_fatal_error "cloning Arkouda"
if [ -z "${CHPL_TEST_ARKOUDA_SKIP_RM_CLONE}" ] ; then
rm -rf ${ARKOUDA_HOME}

# Clone Arkouda
log "cloning Arkouda (URL=$ARKOUDA_URL, branch=$ARKOUDA_BRANCH"
if ! git clone --depth=1 ${ARKOUDA_URL} --branch=${ARKOUDA_BRANCH} ; then
log_fatal_error "cloning Arkouda"
fi
fi

cd ${ARKOUDA_HOME}

# Install dependencies if needed
if make check-deps 2>/dev/null ; then
export ARKOUDA_SKIP_CHECK_DEPS=true
else
if ! nice make -j $($CHPL_HOME/util/buildRelease/chpl-make-cpu_count) install-deps ; then
log_fatal_error "installing dependencies"
if [ -z "${ARKOUDA_SKIP_CHECK_DEPS}" ] ; then
# Install dependencies if needed
if make check-deps ; then
export ARKOUDA_SKIP_CHECK_DEPS=true
else
if ! nice make -j $($CHPL_HOME/util/buildRelease/chpl-make-cpu_count) install-deps ; then
log_fatal_error "installing dependencies"
fi
fi
else
log "Skipping dependency checks"
log "Dependency dir: $ARKOUDA_DEP_DIR"
fi

export "PATH=${ARKOUDA_HOME}/dep/hdf5-install/bin:$PATH"

# CHPL_TEST_ARKOUDA_DISABLE_MODULES is a colon separated list of modules to
Expand All @@ -52,7 +67,9 @@ if [ -n "${CHPL_TEST_ARKOUDA_DISABLE_MODULES}" ] ; then
fi

# install frontend python bindings
(cd $CHPL_HOME && make chapel-py-venv)
if [ -z "${CHPL_TEST_ARKOUDA_SKIP_CHAPEL_PY}" ] ; then
(cd $CHPL_HOME && make chapel-py-venv)
fi

# Compile Arkouda
if [ "${CHPL_TEST_ARKOUDA_PERF}" = "true" ] ; then
Expand All @@ -62,17 +79,21 @@ if [ "${CHPL_TEST_ARKOUDA_PERF}" = "true" ] ; then
export ARKOUDA_EMITTED_CODE_SIZE_FILE="$PERF_SUB_DIR/emitted-code-size"
export CHPL_DEBUG_FLAGS="${CHPL_DEBUG_FLAGS} --print-emitted-code-size"

make 2>&1 | tee $ARKOUDA_EMITTED_CODE_SIZE_FILE.tmp
if [ ${PIPESTATUS[0]} -ne "0" ] ; then
log_fatal_error "compiling arkouda"
fi
if grep -q "Statements emitted:" $ARKOUDA_EMITTED_CODE_SIZE_FILE.tmp ; then
grep "Statements emitted:" $ARKOUDA_EMITTED_CODE_SIZE_FILE.tmp > $ARKOUDA_EMITTED_CODE_SIZE_FILE
rm -f $ARKOUDA_EMITTED_CODE_SIZE_FILE.tmp
if [ -z "${CHPL_TEST_ARKOUDA_SKIP_BUILD}" ] ; then
make 2>&1 | tee $ARKOUDA_EMITTED_CODE_SIZE_FILE.tmp
if [ ${PIPESTATUS[0]} -ne "0" ] ; then
log_fatal_error "compiling arkouda"
fi
if grep -q "Statements emitted:" $ARKOUDA_EMITTED_CODE_SIZE_FILE.tmp ; then
grep "Statements emitted:" $ARKOUDA_EMITTED_CODE_SIZE_FILE.tmp > $ARKOUDA_EMITTED_CODE_SIZE_FILE
rm -f $ARKOUDA_EMITTED_CODE_SIZE_FILE.tmp
fi
fi
else
if ! make ; then
log_fatal_error "compiling arkouda"
if [ -z "${CHPL_TEST_ARKOUDA_SKIP_BUILD}" ] ; then
if ! make ; then
log_fatal_error "compiling arkouda"
fi
fi
fi

Expand All @@ -81,27 +102,32 @@ if [ ${CHPL_TEST_ARKOUDA_STOP_AFTER_BUILD} = "false" ]; then
export PYTHONUSERBASE=$ARKOUDA_HOME/python-deps
# If Arkouda deps use any of our test deps, try to use the versions we want
AK_PIP_CONTRAINTS="--constraint $CHPL_HOME/third-party/chpl-venv/test-requirements.txt"
log "Installing Arkouda using python3 at: $(which python3)"
if ! python3 -m pip install --force-reinstall --timeout 60 $AK_PIP_CONTRAINTS -e .[dev] --user ; then
log_fatal_error "installing arkouda"
fi

# Check installation
test_start "make check"
if make check ; then
log_success "make check output"
else
log_fatal_error "running make check"
if [ -z "${CHPL_TEST_ARKOUDA_SKIP_MAKE_CHECK}" ] ; then
test_start "make check"
if make check ; then
log_success "make check output"
else
log_fatal_error "running make check"
fi
test_end
fi
test_end

# Run Python unit tests
test_start "make test-python"
if make test-python ; then
log_success "make test-python output"
else
log_error "running make test-python"
if [ -z "${CHPL_TEST_ARKOUDA_SKIP_UNIT_TESTS}" ] ; then
test_start "make test-python"
if make test-python ; then
log_success "make test-python output"
else
log_error "running make test-python"
fi
test_end
fi
test_end

# Run benchmarks
if [ "${CHPL_TEST_ARKOUDA_PERF}" = "true" ] ; then
Expand Down
34 changes: 27 additions & 7 deletions util/cron/common-arkouda.bash
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ export CHPL_TEST_ARKOUDA=true

# HPCDC doesn't seem to be accessible to compute nodes at the moment
# so we made a mirror on lustre where compute nodes can access
ARKOUDA_DEP_DIR=/lus/scratch/chapelu/arkouda-deps
ARKOUDA_DEP_DIR=${ARKOUDA_DEP_DIR:-/lus/scratch/chapelu/arkouda-deps}
if [ ! -d "$ARKOUDA_DEP_DIR" ]; then
ARKOUDA_DEP_DIR=$COMMON_DIR/arkouda-deps
fi
Expand All @@ -43,32 +43,52 @@ fi
# enable arrow/parquet support
export ARKOUDA_SERVER_PARQUET_SUPPORT=true

SETUP_PYTHON=$COMMON_DIR/setup_python39.bash
if [ -f "$SETUP_PYTHON" ]; then
echo "Setting up Python using $SETUP_PYTHON"
source $SETUP_PYTHON
echo "Using Python $(which python3)"
else
echo "Can't find Python setup script $SETUP_PYTHON"
fi

export CHPL_WHICH_RELEASE_FOR_ARKOUDA="2.3.0"
# test against Chapel release (checking out current test/cron directories)
function test_release() {
export CHPL_TEST_PERF_DESCRIPTION=release
export CHPL_TEST_PERF_CONFIGS="release:v,nightly:v"

function partial_checkout_release() {
currentSha=`git rev-parse HEAD`
git checkout $CHPL_WHICH_RELEASE_FOR_ARKOUDA
git checkout $currentSha -- $CHPL_HOME/test/
git checkout $currentSha -- $CHPL_HOME/util/cron/
git checkout $currentSha -- $CHPL_HOME/util/test/perf/
git checkout $currentSha -- $CHPL_HOME/util/test/computePerfStats
git checkout $currentSha -- $CHPL_HOME/third-party/chpl-venv/test-requirements.txt
}

# test against Chapel release (checking out current test/cron directories)
function test_release_performance() {
export CHPL_TEST_PERF_DESCRIPTION=release
export CHPL_TEST_PERF_CONFIGS="release:v,nightly:v"
partial_checkout_release
$UTIL_CRON_DIR/nightly -cron ${nightly_args}
}

# test against Chapel nightly
function test_nightly() {
function test_nightly_performance() {
export CHPL_TEST_PERF_DESCRIPTION=nightly
export CHPL_TEST_PERF_CONFIGS="release:v,nightly:v"
$UTIL_CRON_DIR/nightly -cron ${nightly_args}
}

function test_correctness() {
function test_release_correctness() {
partial_checkout_release
$UTIL_CRON_DIR/nightly -cron ${nightly_args}
}

function test_nightly_correctness() {
$UTIL_CRON_DIR/nightly -cron ${nightly_args}
}


function sync_graphs() {
$CHPL_HOME/util/cron/syncPerfGraphs.py $CHPL_TEST_PERF_DIR/html/ arkouda/$CHPL_TEST_PERF_CONFIG_NAME
}
4 changes: 4 additions & 0 deletions util/cron/common-hpe-apollo.bash
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@ UTIL_CRON_DIR=$(cd $(dirname ${BASH_SOURCE[0]}) ; pwd)
export CHPL_HOST_PLATFORM=hpe-apollo
export CHPL_TEST_LAUNCHCMD=\$CHPL_HOME/util/test/chpl_launchcmd.py
export CHPL_LAUNCHER_TIMEOUT=pbs
export CHPL_LAUNCHCMD_NUM_CPUS=144
export CHPL_LAUNCHCMD_QUEUE=f2401THP

export CHPL_SYSTEM_PREDIFF=$CHPL_HOME/util/test/prediff-for-gasnet


module purge
source $UTIL_CRON_DIR/load-base-deps.bash
module load gcc
2 changes: 0 additions & 2 deletions util/cron/common-perf-hpe-apollo-hdr.bash
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
#
# Configure settings for HPE Apollo HDR performance testing.

export CHPL_LAUNCHCMD_NUM_CPUS=144
export CHPL_LAUNCHCMD_QUEUE=f2401THP
export CHPL_TARGET_CPU=none

perf_hpe_apollo_args="-performance-configs gn-ibv-large:v,gn-ibv-fast:v -perflabel ml- -startdate 03/11/21"
4 changes: 4 additions & 0 deletions util/cron/common.bash
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ export CHPL_TARGET_CPU=none
explicit_prefix=${CHPL_NIGHTLY_LOG_PREFIX}
default_prefix=${TMPDIR:-/tmp}/chapel_logs
css_prefix=/hpcdc/project/chapel
log_info "About to set log prefix. explicit: ${explicit_prefix}"
log_info "About to set log prefix. default: ${default_prefix}"
log_info "About to set log prefix. css: ${css_prefix}"

if [ -n "$explicit_prefix" ]; then
LOGDIR_PREFIX=$explicit_prefix
elif [ -d $css_prefix ] ; then
Expand Down
91 changes: 48 additions & 43 deletions util/cron/nightly
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ $debug = 1;
$asserts = 0;
$warnings = 1;
$runtests = 1;
$build=1;
$buildruntime = 1;
$allhellos = 0;
$examples = 0;
Expand Down Expand Up @@ -94,6 +95,8 @@ while (@ARGV) {
$runtests = 0;
} elsif ($flag eq "-noruntime") {
$buildruntime = 0;
} elsif ($flag eq "-nobuild") {
$build = 0;
} elsif ($flag eq "-hellos") {
$allhellos = 1;
} elsif ($flag eq "-examples") {
Expand Down Expand Up @@ -538,58 +541,60 @@ if ($python2 == 0) {
}


print "Making $make_vars_opt compiler\n";
$makestat = mysystem("cd $chplhomedir && $make -j$num_procs $make_vars_opt compiler", "making chapel compiler", $exitOnError);
if ($build == 1) {
print "Making $make_vars_opt compiler\n";
$makestat = mysystem("cd $chplhomedir && $make -j$num_procs $make_vars_opt compiler", "making chapel compiler", $exitOnError);

# Speculatively build a couple third-party libraries. This command should not
# fail, even if it fails to build the libraries.
print "Making $make_vars_opt third-party-try-opt\n";
mysystem("cd $chplhomedir && $make -j$num_procs $make_vars_opt third-party-try-opt", "make chapel third-party-try-opt", $exitOnError);
# Speculatively build a couple third-party libraries. This command should not
# fail, even if it fails to build the libraries.
print "Making $make_vars_opt third-party-try-opt\n";
mysystem("cd $chplhomedir && $make -j$num_procs $make_vars_opt third-party-try-opt", "make chapel third-party-try-opt", $exitOnError);

# if we are using python2 or a deprecated version of python3, we cannot build
# the test-venv and/or chpldoc
if ($python2 == 0 && $pythonDep == 0) {
# Build chpldoc. Do not fail the build if it does not succeed. Do not send
# mail either.
print "Making $make_vars_opt chpldoc\n";
mysystem("cd $chplhomedir && $make -j$num_procs $make_vars_opt chpldoc", "make chapel chpldoc", $ignoreErrors);
# if we are using python2 or a deprecated version of python3, we cannot build
# the test-venv and/or chpldoc
if ($python2 == 0 && $pythonDep == 0) {
# Build chpldoc. Do not fail the build if it does not succeed. Do not send
# mail either.
print "Making $make_vars_opt chpldoc\n";
mysystem("cd $chplhomedir && $make -j$num_procs $make_vars_opt chpldoc", "make chapel chpldoc", $ignoreErrors);

# Build test virtualenv. Fail if the build does not succeed as virtualenv is
# needed for start_test. Send mail on failure
print "Making $make_var_opt test-venv\n";
mysystem("cd $chplhomedir && $make -j$num_procs $make_vars_opt test-venv", "make chapel test-venv", $exitOnError);
}
# Build test virtualenv. Fail if the build does not succeed as virtualenv is
# needed for start_test. Send mail on failure
print "Making $make_var_opt test-venv\n";
mysystem("cd $chplhomedir && $make -j$num_procs $make_vars_opt test-venv", "make chapel test-venv", $exitOnError);
}

if ($buildruntime == 0) {
print "Built compiler but not runtime, and did not run tests\n";
exit 0;
}
if ($buildruntime == 0) {
print "Built compiler but not runtime, and did not run tests\n";
exit 0;
}

if ($chplcheck == 1) {
# Build chplcheck if it's requested. If something is wrong with chapel-py,
# this can fail. It's not fatal, since it will only throw
# off the chplcheck-specific tests. However, we definitely want to send
# mail, since this indicates a problem with chplcheck.
print "Making chplcheck\n";
mysystem("cd $chplhomedir && $make -j$num_procs $make_vars_opt chplcheck", "make chplcheck", $emailOnError);
}
if ($chplcheck == 1) {
# Build chplcheck if it's requested. If something is wrong with chapel-py,
# this can fail. It's not fatal, since it will only throw
# off the chplcheck-specific tests. However, we definitely want to send
# mail, since this indicates a problem with chplcheck.
print "Making chplcheck\n";
mysystem("cd $chplhomedir && $make -j$num_procs $make_vars_opt chplcheck", "make chplcheck", $emailOnError);
}

print "Making $make_vars_opt runtime\n";
$makestat = mysystem("cd $chplhomedir && $make -j$num_procs $make_vars_opt runtime", "making chapel runtime", $exitOnError);
print "Making $make_vars_opt runtime\n";
$makestat = mysystem("cd $chplhomedir && $make -j$num_procs $make_vars_opt runtime", "making chapel runtime", $exitOnError);

print "Making modules\n";
$makestat = mysystem("cd $chplhomedir && $make -j$num_procs $make_vars_opt modules", "making chapel modules", $exitOnError);
print "Making modules\n";
$makestat = mysystem("cd $chplhomedir && $make -j$num_procs $make_vars_opt modules", "making chapel modules", $exitOnError);

# Build mason
if ($mason_build == 1) {
print "Making mason\n";
mysystem("cd $chplhomedir && $make -j$num_procs mason", "making mason", $exitOnError);
}
# Build mason
if ($mason_build == 1) {
print "Making mason\n";
mysystem("cd $chplhomedir && $make -j$num_procs mason", "making mason", $exitOnError);
}

# Build the protobuf Chapel plugin
if ($protobuf_build == 1) {
print "Making the protobuf Chapel plugin\n";
mysystem("cd $chplhomedir && $make protoc-gen-chpl", "making protoc-gen-chpl", $exitOnError);
# Build the protobuf Chapel plugin
if ($protobuf_build == 1) {
print "Making the protobuf Chapel plugin\n";
mysystem("cd $chplhomedir && $make protoc-gen-chpl", "making protoc-gen-chpl", $exitOnError);
}
}

#
Expand Down
2 changes: 1 addition & 1 deletion util/cron/test-cray-xc-arkouda.bash
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,5 @@ export CHPL_LAUNCHER_CORES_PER_LOCALE=96
export CHPL_LAUNCHER=slurm-srun
nightly_args="${nightly_args} -no-buildcheck"

test_nightly
test_nightly_performance
sync_graphs
2 changes: 1 addition & 1 deletion util/cron/test-cray-xc-arkouda.release.bash
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,5 @@ export CHPL_LAUNCHER_CORES_PER_LOCALE=96
export CHPL_LAUNCHER=slurm-srun
nightly_args="${nightly_args} -no-buildcheck"

test_release
test_release_performance
sync_graphs
2 changes: 1 addition & 1 deletion util/cron/test-cray-xc-gpu-arkouda.bash
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ export CHPL_TEST_ARKOUDA_STOP_AFTER_BUILD="true"
module list
nightly_args="${nightly_args} -no-buildcheck"

test_nightly
test_nightly_correctness
2 changes: 1 addition & 1 deletion util/cron/test-gasnet-asan.arkouda.bash
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ source $UTIL_CRON_DIR/common-arkouda.bash

export CHPL_FLAGS="--parallel-make 16 --ccflags -Og"

test_correctness
test_nightly_correctness
Loading

0 comments on commit 7369e34

Please sign in to comment.