Skip to content

Commit

Permalink
contrib/intel/jenkins: Disable ZE IPC for OneCCL-GPU-v3 tests
Browse files Browse the repository at this point in the history
ZE IPC path is slow and needs to be debugged. Disabling it
forces oneccl-gpu tests to take the SAR path instead.

Signed-off-by: Zach Dworkin <[email protected]>
  • Loading branch information
zachdworkin committed Mar 19, 2024
1 parent 6bc86f5 commit ff2a621
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 8 deletions.
22 changes: 14 additions & 8 deletions contrib/intel/jenkins/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def run_fabtests(stage_name, hw, partition, node_num, prov, util=null,
}

def run_middleware(providers, stage_name, test, hw, partition, node_num,
mpi=null, imb_grp=null) {
mpi=null, imb_grp=null, user_env=null) {
def base_cmd = "python3.9 ${RUN_LOCATION}/runtests.py --test=${test} --build_hw=${hw}"
def opts = ""
def prefix = "${env.LOG_DIR}/${stage_name}_"
Expand All @@ -74,7 +74,10 @@ def run_middleware(providers, stage_name, test, hw, partition, node_num,

if (env.WEEKLY.toBoolean())
base_cmd = "${base_cmd} --weekly=${env.WEEKLY}"


if (user_env)
base_cmd = "${base_cmd} --user_env ${user_env}"

for (prov in providers) {
if (prov[1]) {
echo "Running ${prov[0]}-${prov[1]} ${stage_name}"
Expand Down Expand Up @@ -713,12 +716,15 @@ pipeline {
steps {
script {
dir (RUN_LOCATION) {
run_middleware([["tcp", null]], "oneCCL-GPU-v3", "onecclgpu",
"gpu", "fabrics-ci", "2")
run_middleware([["psm3", null]], "oneCCL-GPU-v3", "onecclgpu",
"gpu", "fabrics-ci", "2")
run_middleware([["verbs", null]], "oneCCL-GPU-v3", "onecclgpu",
"gpu", "fabrics-ci", "2")
run_middleware([["tcp", null]], "oneCCL-GPU-v3", "onecclgpu",
"gpu", "fabrics-ci", "2", null, null,
"FI_HMEM_DISABLE_P2P=1")
run_middleware([["psm3", null]], "oneCCL-GPU-v3", "onecclgpu",
"gpu", "fabrics-ci", "2", null, null,
"FI_HMEM_DISABLE_P2P=1")
run_middleware([["verbs", null]], "oneCCL-GPU-v3", "onecclgpu",
"gpu", "fabrics-ci", "2", null, null,
"FI_HMEM_DISABLE_P2P=1")
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions contrib/intel/jenkins/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -850,6 +850,10 @@ def __init__(self, jobname, buildno, testname, hw, core_prov, fabric,
'CCL_ATL_TRANSPORT_LIST' : 'ofi'
}

if self.env:
for key in self.env:
self.oneccl_environ[key] = self.env[key]

self.ld_library = [
f'{self.libfab_installpath}/lib',
f'{self.oneccl_path}/build/_install/lib'
Expand Down

0 comments on commit ff2a621

Please sign in to comment.