Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX Without OPENMP, LM_distributable_computation doesn't write output. #1566

Merged
merged 5 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 23 additions & 13 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,15 @@ jobs:
parallelproj: "ON"
ROOT: "ON"
ITK: "OFF"
- os: ubuntu-24.04
compiler: gcc
# compiler_version: 9
cuda_version: "0"
BUILD_FLAGS: "-DSTIR_OPENMP=OFF"
BUILD_TYPE: "Release"
parallelproj: "ON"
ROOT: "OFF"
ITK: "OFF"
- os: ubuntu-24.04
compiler: clang
#compiler_version:
Expand Down Expand Up @@ -94,15 +103,15 @@ jobs:
BUILD_TYPE: "Debug"
ROOT: "OFF"
ITK: "OFF"
# Currently disabled due to problems with listmode recon, see https://github.com/UCL/STIR/issues/1200
#- os: macOS-latest
# compiler: clang
# compiler_version: 16
# BUILD_FLAGS: "-DSTIR_OPENMP=OFF"
# parallelproj: "OFF"
# BUILD_TYPE: "Release"
# ROOT: "OFF"
# ITK: "OFF"
- os: macOS-latest
compiler: clang
compiler_version: 16
cuda_version: "0"
BUILD_FLAGS: "-DSTIR_OPENMP=OFF"
parallelproj: "OFF"
BUILD_TYPE: "Release"
ROOT: "OFF"
ITK: "OFF"

# let's run all of them, as opposed to aborting when one fails
fail-fast: false
Expand Down Expand Up @@ -168,11 +177,12 @@ jobs:
if test XX${HOMEBREW_PREFIX} = XX; then
HOMEBREW_PREFIX=/usr/local
fi
LDFLAGS="-L$HOMEBREW_PREFIX/opt/llvm/lib/c++ -Wl,-rpath,$HOMEBREW_PREFIX/opt/llvm/lib/c++"
LLVMDIR=$HOMEBREW_PREFIX/opt/llvm@${{ matrix.compiler_version }}
LDFLAGS="-L${LLVMDIR}/lib/c++ -Wl,-rpath,${LLVMDIR}/lib/c++"
# make available to jobs below
echo LDFLAGS="$LDFLAGS" >> $GITHUB_ENV
CC="$HOMEBREW_PREFIX/opt/llvm/bin/clang"
CXX="$HOMEBREW_PREFIX/opt/llvm/bin/clang++"
CC="${LLVMDIR}/bin/clang"
CXX="${LLVMDIR}/bin/clang++"
fi
fi
export CC CXX
Expand Down Expand Up @@ -359,7 +369,7 @@ jobs:
set -vx
cd ${GITHUB_WORKSPACE}/build
# find test exclusions. Complicated due to need of single -E argument and | pattern
if test ${{matrix.cuda_version}} != "0"; then
if test "${{matrix.cuda_version}}" != "0"; then
# No CUDA drivers on GitHub Actions
EXCLUDE="parallelproj|test_blocks_on_cylindrical_projectors"
fi
Expand Down
4 changes: 4 additions & 0 deletions documentation/release_6.3.htm
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ <h3>Bug fixes</h3>
See <a href="https://github.com/UCL/STIR/issues/1532">Issue #1532</a> for more detail. Fixed by using averaging functionality of SSRB instead of adding segments for attenuation correction factors.
<a href=https://github.com/UCL/STIR/pull/1531>PR #1531</a>
</li>
<li>
Fixed a bug in the distributed LM computation code (introduced in 6.1) that neglected to accumulate outputs when not build with OpenMP.
See <a href="https://github.com/UCL/STIR/pull/1566"">PR #1566</a>".
</li>
</ul>

<h3>Build system</h3>
Expand Down
44 changes: 21 additions & 23 deletions src/include/stir/recon_buildblock/distributable.txx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*
Copyright (C) 2024 University College London
Copyright (C) 2020, 2022, Univeristy of Pennsylvania
Copyright (C) 2024, 2025 University College London
Copyright (C) 2020, 2022, University of Pennsylvania
Copyright (C) 2025, Commonwealth Scientific and Industrial Research Organisation
This file is part of STIR.

SPDX-License-Identifier: Apache-2.0 AND License-ref-PARAPET-license
Expand All @@ -16,6 +17,7 @@

\author Nikos Efthimiou
\author Kris Thielemans
\author Ashley Gillman
*/
#include "stir/shared_ptr.h"
#include "stir/recon_buildblock/distributable.h"
Expand Down Expand Up @@ -71,31 +73,29 @@ LM_distributable_computation(const shared_ptr<ProjMatrixByBin> PM_sptr,
{
#ifdef STIR_OPENMP
# pragma omp single
#endif
// allocate "local" vectors
{
info("Listmode gradient calculation: starting loop with " + std::to_string(omp_get_num_threads()) + " threads", 2);
local_output_image_sptrs.resize(omp_get_max_threads(), shared_ptr<DiscretisedDensity<3, float>>());
local_double_out_ptrs.resize(omp_get_max_threads(), 0);
#ifdef STIR_OPENMP
const auto num_threads = omp_get_num_threads();
#else
const int num_threads = 1;
#endif
info("Listmode gradient calculation: starting loop with " + std::to_string(num_threads) + " threads", 2);
local_output_image_sptrs.resize(get_max_num_threads(), shared_ptr<DiscretisedDensity<3, float>>());
local_double_out_ptrs.resize(get_max_num_threads(), 0);
if (double_out_ptr)
{
local_double_outs.resize(omp_get_max_threads(), 0.);
for (int t = 0; t < omp_get_max_threads(); ++t)
local_double_outs.resize(get_max_num_threads(), 0.);
for (int t = 0; t < get_max_num_threads(); ++t)
local_double_out_ptrs[t] = &local_double_outs[t];
}
local_counts.resize(omp_get_max_threads(), 0);
local_count2s.resize(omp_get_max_threads(), 0);
local_row.resize(omp_get_max_threads(), ProjMatrixElemsForOneBin());
local_counts.resize(get_max_num_threads(), 0);
local_count2s.resize(get_max_num_threads(), 0);
local_row.resize(get_max_num_threads(), ProjMatrixElemsForOneBin());
}

#ifdef STIR_OPENMP
# pragma omp for schedule(dynamic)
#else
{
info("Listmode gradient calculation: starting loop with 1 thread", 2);
local_output_image_sptrs.resize(1, shared_ptr<DiscretisedDensity<3, float>>());
local_double_out_ptrs.resize(1, double_out_ptr);
local_counts.resize(1, 0);
local_count2s.resize(1, 0);
local_row.resize(1, ProjMatrixElemsForOneBin());
}
#endif
// note: VC uses OpenMP 2.0, so need signed integer for loop
for (long int ievent = 0; ievent < static_cast<long>(record_ptr.size()); ++ievent)
Expand Down Expand Up @@ -139,8 +139,7 @@ LM_distributable_computation(const shared_ptr<ProjMatrixByBin> PM_sptr,
local_double_out_ptrs[thread_num]);
}
}
#ifdef STIR_OPENMP
// flatten data constructed by threads
// flatten data constructed by threads (or collapse unitary dim if no threading)
{
if (double_out_ptr != NULL)
{
Expand All @@ -157,7 +156,6 @@ LM_distributable_computation(const shared_ptr<ProjMatrixByBin> PM_sptr,
*output_image_ptr += *(local_output_image_sptrs[i]);
}
}
#endif
CPU_timer.stop();
wall_clock_timer.stop();
info(boost::format("Computation times for distributable_computation, CPU %1%s, wall-clock %2%s") % CPU_timer.value()
Expand Down
Loading