UCL · KrisThielemans · Feb 26, 2025 · Feb 20, 2025 · Feb 20, 2025 · Feb 24, 2025
diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
@@ -47,6 +47,15 @@ jobs:
           parallelproj: "ON"
           ROOT: "ON"
           ITK: "OFF"
+        - os: ubuntu-24.04
+          compiler: gcc
+          # compiler_version: 9
+          cuda_version: "0"
+          BUILD_FLAGS: "-DSTIR_OPENMP=OFF"
+          BUILD_TYPE: "Release"
+          parallelproj: "ON"
+          ROOT: "OFF"
+          ITK: "OFF"
         - os: ubuntu-24.04
           compiler: clang
           #compiler_version:
@@ -94,15 +103,15 @@ jobs:
           BUILD_TYPE: "Debug"
           ROOT: "OFF"
           ITK: "OFF"
-        # Currently disabled due to problems with listmode recon, see https://github.com/UCL/STIR/issues/1200
-        #- os: macOS-latest
-        #  compiler: clang
-        #  compiler_version: 16
-        #  BUILD_FLAGS: "-DSTIR_OPENMP=OFF"
-        #  parallelproj: "OFF"
-        #  BUILD_TYPE: "Release"
-        #  ROOT: "OFF"
-        #  ITK: "OFF"
+        - os: macOS-latest
+          compiler: clang
+          compiler_version: 16
+          cuda_version: "0"
+          BUILD_FLAGS: "-DSTIR_OPENMP=OFF"
+          parallelproj: "OFF"
+          BUILD_TYPE: "Release"
+          ROOT: "OFF"
+          ITK: "OFF"
 
       # let's run all of them, as opposed to aborting when one fails
       fail-fast: false
@@ -168,11 +177,12 @@ jobs:
               if test XX${HOMEBREW_PREFIX} = XX; then
                 HOMEBREW_PREFIX=/usr/local
               fi
-              LDFLAGS="-L$HOMEBREW_PREFIX/opt/llvm/lib/c++ -Wl,-rpath,$HOMEBREW_PREFIX/opt/llvm/lib/c++"
+              LLVMDIR=$HOMEBREW_PREFIX/opt/llvm@${{ matrix.compiler_version }}
+              LDFLAGS="-L${LLVMDIR}/lib/c++ -Wl,-rpath,${LLVMDIR}/lib/c++"
               # make available to jobs below
               echo LDFLAGS="$LDFLAGS" >> $GITHUB_ENV
-              CC="$HOMEBREW_PREFIX/opt/llvm/bin/clang"
-              CXX="$HOMEBREW_PREFIX/opt/llvm/bin/clang++"
+              CC="${LLVMDIR}/bin/clang"
+              CXX="${LLVMDIR}/bin/clang++"
             fi
           fi
           export CC CXX
@@ -359,7 +369,7 @@ jobs:
           set -vx
           cd ${GITHUB_WORKSPACE}/build
           # find test exclusions. Complicated due to need of single -E argument and | pattern
-          if test ${{matrix.cuda_version}} != "0"; then
+          if test "${{matrix.cuda_version}}" != "0"; then
               # No CUDA drivers on GitHub Actions
               EXCLUDE="parallelproj|test_blocks_on_cylindrical_projectors"
           fi

diff --git a/documentation/release_6.3.htm b/documentation/release_6.3.htm
@@ -76,6 +76,10 @@ <h3>Bug fixes</h3>
             See <a href="https://github.com/UCL/STIR/issues/1532">Issue #1532</a> for more detail. Fixed by using averaging functionality of SSRB instead of adding segments for attenuation correction factors.
             <a href=https://github.com/UCL/STIR/pull/1531>PR #1531</a>
         </li>
+        <li>
+            Fixed a bug in the distributed LM computation code (introduced in 6.1) that neglected to accumulate outputs when not build with OpenMP.
+            See <a href="https://github.com/UCL/STIR/pull/1566"">PR #1566</a>".
+        </li>
     </ul>
 
 <h3>Build system</h3>

diff --git a/src/include/stir/recon_buildblock/distributable.txx b/src/include/stir/recon_buildblock/distributable.txx
@@ -1,6 +1,7 @@
 /*
-    Copyright (C) 2024 University College London
-    Copyright (C) 2020, 2022, Univeristy of Pennsylvania
+    Copyright (C) 2024, 2025 University College London
+    Copyright (C) 2020, 2022, University of Pennsylvania
+    Copyright (C) 2025, Commonwealth Scientific and Industrial Research Organisation
     This file is part of STIR.
 
     SPDX-License-Identifier: Apache-2.0 AND License-ref-PARAPET-license
@@ -16,6 +17,7 @@
 
   \author Nikos Efthimiou
   \author Kris Thielemans
+  \author Ashley Gillman
 */
 #include "stir/shared_ptr.h"
 #include "stir/recon_buildblock/distributable.h"
@@ -71,31 +73,29 @@ LM_distributable_computation(const shared_ptr<ProjMatrixByBin> PM_sptr,
   {
 #ifdef STIR_OPENMP
 #  pragma omp single
+#endif
+    // allocate "local" vectors
     {
-      info("Listmode gradient calculation: starting loop with " + std::to_string(omp_get_num_threads()) + " threads", 2);
-      local_output_image_sptrs.resize(omp_get_max_threads(), shared_ptr<DiscretisedDensity<3, float>>());
-      local_double_out_ptrs.resize(omp_get_max_threads(), 0);
+#ifdef STIR_OPENMP
+      const auto num_threads = omp_get_num_threads();
+#else
+      const int num_threads = 1;
+#endif
+      info("Listmode gradient calculation: starting loop with " + std::to_string(num_threads) + " threads", 2);
+      local_output_image_sptrs.resize(get_max_num_threads(), shared_ptr<DiscretisedDensity<3, float>>());
+      local_double_out_ptrs.resize(get_max_num_threads(), 0);
       if (double_out_ptr)
         {
-          local_double_outs.resize(omp_get_max_threads(), 0.);
-          for (int t = 0; t < omp_get_max_threads(); ++t)
+          local_double_outs.resize(get_max_num_threads(), 0.);
+          for (int t = 0; t < get_max_num_threads(); ++t)
             local_double_out_ptrs[t] = &local_double_outs[t];
         }
-      local_counts.resize(omp_get_max_threads(), 0);
-      local_count2s.resize(omp_get_max_threads(), 0);
-      local_row.resize(omp_get_max_threads(), ProjMatrixElemsForOneBin());
+      local_counts.resize(get_max_num_threads(), 0);
+      local_count2s.resize(get_max_num_threads(), 0);
+      local_row.resize(get_max_num_threads(), ProjMatrixElemsForOneBin());
     }
-
+#ifdef STIR_OPENMP
 #  pragma omp for schedule(dynamic)
-#else
-    {
-      info("Listmode gradient calculation: starting loop with 1 thread", 2);
-      local_output_image_sptrs.resize(1, shared_ptr<DiscretisedDensity<3, float>>());
-      local_double_out_ptrs.resize(1, double_out_ptr);
-      local_counts.resize(1, 0);
-      local_count2s.resize(1, 0);
-      local_row.resize(1, ProjMatrixElemsForOneBin());
-    }
 #endif
     // note: VC uses OpenMP 2.0, so need signed integer for loop
     for (long int ievent = 0; ievent < static_cast<long>(record_ptr.size()); ++ievent)
@@ -139,8 +139,7 @@ LM_distributable_computation(const shared_ptr<ProjMatrixByBin> PM_sptr,
                   local_double_out_ptrs[thread_num]);
       }
   }
-#ifdef STIR_OPENMP
-  // flatten data constructed by threads
+  // flatten data constructed by threads (or collapse unitary dim if no threading)
   {
     if (double_out_ptr != NULL)
       {
@@ -157,7 +156,6 @@ LM_distributable_computation(const shared_ptr<ProjMatrixByBin> PM_sptr,
             *output_image_ptr += *(local_output_image_sptrs[i]);
       }
   }
-#endif
   CPU_timer.stop();
   wall_clock_timer.stop();
   info(boost::format("Computation times for distributable_computation, CPU %1%s, wall-clock %2%s") % CPU_timer.value()