From ee2ce3fef6d6bd073eb31303808618db88cec2e1 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Fri, 18 Nov 2022 18:55:33 +0000 Subject: [PATCH] Set make max load when building libtorch (#89237) The nccl build is still OOM sometimes when using `$(MAKE)`: ``` virtual memory exhausted: Cannot allocate memory Makefile:73: recipe for target '/var/lib/jenkins/cpp-build/caffe2/build/nccl/obj/collectives/device/devlink.o' failed make[5]: *** [/var/lib/jenkins/cpp-build/caffe2/build/nccl/obj/collectives/device/devlink.o] Error 1 make[5]: Leaving directory '/var/lib/jenkins/workspace/third_party/nccl/nccl/src/collectives/device' ``` * https://github.com/pytorch/pytorch/actions/runs/3476485191/jobs/5811758058 * https://github.com/pytorch/pytorch/actions/runs/3422228421/jobs/5702153639 So trying to set the same limit here as when building with ninja Pull Request resolved: https://github.com/pytorch/pytorch/pull/89237 Approved by: https://github.com/malfet --- cmake/External/nccl.cmake | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/cmake/External/nccl.cmake b/cmake/External/nccl.cmake index cb928baf3a595..160d2b648c051 100644 --- a/cmake/External/nccl.cmake +++ b/cmake/External/nccl.cmake @@ -15,23 +15,24 @@ if(NOT __NCCL_INCLUDED) # this second replacement is needed when there are multiple archs string(REPLACE ";-gencode" " -gencode" NVCC_GENCODE "${NVCC_GENCODE}") - if("${CMAKE_GENERATOR}" MATCHES "Make") - # Recursive make with jobserver for parallelism - set(MAKE_COMMAND "$(MAKE)") + if(DEFINED ENV{MAX_JOBS}) + set(MAX_JOBS "$ENV{MAX_JOBS}") else() - if(DEFINED ENV{MAX_JOBS}) - set(MAX_JOBS "$ENV{MAX_JOBS}") - else() - include(ProcessorCount) - ProcessorCount(NUM_HARDWARE_THREADS) - # Assume 2 hardware threads per cpu core - math(EXPR MAX_JOBS "${NUM_HARDWARE_THREADS} / 2") - # ProcessorCount might return 0, set to a positive number - if(MAX_JOBS LESS 2) - set(MAX_JOBS 2) - endif() + include(ProcessorCount) + ProcessorCount(NUM_HARDWARE_THREADS) + # Assume 2 hardware threads per cpu core + math(EXPR MAX_JOBS "${NUM_HARDWARE_THREADS} / 2") + # ProcessorCount might return 0, set to a positive number + if(MAX_JOBS LESS 2) + set(MAX_JOBS 2) endif() + endif() + if("${CMAKE_GENERATOR}" MATCHES "Make") + # Recursive make with jobserver for parallelism, and also put a load limit + # here to avoid flaky OOM, https://www.gnu.org/software/make/manual/html_node/Parallel.html + set(MAKE_COMMAND "$(MAKE)" "-l${MAX_JOBS}") + else() # Parallel build with CPU load limit to avoid oversubscription set(MAKE_COMMAND "make" "-j${MAX_JOBS}" "-l${MAX_JOBS}") endif()