xref: /aosp_15_r20/external/pytorch/cmake/External/nccl.cmake (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1if(NOT __NCCL_INCLUDED)
2  set(__NCCL_INCLUDED TRUE)
3
4  if(USE_SYSTEM_NCCL)
5    # NCCL_ROOT, NCCL_LIB_DIR, NCCL_INCLUDE_DIR will be accounted in the following line.
6    find_package(NCCL REQUIRED)
7    if(NCCL_FOUND)
8      add_library(__caffe2_nccl INTERFACE)
9      target_link_libraries(__caffe2_nccl INTERFACE ${NCCL_LIBRARIES})
10      target_include_directories(__caffe2_nccl INTERFACE ${NCCL_INCLUDE_DIRS})
11    endif()
12  else()
13    torch_cuda_get_nvcc_gencode_flag(NVCC_GENCODE)
14    string(REPLACE "-gencode;" "-gencode=" NVCC_GENCODE "${NVCC_GENCODE}")
15    # this second replacement is needed when there are multiple archs
16    string(REPLACE ";-gencode" " -gencode" NVCC_GENCODE "${NVCC_GENCODE}")
17
18    if(DEFINED ENV{MAX_JOBS})
19      set(MAX_JOBS "$ENV{MAX_JOBS}")
20    else()
21      include(ProcessorCount)
22      ProcessorCount(NUM_HARDWARE_THREADS)
23      # Assume 2 hardware threads per cpu core
24      math(EXPR MAX_JOBS "${NUM_HARDWARE_THREADS} / 2")
25      # ProcessorCount might return 0, set to a positive number
26      if(MAX_JOBS LESS 2)
27        set(MAX_JOBS 2)
28      endif()
29    endif()
30
31    if("${CMAKE_GENERATOR}" MATCHES "Make")
32      # Recursive make with jobserver for parallelism, and also put a load limit
33      # here to avoid flaky OOM, https://www.gnu.org/software/make/manual/html_node/Parallel.html
34      set(MAKE_COMMAND "$(MAKE)" "-l${MAX_JOBS}")
35    else()
36      # Parallel build with CPU load limit to avoid oversubscription
37      set(MAKE_COMMAND "make" "-j${MAX_JOBS}" "-l${MAX_JOBS}")
38    endif()
39
40    set(__NCCL_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/nccl")
41    ExternalProject_Add(nccl_external
42      SOURCE_DIR ${PROJECT_SOURCE_DIR}/third_party/nccl/nccl
43      BUILD_IN_SOURCE 1
44      CONFIGURE_COMMAND ""
45      BUILD_COMMAND
46        ${MAKE_COMMAND}
47        "CXX=${CMAKE_CXX_COMPILER}"
48        "CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}"
49        "NVCC=${CUDA_NVCC_EXECUTABLE}"
50        "NVCC_GENCODE=${NVCC_GENCODE}"
51        "BUILDDIR=${__NCCL_BUILD_DIR}"
52        "VERBOSE=0"
53        "DEBUG=0"
54      BUILD_BYPRODUCTS "${__NCCL_BUILD_DIR}/lib/libnccl_static.a"
55      INSTALL_COMMAND ""
56      )
57
58    # Detect objcopy version
59    execute_process(COMMAND "${CMAKE_OBJCOPY}" "--version" OUTPUT_VARIABLE OBJCOPY_VERSION_STR)
60    string(REGEX REPLACE "GNU objcopy .+ ([0-9])\\.([0-9]+).*" "\\1" OBJCOPY_VERSION_MAJOR ${OBJCOPY_VERSION_STR})
61    string(REGEX REPLACE "GNU objcopy .+ ([0-9])\\.([0-9]+).*" "\\2" OBJCOPY_VERSION_MINOR ${OBJCOPY_VERSION_STR})
62
63    # TODO: Replace me with SKIP_NCCL_SLIMMING option (and investigate why it does not work on newer compilers)
64    if("$ENV{BUILD_ENVIRONMENT}" MATCHES ".*-libtorch-cxx11-abi$")
65      # See https://github.com/pytorch/pytorch/issues/83887
66      message(WARNING "Skip NCCL library slimming for cxx11-abi builds")
67      set(__NCCL_LIBRARY_DEP nccl_external)
68      set(NCCL_LIBRARIES ${__NCCL_BUILD_DIR}/lib/libnccl_static.a)
69    elseif((${OBJCOPY_VERSION_MAJOR} GREATER 2) OR ((${OBJCOPY_VERSION_MAJOR} EQUAL 2) AND (${OBJCOPY_VERSION_MINOR} GREATER 27)))
70      message(WARNING "Enabling NCCL library slimming")
71      add_custom_command(
72        OUTPUT "${__NCCL_BUILD_DIR}/lib/libnccl_slim_static.a"
73        DEPENDS nccl_external
74        COMMAND "${CMAKE_COMMAND}" -E make_directory "${__NCCL_BUILD_DIR}/objects"
75        COMMAND cd objects
76        COMMAND "${CMAKE_AR}" x "${__NCCL_BUILD_DIR}/lib/libnccl_static.a"
77        COMMAND for obj in all_gather_* all_reduce_* broadcast_* reduce_*.o$<SEMICOLON> do "${CMAKE_OBJCOPY}" --remove-relocations .nvFatBinSegment --remove-section __nv_relfatbin $$obj$<SEMICOLON> done
78        COMMAND "${CMAKE_AR}" cr "${__NCCL_BUILD_DIR}/lib/libnccl_slim_static.a" "*.o"
79        COMMAND "${CMAKE_AR}" xN 1 "${__NCCL_BUILD_DIR}/lib/libnccl_static.a" net.o
80        COMMAND "${CMAKE_AR}" q "${__NCCL_BUILD_DIR}/lib/libnccl_slim_static.a" net.o
81        COMMAND cd -
82        COMMAND "${CMAKE_COMMAND}" -E remove_directory "${__NCCL_BUILD_DIR}/objects"
83        WORKING_DIRECTORY "${__NCCL_BUILD_DIR}"
84        COMMENT "Slimming NCCL"
85        )
86      add_custom_target(nccl_slim_external DEPENDS "${__NCCL_BUILD_DIR}/lib/libnccl_slim_static.a")
87      set(__NCCL_LIBRARY_DEP nccl_slim_external)
88      set(NCCL_LIBRARIES ${__NCCL_BUILD_DIR}/lib/libnccl_slim_static.a)
89    else()
90      message(WARNING "Objcopy version is too old to support NCCL library slimming")
91      set(__NCCL_LIBRARY_DEP nccl_external)
92      set(NCCL_LIBRARIES ${__NCCL_BUILD_DIR}/lib/libnccl_static.a)
93    endif()
94
95    set(NCCL_FOUND TRUE)
96    add_library(__caffe2_nccl INTERFACE)
97    # The following old-style variables are set so that other libs, such as Gloo,
98    # can still use it.
99    set(NCCL_INCLUDE_DIRS ${__NCCL_BUILD_DIR}/include)
100    add_dependencies(__caffe2_nccl ${__NCCL_LIBRARY_DEP})
101    target_link_libraries(__caffe2_nccl INTERFACE ${NCCL_LIBRARIES})
102    target_include_directories(__caffe2_nccl INTERFACE ${NCCL_INCLUDE_DIRS})
103    # nccl includes calls to shm_open/shm_close and therefore must depend on librt on Linux
104    if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
105      target_link_libraries(__caffe2_nccl INTERFACE rt)
106    endif()
107  endif()
108endif()
109