xref: /aosp_15_r20/external/pytorch/cmake/External/nccl.cmake (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1*da0073e9SAndroid Build Coastguard Workerif(NOT __NCCL_INCLUDED)
2*da0073e9SAndroid Build Coastguard Worker  set(__NCCL_INCLUDED TRUE)
3*da0073e9SAndroid Build Coastguard Worker
4*da0073e9SAndroid Build Coastguard Worker  if(USE_SYSTEM_NCCL)
5*da0073e9SAndroid Build Coastguard Worker    # NCCL_ROOT, NCCL_LIB_DIR, NCCL_INCLUDE_DIR will be accounted in the following line.
6*da0073e9SAndroid Build Coastguard Worker    find_package(NCCL REQUIRED)
7*da0073e9SAndroid Build Coastguard Worker    if(NCCL_FOUND)
8*da0073e9SAndroid Build Coastguard Worker      add_library(__caffe2_nccl INTERFACE)
9*da0073e9SAndroid Build Coastguard Worker      target_link_libraries(__caffe2_nccl INTERFACE ${NCCL_LIBRARIES})
10*da0073e9SAndroid Build Coastguard Worker      target_include_directories(__caffe2_nccl INTERFACE ${NCCL_INCLUDE_DIRS})
11*da0073e9SAndroid Build Coastguard Worker    endif()
12*da0073e9SAndroid Build Coastguard Worker  else()
13*da0073e9SAndroid Build Coastguard Worker    torch_cuda_get_nvcc_gencode_flag(NVCC_GENCODE)
14*da0073e9SAndroid Build Coastguard Worker    string(REPLACE "-gencode;" "-gencode=" NVCC_GENCODE "${NVCC_GENCODE}")
15*da0073e9SAndroid Build Coastguard Worker    # this second replacement is needed when there are multiple archs
16*da0073e9SAndroid Build Coastguard Worker    string(REPLACE ";-gencode" " -gencode" NVCC_GENCODE "${NVCC_GENCODE}")
17*da0073e9SAndroid Build Coastguard Worker
18*da0073e9SAndroid Build Coastguard Worker    if(DEFINED ENV{MAX_JOBS})
19*da0073e9SAndroid Build Coastguard Worker      set(MAX_JOBS "$ENV{MAX_JOBS}")
20*da0073e9SAndroid Build Coastguard Worker    else()
21*da0073e9SAndroid Build Coastguard Worker      include(ProcessorCount)
22*da0073e9SAndroid Build Coastguard Worker      ProcessorCount(NUM_HARDWARE_THREADS)
23*da0073e9SAndroid Build Coastguard Worker      # Assume 2 hardware threads per cpu core
24*da0073e9SAndroid Build Coastguard Worker      math(EXPR MAX_JOBS "${NUM_HARDWARE_THREADS} / 2")
25*da0073e9SAndroid Build Coastguard Worker      # ProcessorCount might return 0, set to a positive number
26*da0073e9SAndroid Build Coastguard Worker      if(MAX_JOBS LESS 2)
27*da0073e9SAndroid Build Coastguard Worker        set(MAX_JOBS 2)
28*da0073e9SAndroid Build Coastguard Worker      endif()
29*da0073e9SAndroid Build Coastguard Worker    endif()
30*da0073e9SAndroid Build Coastguard Worker
31*da0073e9SAndroid Build Coastguard Worker    if("${CMAKE_GENERATOR}" MATCHES "Make")
32*da0073e9SAndroid Build Coastguard Worker      # Recursive make with jobserver for parallelism, and also put a load limit
33*da0073e9SAndroid Build Coastguard Worker      # here to avoid flaky OOM, https://www.gnu.org/software/make/manual/html_node/Parallel.html
34*da0073e9SAndroid Build Coastguard Worker      set(MAKE_COMMAND "$(MAKE)" "-l${MAX_JOBS}")
35*da0073e9SAndroid Build Coastguard Worker    else()
36*da0073e9SAndroid Build Coastguard Worker      # Parallel build with CPU load limit to avoid oversubscription
37*da0073e9SAndroid Build Coastguard Worker      set(MAKE_COMMAND "make" "-j${MAX_JOBS}" "-l${MAX_JOBS}")
38*da0073e9SAndroid Build Coastguard Worker    endif()
39*da0073e9SAndroid Build Coastguard Worker
40*da0073e9SAndroid Build Coastguard Worker    set(__NCCL_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/nccl")
41*da0073e9SAndroid Build Coastguard Worker    ExternalProject_Add(nccl_external
42*da0073e9SAndroid Build Coastguard Worker      SOURCE_DIR ${PROJECT_SOURCE_DIR}/third_party/nccl/nccl
43*da0073e9SAndroid Build Coastguard Worker      BUILD_IN_SOURCE 1
44*da0073e9SAndroid Build Coastguard Worker      CONFIGURE_COMMAND ""
45*da0073e9SAndroid Build Coastguard Worker      BUILD_COMMAND
46*da0073e9SAndroid Build Coastguard Worker        ${MAKE_COMMAND}
47*da0073e9SAndroid Build Coastguard Worker        "CXX=${CMAKE_CXX_COMPILER}"
48*da0073e9SAndroid Build Coastguard Worker        "CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}"
49*da0073e9SAndroid Build Coastguard Worker        "NVCC=${CUDA_NVCC_EXECUTABLE}"
50*da0073e9SAndroid Build Coastguard Worker        "NVCC_GENCODE=${NVCC_GENCODE}"
51*da0073e9SAndroid Build Coastguard Worker        "BUILDDIR=${__NCCL_BUILD_DIR}"
52*da0073e9SAndroid Build Coastguard Worker        "VERBOSE=0"
53*da0073e9SAndroid Build Coastguard Worker        "DEBUG=0"
54*da0073e9SAndroid Build Coastguard Worker      BUILD_BYPRODUCTS "${__NCCL_BUILD_DIR}/lib/libnccl_static.a"
55*da0073e9SAndroid Build Coastguard Worker      INSTALL_COMMAND ""
56*da0073e9SAndroid Build Coastguard Worker      )
57*da0073e9SAndroid Build Coastguard Worker
58*da0073e9SAndroid Build Coastguard Worker    # Detect objcopy version
59*da0073e9SAndroid Build Coastguard Worker    execute_process(COMMAND "${CMAKE_OBJCOPY}" "--version" OUTPUT_VARIABLE OBJCOPY_VERSION_STR)
60*da0073e9SAndroid Build Coastguard Worker    string(REGEX REPLACE "GNU objcopy .+ ([0-9])\\.([0-9]+).*" "\\1" OBJCOPY_VERSION_MAJOR ${OBJCOPY_VERSION_STR})
61*da0073e9SAndroid Build Coastguard Worker    string(REGEX REPLACE "GNU objcopy .+ ([0-9])\\.([0-9]+).*" "\\2" OBJCOPY_VERSION_MINOR ${OBJCOPY_VERSION_STR})
62*da0073e9SAndroid Build Coastguard Worker
63*da0073e9SAndroid Build Coastguard Worker    # TODO: Replace me with SKIP_NCCL_SLIMMING option (and investigate why it does not work on newer compilers)
64*da0073e9SAndroid Build Coastguard Worker    if("$ENV{BUILD_ENVIRONMENT}" MATCHES ".*-libtorch-cxx11-abi$")
65*da0073e9SAndroid Build Coastguard Worker      # See https://github.com/pytorch/pytorch/issues/83887
66*da0073e9SAndroid Build Coastguard Worker      message(WARNING "Skip NCCL library slimming for cxx11-abi builds")
67*da0073e9SAndroid Build Coastguard Worker      set(__NCCL_LIBRARY_DEP nccl_external)
68*da0073e9SAndroid Build Coastguard Worker      set(NCCL_LIBRARIES ${__NCCL_BUILD_DIR}/lib/libnccl_static.a)
69*da0073e9SAndroid Build Coastguard Worker    elseif((${OBJCOPY_VERSION_MAJOR} GREATER 2) OR ((${OBJCOPY_VERSION_MAJOR} EQUAL 2) AND (${OBJCOPY_VERSION_MINOR} GREATER 27)))
70*da0073e9SAndroid Build Coastguard Worker      message(WARNING "Enabling NCCL library slimming")
71*da0073e9SAndroid Build Coastguard Worker      add_custom_command(
72*da0073e9SAndroid Build Coastguard Worker        OUTPUT "${__NCCL_BUILD_DIR}/lib/libnccl_slim_static.a"
73*da0073e9SAndroid Build Coastguard Worker        DEPENDS nccl_external
74*da0073e9SAndroid Build Coastguard Worker        COMMAND "${CMAKE_COMMAND}" -E make_directory "${__NCCL_BUILD_DIR}/objects"
75*da0073e9SAndroid Build Coastguard Worker        COMMAND cd objects
76*da0073e9SAndroid Build Coastguard Worker        COMMAND "${CMAKE_AR}" x "${__NCCL_BUILD_DIR}/lib/libnccl_static.a"
77*da0073e9SAndroid Build Coastguard Worker        COMMAND for obj in all_gather_* all_reduce_* broadcast_* reduce_*.o$<SEMICOLON> do "${CMAKE_OBJCOPY}" --remove-relocations .nvFatBinSegment --remove-section __nv_relfatbin $$obj$<SEMICOLON> done
78*da0073e9SAndroid Build Coastguard Worker        COMMAND "${CMAKE_AR}" cr "${__NCCL_BUILD_DIR}/lib/libnccl_slim_static.a" "*.o"
79*da0073e9SAndroid Build Coastguard Worker        COMMAND "${CMAKE_AR}" xN 1 "${__NCCL_BUILD_DIR}/lib/libnccl_static.a" net.o
80*da0073e9SAndroid Build Coastguard Worker        COMMAND "${CMAKE_AR}" q "${__NCCL_BUILD_DIR}/lib/libnccl_slim_static.a" net.o
81*da0073e9SAndroid Build Coastguard Worker        COMMAND cd -
82*da0073e9SAndroid Build Coastguard Worker        COMMAND "${CMAKE_COMMAND}" -E remove_directory "${__NCCL_BUILD_DIR}/objects"
83*da0073e9SAndroid Build Coastguard Worker        WORKING_DIRECTORY "${__NCCL_BUILD_DIR}"
84*da0073e9SAndroid Build Coastguard Worker        COMMENT "Slimming NCCL"
85*da0073e9SAndroid Build Coastguard Worker        )
86*da0073e9SAndroid Build Coastguard Worker      add_custom_target(nccl_slim_external DEPENDS "${__NCCL_BUILD_DIR}/lib/libnccl_slim_static.a")
87*da0073e9SAndroid Build Coastguard Worker      set(__NCCL_LIBRARY_DEP nccl_slim_external)
88*da0073e9SAndroid Build Coastguard Worker      set(NCCL_LIBRARIES ${__NCCL_BUILD_DIR}/lib/libnccl_slim_static.a)
89*da0073e9SAndroid Build Coastguard Worker    else()
90*da0073e9SAndroid Build Coastguard Worker      message(WARNING "Objcopy version is too old to support NCCL library slimming")
91*da0073e9SAndroid Build Coastguard Worker      set(__NCCL_LIBRARY_DEP nccl_external)
92*da0073e9SAndroid Build Coastguard Worker      set(NCCL_LIBRARIES ${__NCCL_BUILD_DIR}/lib/libnccl_static.a)
93*da0073e9SAndroid Build Coastguard Worker    endif()
94*da0073e9SAndroid Build Coastguard Worker
95*da0073e9SAndroid Build Coastguard Worker    set(NCCL_FOUND TRUE)
96*da0073e9SAndroid Build Coastguard Worker    add_library(__caffe2_nccl INTERFACE)
97*da0073e9SAndroid Build Coastguard Worker    # The following old-style variables are set so that other libs, such as Gloo,
98*da0073e9SAndroid Build Coastguard Worker    # can still use it.
99*da0073e9SAndroid Build Coastguard Worker    set(NCCL_INCLUDE_DIRS ${__NCCL_BUILD_DIR}/include)
100*da0073e9SAndroid Build Coastguard Worker    add_dependencies(__caffe2_nccl ${__NCCL_LIBRARY_DEP})
101*da0073e9SAndroid Build Coastguard Worker    target_link_libraries(__caffe2_nccl INTERFACE ${NCCL_LIBRARIES})
102*da0073e9SAndroid Build Coastguard Worker    target_include_directories(__caffe2_nccl INTERFACE ${NCCL_INCLUDE_DIRS})
103*da0073e9SAndroid Build Coastguard Worker    # nccl includes calls to shm_open/shm_close and therefore must depend on librt on Linux
104*da0073e9SAndroid Build Coastguard Worker    if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
105*da0073e9SAndroid Build Coastguard Worker      target_link_libraries(__caffe2_nccl INTERFACE rt)
106*da0073e9SAndroid Build Coastguard Worker    endif()
107*da0073e9SAndroid Build Coastguard Worker  endif()
108*da0073e9SAndroid Build Coastguard Workerendif()
109