xref: /aosp_15_r20/external/pytorch/CMakeLists.txt (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
2# cmake_policy(SET CMP0022 NEW) cmake_policy(SET CMP0023 NEW)
3
4# Use compiler ID "AppleClang" instead of "Clang" for XCode. Not setting this
5# sometimes makes XCode C compiler gets detected as "Clang", even when the C++
6# one is detected as "AppleClang".
7cmake_policy(SET CMP0010 NEW)
8cmake_policy(SET CMP0025 NEW)
9
10# Enables CMake to set LTO on compilers other than Intel.
11cmake_policy(SET CMP0069 NEW)
12# Enable the policy for CMake subprojects. protobuf currently causes issues
13# set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
14
15# Suppress warning flags in default MSVC configuration.  It's not mandatory that
16# we do this (and we don't if cmake is old), but it's nice when it's possible,
17# and it's possible on our Windows configs.
18cmake_policy(SET CMP0092 NEW)
19
20# Prohibit in-source builds
21if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
22  message(FATAL_ERROR "In-source build are not supported")
23endif()
24
25# ---[ Project and semantic versioning.
26project(Torch CXX C)
27
28if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
29  set(LINUX TRUE)
30else()
31  set(LINUX FALSE)
32endif()
33
34set(CMAKE_INSTALL_MESSAGE NEVER)
35
36# check and set CMAKE_CXX_STANDARD
37string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
38if(env_cxx_standard GREATER -1)
39  message(
40    WARNING
41      "C++ standard version definition detected in environment variable."
42      "PyTorch requires -std=c++17. Please remove -std=c++ settings in your environment."
43  )
44endif()
45set(CMAKE_CXX_STANDARD
46    17
47    CACHE STRING
48          "The C++ standard whose features are requested to build this target.")
49set(CMAKE_C_STANDARD
50    11
51    CACHE STRING
52          "The C standard whose features are requested to build this target.")
53
54# ---[ Utils
55include(cmake/public/utils.cmake)
56
57# --- [ Check that minimal gcc version is 9.3+
58if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.3)
59  message(
60    FATAL_ERROR
61      "GCC-9.3 or newer is required to compile PyTorch, but found ${CMAKE_CXX_COMPILER_VERSION}"
62  )
63endif()
64
65# This define is needed to preserve behavior given anticpated changes to
66# cccl/thrust
67# https://nvidia.github.io/libcudacxx/standard_api/numerics_library/complex.html
68string(APPEND CMAKE_CUDA_FLAGS
69       " -DLIBCUDACXX_ENABLE_SIMPLIFIED_COMPLEX_OPERATIONS")
70
71if(LINUX)
72  include(cmake/CheckAbi.cmake)
73  string(APPEND CMAKE_CXX_FLAGS
74         " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")
75  string(APPEND CMAKE_CUDA_FLAGS
76         " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")
77  if(${GLIBCXX_USE_CXX11_ABI} EQUAL 1)
78    set(CXX_STANDARD_REQUIRED ON)
79  else()
80    # Please note this is required in order to ensure compatibility between gcc
81    # 9 and gcc 7 This could be removed when all Linux PyTorch binary builds are
82    # compiled by the same toolchain again
83    append_cxx_flag_if_supported("-fabi-version=11" CMAKE_CXX_FLAGS)
84  endif()
85endif()
86
87set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
88set(CMAKE_LINK_WHAT_YOU_USE TRUE)
89
90# One variable that determines whether the current cmake process is being run
91# with the main Caffe2 library. This is useful for building modules - if modules
92# are built with the main Caffe2 library then one does not need to do find
93# caffe2 in the cmake script. One can usually guard it in some way like if(NOT
94# CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) find_package(Caffe2 REQUIRED) endif()
95set(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO ON)
96
97# Googletest's cmake files are going to set it on once they are processed. Let's
98# set it at the very beginning so that the entire build is deterministic.
99set(THREADS_PREFER_PTHREAD_FLAG ON)
100
101if(NOT DEFINED BLAS_SET_BY_USER)
102  if(DEFINED BLAS)
103    set(BLAS_SET_BY_USER TRUE)
104  else()
105    message(STATUS "Not forcing any particular BLAS to be found")
106    set(BLAS_SET_BY_USER FALSE)
107  endif()
108  set(BLAS_SET_BY_USER
109      ${BLAS_SET_BY_USER}
110      CACHE STRING
111            "Marks whether BLAS was manually set by user or auto-detected")
112endif()
113
114# Apple specific
115if(APPLE)
116  # These lines are an attempt to make find_package(cuda) pick up libcuda.dylib,
117  # and not cuda.framework.  It doesn't work all the time, but it seems to help
118  # for some users. TODO: replace this with a more robust fix
119  set(CMAKE_FIND_FRAMEWORK LAST)
120  set(CMAKE_FIND_APPBUNDLE LAST)
121
122  # Get clang version on macOS
123  execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version
124                  OUTPUT_VARIABLE clang_full_version_string)
125  string(REGEX REPLACE "Apple (.*) version ([0-9]+\\.[0-9]+).*" "\\2"
126                       CLANG_VERSION_STRING ${clang_full_version_string})
127  message(STATUS "CLANG_VERSION_STRING:         " ${CLANG_VERSION_STRING})
128
129  # RPATH stuff
130  set(CMAKE_MACOSX_RPATH ON)
131  if(NOT IOS)
132    # Determine if we can link against MPSGraph
133    set(MPS_FOUND OFF)
134    execute_process(
135      COMMAND bash -c "xcrun --sdk macosx --show-sdk-version"
136      RESULT_VARIABLE _exit_code
137      OUTPUT_VARIABLE _macosx_sdk_version
138      OUTPUT_STRIP_TRAILING_WHITESPACE)
139    if(_exit_code EQUAL 0)
140      set(_MPS_supported_os_version OFF)
141      if(_macosx_sdk_version VERSION_GREATER_EQUAL 12.3)
142        set(_MPS_supported_os_version ON)
143      endif()
144      message(
145        STATUS
146          "sdk version: ${_macosx_sdk_version}, mps supported: ${_MPS_supported_os_version}"
147      )
148      execute_process(
149        COMMAND bash -c "xcrun --sdk macosx --show-sdk-path"
150        OUTPUT_VARIABLE _macosx_sdk_path
151        OUTPUT_STRIP_TRAILING_WHITESPACE)
152      set(_SDK_SEARCH_PATH "${_macosx_sdk_path}/System/Library/Frameworks/")
153      set(_FRAMEWORK_SEARCH_PATH "/System/Library/Frameworks/")
154
155      find_library(
156        _MPS_fwrk_path_
157        NAMES MetalPerformanceShadersGraph MetalPerformanceShaders
158        PATHS ${_FRAMEWORK_SEARCH_PATH}
159        NO_DEFAULT_PATH)
160      find_library(
161        _MPS_sdk_path_
162        NAMES MetalPerformanceShadersGraph MetalPerformanceShaders
163        PATHS ${_SDK_SEARCH_PATH}
164        NO_DEFAULT_PATH)
165
166      if(_MPS_supported_os_version
167         AND _MPS_fwrk_path_
168         AND _MPS_sdk_path_)
169        set(MPS_FOUND ON)
170        message(STATUS "MPSGraph framework found")
171      else()
172        message(STATUS "MPSGraph framework not found")
173      endif()
174    else()
175      message(STATUS "MPS: unable to get MacOS sdk version")
176      message(STATUS "MPSGraph framework not found")
177    endif()
178  endif()
179endif()
180
181set(CPU_AARCH64 OFF)
182set(CPU_INTEL OFF)
183
184if(CMAKE_SYSTEM_PROCESSOR MATCHES "(AMD64|x86_64)")
185  set(CPU_INTEL ON)
186elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)")
187  set(CPU_AARCH64 ON)
188endif()
189
190# For non-supported platforms, turn USE_DISTRIBUTED off by default. It is not
191# tested and likely won't work without additional changes.
192if(NOT LINUX AND NOT WIN32)
193  set(USE_DISTRIBUTED
194      OFF
195      CACHE STRING "Use distributed")
196  # On macOS, if USE_DISTRIBUTED is enabled (specified by the user), then make
197  # Gloo build with the libuv transport.
198  if(APPLE AND USE_DISTRIBUTED)
199    set(USE_LIBUV
200        ON
201        CACHE STRING "")
202  endif()
203endif()
204
205# ---[ Options. Note to developers: if you add an option below, make sure you
206# also add it to cmake/Summary.cmake so that the summary prints out the option
207# values.
208include(CMakeDependentOption)
209option(ATEN_NO_TEST "Do not build ATen test binaries" OFF)
210option(BUILD_BINARY "Build C++ binaries" OFF)
211option(BUILD_CUSTOM_PROTOBUF
212       "Build and use Caffe2's own protobuf under third_party" ON)
213option(BUILD_PYTHON "Build Python binaries" ON)
214option(BUILD_LITE_INTERPRETER "Master flag to build Lite Interpreter" OFF)
215option(BUILD_SHARED_LIBS "Build libcaffe2.so" ON)
216cmake_dependent_option(
217  CAFFE2_LINK_LOCAL_PROTOBUF "If set, build protobuf inside libcaffe2.so." ON
218  "BUILD_SHARED_LIBS AND BUILD_CUSTOM_PROTOBUF" OFF)
219cmake_dependent_option(
220  CAFFE2_USE_MSVC_STATIC_RUNTIME "Using MSVC static runtime libraries" ON
221  "NOT BUILD_SHARED_LIBS" OFF)
222option(BUILD_TEST "Build C++ test binaries (need gtest and gbenchmark)" OFF)
223option(BUILD_AOT_INDUCTOR_TEST "Build C++ test binaries for aot-inductor" OFF)
224option(BUILD_STATIC_RUNTIME_BENCHMARK
225       "Build C++ binaries for static runtime benchmarks (need gbenchmark)" OFF)
226option(
227  BUILD_MOBILE_BENCHMARK
228  "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)"
229  OFF)
230option(
231  BUILD_MOBILE_TEST
232  "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)"
233  OFF)
234option(BUILD_JNI "Build JNI bindings" OFF)
235option(BUILD_MOBILE_AUTOGRAD
236       "Build autograd function in mobile build (in development)" OFF)
237cmake_dependent_option(INSTALL_TEST "Install test binaries if BUILD_TEST is on"
238                       ON "BUILD_TEST" OFF)
239option(USE_CPP_CODE_COVERAGE "Compile C/C++ with code coverage flags" OFF)
240option(USE_COLORIZE_OUTPUT "Colorize output during compilation" ON)
241option(USE_ASAN "Use Address+Undefined Sanitizers" OFF)
242option(USE_TSAN "Use Thread Sanitizer" OFF)
243option(USE_CUDA "Use CUDA" ON)
244option(USE_XPU "Use XPU" ON)
245cmake_dependent_option(
246  BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON
247  "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
248cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
249option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
250cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF)
251cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
252                       "USE_CUDNN" OFF)
253cmake_dependent_option(USE_CUSPARSELT "Use cuSPARSELt" ON "USE_CUDA" OFF)
254cmake_dependent_option(USE_CUDSS "Use cuDSS" ON "USE_CUDA" OFF)
255# Binary builds will fail for cufile due to https://github.com/pytorch/builder/issues/1924
256# Using TH_BINARY_BUILD to check whether is binary build.
257# USE_ROCM is guarded against in Dependencies.cmake because USE_ROCM is not properly defined here
258if(DEFINED ENV{TH_BINARY_BUILD})
259  cmake_dependent_option(USE_CUFILE "Use cuFile" OFF
260                         "USE_CUDA AND NOT $ENV{TH_BINARY_BUILD} AND NOT WIN32" OFF)
261else()
262  cmake_dependent_option(USE_CUFILE "Use cuFile" OFF "USE_CUDA AND NOT WIN32" OFF)
263endif()
264option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
265option(USE_KINETO "Use Kineto profiling library" ON)
266option(USE_CUPTI_SO "Use CUPTI as a shared library" ON)
267option(USE_FAKELOWP "Use FakeLowp operators" OFF)
268option(USE_GFLAGS "Use GFLAGS" OFF)
269option(USE_GLOG "Use GLOG" OFF)
270option(USE_LITE_PROTO "Use lite protobuf instead of full." OFF)
271option(USE_MAGMA "Use MAGMA" ON)
272option(USE_PYTORCH_METAL "Use Metal for PyTorch iOS build" OFF)
273option(USE_PYTORCH_METAL_EXPORT "Export Metal models on MacOSX desktop" OFF)
274option(USE_NATIVE_ARCH "Use -march=native" OFF)
275cmake_dependent_option(USE_MPS "Use MPS for macOS build" ON "MPS_FOUND" OFF)
276cmake_dependent_option(USE_NCCL "Use NCCL" ON
277                       "USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
278cmake_dependent_option(USE_RCCL "Use RCCL" ON USE_NCCL OFF)
279cmake_dependent_option(USE_STATIC_NCCL "Use static NCCL" OFF "USE_NCCL" OFF)
280cmake_dependent_option(USE_SYSTEM_NCCL "Use system-wide NCCL" OFF "USE_NCCL"
281                       OFF)
282option(USE_NNAPI "Use NNAPI" OFF)
283option(USE_NNPACK "Use NNPACK" ON)
284cmake_dependent_option(USE_NUMA "Use NUMA. Only available on Linux." ON "LINUX"
285                       OFF)
286cmake_dependent_option(USE_NVRTC "Use NVRTC. Only available if USE_CUDA is on."
287                       OFF "USE_CUDA" OFF)
288option(USE_NUMPY "Use NumPy" ON)
289option(USE_OBSERVERS "Use observers module." OFF)
290option(USE_OPENCL "Use OpenCL" OFF)
291option(USE_OPENMP "Use OpenMP for parallel code" ON)
292option(USE_PRECOMPILED_HEADERS "Use pre-compiled headers to accelerate build."
293       OFF)
294
295option(USE_PROF "Use profiling" OFF)
296option(USE_PYTORCH_QNNPACK "Use ATen/QNNPACK (quantized 8-bit operators)" ON)
297option(USE_SNPE "Use Qualcomm's SNPE library" OFF)
298option(USE_SYSTEM_EIGEN_INSTALL
299    "Use system Eigen instead of the one under third_party" OFF)
300cmake_dependent_option(
301    USE_VALGRIND "Use Valgrind. Only available on Linux." ON
302    "LINUX" OFF)
303
304if(NOT DEFINED USE_VULKAN)
305  cmake_dependent_option(USE_VULKAN "Use Vulkan GPU backend" ON "ANDROID" OFF)
306endif()
307
308option(USE_SLEEF_FOR_ARM_VEC256 "Use sleef for arm" OFF)
309option(USE_SOURCE_DEBUG_ON_MOBILE "Enable" ON)
310option(USE_LITE_INTERPRETER_PROFILER "Enable" ON)
311cmake_dependent_option(
312  USE_LITE_AOTI "Include AOTI sources" OFF
313  "BUILD_LITE_INTERPRETER" OFF)
314option(USE_VULKAN_FP16_INFERENCE "Vulkan - Use fp16 inference" OFF)
315option(USE_VULKAN_RELAXED_PRECISION
316       "Vulkan - Use relaxed precision math in the kernels (mediump)" OFF)
317# option USE_XNNPACK: try to enable xnnpack by default.
318option(USE_XNNPACK "Use XNNPACK" ON)
319option(USE_ROCM_KERNEL_ASSERT "Use Kernel Assert for ROCm" OFF)
320# Ensure that an ITT build is the default for x86 CPUs
321cmake_dependent_option(USE_ITT "Use Intel(R) VTune Profiler ITT functionality"
322                       ON "CPU_INTEL" OFF)
323# Ensure that an MKLDNN build is the default for x86 CPUs but optional for
324# AArch64 (dependent on -DUSE_MKLDNN).
325cmake_dependent_option(
326  USE_MKLDNN "Use MKLDNN. Only available on x86, x86_64, and AArch64."
327  "${CPU_INTEL}" "CPU_INTEL OR CPU_AARCH64" OFF)
328cmake_dependent_option(
329  USE_MKLDNN_ACL "Use Compute Library for the Arm architecture." OFF
330  "USE_MKLDNN AND CPU_AARCH64" OFF)
331set(MKLDNN_ENABLE_CONCURRENT_EXEC ${USE_MKLDNN})
332cmake_dependent_option(USE_MKLDNN_CBLAS "Use CBLAS in MKLDNN" OFF "USE_MKLDNN"
333                       OFF)
334option(USE_STATIC_MKL "Prefer to link with MKL statically (Unix only)" OFF)
335option(USE_DISTRIBUTED "Use distributed" ON)
336cmake_dependent_option(
337  USE_MPI "Use MPI for Caffe2. Only available if USE_DISTRIBUTED is on." ON
338  "USE_DISTRIBUTED" OFF)
339cmake_dependent_option(
340  USE_UCC "Use UCC. Only available if USE_DISTRIBUTED is on." OFF
341  "USE_DISTRIBUTED" OFF)
342cmake_dependent_option(USE_SYSTEM_UCC "Use system-wide UCC" OFF "USE_UCC" OFF)
343cmake_dependent_option(USE_C10D_UCC "USE C10D UCC" ON "USE_DISTRIBUTED;USE_UCC"
344                       OFF)
345cmake_dependent_option(
346  USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON
347  "USE_DISTRIBUTED" OFF)
348cmake_dependent_option(
349  USE_GLOO_WITH_OPENSSL
350  "Use Gloo with OpenSSL. Only available if USE_GLOO is on." OFF
351  "USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
352cmake_dependent_option(USE_C10D_GLOO "USE C10D GLOO" ON
353                       "USE_DISTRIBUTED;USE_GLOO" OFF)
354cmake_dependent_option(USE_C10D_NCCL "USE C10D NCCL" ON
355                       "USE_DISTRIBUTED;USE_NCCL" OFF)
356cmake_dependent_option(USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI"
357                       OFF)
358cmake_dependent_option(
359    USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON
360    "USE_DISTRIBUTED" OFF)
361cmake_dependent_option(
362  USE_GLOO_WITH_OPENSSL "Use Gloo with OpenSSL. Only available if USE_GLOO is on." OFF
363    "USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
364cmake_dependent_option(
365    USE_C10D_GLOO "USE C10D GLOO" ON "USE_DISTRIBUTED;USE_GLOO" OFF)
366cmake_dependent_option(
367    USE_C10D_NCCL "USE C10D NCCL" ON "USE_DISTRIBUTED;USE_NCCL" OFF)
368cmake_dependent_option(
369    USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI" OFF)
370cmake_dependent_option(
371    USE_TENSORPIPE "Use TensorPipe. Only available if USE_DISTRIBUTED is on." ON
372    "USE_DISTRIBUTED" OFF)
373option(ONNX_ML "Enable traditional ONNX ML API." ON)
374option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
375option(BUILD_LIBTORCH_CPU_WITH_DEBUG
376       "Enable RelWithDebInfo for libtorch_cpu target only" OFF)
377cmake_dependent_option(
378  USE_CCACHE "Attempt using CCache to wrap the compilation" ON "UNIX" OFF)
379option(WERROR "Build with -Werror supported by the compiler" OFF)
380option(
381  DEBUG_CUDA
382  "When compiling DEBUG, also attempt to compile CUDA with debug flags (may cause nvcc to OOM)"
383  OFF)
384option(USE_COREML_DELEGATE "Use the CoreML backend through delegate APIs" OFF)
385option(USE_PER_OPERATOR_HEADERS
386       "Whether ATen should generate separate headers for each operator" ON)
387cmake_dependent_option(
388  BUILD_LAZY_TS_BACKEND
389  "Build the lazy Torchscript backend, not compatible with mobile builds" ON
390  "NOT INTERN_BUILD_MOBILE" OFF)
391cmake_dependent_option(BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF)
392cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler"
393                       OFF "USE_CUDA" OFF)
394
395option(USE_MIMALLOC "Use mimalloc" OFF)
396# Enable third party mimalloc library to improve memory allocation performance
397# on Windows.
398if(WIN32)
399  set(USE_MIMALLOC ON)
400endif()
401
402if(USE_CCACHE)
403  find_program(CCACHE_PROGRAM ccache)
404  if(CCACHE_PROGRAM)
405    set(CMAKE_C_COMPILER_LAUNCHER
406        "${CCACHE_PROGRAM}"
407        CACHE STRING "C compiler launcher")
408    set(CMAKE_CXX_COMPILER_LAUNCHER
409        "${CCACHE_PROGRAM}"
410        CACHE STRING "CXX compiler launcher")
411    set(CMAKE_CUDA_COMPILER_LAUNCHER
412        "${CCACHE_PROGRAM}"
413        CACHE STRING "CUDA compiler launcher")
414  else()
415    message(
416      STATUS
417        "Could not find ccache. Consider installing ccache to speed up compilation."
418    )
419  endif()
420endif()
421
422# Since TensorPipe does not support Windows, set it to OFF when WIN32 detected
423# On Windows platform, if user does not install libuv in build conda env and
424# does not set libuv_ROOT environment variable. Set USE_DISTRIBUTED to OFF.
425if(WIN32)
426  set(USE_TENSORPIPE OFF)
427  message(WARNING "TensorPipe cannot be used on Windows. Set it to OFF")
428
429  if(USE_DISTRIBUTED AND NOT DEFINED ENV{libuv_ROOT})
430    find_library(
431      libuv_tmp_LIBRARY
432      NAMES uv libuv
433      HINTS $ENV{CONDA_PREFIX}\\Library $ENV{PREFIX}\\Library
434      PATH_SUFFIXES lib
435      NO_DEFAULT_PATH)
436    if(NOT libuv_tmp_LIBRARY)
437      set(USE_DISTRIBUTED OFF)
438      set(USE_GLOO OFF)
439      message(
440        WARNING
441          "Libuv is not installed in current conda env. Set USE_DISTRIBUTED to OFF. "
442          "Please run command 'conda install -c conda-forge libuv=1.39' to install libuv."
443      )
444    else()
445      set(ENV{libuv_ROOT} ${libuv_tmp_LIBRARY}/../../)
446    endif()
447  endif()
448endif()
449
450if(USE_GLOO_WITH_OPENSSL)
451  set(USE_TCP_OPENSSL_LOAD
452      ON
453      CACHE STRING "")
454endif()
455
456# Linux distributions do not want too many embedded sources, in that sense we
457# need to be able to build pytorch with an (almost) empty third_party directory.
458# USE_SYSTEM_LIBS is a shortcut variable to toggle all the # USE_SYSTEM_*
459# variables on. Individual USE_SYSTEM_* variables can be toggled with
460# USE_SYSTEM_LIBS being "OFF".
461option(USE_SYSTEM_LIBS "Use all available system-provided libraries." OFF)
462option(USE_SYSTEM_CPUINFO "Use system-provided cpuinfo." OFF)
463option(USE_SYSTEM_SLEEF "Use system-provided sleef." OFF)
464option(USE_SYSTEM_GLOO "Use system-provided gloo." OFF)
465option(USE_SYSTEM_FP16 "Use system-provided fp16." OFF)
466option(USE_SYSTEM_PYBIND11 "Use system-provided PyBind11." OFF)
467option(USE_SYSTEM_PTHREADPOOL "Use system-provided pthreadpool." OFF)
468option(USE_SYSTEM_PSIMD "Use system-provided psimd." OFF)
469option(USE_SYSTEM_FXDIV "Use system-provided fxdiv." OFF)
470option(USE_SYSTEM_BENCHMARK "Use system-provided google benchmark." OFF)
471option(USE_SYSTEM_ONNX "Use system-provided onnx." OFF)
472option(USE_SYSTEM_XNNPACK "Use system-provided xnnpack." OFF)
473option(USE_GOLD_LINKER "Use ld.gold to link" OFF)
474if(USE_SYSTEM_LIBS)
475  set(USE_SYSTEM_CPUINFO ON)
476  set(USE_SYSTEM_SLEEF ON)
477  set(USE_SYSTEM_GLOO ON)
478  set(BUILD_CUSTOM_PROTOBUF OFF)
479  set(USE_SYSTEM_EIGEN_INSTALL ON)
480  set(USE_SYSTEM_FP16 ON)
481  set(USE_SYSTEM_PTHREADPOOL ON)
482  set(USE_SYSTEM_PSIMD ON)
483  set(USE_SYSTEM_FXDIV ON)
484  set(USE_SYSTEM_BENCHMARK ON)
485  set(USE_SYSTEM_ONNX ON)
486  set(USE_SYSTEM_XNNPACK ON)
487  set(USE_SYSTEM_PYBIND11 ON)
488  if(USE_NCCL)
489    set(USE_SYSTEM_NCCL ON)
490  endif()
491endif()
492
493# /Z7 override option When generating debug symbols, CMake default to use the
494# flag /Zi. However, it is not compatible with sccache. So we rewrite it off.
495# But some users don't use sccache; this override is for them.
496cmake_dependent_option(
497  MSVC_Z7_OVERRIDE
498  "Work around sccache bug by replacing /Zi and /ZI with /Z7 when using MSVC (if you are not using sccache, you can turn this OFF)"
499  ON
500  "MSVC"
501  OFF)
502
503if(NOT USE_SYSTEM_ONNX)
504  set(ONNX_NAMESPACE
505      "onnx_torch"
506      CACHE
507        STRING
508        "A namespace for ONNX; needed to build with other frameworks that share ONNX."
509  )
510else()
511  set(ONNX_NAMESPACE
512      "onnx"
513      CACHE
514        STRING
515        "A namespace for ONNX; needed to build with other frameworks that share ONNX."
516  )
517endif()
518set(SELECTED_OP_LIST
519    ""
520    CACHE
521      STRING
522      "Path to the yaml file that contains the list of operators to include for custom build. Include all operators by default."
523)
524option(
525  STATIC_DISPATCH_BACKEND
526  "Name of the backend for which static dispatch code is generated, e.g.: CPU."
527  "")
528option(
529  USE_LIGHTWEIGHT_DISPATCH
530  "Enable codegen unboxing for ATen ops, need to work with static dispatch in order to work properly."
531  OFF)
532if(USE_LIGHTWEIGHT_DISPATCH AND NOT STATIC_DISPATCH_BACKEND)
533  message(
534    FATAL_ERROR
535      "Need to enable static dispatch after enabling USE_LIGHTWEIGHT_DISPATCH.")
536endif()
537option(TRACING_BASED
538       "Master flag to build Lite Interpreter with tracing build option" OFF)
539option(BUILD_EXECUTORCH "Master flag to build Executorch" ON)
540# This is a fix for a rare build issue on Ubuntu: symbol lookup error:
541# miniconda3/envs/pytorch-py3.7/lib/libmkl_intel_lp64.so: undefined symbol:
542# mkl_blas_dsyrk
543# https://software.intel.com/en-us/articles/symbol-lookup-error-when-linking-intel-mkl-with-gcc-on-ubuntu
544if(LINUX)
545  set(CMAKE_SHARED_LINKER_FLAGS
546      "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
547
548  set(ENV_LDFLAGS "$ENV{LDFLAGS}")
549  string(STRIP "${ENV_LDFLAGS}" ENV_LDFLAGS)
550  # Do not append linker flags passed via env var if they already there
551  if(NOT ${CMAKE_SHARED_LINKER_FLAGS} MATCHES "${ENV_LDFLAGS}")
552     set(CMAKE_SHARED_LINKER_FLAGS
553         "${CMAKE_SHARED_LINKER_FLAGS} ${ENV_LDFLAGS}")
554  endif()
555endif()
556
557if(MSVC)
558  # MSVC by default does not apply the correct __cplusplus version as specified
559  # by the C++ standard because MSVC is not a completely compliant
560  # implementation. This option forces MSVC to use the appropriate value given
561  # the requested --std option. This fixes a compilation issue mismatch between
562  # GCC/Clang and MSVC.
563  #
564  # See: *
565  # https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus?view=msvc-170
566  # * https://en.cppreference.com/w/cpp/preprocessor/replace#Predefined_macros
567  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus")
568  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler  /Zc:__cplusplus")
569
570  set(CMAKE_NINJA_CMCLDEPS_RC OFF)
571  foreach(
572    flag_var
573    CMAKE_C_FLAGS
574    CMAKE_C_FLAGS_DEBUG
575    CMAKE_C_FLAGS_RELEASE
576    CMAKE_C_FLAGS_MINSIZEREL
577    CMAKE_C_FLAGS_RELWITHDEBINFO
578    CMAKE_CXX_FLAGS
579    CMAKE_CXX_FLAGS_DEBUG
580    CMAKE_CXX_FLAGS_RELEASE
581    CMAKE_CXX_FLAGS_MINSIZEREL
582    CMAKE_CXX_FLAGS_RELWITHDEBINFO)
583    # Replace /Zi and /ZI with /Z7
584    if(MSVC_Z7_OVERRIDE)
585      if(${flag_var} MATCHES "/Z[iI]")
586        string(REGEX REPLACE "/Z[iI]" "/Z7" ${flag_var} "${${flag_var}}")
587      endif(${flag_var} MATCHES "/Z[iI]")
588    endif(MSVC_Z7_OVERRIDE)
589
590    if(${CAFFE2_USE_MSVC_STATIC_RUNTIME})
591      if(${flag_var} MATCHES "/MD")
592        string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
593      endif(${flag_var} MATCHES "/MD")
594    else()
595      if(${flag_var} MATCHES "/MT")
596        string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}")
597      endif()
598    endif()
599
600    # /bigobj increases number of sections in .obj file, which is needed to link
601    # against libraries in Python 2.7 under Windows For Visual Studio
602    # generators, if /MP is not added, then we may need to add /MP to the flags.
603    # For other generators like ninja, we don't need to add /MP because it is
604    # already handled by the generator itself.
605    if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NOT ${flag_var} MATCHES
606                                                   "/MP")
607      set(${flag_var} "${${flag_var}} /MP /bigobj")
608    else()
609      set(${flag_var} "${${flag_var}} /bigobj")
610    endif()
611  endforeach(flag_var)
612
613  foreach(flag_var
614          CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL
615          CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL)
616    if(${flag_var} MATCHES "/Z[iI7]")
617      string(REGEX REPLACE "/Z[iI7]" "" ${flag_var} "${${flag_var}}")
618    endif()
619  endforeach(flag_var)
620
621  foreach(
622    flag_var
623    CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO
624    CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO
625    CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO
626    CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO
627    CMAKE_SHARED_LINKER_FLAGS_DEBUG
628    CMAKE_STATIC_LINKER_FLAGS_DEBUG
629    CMAKE_EXE_LINKER_FLAGS_DEBUG
630    CMAKE_MODULE_LINKER_FLAGS_DEBUG)
631    # Switch off incremental linking in debug/relwithdebinfo builds
632    if(${flag_var} MATCHES "/INCREMENTAL" AND NOT ${flag_var} MATCHES
633                                              "/INCREMENTAL:NO")
634      string(REGEX REPLACE "/INCREMENTAL" "/INCREMENTAL:NO" ${flag_var}
635                           "${${flag_var}}")
636    endif()
637  endforeach(flag_var)
638
639  foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS
640                   CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS)
641    string(APPEND ${flag_var} " /ignore:4049 /ignore:4217 /ignore:4099")
642  endforeach(flag_var)
643
644  foreach(flag_var CMAKE_SHARED_LINKER_FLAGS)
645    # https://github.com/pytorch/pytorch/issues/91933: Don't set the manifest
646    # filename explicitly helps fix the linker error when linking
647    # torch_python.dll. The manifest file would still be there in the correct
648    # format torch_python.dll.manifest
649    if(${flag_var} MATCHES "/MANIFESTFILE:.*\\.manifest")
650      string(REGEX REPLACE "/MANIFESTFILE:.*\\.manifest" "" ${flag_var}
651                           "${${flag_var}}")
652    endif()
653  endforeach(flag_var)
654
655  # Try harder
656  string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler /w -w")
657
658  string(APPEND CMAKE_CXX_FLAGS " /FS")
659  string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler /FS")
660endif(MSVC)
661
662string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all")
663
664# Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not
665# applicable to mobile are disabled by this variable. Setting
666# `BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN` environment variable can force it
667# to do mobile build with host toolchain - which is useful for testing purpose.
668if(ANDROID
669   OR IOS
670   OR DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN})
671  set(INTERN_BUILD_MOBILE ON)
672  message(WARNING "INTERN_BUILD_MOBILE is on, disabling BUILD_LAZY_TS_BACKEND")
673  set(BUILD_LAZY_TS_BACKEND OFF)
674
675  # Set -ffunction-sections and -fdata-sections so that each method has its own
676  # text section. This allows the linker to remove unused section when the flag
677  # -Wl,-gc-sections is provided at link time.
678  string(APPEND CMAKE_CXX_FLAGS " -ffunction-sections")
679  string(APPEND CMAKE_C_FLAGS " -ffunction-sections")
680  string(APPEND CMAKE_CXX_FLAGS " -fdata-sections")
681  string(APPEND CMAKE_C_FLAGS " -fdata-sections")
682
683  # Please note that the use of the following flags is required when linking
684  # against libtorch_cpu.a for mobile builds. -Wl,--whole-archive -ltorch_cpu
685  # -Wl,--no-whole-archive
686  #
687  # This allows global constructors to be included and run. Global constructors
688  # are used for operator/kernel registration with the PyTorch Dispatcher.
689
690  if(DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN})
691    # C10_MOBILE is derived from Android/iOS toolchain macros in
692    # c10/macros/Macros.h, so it needs to be explicitly set here.
693    string(APPEND CMAKE_CXX_FLAGS " -DC10_MOBILE")
694  endif()
695
696  if(DEFINED ENV{PYTORCH_MOBILE_TRIM_DISPATCH_KEY_SET})
697    # If PYTORCH_MOBILE_TRIM_DISPATCH_KEY_SET is defined (env var), then define
698    # C10_MOBILE_TRIM_DISPATCH_KEYS, which limits the number of dispatch keys in
699    # OperatorEntry::dispatchTable_ to reduce peak memory during library
700    # initialization.
701    string(APPEND CMAKE_CXX_FLAGS " -DC10_MOBILE_TRIM_DISPATCH_KEYS")
702  endif()
703endif()
704
705# INTERN_BUILD_ATEN_OPS is used to control whether to build ATen/TH operators.
706set(INTERN_BUILD_ATEN_OPS ON)
707
708if(NOT DEFINED USE_BLAS)
709  set(USE_BLAS ON)
710endif()
711
712# Build libtorch mobile library, which contains ATen/TH ops and native support
713# for TorchScript model, but doesn't contain not-yet-unified caffe2 ops;
714if(INTERN_BUILD_MOBILE)
715  if(NOT BUILD_SHARED_LIBS AND NOT "${SELECTED_OP_LIST}" STREQUAL "")
716    string(APPEND CMAKE_CXX_FLAGS " -DNO_EXPORT")
717  endif()
718  if(BUILD_MOBILE_AUTOGRAD)
719    set(INTERN_DISABLE_AUTOGRAD OFF)
720  else()
721    set(INTERN_DISABLE_AUTOGRAD ON)
722  endif()
723  set(BUILD_PYTHON OFF)
724  set(BUILD_FUNCTORCH OFF)
725  set(USE_DISTRIBUTED OFF)
726  set(NO_API ON)
727  set(USE_FBGEMM OFF)
728  set(INTERN_DISABLE_ONNX ON)
729  if(USE_BLAS)
730    set(INTERN_USE_EIGEN_BLAS ON)
731  else()
732    set(INTERN_USE_EIGEN_BLAS OFF)
733  endif()
734  # Disable developing mobile interpreter for actual mobile build. Enable it
735  # elsewhere to capture build error.
736  set(INTERN_DISABLE_MOBILE_INTERP ON)
737endif()
738
739# ---[ Version numbers for generated libraries
740file(READ version.txt TORCH_DEFAULT_VERSION)
741# Strip trailing newline
742string(REGEX REPLACE "\n$" "" TORCH_DEFAULT_VERSION "${TORCH_DEFAULT_VERSION}")
743if("${TORCH_DEFAULT_VERSION} " STREQUAL " ")
744  message(WARNING "Could not get version from base 'version.txt'")
745  # If we can't get the version from the version file we should probably set it
746  # to something non-sensical like 0.0.0
747  set(TORCH_DEFAULT_VERSION, "0.0.0")
748endif()
749set(TORCH_BUILD_VERSION
750    "${TORCH_DEFAULT_VERSION}"
751    CACHE STRING "Torch build version")
752if(DEFINED ENV{PYTORCH_BUILD_VERSION})
753  set(TORCH_BUILD_VERSION
754      "$ENV{PYTORCH_BUILD_VERSION}"
755      CACHE STRING "Torch build version" FORCE)
756endif()
757if(NOT TORCH_BUILD_VERSION)
758  # An empty string was specified so force version to the default
759  set(TORCH_BUILD_VERSION
760      "${TORCH_DEFAULT_VERSION}"
761      CACHE STRING "Torch build version" FORCE)
762endif()
763caffe2_parse_version_str(TORCH ${TORCH_BUILD_VERSION})
764set(TORCH_SOVERSION "${TORCH_VERSION_MAJOR}.${TORCH_VERSION_MINOR}")
765
766# ---[ CMake scripts + modules
767list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
768
769# ---[ CMake build directories
770set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
771set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
772set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
773
774enable_testing()
775
776# ---[ Build variables set within the cmake tree
777include(cmake/BuildVariables.cmake)
778set(CAFFE2_ALLOWLIST
779    ""
780    CACHE STRING "A allowlist file of files that one should build.")
781
782# Set default build type
783if(NOT CMAKE_BUILD_TYPE)
784  message(STATUS "Build type not set - defaulting to Release")
785  set(CMAKE_BUILD_TYPE
786      "Release"
787      CACHE
788        STRING
789        "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage."
790        FORCE)
791endif()
792
793# The below means we are cross compiling for arm64 or x86_64 on MacOSX
794if(NOT IOS
795   AND CMAKE_SYSTEM_NAME STREQUAL "Darwin"
796   AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64)$")
797  set(CROSS_COMPILING_MACOSX TRUE)
798  # We need to compile a universal protoc to not fail protobuf build We set
799  # CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY (vs executable) to succeed
800  # the cmake compiler check for cross-compiling
801  set(protoc_build_command
802      "./scripts/build_host_protoc.sh --other-flags -DCMAKE_OSX_ARCHITECTURES=\"x86_64;arm64\" -DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY -DCMAKE_C_COMPILER_WORKS=1 -DCMAKE_CXX_COMPILER_WORKS=1"
803  )
804  # We write to a temp scriptfile because CMake COMMAND dislikes double quotes
805  # in commands
806  file(WRITE ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh
807       "#!/bin/bash\n${protoc_build_command}")
808  file(
809    COPY ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh
810    DESTINATION ${PROJECT_SOURCE_DIR}/scripts/
811    FILE_PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ)
812  execute_process(
813    COMMAND ./scripts/tmp_protoc_script.sh
814    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
815    RESULT_VARIABLE BUILD_HOST_PROTOC_RESULT)
816  file(REMOVE ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh
817       ${PROJECT_SOURCE_DIR}/scripts/tmp_protoc_script.sh)
818  if(NOT BUILD_HOST_PROTOC_RESULT EQUAL "0")
819    message(FATAL_ERROR "Could not compile universal protoc.")
820  endif()
821  set(PROTOBUF_PROTOC_EXECUTABLE
822      "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc")
823  set(CAFFE2_CUSTOM_PROTOC_EXECUTABLE
824      "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc")
825endif()
826
827# ---[ Misc checks to cope with various compiler modes
828include(cmake/MiscCheck.cmake)
829
830# External projects
831include(ExternalProject)
832
833# ---[ Dependencies ---[ FBGEMM doesn't work on x86 32bit and
834# CMAKE_SYSTEM_PROCESSOR thinks its 64bit
835if(USE_FBGEMM
836   AND((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VOID_P EQUAL
837                                                      4)
838        OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86"))
839  set(USE_FBGEMM OFF)
840endif()
841
842set(BUILD_ONEDNN_GRAPH OFF)
843
844if(MSVC)
845  # The source code is in utf-8 encoding
846  append_cxx_flag_if_supported("/utf-8" CMAKE_CXX_FLAGS)
847endif()
848
849# Note for ROCM platform: 1. USE_ROCM is always ON until
850# include(cmake/Dependencies.cmake) 2. USE_CUDA will become OFF during
851# re-configuration Truth Table: CUDA 1st pass: USE_CUDA=True;USE_ROCM=True,
852# FLASH evaluates to ON by default CUDA 2nd pass: USE_CUDA=True;USE_ROCM=False,
853# FLASH evaluates to ON by default ROCM 1st pass: USE_CUDA=True;USE_ROCM=True,
854# FLASH evaluates to ON by default ROCM 2nd pass: USE_CUDA=False;USE_ROCM=True,
855# FLASH evaluates to ON by default CPU 1st pass: USE_CUDA=False(Cmd
856# Option);USE_ROCM=True, FLASH evaluates to OFF by default CPU 2nd pass:
857# USE_CUDA=False(Cmd Option);USE_ROCM=False, FLASH evaluates to OFF by default
858# Thus we cannot tell ROCM 2nd pass and CPU 1st pass
859#
860# The only solution is to include(cmake/Dependencies.cmake), and defer the
861# aotriton build decision later.
862
863include(cmake/Dependencies.cmake)
864
865cmake_dependent_option(
866  USE_FLASH_ATTENTION
867  "Whether to build the flash_attention kernel for scaled dot product attention.\
868  Will be disabled if not supported by the platform"
869  ON
870  "USE_CUDA OR USE_ROCM;NOT MSVC"
871  OFF)
872
873# We are currenlty not using alibi attention for Flash So we disable this
874# feature by default We dont currently document this feature because we don't
875# Suspect users building from source will need this
876add_definitions(-DFLASHATTENTION_DISABLE_ALIBI)
877
878# CAVEAT: Again, Flash Attention2 will error while building for sm52 while Mem
879# Eff Attention won't
880cmake_dependent_option(
881  USE_MEM_EFF_ATTENTION
882  "Enable memory-efficient attention for scaled dot product attention.\
883  Will be disabled if not supported by the platform" ON
884  "USE_CUDA OR USE_ROCM" OFF)
885
886#
887# Cannot be put into Dependencies.cmake due circular dependency:
888# USE_FLASH_ATTENTION -> USE_ROCM -> Dependencies.cmake -> aotriton.cmake
889#
890if(USE_ROCM)
891  if(USE_FLASH_ATTENTION OR USE_MEM_EFF_ATTENTION)
892    include(cmake/External/aotriton.cmake)
893  endif()
894endif()
895
896if(DEBUG_CUDA)
897  string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -lineinfo")
898  string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -lineinfo")
899  # CUDA-12.1 crashes when trying to compile with --source-in-ptx See
900  # https://github.com/pytorch/pytorch/issues/102372#issuecomment-1572526893
901  if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12.1)
902    string(APPEND CMAKE_CUDA_FLAGS_DEBUG " --source-in-ptx")
903    string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " --source-in-ptx")
904  endif()
905endif(DEBUG_CUDA)
906
907if(USE_FBGEMM)
908  string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM")
909endif()
910
911if(USE_PYTORCH_QNNPACK)
912  string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_QNNPACK")
913endif()
914
915if(USE_SLEEF_FOR_ARM_VEC256)
916  string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF")
917  add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF)
918endif()
919
920# Enable sleef on macOS with Apple silicon by default
921if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") AND ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64"))
922  message(STATUS "Running on macOS with Apple silicon")
923  string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF")
924  add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF)
925endif()
926
927if(USE_XNNPACK)
928  string(APPEND CMAKE_CXX_FLAGS " -DUSE_XNNPACK")
929endif()
930
931if(USE_VULKAN)
932  string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN")
933  string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN_API")
934
935  if(USE_VULKAN_FP16_INFERENCE)
936    string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN_FP16_INFERENCE")
937  endif()
938
939  if(USE_VULKAN_RELAXED_PRECISION)
940    string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN_RELAXED_PRECISION")
941  endif()
942
943endif()
944
945if(BUILD_LITE_INTERPRETER)
946  string(APPEND CMAKE_CXX_FLAGS " -DBUILD_LITE_INTERPRETER")
947endif()
948
949if(TRACING_BASED)
950  string(APPEND CMAKE_CXX_FLAGS " -DTRACING_BASED")
951endif()
952
953if(USE_PYTORCH_METAL)
954  string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_METAL")
955endif()
956
957if(USE_PYTORCH_METAL_EXPORT)
958  string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_METAL_EXPORT")
959endif()
960
961if(USE_SOURCE_DEBUG_ON_MOBILE)
962  string(APPEND CMAKE_CXX_FLAGS " -DSYMBOLICATE_MOBILE_DEBUG_HANDLE")
963endif()
964
965if(BUILD_LITE_INTERPRETER AND USE_LITE_INTERPRETER_PROFILER)
966  string(APPEND CMAKE_CXX_FLAGS " -DEDGE_PROFILER_USE_KINETO")
967endif()
968
969if(USE_COREML_DELEGATE)
970  string(APPEND CMAKE_CXX_FLAGS " -DUSE_COREML_DELEGATE")
971endif()
972
973# ---[ Allowlist file if allowlist is specified
974include(cmake/Allowlist.cmake)
975
976# ---[ Set link flag, handle additional deps for gcc 4.8 and above
977if(CMAKE_COMPILER_IS_GNUCXX AND NOT ANDROID)
978  message(
979    STATUS
980      "GCC ${CMAKE_CXX_COMPILER_VERSION}: Adding gcc and gcc_s libs to link line"
981  )
982  list(APPEND Caffe2_DEPENDENCY_LIBS gcc_s gcc)
983endif()
984
985# ---[ Build flags Re-include to override append_cxx_flag_if_supported from
986# third_party/FBGEMM
987include(cmake/public/utils.cmake)
988if(NOT MSVC)
989  string(APPEND CMAKE_CXX_FLAGS " -O2 -fPIC")
990  # Eigen fails to build with some versions, so convert this to a warning
991  # Details at http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1459
992  string(APPEND CMAKE_CXX_FLAGS " -Wall")
993  string(APPEND CMAKE_CXX_FLAGS " -Wextra")
994  append_cxx_flag_if_supported("-Werror=return-type" CMAKE_CXX_FLAGS)
995  append_cxx_flag_if_supported("-Werror=non-virtual-dtor" CMAKE_CXX_FLAGS)
996  append_cxx_flag_if_supported("-Werror=braced-scalar-init" CMAKE_CXX_FLAGS)
997  append_cxx_flag_if_supported("-Werror=range-loop-construct" CMAKE_CXX_FLAGS)
998  append_cxx_flag_if_supported("-Werror=bool-operation" CMAKE_CXX_FLAGS)
999  append_cxx_flag_if_supported("-Wnarrowing" CMAKE_CXX_FLAGS)
1000  append_cxx_flag_if_supported("-Wno-missing-field-initializers"
1001                               CMAKE_CXX_FLAGS)
1002  append_cxx_flag_if_supported("-Wno-type-limits" CMAKE_CXX_FLAGS)
1003  append_cxx_flag_if_supported("-Wno-array-bounds" CMAKE_CXX_FLAGS)
1004  append_cxx_flag_if_supported("-Wno-unknown-pragmas" CMAKE_CXX_FLAGS)
1005  append_cxx_flag_if_supported("-Wno-unused-parameter" CMAKE_CXX_FLAGS)
1006  append_cxx_flag_if_supported("-Wno-strict-overflow" CMAKE_CXX_FLAGS)
1007  append_cxx_flag_if_supported("-Wno-strict-aliasing" CMAKE_CXX_FLAGS)
1008  append_cxx_flag_if_supported("-Wno-stringop-overflow" CMAKE_CXX_FLAGS)
1009  append_cxx_flag_if_supported("-Wvla-extension" CMAKE_CXX_FLAGS)
1010  append_cxx_flag_if_supported("-Wsuggest-override" CMAKE_CXX_FLAGS)
1011  append_cxx_flag_if_supported("-Wnewline-eof" CMAKE_CXX_FLAGS)
1012  append_cxx_flag_if_supported("-Winconsistent-missing-override"
1013                               CMAKE_CXX_FLAGS)
1014  append_cxx_flag_if_supported("-Winconsistent-missing-destructor-override"
1015                               CMAKE_CXX_FLAGS)
1016  if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
1017    string(APPEND CMAKE_CXX_FLAGS " -Wno-pass-failed")
1018  endif()
1019  if(CMAKE_COMPILER_IS_GNUCXX)
1020    # Suppress "The ABI for passing parameters with 64-byte alignment has
1021    # changed in GCC 4.6"
1022    string(APPEND CMAKE_CXX_FLAGS " -Wno-psabi")
1023  endif()
1024
1025  # Use ld.gold if available, fall back to ld.bfd (the default ld) if not
1026  if(USE_GOLD_LINKER)
1027    if(USE_DISTRIBUTED AND USE_MPI)
1028      # Same issue as here with default MPI on Ubuntu
1029      # https://bugs.launchpad.net/ubuntu/+source/deal.ii/+bug/1841577
1030      message(WARNING "Refusing to use gold when USE_MPI=1")
1031    else()
1032      execute_process(
1033        COMMAND "${CMAKE_C_COMPILER}" -fuse-ld=gold -Wl,--version
1034        ERROR_QUIET
1035        OUTPUT_VARIABLE LD_VERSION)
1036      if(NOT "${LD_VERSION}" MATCHES "GNU gold")
1037        message(
1038          WARNING
1039            "USE_GOLD_LINKER was set but ld.gold isn't available, turning it off"
1040        )
1041        set(USE_GOLD_LINKER OFF)
1042      else()
1043        message(STATUS "ld.gold is available, using it to link")
1044        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=gold")
1045        set(CMAKE_SHARED_LINKER_FLAGS
1046            "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=gold")
1047        set(CMAKE_MODULE_LINKER_FLAGS
1048            "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=gold")
1049      endif()
1050    endif()
1051  endif()
1052
1053  append_cxx_flag_if_supported("-Wno-error=old-style-cast" CMAKE_CXX_FLAGS)
1054  append_cxx_flag_if_supported("-Wconstant-conversion" CMAKE_CXX_FLAGS)
1055  append_cxx_flag_if_supported("-Wno-aligned-allocation-unavailable"
1056                               CMAKE_CXX_FLAGS)
1057  append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS)
1058  append_cxx_flag_if_supported("-Qunused-arguments" CMAKE_CXX_FLAGS)
1059
1060  if(${USE_COLORIZE_OUTPUT})
1061    # Why compiler checks are necessary even when `try_compile` is used Because
1062    # of the bug in ccache that can incorrectly identify `-fcolor-diagnostics`
1063    # As supported by GCC, see https://github.com/ccache/ccache/issues/740 (for
1064    # older ccache) and https://github.com/ccache/ccache/issues/1275 (for newer
1065    # ones)
1066    if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
1067      append_cxx_flag_if_supported("-fdiagnostics-color=always" CMAKE_CXX_FLAGS)
1068    else()
1069      append_cxx_flag_if_supported("-fcolor-diagnostics" CMAKE_CXX_FLAGS)
1070    endif()
1071  endif()
1072
1073  append_cxx_flag_if_supported("-faligned-new" CMAKE_CXX_FLAGS)
1074
1075  if(WERROR)
1076    append_cxx_flag_if_supported("-Werror" CMAKE_CXX_FLAGS)
1077    if(NOT COMPILER_SUPPORT_WERROR)
1078      set(WERROR FALSE)
1079    endif()
1080  endif()
1081  append_cxx_flag_if_supported("-Wno-unused-but-set-variable" CMAKE_CXX_FLAGS)
1082  append_cxx_flag_if_supported("-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
1083  append_cxx_flag_if_supported("-fstandalone-debug" CMAKE_CXX_FLAGS_DEBUG)
1084  string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
1085  string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
1086  append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS)
1087  append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS)
1088  append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
1089else()
1090  # skip unwanted includes from windows.h
1091  add_compile_definitions(WIN32_LEAN_AND_MEAN)
1092  # Windows SDK broke compatibility since version 25131, but introduced this
1093  # define for backward compatibility.
1094  add_compile_definitions(_UCRT_LEGACY_INFINITY)
1095  # disable min/max macros
1096  add_compile_definitions(NOMINMAX)
1097  # Turn off these warnings on Windows. destructor was implicitly defined as
1098  # delete
1099  append_cxx_flag_if_supported("/wd4624" CMAKE_CXX_FLAGS)
1100  # unknown pragma
1101  append_cxx_flag_if_supported("/wd4068" CMAKE_CXX_FLAGS)
1102  # unexpected tokens following preprocessor directive - expected a newline
1103  append_cxx_flag_if_supported("/wd4067" CMAKE_CXX_FLAGS)
1104  # conversion from 'size_t' to 'unsigned int', possible loss of data
1105  append_cxx_flag_if_supported("/wd4267" CMAKE_CXX_FLAGS)
1106  # no suitable definition provided for explicit template instantiation request
1107  append_cxx_flag_if_supported("/wd4661" CMAKE_CXX_FLAGS)
1108  # recursive on all control paths, function will cause runtime stack overflow
1109  append_cxx_flag_if_supported("/wd4717" CMAKE_CXX_FLAGS)
1110  # conversion from '_Ty' to '_Ty', possible loss of data
1111  append_cxx_flag_if_supported("/wd4244" CMAKE_CXX_FLAGS)
1112  # unsafe use of type 'bool' in operation
1113  append_cxx_flag_if_supported("/wd4804" CMAKE_CXX_FLAGS)
1114  # inconsistent dll linkage
1115  append_cxx_flag_if_supported("/wd4273" CMAKE_CXX_FLAGS)
1116endif()
1117
1118if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
1119  include(CheckCSourceCompiles)
1120  check_c_source_compiles(
1121    "#include <arm_neon.h>
1122int main() {
1123  float a[] = {1.0, 1.0};
1124  float32x4x2_t v;
1125  v.val[0] = vcombine_f32 (vcreate_f32 (0UL), vcreate_f32 (0UL));
1126  v.val[1] = vcombine_f32 (vcreate_f32 (0UL), vcreate_f32 (0UL));
1127  vst1q_f32_x2(a, v);
1128  return 0;
1129}"
1130    HAS_VST1)
1131
1132  if(NOT HAS_VST1)
1133    string(APPEND CMAKE_CXX_FLAGS " -DMISSING_ARM_VST1")
1134  endif()
1135endif()
1136
1137if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
1138  include(CheckCSourceCompiles)
1139  check_c_source_compiles(
1140    "#include <arm_neon.h>
1141int main() {
1142  float a[] = {1.0, 1.0};
1143  vld1q_f32_x2(a);
1144  return 0;
1145}"
1146    HAS_VLD1)
1147
1148  if(NOT HAS_VLD1)
1149    string(APPEND CMAKE_CXX_FLAGS " -DMISSING_ARM_VLD1")
1150  endif()
1151endif()
1152
1153# Add code coverage flags to supported compilers
1154if(USE_CPP_CODE_COVERAGE)
1155  if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
1156    string(APPEND CMAKE_C_FLAGS " --coverage -fprofile-abs-path")
1157    string(APPEND CMAKE_CXX_FLAGS " --coverage -fprofile-abs-path")
1158  elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
1159    string(APPEND CMAKE_C_FLAGS " -fprofile-instr-generate -fcoverage-mapping")
1160    string(APPEND CMAKE_CXX_FLAGS
1161           " -fprofile-instr-generate -fcoverage-mapping")
1162  else()
1163    message(
1164      ERROR
1165      "Code coverage for compiler ${CMAKE_CXX_COMPILER_ID} is unsupported")
1166  endif()
1167
1168endif()
1169
1170if(APPLE)
1171  if(USE_MPS)
1172    string(APPEND CMAKE_OBJCXX_FLAGS " -DUSE_MPS -fno-objc-arc")
1173    string(APPEND CMAKE_CXX_FLAGS " -DUSE_MPS")
1174    string(
1175      APPEND
1176      CMAKE_SHARED_LINKER_FLAGS
1177      " -weak_framework Foundation -weak_framework MetalPerformanceShaders -weak_framework MetalPerformanceShadersGraph -weak_framework Metal"
1178    )
1179    # To suppress MPSGraph availability warnings
1180    append_cxx_flag_if_supported("-Wno-unguarded-availability-new"
1181                                 CMAKE_OBJCXX_FLAGS)
1182  endif()
1183  append_cxx_flag_if_supported("-Wno-unused-private-field" CMAKE_CXX_FLAGS)
1184  append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS)
1185endif()
1186
1187if(USE_XPU)
1188  string(APPEND CMAKE_CXX_FLAGS " -DUSE_XPU")
1189endif()
1190
1191if(EMSCRIPTEN)
1192  string(
1193    APPEND
1194    CMAKE_CXX_FLAGS
1195    " -Wno-implicit-function-declaration -DEMSCRIPTEN -s DISABLE_EXCEPTION_CATCHING=0"
1196  )
1197endif()
1198
1199append_cxx_flag_if_supported("-Wno-stringop-overflow" CMAKE_CXX_FLAGS)
1200
1201if(ANDROID AND (NOT ANDROID_DEBUG_SYMBOLS))
1202  if(CMAKE_COMPILER_IS_GNUCXX)
1203    string(APPEND CMAKE_CXX_FLAGS " -s")
1204  elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
1205    string(APPEND CMAKE_CXX_FLAGS " -g0")
1206  else()
1207    string(APPEND CMAKE_EXE_LINKER_FLAGS " -s")
1208  endif()
1209endif()
1210
1211if(NOT APPLE AND UNIX)
1212  list(APPEND Caffe2_DEPENDENCY_LIBS dl)
1213endif()
1214
1215# Prefix path to Caffe2 headers. If a directory containing installed Caffe2
1216# headers was inadvertently added to the list of include directories, prefixing
1217# PROJECT_SOURCE_DIR means this source tree always takes precedence.
1218include_directories(BEFORE ${PROJECT_SOURCE_DIR})
1219
1220# Prefix path to generated Caffe2 headers. These need to take precedence over
1221# their empty counterparts located in PROJECT_SOURCE_DIR.
1222include_directories(BEFORE ${PROJECT_BINARY_DIR})
1223
1224include_directories(BEFORE ${PROJECT_SOURCE_DIR}/aten/src/)
1225include_directories(BEFORE ${CMAKE_BINARY_DIR}/aten/src/)
1226
1227if(USE_MIMALLOC)
1228  set(MI_OVERRIDE OFF)
1229  set(MI_BUILD_SHARED OFF)
1230  set(MI_BUILD_OBJECT OFF)
1231  set(MI_BUILD_TESTS OFF)
1232  add_definitions(-DUSE_MIMALLOC)
1233  add_subdirectory(third_party/mimalloc)
1234  include_directories(third_party/mimalloc/include)
1235endif()
1236
1237# ---[ Main build
1238add_subdirectory(c10)
1239add_subdirectory(caffe2)
1240
1241# ---[ CMake related files Uninistall option.
1242if(NOT TARGET caffe2_uninstall)
1243  configure_file(
1244    ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in
1245    ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake IMMEDIATE @ONLY)
1246
1247  add_custom_target(
1248    caffe2_uninstall COMMAND ${CMAKE_COMMAND} -P
1249                             ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
1250endif()
1251
1252# ---[ Make configuration files for cmake to allow dependent libraries easier
1253# access to Caffe2.
1254
1255if((NOT USE_GLOG)
1256   OR(NOT USE_GFLAGS)
1257   OR BUILD_CUSTOM_PROTOBUF)
1258  message(WARNING "Generated cmake files are only fully tested if one builds "
1259                  "with system glog, gflags, and protobuf. Other settings may "
1260                  "generate files that are not well tested.")
1261endif()
1262
1263if(USE_CUDA OR USE_ROCM)
1264  # TODO: check if we should include other cuda dependency libraries to the
1265  # interface as well.
1266
1267endif()
1268
1269# Note(jiayq): when building static libraries, all PRIVATE dependencies will
1270# also become interface libraries, and as a result if there are any dependency
1271# libraries that are not exported, the following install export script will
1272# fail. As a result, we will only provide the targets cmake files for shared lib
1273# installation. For more info, read:
1274# https://cmake.org/pipermail/cmake/2016-May/063400.html
1275if(BUILD_SHARED_LIBS)
1276  configure_file(${PROJECT_SOURCE_DIR}/cmake/Caffe2Config.cmake.in
1277                 ${PROJECT_BINARY_DIR}/Caffe2Config.cmake @ONLY)
1278  install(
1279    FILES ${PROJECT_BINARY_DIR}/Caffe2Config.cmake
1280    DESTINATION share/cmake/Caffe2
1281    COMPONENT dev)
1282  install(
1283    FILES ${PROJECT_SOURCE_DIR}/cmake/public/cuda.cmake
1284          ${PROJECT_SOURCE_DIR}/cmake/public/xpu.cmake
1285          ${PROJECT_SOURCE_DIR}/cmake/public/glog.cmake
1286          ${PROJECT_SOURCE_DIR}/cmake/public/gflags.cmake
1287          ${PROJECT_SOURCE_DIR}/cmake/public/mkl.cmake
1288          ${PROJECT_SOURCE_DIR}/cmake/public/mkldnn.cmake
1289          ${PROJECT_SOURCE_DIR}/cmake/public/protobuf.cmake
1290          ${PROJECT_SOURCE_DIR}/cmake/public/utils.cmake
1291          ${PROJECT_SOURCE_DIR}/cmake/public/LoadHIP.cmake
1292    DESTINATION share/cmake/Caffe2/public
1293    COMPONENT dev)
1294  install(
1295    DIRECTORY ${PROJECT_SOURCE_DIR}/cmake/Modules_CUDA_fix
1296    DESTINATION share/cmake/Caffe2/
1297    COMPONENT dev)
1298  install(
1299    FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDAToolkit.cmake
1300    DESTINATION share/cmake/Caffe2/
1301    COMPONENT dev)
1302  install(
1303    FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUSPARSELT.cmake
1304    DESTINATION share/cmake/Caffe2/
1305    COMPONENT dev)
1306  install(
1307    FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDSS.cmake
1308    DESTINATION share/cmake/Caffe2/
1309    COMPONENT dev)
1310  install(
1311    FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindSYCLToolkit.cmake
1312    DESTINATION share/cmake/Caffe2/
1313    COMPONENT dev)
1314  if(NOT BUILD_LIBTORCHLESS)
1315    install(
1316      EXPORT Caffe2Targets
1317      DESTINATION share/cmake/Caffe2
1318      FILE Caffe2Targets.cmake
1319      COMPONENT dev)
1320  endif()
1321else()
1322  message(WARNING "Generated cmake files are only available when building "
1323                  "shared libs.")
1324endif()
1325
1326# ---[ Binaries Binaries will be built after the Caffe2 main libraries and the
1327# modules are built. For the binaries, they will be linked to the Caffe2 main
1328# libraries, as well as all the modules that are built with Caffe2 (the ones
1329# built in the previous Modules section above).
1330if(BUILD_BINARY)
1331  add_subdirectory(binaries)
1332endif()
1333
1334# ---[ JNI
1335if(BUILD_JNI)
1336  if(NOT MSVC)
1337    string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-variable")
1338  endif()
1339  set(BUILD_LIBTORCH_WITH_JNI 1)
1340  set(FBJNI_SKIP_TESTS 1)
1341  add_subdirectory(android/pytorch_android)
1342endif()
1343
1344include(cmake/Summary.cmake)
1345caffe2_print_configuration_summary()
1346
1347if(BUILD_FUNCTORCH)
1348  add_subdirectory(functorch)
1349endif()
1350
1351# Parse custom debug info
1352if(DEFINED USE_CUSTOM_DEBINFO)
1353  string(REPLACE ";" " " SOURCE_FILES "${USE_CUSTOM_DEBINFO}")
1354  message(STATUS "Source files with custom debug infos: ${SOURCE_FILES}")
1355
1356  string(REGEX REPLACE " +" ";" SOURCE_FILES_LIST "${SOURCE_FILES}")
1357
1358  # Set the COMPILE_FLAGS property for each source file
1359  foreach(SOURCE_FILE ${SOURCE_FILES_LIST})
1360    # We have to specify the scope here. We do this by specifying the targets we
1361    # care about and caffe2/ for all test targets defined there
1362    if(BUILD_LIBTORCHLESS)
1363      caffe2_update_option(USE_CUDA OFF)
1364      set(ALL_PT_TARGETS "torch_python;${C10_LIB};${TORCH_CPU_LIB};${TORCH_LIB}")
1365    else()
1366      # @todo test if we can remove this
1367      set(ALL_PT_TARGETS "torch_python;c10;torch_cpu;torch")
1368    endif()
1369    set_source_files_properties(
1370      ${SOURCE_FILE} DIRECTORY "caffe2/" TARGET_DIRECTORY ${ALL_PT_TARGETS}
1371      PROPERTIES COMPILE_FLAGS "-g")
1372  endforeach()
1373
1374  # Link everything with debug info when any file is in debug mode
1375  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -g")
1376  set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -g")
1377endif()
1378
1379# Bundle PTXAS if needed
1380if(BUILD_BUNDLE_PTXAS AND USE_CUDA)
1381  if(NOT EXISTS "${PROJECT_SOURCE_DIR}/build/bin/ptxas")
1382    message(STATUS "Copying PTXAS into the bin folder")
1383    file(COPY "${CUDAToolkit_BIN_DIR}/ptxas"
1384         DESTINATION "${PROJECT_BINARY_DIR}")
1385  endif()
1386  install(PROGRAMS "${PROJECT_BINARY_DIR}/ptxas"
1387          DESTINATION "${CMAKE_INSTALL_BINDIR}")
1388endif()
1389