1cmake_minimum_required(VERSION 3.18 FATAL_ERROR) 2# cmake_policy(SET CMP0022 NEW) cmake_policy(SET CMP0023 NEW) 3 4# Use compiler ID "AppleClang" instead of "Clang" for XCode. Not setting this 5# sometimes makes XCode C compiler gets detected as "Clang", even when the C++ 6# one is detected as "AppleClang". 7cmake_policy(SET CMP0010 NEW) 8cmake_policy(SET CMP0025 NEW) 9 10# Enables CMake to set LTO on compilers other than Intel. 11cmake_policy(SET CMP0069 NEW) 12# Enable the policy for CMake subprojects. protobuf currently causes issues 13# set(CMAKE_POLICY_DEFAULT_CMP0069 NEW) 14 15# Suppress warning flags in default MSVC configuration. It's not mandatory that 16# we do this (and we don't if cmake is old), but it's nice when it's possible, 17# and it's possible on our Windows configs. 18cmake_policy(SET CMP0092 NEW) 19 20# Prohibit in-source builds 21if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) 22 message(FATAL_ERROR "In-source build are not supported") 23endif() 24 25# ---[ Project and semantic versioning. 26project(Torch CXX C) 27 28if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") 29 set(LINUX TRUE) 30else() 31 set(LINUX FALSE) 32endif() 33 34set(CMAKE_INSTALL_MESSAGE NEVER) 35 36# check and set CMAKE_CXX_STANDARD 37string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard) 38if(env_cxx_standard GREATER -1) 39 message( 40 WARNING 41 "C++ standard version definition detected in environment variable." 42 "PyTorch requires -std=c++17. Please remove -std=c++ settings in your environment." 43 ) 44endif() 45set(CMAKE_CXX_STANDARD 46 17 47 CACHE STRING 48 "The C++ standard whose features are requested to build this target.") 49set(CMAKE_C_STANDARD 50 11 51 CACHE STRING 52 "The C standard whose features are requested to build this target.") 53 54# ---[ Utils 55include(cmake/public/utils.cmake) 56 57# --- [ Check that minimal gcc version is 9.3+ 58if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.3) 59 message( 60 FATAL_ERROR 61 "GCC-9.3 or newer is required to compile PyTorch, but found ${CMAKE_CXX_COMPILER_VERSION}" 62 ) 63endif() 64 65# This define is needed to preserve behavior given anticpated changes to 66# cccl/thrust 67# https://nvidia.github.io/libcudacxx/standard_api/numerics_library/complex.html 68string(APPEND CMAKE_CUDA_FLAGS 69 " -DLIBCUDACXX_ENABLE_SIMPLIFIED_COMPLEX_OPERATIONS") 70 71if(LINUX) 72 include(cmake/CheckAbi.cmake) 73 string(APPEND CMAKE_CXX_FLAGS 74 " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}") 75 string(APPEND CMAKE_CUDA_FLAGS 76 " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}") 77 if(${GLIBCXX_USE_CXX11_ABI} EQUAL 1) 78 set(CXX_STANDARD_REQUIRED ON) 79 else() 80 # Please note this is required in order to ensure compatibility between gcc 81 # 9 and gcc 7 This could be removed when all Linux PyTorch binary builds are 82 # compiled by the same toolchain again 83 append_cxx_flag_if_supported("-fabi-version=11" CMAKE_CXX_FLAGS) 84 endif() 85endif() 86 87set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 88set(CMAKE_LINK_WHAT_YOU_USE TRUE) 89 90# One variable that determines whether the current cmake process is being run 91# with the main Caffe2 library. This is useful for building modules - if modules 92# are built with the main Caffe2 library then one does not need to do find 93# caffe2 in the cmake script. One can usually guard it in some way like if(NOT 94# CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) find_package(Caffe2 REQUIRED) endif() 95set(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO ON) 96 97# Googletest's cmake files are going to set it on once they are processed. Let's 98# set it at the very beginning so that the entire build is deterministic. 99set(THREADS_PREFER_PTHREAD_FLAG ON) 100 101if(NOT DEFINED BLAS_SET_BY_USER) 102 if(DEFINED BLAS) 103 set(BLAS_SET_BY_USER TRUE) 104 else() 105 message(STATUS "Not forcing any particular BLAS to be found") 106 set(BLAS_SET_BY_USER FALSE) 107 endif() 108 set(BLAS_SET_BY_USER 109 ${BLAS_SET_BY_USER} 110 CACHE STRING 111 "Marks whether BLAS was manually set by user or auto-detected") 112endif() 113 114# Apple specific 115if(APPLE) 116 # These lines are an attempt to make find_package(cuda) pick up libcuda.dylib, 117 # and not cuda.framework. It doesn't work all the time, but it seems to help 118 # for some users. TODO: replace this with a more robust fix 119 set(CMAKE_FIND_FRAMEWORK LAST) 120 set(CMAKE_FIND_APPBUNDLE LAST) 121 122 # Get clang version on macOS 123 execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version 124 OUTPUT_VARIABLE clang_full_version_string) 125 string(REGEX REPLACE "Apple (.*) version ([0-9]+\\.[0-9]+).*" "\\2" 126 CLANG_VERSION_STRING ${clang_full_version_string}) 127 message(STATUS "CLANG_VERSION_STRING: " ${CLANG_VERSION_STRING}) 128 129 # RPATH stuff 130 set(CMAKE_MACOSX_RPATH ON) 131 if(NOT IOS) 132 # Determine if we can link against MPSGraph 133 set(MPS_FOUND OFF) 134 execute_process( 135 COMMAND bash -c "xcrun --sdk macosx --show-sdk-version" 136 RESULT_VARIABLE _exit_code 137 OUTPUT_VARIABLE _macosx_sdk_version 138 OUTPUT_STRIP_TRAILING_WHITESPACE) 139 if(_exit_code EQUAL 0) 140 set(_MPS_supported_os_version OFF) 141 if(_macosx_sdk_version VERSION_GREATER_EQUAL 12.3) 142 set(_MPS_supported_os_version ON) 143 endif() 144 message( 145 STATUS 146 "sdk version: ${_macosx_sdk_version}, mps supported: ${_MPS_supported_os_version}" 147 ) 148 execute_process( 149 COMMAND bash -c "xcrun --sdk macosx --show-sdk-path" 150 OUTPUT_VARIABLE _macosx_sdk_path 151 OUTPUT_STRIP_TRAILING_WHITESPACE) 152 set(_SDK_SEARCH_PATH "${_macosx_sdk_path}/System/Library/Frameworks/") 153 set(_FRAMEWORK_SEARCH_PATH "/System/Library/Frameworks/") 154 155 find_library( 156 _MPS_fwrk_path_ 157 NAMES MetalPerformanceShadersGraph MetalPerformanceShaders 158 PATHS ${_FRAMEWORK_SEARCH_PATH} 159 NO_DEFAULT_PATH) 160 find_library( 161 _MPS_sdk_path_ 162 NAMES MetalPerformanceShadersGraph MetalPerformanceShaders 163 PATHS ${_SDK_SEARCH_PATH} 164 NO_DEFAULT_PATH) 165 166 if(_MPS_supported_os_version 167 AND _MPS_fwrk_path_ 168 AND _MPS_sdk_path_) 169 set(MPS_FOUND ON) 170 message(STATUS "MPSGraph framework found") 171 else() 172 message(STATUS "MPSGraph framework not found") 173 endif() 174 else() 175 message(STATUS "MPS: unable to get MacOS sdk version") 176 message(STATUS "MPSGraph framework not found") 177 endif() 178 endif() 179endif() 180 181set(CPU_AARCH64 OFF) 182set(CPU_INTEL OFF) 183 184if(CMAKE_SYSTEM_PROCESSOR MATCHES "(AMD64|x86_64)") 185 set(CPU_INTEL ON) 186elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)") 187 set(CPU_AARCH64 ON) 188endif() 189 190# For non-supported platforms, turn USE_DISTRIBUTED off by default. It is not 191# tested and likely won't work without additional changes. 192if(NOT LINUX AND NOT WIN32) 193 set(USE_DISTRIBUTED 194 OFF 195 CACHE STRING "Use distributed") 196 # On macOS, if USE_DISTRIBUTED is enabled (specified by the user), then make 197 # Gloo build with the libuv transport. 198 if(APPLE AND USE_DISTRIBUTED) 199 set(USE_LIBUV 200 ON 201 CACHE STRING "") 202 endif() 203endif() 204 205# ---[ Options. Note to developers: if you add an option below, make sure you 206# also add it to cmake/Summary.cmake so that the summary prints out the option 207# values. 208include(CMakeDependentOption) 209option(ATEN_NO_TEST "Do not build ATen test binaries" OFF) 210option(BUILD_BINARY "Build C++ binaries" OFF) 211option(BUILD_CUSTOM_PROTOBUF 212 "Build and use Caffe2's own protobuf under third_party" ON) 213option(BUILD_PYTHON "Build Python binaries" ON) 214option(BUILD_LITE_INTERPRETER "Master flag to build Lite Interpreter" OFF) 215option(BUILD_SHARED_LIBS "Build libcaffe2.so" ON) 216cmake_dependent_option( 217 CAFFE2_LINK_LOCAL_PROTOBUF "If set, build protobuf inside libcaffe2.so." ON 218 "BUILD_SHARED_LIBS AND BUILD_CUSTOM_PROTOBUF" OFF) 219cmake_dependent_option( 220 CAFFE2_USE_MSVC_STATIC_RUNTIME "Using MSVC static runtime libraries" ON 221 "NOT BUILD_SHARED_LIBS" OFF) 222option(BUILD_TEST "Build C++ test binaries (need gtest and gbenchmark)" OFF) 223option(BUILD_AOT_INDUCTOR_TEST "Build C++ test binaries for aot-inductor" OFF) 224option(BUILD_STATIC_RUNTIME_BENCHMARK 225 "Build C++ binaries for static runtime benchmarks (need gbenchmark)" OFF) 226option( 227 BUILD_MOBILE_BENCHMARK 228 "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)" 229 OFF) 230option( 231 BUILD_MOBILE_TEST 232 "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)" 233 OFF) 234option(BUILD_JNI "Build JNI bindings" OFF) 235option(BUILD_MOBILE_AUTOGRAD 236 "Build autograd function in mobile build (in development)" OFF) 237cmake_dependent_option(INSTALL_TEST "Install test binaries if BUILD_TEST is on" 238 ON "BUILD_TEST" OFF) 239option(USE_CPP_CODE_COVERAGE "Compile C/C++ with code coverage flags" OFF) 240option(USE_COLORIZE_OUTPUT "Colorize output during compilation" ON) 241option(USE_ASAN "Use Address+Undefined Sanitizers" OFF) 242option(USE_TSAN "Use Thread Sanitizer" OFF) 243option(USE_CUDA "Use CUDA" ON) 244option(USE_XPU "Use XPU" ON) 245cmake_dependent_option( 246 BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON 247 "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF) 248cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF) 249option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF) 250cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF) 251cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF 252 "USE_CUDNN" OFF) 253cmake_dependent_option(USE_CUSPARSELT "Use cuSPARSELt" ON "USE_CUDA" OFF) 254cmake_dependent_option(USE_CUDSS "Use cuDSS" ON "USE_CUDA" OFF) 255# Binary builds will fail for cufile due to https://github.com/pytorch/builder/issues/1924 256# Using TH_BINARY_BUILD to check whether is binary build. 257# USE_ROCM is guarded against in Dependencies.cmake because USE_ROCM is not properly defined here 258if(DEFINED ENV{TH_BINARY_BUILD}) 259 cmake_dependent_option(USE_CUFILE "Use cuFile" OFF 260 "USE_CUDA AND NOT $ENV{TH_BINARY_BUILD} AND NOT WIN32" OFF) 261else() 262 cmake_dependent_option(USE_CUFILE "Use cuFile" OFF "USE_CUDA AND NOT WIN32" OFF) 263endif() 264option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON) 265option(USE_KINETO "Use Kineto profiling library" ON) 266option(USE_CUPTI_SO "Use CUPTI as a shared library" ON) 267option(USE_FAKELOWP "Use FakeLowp operators" OFF) 268option(USE_GFLAGS "Use GFLAGS" OFF) 269option(USE_GLOG "Use GLOG" OFF) 270option(USE_LITE_PROTO "Use lite protobuf instead of full." OFF) 271option(USE_MAGMA "Use MAGMA" ON) 272option(USE_PYTORCH_METAL "Use Metal for PyTorch iOS build" OFF) 273option(USE_PYTORCH_METAL_EXPORT "Export Metal models on MacOSX desktop" OFF) 274option(USE_NATIVE_ARCH "Use -march=native" OFF) 275cmake_dependent_option(USE_MPS "Use MPS for macOS build" ON "MPS_FOUND" OFF) 276cmake_dependent_option(USE_NCCL "Use NCCL" ON 277 "USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF) 278cmake_dependent_option(USE_RCCL "Use RCCL" ON USE_NCCL OFF) 279cmake_dependent_option(USE_STATIC_NCCL "Use static NCCL" OFF "USE_NCCL" OFF) 280cmake_dependent_option(USE_SYSTEM_NCCL "Use system-wide NCCL" OFF "USE_NCCL" 281 OFF) 282option(USE_NNAPI "Use NNAPI" OFF) 283option(USE_NNPACK "Use NNPACK" ON) 284cmake_dependent_option(USE_NUMA "Use NUMA. Only available on Linux." ON "LINUX" 285 OFF) 286cmake_dependent_option(USE_NVRTC "Use NVRTC. Only available if USE_CUDA is on." 287 OFF "USE_CUDA" OFF) 288option(USE_NUMPY "Use NumPy" ON) 289option(USE_OBSERVERS "Use observers module." OFF) 290option(USE_OPENCL "Use OpenCL" OFF) 291option(USE_OPENMP "Use OpenMP for parallel code" ON) 292option(USE_PRECOMPILED_HEADERS "Use pre-compiled headers to accelerate build." 293 OFF) 294 295option(USE_PROF "Use profiling" OFF) 296option(USE_PYTORCH_QNNPACK "Use ATen/QNNPACK (quantized 8-bit operators)" ON) 297option(USE_SNPE "Use Qualcomm's SNPE library" OFF) 298option(USE_SYSTEM_EIGEN_INSTALL 299 "Use system Eigen instead of the one under third_party" OFF) 300cmake_dependent_option( 301 USE_VALGRIND "Use Valgrind. Only available on Linux." ON 302 "LINUX" OFF) 303 304if(NOT DEFINED USE_VULKAN) 305 cmake_dependent_option(USE_VULKAN "Use Vulkan GPU backend" ON "ANDROID" OFF) 306endif() 307 308option(USE_SLEEF_FOR_ARM_VEC256 "Use sleef for arm" OFF) 309option(USE_SOURCE_DEBUG_ON_MOBILE "Enable" ON) 310option(USE_LITE_INTERPRETER_PROFILER "Enable" ON) 311cmake_dependent_option( 312 USE_LITE_AOTI "Include AOTI sources" OFF 313 "BUILD_LITE_INTERPRETER" OFF) 314option(USE_VULKAN_FP16_INFERENCE "Vulkan - Use fp16 inference" OFF) 315option(USE_VULKAN_RELAXED_PRECISION 316 "Vulkan - Use relaxed precision math in the kernels (mediump)" OFF) 317# option USE_XNNPACK: try to enable xnnpack by default. 318option(USE_XNNPACK "Use XNNPACK" ON) 319option(USE_ROCM_KERNEL_ASSERT "Use Kernel Assert for ROCm" OFF) 320# Ensure that an ITT build is the default for x86 CPUs 321cmake_dependent_option(USE_ITT "Use Intel(R) VTune Profiler ITT functionality" 322 ON "CPU_INTEL" OFF) 323# Ensure that an MKLDNN build is the default for x86 CPUs but optional for 324# AArch64 (dependent on -DUSE_MKLDNN). 325cmake_dependent_option( 326 USE_MKLDNN "Use MKLDNN. Only available on x86, x86_64, and AArch64." 327 "${CPU_INTEL}" "CPU_INTEL OR CPU_AARCH64" OFF) 328cmake_dependent_option( 329 USE_MKLDNN_ACL "Use Compute Library for the Arm architecture." OFF 330 "USE_MKLDNN AND CPU_AARCH64" OFF) 331set(MKLDNN_ENABLE_CONCURRENT_EXEC ${USE_MKLDNN}) 332cmake_dependent_option(USE_MKLDNN_CBLAS "Use CBLAS in MKLDNN" OFF "USE_MKLDNN" 333 OFF) 334option(USE_STATIC_MKL "Prefer to link with MKL statically (Unix only)" OFF) 335option(USE_DISTRIBUTED "Use distributed" ON) 336cmake_dependent_option( 337 USE_MPI "Use MPI for Caffe2. Only available if USE_DISTRIBUTED is on." ON 338 "USE_DISTRIBUTED" OFF) 339cmake_dependent_option( 340 USE_UCC "Use UCC. Only available if USE_DISTRIBUTED is on." OFF 341 "USE_DISTRIBUTED" OFF) 342cmake_dependent_option(USE_SYSTEM_UCC "Use system-wide UCC" OFF "USE_UCC" OFF) 343cmake_dependent_option(USE_C10D_UCC "USE C10D UCC" ON "USE_DISTRIBUTED;USE_UCC" 344 OFF) 345cmake_dependent_option( 346 USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON 347 "USE_DISTRIBUTED" OFF) 348cmake_dependent_option( 349 USE_GLOO_WITH_OPENSSL 350 "Use Gloo with OpenSSL. Only available if USE_GLOO is on." OFF 351 "USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF) 352cmake_dependent_option(USE_C10D_GLOO "USE C10D GLOO" ON 353 "USE_DISTRIBUTED;USE_GLOO" OFF) 354cmake_dependent_option(USE_C10D_NCCL "USE C10D NCCL" ON 355 "USE_DISTRIBUTED;USE_NCCL" OFF) 356cmake_dependent_option(USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI" 357 OFF) 358cmake_dependent_option( 359 USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON 360 "USE_DISTRIBUTED" OFF) 361cmake_dependent_option( 362 USE_GLOO_WITH_OPENSSL "Use Gloo with OpenSSL. Only available if USE_GLOO is on." OFF 363 "USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF) 364cmake_dependent_option( 365 USE_C10D_GLOO "USE C10D GLOO" ON "USE_DISTRIBUTED;USE_GLOO" OFF) 366cmake_dependent_option( 367 USE_C10D_NCCL "USE C10D NCCL" ON "USE_DISTRIBUTED;USE_NCCL" OFF) 368cmake_dependent_option( 369 USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI" OFF) 370cmake_dependent_option( 371 USE_TENSORPIPE "Use TensorPipe. Only available if USE_DISTRIBUTED is on." ON 372 "USE_DISTRIBUTED" OFF) 373option(ONNX_ML "Enable traditional ONNX ML API." ON) 374option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF) 375option(BUILD_LIBTORCH_CPU_WITH_DEBUG 376 "Enable RelWithDebInfo for libtorch_cpu target only" OFF) 377cmake_dependent_option( 378 USE_CCACHE "Attempt using CCache to wrap the compilation" ON "UNIX" OFF) 379option(WERROR "Build with -Werror supported by the compiler" OFF) 380option( 381 DEBUG_CUDA 382 "When compiling DEBUG, also attempt to compile CUDA with debug flags (may cause nvcc to OOM)" 383 OFF) 384option(USE_COREML_DELEGATE "Use the CoreML backend through delegate APIs" OFF) 385option(USE_PER_OPERATOR_HEADERS 386 "Whether ATen should generate separate headers for each operator" ON) 387cmake_dependent_option( 388 BUILD_LAZY_TS_BACKEND 389 "Build the lazy Torchscript backend, not compatible with mobile builds" ON 390 "NOT INTERN_BUILD_MOBILE" OFF) 391cmake_dependent_option(BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF) 392cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler" 393 OFF "USE_CUDA" OFF) 394 395option(USE_MIMALLOC "Use mimalloc" OFF) 396# Enable third party mimalloc library to improve memory allocation performance 397# on Windows. 398if(WIN32) 399 set(USE_MIMALLOC ON) 400endif() 401 402if(USE_CCACHE) 403 find_program(CCACHE_PROGRAM ccache) 404 if(CCACHE_PROGRAM) 405 set(CMAKE_C_COMPILER_LAUNCHER 406 "${CCACHE_PROGRAM}" 407 CACHE STRING "C compiler launcher") 408 set(CMAKE_CXX_COMPILER_LAUNCHER 409 "${CCACHE_PROGRAM}" 410 CACHE STRING "CXX compiler launcher") 411 set(CMAKE_CUDA_COMPILER_LAUNCHER 412 "${CCACHE_PROGRAM}" 413 CACHE STRING "CUDA compiler launcher") 414 else() 415 message( 416 STATUS 417 "Could not find ccache. Consider installing ccache to speed up compilation." 418 ) 419 endif() 420endif() 421 422# Since TensorPipe does not support Windows, set it to OFF when WIN32 detected 423# On Windows platform, if user does not install libuv in build conda env and 424# does not set libuv_ROOT environment variable. Set USE_DISTRIBUTED to OFF. 425if(WIN32) 426 set(USE_TENSORPIPE OFF) 427 message(WARNING "TensorPipe cannot be used on Windows. Set it to OFF") 428 429 if(USE_DISTRIBUTED AND NOT DEFINED ENV{libuv_ROOT}) 430 find_library( 431 libuv_tmp_LIBRARY 432 NAMES uv libuv 433 HINTS $ENV{CONDA_PREFIX}\\Library $ENV{PREFIX}\\Library 434 PATH_SUFFIXES lib 435 NO_DEFAULT_PATH) 436 if(NOT libuv_tmp_LIBRARY) 437 set(USE_DISTRIBUTED OFF) 438 set(USE_GLOO OFF) 439 message( 440 WARNING 441 "Libuv is not installed in current conda env. Set USE_DISTRIBUTED to OFF. " 442 "Please run command 'conda install -c conda-forge libuv=1.39' to install libuv." 443 ) 444 else() 445 set(ENV{libuv_ROOT} ${libuv_tmp_LIBRARY}/../../) 446 endif() 447 endif() 448endif() 449 450if(USE_GLOO_WITH_OPENSSL) 451 set(USE_TCP_OPENSSL_LOAD 452 ON 453 CACHE STRING "") 454endif() 455 456# Linux distributions do not want too many embedded sources, in that sense we 457# need to be able to build pytorch with an (almost) empty third_party directory. 458# USE_SYSTEM_LIBS is a shortcut variable to toggle all the # USE_SYSTEM_* 459# variables on. Individual USE_SYSTEM_* variables can be toggled with 460# USE_SYSTEM_LIBS being "OFF". 461option(USE_SYSTEM_LIBS "Use all available system-provided libraries." OFF) 462option(USE_SYSTEM_CPUINFO "Use system-provided cpuinfo." OFF) 463option(USE_SYSTEM_SLEEF "Use system-provided sleef." OFF) 464option(USE_SYSTEM_GLOO "Use system-provided gloo." OFF) 465option(USE_SYSTEM_FP16 "Use system-provided fp16." OFF) 466option(USE_SYSTEM_PYBIND11 "Use system-provided PyBind11." OFF) 467option(USE_SYSTEM_PTHREADPOOL "Use system-provided pthreadpool." OFF) 468option(USE_SYSTEM_PSIMD "Use system-provided psimd." OFF) 469option(USE_SYSTEM_FXDIV "Use system-provided fxdiv." OFF) 470option(USE_SYSTEM_BENCHMARK "Use system-provided google benchmark." OFF) 471option(USE_SYSTEM_ONNX "Use system-provided onnx." OFF) 472option(USE_SYSTEM_XNNPACK "Use system-provided xnnpack." OFF) 473option(USE_GOLD_LINKER "Use ld.gold to link" OFF) 474if(USE_SYSTEM_LIBS) 475 set(USE_SYSTEM_CPUINFO ON) 476 set(USE_SYSTEM_SLEEF ON) 477 set(USE_SYSTEM_GLOO ON) 478 set(BUILD_CUSTOM_PROTOBUF OFF) 479 set(USE_SYSTEM_EIGEN_INSTALL ON) 480 set(USE_SYSTEM_FP16 ON) 481 set(USE_SYSTEM_PTHREADPOOL ON) 482 set(USE_SYSTEM_PSIMD ON) 483 set(USE_SYSTEM_FXDIV ON) 484 set(USE_SYSTEM_BENCHMARK ON) 485 set(USE_SYSTEM_ONNX ON) 486 set(USE_SYSTEM_XNNPACK ON) 487 set(USE_SYSTEM_PYBIND11 ON) 488 if(USE_NCCL) 489 set(USE_SYSTEM_NCCL ON) 490 endif() 491endif() 492 493# /Z7 override option When generating debug symbols, CMake default to use the 494# flag /Zi. However, it is not compatible with sccache. So we rewrite it off. 495# But some users don't use sccache; this override is for them. 496cmake_dependent_option( 497 MSVC_Z7_OVERRIDE 498 "Work around sccache bug by replacing /Zi and /ZI with /Z7 when using MSVC (if you are not using sccache, you can turn this OFF)" 499 ON 500 "MSVC" 501 OFF) 502 503if(NOT USE_SYSTEM_ONNX) 504 set(ONNX_NAMESPACE 505 "onnx_torch" 506 CACHE 507 STRING 508 "A namespace for ONNX; needed to build with other frameworks that share ONNX." 509 ) 510else() 511 set(ONNX_NAMESPACE 512 "onnx" 513 CACHE 514 STRING 515 "A namespace for ONNX; needed to build with other frameworks that share ONNX." 516 ) 517endif() 518set(SELECTED_OP_LIST 519 "" 520 CACHE 521 STRING 522 "Path to the yaml file that contains the list of operators to include for custom build. Include all operators by default." 523) 524option( 525 STATIC_DISPATCH_BACKEND 526 "Name of the backend for which static dispatch code is generated, e.g.: CPU." 527 "") 528option( 529 USE_LIGHTWEIGHT_DISPATCH 530 "Enable codegen unboxing for ATen ops, need to work with static dispatch in order to work properly." 531 OFF) 532if(USE_LIGHTWEIGHT_DISPATCH AND NOT STATIC_DISPATCH_BACKEND) 533 message( 534 FATAL_ERROR 535 "Need to enable static dispatch after enabling USE_LIGHTWEIGHT_DISPATCH.") 536endif() 537option(TRACING_BASED 538 "Master flag to build Lite Interpreter with tracing build option" OFF) 539option(BUILD_EXECUTORCH "Master flag to build Executorch" ON) 540# This is a fix for a rare build issue on Ubuntu: symbol lookup error: 541# miniconda3/envs/pytorch-py3.7/lib/libmkl_intel_lp64.so: undefined symbol: 542# mkl_blas_dsyrk 543# https://software.intel.com/en-us/articles/symbol-lookup-error-when-linking-intel-mkl-with-gcc-on-ubuntu 544if(LINUX) 545 set(CMAKE_SHARED_LINKER_FLAGS 546 "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed") 547 548 set(ENV_LDFLAGS "$ENV{LDFLAGS}") 549 string(STRIP "${ENV_LDFLAGS}" ENV_LDFLAGS) 550 # Do not append linker flags passed via env var if they already there 551 if(NOT ${CMAKE_SHARED_LINKER_FLAGS} MATCHES "${ENV_LDFLAGS}") 552 set(CMAKE_SHARED_LINKER_FLAGS 553 "${CMAKE_SHARED_LINKER_FLAGS} ${ENV_LDFLAGS}") 554 endif() 555endif() 556 557if(MSVC) 558 # MSVC by default does not apply the correct __cplusplus version as specified 559 # by the C++ standard because MSVC is not a completely compliant 560 # implementation. This option forces MSVC to use the appropriate value given 561 # the requested --std option. This fixes a compilation issue mismatch between 562 # GCC/Clang and MSVC. 563 # 564 # See: * 565 # https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus?view=msvc-170 566 # * https://en.cppreference.com/w/cpp/preprocessor/replace#Predefined_macros 567 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus") 568 set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /Zc:__cplusplus") 569 570 set(CMAKE_NINJA_CMCLDEPS_RC OFF) 571 foreach( 572 flag_var 573 CMAKE_C_FLAGS 574 CMAKE_C_FLAGS_DEBUG 575 CMAKE_C_FLAGS_RELEASE 576 CMAKE_C_FLAGS_MINSIZEREL 577 CMAKE_C_FLAGS_RELWITHDEBINFO 578 CMAKE_CXX_FLAGS 579 CMAKE_CXX_FLAGS_DEBUG 580 CMAKE_CXX_FLAGS_RELEASE 581 CMAKE_CXX_FLAGS_MINSIZEREL 582 CMAKE_CXX_FLAGS_RELWITHDEBINFO) 583 # Replace /Zi and /ZI with /Z7 584 if(MSVC_Z7_OVERRIDE) 585 if(${flag_var} MATCHES "/Z[iI]") 586 string(REGEX REPLACE "/Z[iI]" "/Z7" ${flag_var} "${${flag_var}}") 587 endif(${flag_var} MATCHES "/Z[iI]") 588 endif(MSVC_Z7_OVERRIDE) 589 590 if(${CAFFE2_USE_MSVC_STATIC_RUNTIME}) 591 if(${flag_var} MATCHES "/MD") 592 string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") 593 endif(${flag_var} MATCHES "/MD") 594 else() 595 if(${flag_var} MATCHES "/MT") 596 string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}") 597 endif() 598 endif() 599 600 # /bigobj increases number of sections in .obj file, which is needed to link 601 # against libraries in Python 2.7 under Windows For Visual Studio 602 # generators, if /MP is not added, then we may need to add /MP to the flags. 603 # For other generators like ninja, we don't need to add /MP because it is 604 # already handled by the generator itself. 605 if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NOT ${flag_var} MATCHES 606 "/MP") 607 set(${flag_var} "${${flag_var}} /MP /bigobj") 608 else() 609 set(${flag_var} "${${flag_var}} /bigobj") 610 endif() 611 endforeach(flag_var) 612 613 foreach(flag_var 614 CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL 615 CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL) 616 if(${flag_var} MATCHES "/Z[iI7]") 617 string(REGEX REPLACE "/Z[iI7]" "" ${flag_var} "${${flag_var}}") 618 endif() 619 endforeach(flag_var) 620 621 foreach( 622 flag_var 623 CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO 624 CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO 625 CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO 626 CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO 627 CMAKE_SHARED_LINKER_FLAGS_DEBUG 628 CMAKE_STATIC_LINKER_FLAGS_DEBUG 629 CMAKE_EXE_LINKER_FLAGS_DEBUG 630 CMAKE_MODULE_LINKER_FLAGS_DEBUG) 631 # Switch off incremental linking in debug/relwithdebinfo builds 632 if(${flag_var} MATCHES "/INCREMENTAL" AND NOT ${flag_var} MATCHES 633 "/INCREMENTAL:NO") 634 string(REGEX REPLACE "/INCREMENTAL" "/INCREMENTAL:NO" ${flag_var} 635 "${${flag_var}}") 636 endif() 637 endforeach(flag_var) 638 639 foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS 640 CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS) 641 string(APPEND ${flag_var} " /ignore:4049 /ignore:4217 /ignore:4099") 642 endforeach(flag_var) 643 644 foreach(flag_var CMAKE_SHARED_LINKER_FLAGS) 645 # https://github.com/pytorch/pytorch/issues/91933: Don't set the manifest 646 # filename explicitly helps fix the linker error when linking 647 # torch_python.dll. The manifest file would still be there in the correct 648 # format torch_python.dll.manifest 649 if(${flag_var} MATCHES "/MANIFESTFILE:.*\\.manifest") 650 string(REGEX REPLACE "/MANIFESTFILE:.*\\.manifest" "" ${flag_var} 651 "${${flag_var}}") 652 endif() 653 endforeach(flag_var) 654 655 # Try harder 656 string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler /w -w") 657 658 string(APPEND CMAKE_CXX_FLAGS " /FS") 659 string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler /FS") 660endif(MSVC) 661 662string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all") 663 664# Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not 665# applicable to mobile are disabled by this variable. Setting 666# `BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN` environment variable can force it 667# to do mobile build with host toolchain - which is useful for testing purpose. 668if(ANDROID 669 OR IOS 670 OR DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN}) 671 set(INTERN_BUILD_MOBILE ON) 672 message(WARNING "INTERN_BUILD_MOBILE is on, disabling BUILD_LAZY_TS_BACKEND") 673 set(BUILD_LAZY_TS_BACKEND OFF) 674 675 # Set -ffunction-sections and -fdata-sections so that each method has its own 676 # text section. This allows the linker to remove unused section when the flag 677 # -Wl,-gc-sections is provided at link time. 678 string(APPEND CMAKE_CXX_FLAGS " -ffunction-sections") 679 string(APPEND CMAKE_C_FLAGS " -ffunction-sections") 680 string(APPEND CMAKE_CXX_FLAGS " -fdata-sections") 681 string(APPEND CMAKE_C_FLAGS " -fdata-sections") 682 683 # Please note that the use of the following flags is required when linking 684 # against libtorch_cpu.a for mobile builds. -Wl,--whole-archive -ltorch_cpu 685 # -Wl,--no-whole-archive 686 # 687 # This allows global constructors to be included and run. Global constructors 688 # are used for operator/kernel registration with the PyTorch Dispatcher. 689 690 if(DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN}) 691 # C10_MOBILE is derived from Android/iOS toolchain macros in 692 # c10/macros/Macros.h, so it needs to be explicitly set here. 693 string(APPEND CMAKE_CXX_FLAGS " -DC10_MOBILE") 694 endif() 695 696 if(DEFINED ENV{PYTORCH_MOBILE_TRIM_DISPATCH_KEY_SET}) 697 # If PYTORCH_MOBILE_TRIM_DISPATCH_KEY_SET is defined (env var), then define 698 # C10_MOBILE_TRIM_DISPATCH_KEYS, which limits the number of dispatch keys in 699 # OperatorEntry::dispatchTable_ to reduce peak memory during library 700 # initialization. 701 string(APPEND CMAKE_CXX_FLAGS " -DC10_MOBILE_TRIM_DISPATCH_KEYS") 702 endif() 703endif() 704 705# INTERN_BUILD_ATEN_OPS is used to control whether to build ATen/TH operators. 706set(INTERN_BUILD_ATEN_OPS ON) 707 708if(NOT DEFINED USE_BLAS) 709 set(USE_BLAS ON) 710endif() 711 712# Build libtorch mobile library, which contains ATen/TH ops and native support 713# for TorchScript model, but doesn't contain not-yet-unified caffe2 ops; 714if(INTERN_BUILD_MOBILE) 715 if(NOT BUILD_SHARED_LIBS AND NOT "${SELECTED_OP_LIST}" STREQUAL "") 716 string(APPEND CMAKE_CXX_FLAGS " -DNO_EXPORT") 717 endif() 718 if(BUILD_MOBILE_AUTOGRAD) 719 set(INTERN_DISABLE_AUTOGRAD OFF) 720 else() 721 set(INTERN_DISABLE_AUTOGRAD ON) 722 endif() 723 set(BUILD_PYTHON OFF) 724 set(BUILD_FUNCTORCH OFF) 725 set(USE_DISTRIBUTED OFF) 726 set(NO_API ON) 727 set(USE_FBGEMM OFF) 728 set(INTERN_DISABLE_ONNX ON) 729 if(USE_BLAS) 730 set(INTERN_USE_EIGEN_BLAS ON) 731 else() 732 set(INTERN_USE_EIGEN_BLAS OFF) 733 endif() 734 # Disable developing mobile interpreter for actual mobile build. Enable it 735 # elsewhere to capture build error. 736 set(INTERN_DISABLE_MOBILE_INTERP ON) 737endif() 738 739# ---[ Version numbers for generated libraries 740file(READ version.txt TORCH_DEFAULT_VERSION) 741# Strip trailing newline 742string(REGEX REPLACE "\n$" "" TORCH_DEFAULT_VERSION "${TORCH_DEFAULT_VERSION}") 743if("${TORCH_DEFAULT_VERSION} " STREQUAL " ") 744 message(WARNING "Could not get version from base 'version.txt'") 745 # If we can't get the version from the version file we should probably set it 746 # to something non-sensical like 0.0.0 747 set(TORCH_DEFAULT_VERSION, "0.0.0") 748endif() 749set(TORCH_BUILD_VERSION 750 "${TORCH_DEFAULT_VERSION}" 751 CACHE STRING "Torch build version") 752if(DEFINED ENV{PYTORCH_BUILD_VERSION}) 753 set(TORCH_BUILD_VERSION 754 "$ENV{PYTORCH_BUILD_VERSION}" 755 CACHE STRING "Torch build version" FORCE) 756endif() 757if(NOT TORCH_BUILD_VERSION) 758 # An empty string was specified so force version to the default 759 set(TORCH_BUILD_VERSION 760 "${TORCH_DEFAULT_VERSION}" 761 CACHE STRING "Torch build version" FORCE) 762endif() 763caffe2_parse_version_str(TORCH ${TORCH_BUILD_VERSION}) 764set(TORCH_SOVERSION "${TORCH_VERSION_MAJOR}.${TORCH_VERSION_MINOR}") 765 766# ---[ CMake scripts + modules 767list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules) 768 769# ---[ CMake build directories 770set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 771set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 772set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 773 774enable_testing() 775 776# ---[ Build variables set within the cmake tree 777include(cmake/BuildVariables.cmake) 778set(CAFFE2_ALLOWLIST 779 "" 780 CACHE STRING "A allowlist file of files that one should build.") 781 782# Set default build type 783if(NOT CMAKE_BUILD_TYPE) 784 message(STATUS "Build type not set - defaulting to Release") 785 set(CMAKE_BUILD_TYPE 786 "Release" 787 CACHE 788 STRING 789 "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage." 790 FORCE) 791endif() 792 793# The below means we are cross compiling for arm64 or x86_64 on MacOSX 794if(NOT IOS 795 AND CMAKE_SYSTEM_NAME STREQUAL "Darwin" 796 AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64)$") 797 set(CROSS_COMPILING_MACOSX TRUE) 798 # We need to compile a universal protoc to not fail protobuf build We set 799 # CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY (vs executable) to succeed 800 # the cmake compiler check for cross-compiling 801 set(protoc_build_command 802 "./scripts/build_host_protoc.sh --other-flags -DCMAKE_OSX_ARCHITECTURES=\"x86_64;arm64\" -DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY -DCMAKE_C_COMPILER_WORKS=1 -DCMAKE_CXX_COMPILER_WORKS=1" 803 ) 804 # We write to a temp scriptfile because CMake COMMAND dislikes double quotes 805 # in commands 806 file(WRITE ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh 807 "#!/bin/bash\n${protoc_build_command}") 808 file( 809 COPY ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh 810 DESTINATION ${PROJECT_SOURCE_DIR}/scripts/ 811 FILE_PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ) 812 execute_process( 813 COMMAND ./scripts/tmp_protoc_script.sh 814 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} 815 RESULT_VARIABLE BUILD_HOST_PROTOC_RESULT) 816 file(REMOVE ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh 817 ${PROJECT_SOURCE_DIR}/scripts/tmp_protoc_script.sh) 818 if(NOT BUILD_HOST_PROTOC_RESULT EQUAL "0") 819 message(FATAL_ERROR "Could not compile universal protoc.") 820 endif() 821 set(PROTOBUF_PROTOC_EXECUTABLE 822 "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc") 823 set(CAFFE2_CUSTOM_PROTOC_EXECUTABLE 824 "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc") 825endif() 826 827# ---[ Misc checks to cope with various compiler modes 828include(cmake/MiscCheck.cmake) 829 830# External projects 831include(ExternalProject) 832 833# ---[ Dependencies ---[ FBGEMM doesn't work on x86 32bit and 834# CMAKE_SYSTEM_PROCESSOR thinks its 64bit 835if(USE_FBGEMM 836 AND((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VOID_P EQUAL 837 4) 838 OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86")) 839 set(USE_FBGEMM OFF) 840endif() 841 842set(BUILD_ONEDNN_GRAPH OFF) 843 844if(MSVC) 845 # The source code is in utf-8 encoding 846 append_cxx_flag_if_supported("/utf-8" CMAKE_CXX_FLAGS) 847endif() 848 849# Note for ROCM platform: 1. USE_ROCM is always ON until 850# include(cmake/Dependencies.cmake) 2. USE_CUDA will become OFF during 851# re-configuration Truth Table: CUDA 1st pass: USE_CUDA=True;USE_ROCM=True, 852# FLASH evaluates to ON by default CUDA 2nd pass: USE_CUDA=True;USE_ROCM=False, 853# FLASH evaluates to ON by default ROCM 1st pass: USE_CUDA=True;USE_ROCM=True, 854# FLASH evaluates to ON by default ROCM 2nd pass: USE_CUDA=False;USE_ROCM=True, 855# FLASH evaluates to ON by default CPU 1st pass: USE_CUDA=False(Cmd 856# Option);USE_ROCM=True, FLASH evaluates to OFF by default CPU 2nd pass: 857# USE_CUDA=False(Cmd Option);USE_ROCM=False, FLASH evaluates to OFF by default 858# Thus we cannot tell ROCM 2nd pass and CPU 1st pass 859# 860# The only solution is to include(cmake/Dependencies.cmake), and defer the 861# aotriton build decision later. 862 863include(cmake/Dependencies.cmake) 864 865cmake_dependent_option( 866 USE_FLASH_ATTENTION 867 "Whether to build the flash_attention kernel for scaled dot product attention.\ 868 Will be disabled if not supported by the platform" 869 ON 870 "USE_CUDA OR USE_ROCM;NOT MSVC" 871 OFF) 872 873# We are currenlty not using alibi attention for Flash So we disable this 874# feature by default We dont currently document this feature because we don't 875# Suspect users building from source will need this 876add_definitions(-DFLASHATTENTION_DISABLE_ALIBI) 877 878# CAVEAT: Again, Flash Attention2 will error while building for sm52 while Mem 879# Eff Attention won't 880cmake_dependent_option( 881 USE_MEM_EFF_ATTENTION 882 "Enable memory-efficient attention for scaled dot product attention.\ 883 Will be disabled if not supported by the platform" ON 884 "USE_CUDA OR USE_ROCM" OFF) 885 886# 887# Cannot be put into Dependencies.cmake due circular dependency: 888# USE_FLASH_ATTENTION -> USE_ROCM -> Dependencies.cmake -> aotriton.cmake 889# 890if(USE_ROCM) 891 if(USE_FLASH_ATTENTION OR USE_MEM_EFF_ATTENTION) 892 include(cmake/External/aotriton.cmake) 893 endif() 894endif() 895 896if(DEBUG_CUDA) 897 string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -lineinfo") 898 string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -lineinfo") 899 # CUDA-12.1 crashes when trying to compile with --source-in-ptx See 900 # https://github.com/pytorch/pytorch/issues/102372#issuecomment-1572526893 901 if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12.1) 902 string(APPEND CMAKE_CUDA_FLAGS_DEBUG " --source-in-ptx") 903 string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " --source-in-ptx") 904 endif() 905endif(DEBUG_CUDA) 906 907if(USE_FBGEMM) 908 string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM") 909endif() 910 911if(USE_PYTORCH_QNNPACK) 912 string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_QNNPACK") 913endif() 914 915if(USE_SLEEF_FOR_ARM_VEC256) 916 string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF") 917 add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF) 918endif() 919 920# Enable sleef on macOS with Apple silicon by default 921if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") AND ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64")) 922 message(STATUS "Running on macOS with Apple silicon") 923 string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF") 924 add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF) 925endif() 926 927if(USE_XNNPACK) 928 string(APPEND CMAKE_CXX_FLAGS " -DUSE_XNNPACK") 929endif() 930 931if(USE_VULKAN) 932 string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN") 933 string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN_API") 934 935 if(USE_VULKAN_FP16_INFERENCE) 936 string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN_FP16_INFERENCE") 937 endif() 938 939 if(USE_VULKAN_RELAXED_PRECISION) 940 string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN_RELAXED_PRECISION") 941 endif() 942 943endif() 944 945if(BUILD_LITE_INTERPRETER) 946 string(APPEND CMAKE_CXX_FLAGS " -DBUILD_LITE_INTERPRETER") 947endif() 948 949if(TRACING_BASED) 950 string(APPEND CMAKE_CXX_FLAGS " -DTRACING_BASED") 951endif() 952 953if(USE_PYTORCH_METAL) 954 string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_METAL") 955endif() 956 957if(USE_PYTORCH_METAL_EXPORT) 958 string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_METAL_EXPORT") 959endif() 960 961if(USE_SOURCE_DEBUG_ON_MOBILE) 962 string(APPEND CMAKE_CXX_FLAGS " -DSYMBOLICATE_MOBILE_DEBUG_HANDLE") 963endif() 964 965if(BUILD_LITE_INTERPRETER AND USE_LITE_INTERPRETER_PROFILER) 966 string(APPEND CMAKE_CXX_FLAGS " -DEDGE_PROFILER_USE_KINETO") 967endif() 968 969if(USE_COREML_DELEGATE) 970 string(APPEND CMAKE_CXX_FLAGS " -DUSE_COREML_DELEGATE") 971endif() 972 973# ---[ Allowlist file if allowlist is specified 974include(cmake/Allowlist.cmake) 975 976# ---[ Set link flag, handle additional deps for gcc 4.8 and above 977if(CMAKE_COMPILER_IS_GNUCXX AND NOT ANDROID) 978 message( 979 STATUS 980 "GCC ${CMAKE_CXX_COMPILER_VERSION}: Adding gcc and gcc_s libs to link line" 981 ) 982 list(APPEND Caffe2_DEPENDENCY_LIBS gcc_s gcc) 983endif() 984 985# ---[ Build flags Re-include to override append_cxx_flag_if_supported from 986# third_party/FBGEMM 987include(cmake/public/utils.cmake) 988if(NOT MSVC) 989 string(APPEND CMAKE_CXX_FLAGS " -O2 -fPIC") 990 # Eigen fails to build with some versions, so convert this to a warning 991 # Details at http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1459 992 string(APPEND CMAKE_CXX_FLAGS " -Wall") 993 string(APPEND CMAKE_CXX_FLAGS " -Wextra") 994 append_cxx_flag_if_supported("-Werror=return-type" CMAKE_CXX_FLAGS) 995 append_cxx_flag_if_supported("-Werror=non-virtual-dtor" CMAKE_CXX_FLAGS) 996 append_cxx_flag_if_supported("-Werror=braced-scalar-init" CMAKE_CXX_FLAGS) 997 append_cxx_flag_if_supported("-Werror=range-loop-construct" CMAKE_CXX_FLAGS) 998 append_cxx_flag_if_supported("-Werror=bool-operation" CMAKE_CXX_FLAGS) 999 append_cxx_flag_if_supported("-Wnarrowing" CMAKE_CXX_FLAGS) 1000 append_cxx_flag_if_supported("-Wno-missing-field-initializers" 1001 CMAKE_CXX_FLAGS) 1002 append_cxx_flag_if_supported("-Wno-type-limits" CMAKE_CXX_FLAGS) 1003 append_cxx_flag_if_supported("-Wno-array-bounds" CMAKE_CXX_FLAGS) 1004 append_cxx_flag_if_supported("-Wno-unknown-pragmas" CMAKE_CXX_FLAGS) 1005 append_cxx_flag_if_supported("-Wno-unused-parameter" CMAKE_CXX_FLAGS) 1006 append_cxx_flag_if_supported("-Wno-strict-overflow" CMAKE_CXX_FLAGS) 1007 append_cxx_flag_if_supported("-Wno-strict-aliasing" CMAKE_CXX_FLAGS) 1008 append_cxx_flag_if_supported("-Wno-stringop-overflow" CMAKE_CXX_FLAGS) 1009 append_cxx_flag_if_supported("-Wvla-extension" CMAKE_CXX_FLAGS) 1010 append_cxx_flag_if_supported("-Wsuggest-override" CMAKE_CXX_FLAGS) 1011 append_cxx_flag_if_supported("-Wnewline-eof" CMAKE_CXX_FLAGS) 1012 append_cxx_flag_if_supported("-Winconsistent-missing-override" 1013 CMAKE_CXX_FLAGS) 1014 append_cxx_flag_if_supported("-Winconsistent-missing-destructor-override" 1015 CMAKE_CXX_FLAGS) 1016 if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") 1017 string(APPEND CMAKE_CXX_FLAGS " -Wno-pass-failed") 1018 endif() 1019 if(CMAKE_COMPILER_IS_GNUCXX) 1020 # Suppress "The ABI for passing parameters with 64-byte alignment has 1021 # changed in GCC 4.6" 1022 string(APPEND CMAKE_CXX_FLAGS " -Wno-psabi") 1023 endif() 1024 1025 # Use ld.gold if available, fall back to ld.bfd (the default ld) if not 1026 if(USE_GOLD_LINKER) 1027 if(USE_DISTRIBUTED AND USE_MPI) 1028 # Same issue as here with default MPI on Ubuntu 1029 # https://bugs.launchpad.net/ubuntu/+source/deal.ii/+bug/1841577 1030 message(WARNING "Refusing to use gold when USE_MPI=1") 1031 else() 1032 execute_process( 1033 COMMAND "${CMAKE_C_COMPILER}" -fuse-ld=gold -Wl,--version 1034 ERROR_QUIET 1035 OUTPUT_VARIABLE LD_VERSION) 1036 if(NOT "${LD_VERSION}" MATCHES "GNU gold") 1037 message( 1038 WARNING 1039 "USE_GOLD_LINKER was set but ld.gold isn't available, turning it off" 1040 ) 1041 set(USE_GOLD_LINKER OFF) 1042 else() 1043 message(STATUS "ld.gold is available, using it to link") 1044 set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=gold") 1045 set(CMAKE_SHARED_LINKER_FLAGS 1046 "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=gold") 1047 set(CMAKE_MODULE_LINKER_FLAGS 1048 "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=gold") 1049 endif() 1050 endif() 1051 endif() 1052 1053 append_cxx_flag_if_supported("-Wno-error=old-style-cast" CMAKE_CXX_FLAGS) 1054 append_cxx_flag_if_supported("-Wconstant-conversion" CMAKE_CXX_FLAGS) 1055 append_cxx_flag_if_supported("-Wno-aligned-allocation-unavailable" 1056 CMAKE_CXX_FLAGS) 1057 append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS) 1058 append_cxx_flag_if_supported("-Qunused-arguments" CMAKE_CXX_FLAGS) 1059 1060 if(${USE_COLORIZE_OUTPUT}) 1061 # Why compiler checks are necessary even when `try_compile` is used Because 1062 # of the bug in ccache that can incorrectly identify `-fcolor-diagnostics` 1063 # As supported by GCC, see https://github.com/ccache/ccache/issues/740 (for 1064 # older ccache) and https://github.com/ccache/ccache/issues/1275 (for newer 1065 # ones) 1066 if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") 1067 append_cxx_flag_if_supported("-fdiagnostics-color=always" CMAKE_CXX_FLAGS) 1068 else() 1069 append_cxx_flag_if_supported("-fcolor-diagnostics" CMAKE_CXX_FLAGS) 1070 endif() 1071 endif() 1072 1073 append_cxx_flag_if_supported("-faligned-new" CMAKE_CXX_FLAGS) 1074 1075 if(WERROR) 1076 append_cxx_flag_if_supported("-Werror" CMAKE_CXX_FLAGS) 1077 if(NOT COMPILER_SUPPORT_WERROR) 1078 set(WERROR FALSE) 1079 endif() 1080 endif() 1081 append_cxx_flag_if_supported("-Wno-unused-but-set-variable" CMAKE_CXX_FLAGS) 1082 append_cxx_flag_if_supported("-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS) 1083 append_cxx_flag_if_supported("-fstandalone-debug" CMAKE_CXX_FLAGS_DEBUG) 1084 string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -O0") 1085 string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0") 1086 append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS) 1087 append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS) 1088 append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS) 1089else() 1090 # skip unwanted includes from windows.h 1091 add_compile_definitions(WIN32_LEAN_AND_MEAN) 1092 # Windows SDK broke compatibility since version 25131, but introduced this 1093 # define for backward compatibility. 1094 add_compile_definitions(_UCRT_LEGACY_INFINITY) 1095 # disable min/max macros 1096 add_compile_definitions(NOMINMAX) 1097 # Turn off these warnings on Windows. destructor was implicitly defined as 1098 # delete 1099 append_cxx_flag_if_supported("/wd4624" CMAKE_CXX_FLAGS) 1100 # unknown pragma 1101 append_cxx_flag_if_supported("/wd4068" CMAKE_CXX_FLAGS) 1102 # unexpected tokens following preprocessor directive - expected a newline 1103 append_cxx_flag_if_supported("/wd4067" CMAKE_CXX_FLAGS) 1104 # conversion from 'size_t' to 'unsigned int', possible loss of data 1105 append_cxx_flag_if_supported("/wd4267" CMAKE_CXX_FLAGS) 1106 # no suitable definition provided for explicit template instantiation request 1107 append_cxx_flag_if_supported("/wd4661" CMAKE_CXX_FLAGS) 1108 # recursive on all control paths, function will cause runtime stack overflow 1109 append_cxx_flag_if_supported("/wd4717" CMAKE_CXX_FLAGS) 1110 # conversion from '_Ty' to '_Ty', possible loss of data 1111 append_cxx_flag_if_supported("/wd4244" CMAKE_CXX_FLAGS) 1112 # unsafe use of type 'bool' in operation 1113 append_cxx_flag_if_supported("/wd4804" CMAKE_CXX_FLAGS) 1114 # inconsistent dll linkage 1115 append_cxx_flag_if_supported("/wd4273" CMAKE_CXX_FLAGS) 1116endif() 1117 1118if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") 1119 include(CheckCSourceCompiles) 1120 check_c_source_compiles( 1121 "#include <arm_neon.h> 1122int main() { 1123 float a[] = {1.0, 1.0}; 1124 float32x4x2_t v; 1125 v.val[0] = vcombine_f32 (vcreate_f32 (0UL), vcreate_f32 (0UL)); 1126 v.val[1] = vcombine_f32 (vcreate_f32 (0UL), vcreate_f32 (0UL)); 1127 vst1q_f32_x2(a, v); 1128 return 0; 1129}" 1130 HAS_VST1) 1131 1132 if(NOT HAS_VST1) 1133 string(APPEND CMAKE_CXX_FLAGS " -DMISSING_ARM_VST1") 1134 endif() 1135endif() 1136 1137if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") 1138 include(CheckCSourceCompiles) 1139 check_c_source_compiles( 1140 "#include <arm_neon.h> 1141int main() { 1142 float a[] = {1.0, 1.0}; 1143 vld1q_f32_x2(a); 1144 return 0; 1145}" 1146 HAS_VLD1) 1147 1148 if(NOT HAS_VLD1) 1149 string(APPEND CMAKE_CXX_FLAGS " -DMISSING_ARM_VLD1") 1150 endif() 1151endif() 1152 1153# Add code coverage flags to supported compilers 1154if(USE_CPP_CODE_COVERAGE) 1155 if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") 1156 string(APPEND CMAKE_C_FLAGS " --coverage -fprofile-abs-path") 1157 string(APPEND CMAKE_CXX_FLAGS " --coverage -fprofile-abs-path") 1158 elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") 1159 string(APPEND CMAKE_C_FLAGS " -fprofile-instr-generate -fcoverage-mapping") 1160 string(APPEND CMAKE_CXX_FLAGS 1161 " -fprofile-instr-generate -fcoverage-mapping") 1162 else() 1163 message( 1164 ERROR 1165 "Code coverage for compiler ${CMAKE_CXX_COMPILER_ID} is unsupported") 1166 endif() 1167 1168endif() 1169 1170if(APPLE) 1171 if(USE_MPS) 1172 string(APPEND CMAKE_OBJCXX_FLAGS " -DUSE_MPS -fno-objc-arc") 1173 string(APPEND CMAKE_CXX_FLAGS " -DUSE_MPS") 1174 string( 1175 APPEND 1176 CMAKE_SHARED_LINKER_FLAGS 1177 " -weak_framework Foundation -weak_framework MetalPerformanceShaders -weak_framework MetalPerformanceShadersGraph -weak_framework Metal" 1178 ) 1179 # To suppress MPSGraph availability warnings 1180 append_cxx_flag_if_supported("-Wno-unguarded-availability-new" 1181 CMAKE_OBJCXX_FLAGS) 1182 endif() 1183 append_cxx_flag_if_supported("-Wno-unused-private-field" CMAKE_CXX_FLAGS) 1184 append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS) 1185endif() 1186 1187if(USE_XPU) 1188 string(APPEND CMAKE_CXX_FLAGS " -DUSE_XPU") 1189endif() 1190 1191if(EMSCRIPTEN) 1192 string( 1193 APPEND 1194 CMAKE_CXX_FLAGS 1195 " -Wno-implicit-function-declaration -DEMSCRIPTEN -s DISABLE_EXCEPTION_CATCHING=0" 1196 ) 1197endif() 1198 1199append_cxx_flag_if_supported("-Wno-stringop-overflow" CMAKE_CXX_FLAGS) 1200 1201if(ANDROID AND (NOT ANDROID_DEBUG_SYMBOLS)) 1202 if(CMAKE_COMPILER_IS_GNUCXX) 1203 string(APPEND CMAKE_CXX_FLAGS " -s") 1204 elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") 1205 string(APPEND CMAKE_CXX_FLAGS " -g0") 1206 else() 1207 string(APPEND CMAKE_EXE_LINKER_FLAGS " -s") 1208 endif() 1209endif() 1210 1211if(NOT APPLE AND UNIX) 1212 list(APPEND Caffe2_DEPENDENCY_LIBS dl) 1213endif() 1214 1215# Prefix path to Caffe2 headers. If a directory containing installed Caffe2 1216# headers was inadvertently added to the list of include directories, prefixing 1217# PROJECT_SOURCE_DIR means this source tree always takes precedence. 1218include_directories(BEFORE ${PROJECT_SOURCE_DIR}) 1219 1220# Prefix path to generated Caffe2 headers. These need to take precedence over 1221# their empty counterparts located in PROJECT_SOURCE_DIR. 1222include_directories(BEFORE ${PROJECT_BINARY_DIR}) 1223 1224include_directories(BEFORE ${PROJECT_SOURCE_DIR}/aten/src/) 1225include_directories(BEFORE ${CMAKE_BINARY_DIR}/aten/src/) 1226 1227if(USE_MIMALLOC) 1228 set(MI_OVERRIDE OFF) 1229 set(MI_BUILD_SHARED OFF) 1230 set(MI_BUILD_OBJECT OFF) 1231 set(MI_BUILD_TESTS OFF) 1232 add_definitions(-DUSE_MIMALLOC) 1233 add_subdirectory(third_party/mimalloc) 1234 include_directories(third_party/mimalloc/include) 1235endif() 1236 1237# ---[ Main build 1238add_subdirectory(c10) 1239add_subdirectory(caffe2) 1240 1241# ---[ CMake related files Uninistall option. 1242if(NOT TARGET caffe2_uninstall) 1243 configure_file( 1244 ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in 1245 ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake IMMEDIATE @ONLY) 1246 1247 add_custom_target( 1248 caffe2_uninstall COMMAND ${CMAKE_COMMAND} -P 1249 ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) 1250endif() 1251 1252# ---[ Make configuration files for cmake to allow dependent libraries easier 1253# access to Caffe2. 1254 1255if((NOT USE_GLOG) 1256 OR(NOT USE_GFLAGS) 1257 OR BUILD_CUSTOM_PROTOBUF) 1258 message(WARNING "Generated cmake files are only fully tested if one builds " 1259 "with system glog, gflags, and protobuf. Other settings may " 1260 "generate files that are not well tested.") 1261endif() 1262 1263if(USE_CUDA OR USE_ROCM) 1264 # TODO: check if we should include other cuda dependency libraries to the 1265 # interface as well. 1266 1267endif() 1268 1269# Note(jiayq): when building static libraries, all PRIVATE dependencies will 1270# also become interface libraries, and as a result if there are any dependency 1271# libraries that are not exported, the following install export script will 1272# fail. As a result, we will only provide the targets cmake files for shared lib 1273# installation. For more info, read: 1274# https://cmake.org/pipermail/cmake/2016-May/063400.html 1275if(BUILD_SHARED_LIBS) 1276 configure_file(${PROJECT_SOURCE_DIR}/cmake/Caffe2Config.cmake.in 1277 ${PROJECT_BINARY_DIR}/Caffe2Config.cmake @ONLY) 1278 install( 1279 FILES ${PROJECT_BINARY_DIR}/Caffe2Config.cmake 1280 DESTINATION share/cmake/Caffe2 1281 COMPONENT dev) 1282 install( 1283 FILES ${PROJECT_SOURCE_DIR}/cmake/public/cuda.cmake 1284 ${PROJECT_SOURCE_DIR}/cmake/public/xpu.cmake 1285 ${PROJECT_SOURCE_DIR}/cmake/public/glog.cmake 1286 ${PROJECT_SOURCE_DIR}/cmake/public/gflags.cmake 1287 ${PROJECT_SOURCE_DIR}/cmake/public/mkl.cmake 1288 ${PROJECT_SOURCE_DIR}/cmake/public/mkldnn.cmake 1289 ${PROJECT_SOURCE_DIR}/cmake/public/protobuf.cmake 1290 ${PROJECT_SOURCE_DIR}/cmake/public/utils.cmake 1291 ${PROJECT_SOURCE_DIR}/cmake/public/LoadHIP.cmake 1292 DESTINATION share/cmake/Caffe2/public 1293 COMPONENT dev) 1294 install( 1295 DIRECTORY ${PROJECT_SOURCE_DIR}/cmake/Modules_CUDA_fix 1296 DESTINATION share/cmake/Caffe2/ 1297 COMPONENT dev) 1298 install( 1299 FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDAToolkit.cmake 1300 DESTINATION share/cmake/Caffe2/ 1301 COMPONENT dev) 1302 install( 1303 FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUSPARSELT.cmake 1304 DESTINATION share/cmake/Caffe2/ 1305 COMPONENT dev) 1306 install( 1307 FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDSS.cmake 1308 DESTINATION share/cmake/Caffe2/ 1309 COMPONENT dev) 1310 install( 1311 FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindSYCLToolkit.cmake 1312 DESTINATION share/cmake/Caffe2/ 1313 COMPONENT dev) 1314 if(NOT BUILD_LIBTORCHLESS) 1315 install( 1316 EXPORT Caffe2Targets 1317 DESTINATION share/cmake/Caffe2 1318 FILE Caffe2Targets.cmake 1319 COMPONENT dev) 1320 endif() 1321else() 1322 message(WARNING "Generated cmake files are only available when building " 1323 "shared libs.") 1324endif() 1325 1326# ---[ Binaries Binaries will be built after the Caffe2 main libraries and the 1327# modules are built. For the binaries, they will be linked to the Caffe2 main 1328# libraries, as well as all the modules that are built with Caffe2 (the ones 1329# built in the previous Modules section above). 1330if(BUILD_BINARY) 1331 add_subdirectory(binaries) 1332endif() 1333 1334# ---[ JNI 1335if(BUILD_JNI) 1336 if(NOT MSVC) 1337 string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-variable") 1338 endif() 1339 set(BUILD_LIBTORCH_WITH_JNI 1) 1340 set(FBJNI_SKIP_TESTS 1) 1341 add_subdirectory(android/pytorch_android) 1342endif() 1343 1344include(cmake/Summary.cmake) 1345caffe2_print_configuration_summary() 1346 1347if(BUILD_FUNCTORCH) 1348 add_subdirectory(functorch) 1349endif() 1350 1351# Parse custom debug info 1352if(DEFINED USE_CUSTOM_DEBINFO) 1353 string(REPLACE ";" " " SOURCE_FILES "${USE_CUSTOM_DEBINFO}") 1354 message(STATUS "Source files with custom debug infos: ${SOURCE_FILES}") 1355 1356 string(REGEX REPLACE " +" ";" SOURCE_FILES_LIST "${SOURCE_FILES}") 1357 1358 # Set the COMPILE_FLAGS property for each source file 1359 foreach(SOURCE_FILE ${SOURCE_FILES_LIST}) 1360 # We have to specify the scope here. We do this by specifying the targets we 1361 # care about and caffe2/ for all test targets defined there 1362 if(BUILD_LIBTORCHLESS) 1363 caffe2_update_option(USE_CUDA OFF) 1364 set(ALL_PT_TARGETS "torch_python;${C10_LIB};${TORCH_CPU_LIB};${TORCH_LIB}") 1365 else() 1366 # @todo test if we can remove this 1367 set(ALL_PT_TARGETS "torch_python;c10;torch_cpu;torch") 1368 endif() 1369 set_source_files_properties( 1370 ${SOURCE_FILE} DIRECTORY "caffe2/" TARGET_DIRECTORY ${ALL_PT_TARGETS} 1371 PROPERTIES COMPILE_FLAGS "-g") 1372 endforeach() 1373 1374 # Link everything with debug info when any file is in debug mode 1375 set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -g") 1376 set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -g") 1377endif() 1378 1379# Bundle PTXAS if needed 1380if(BUILD_BUNDLE_PTXAS AND USE_CUDA) 1381 if(NOT EXISTS "${PROJECT_SOURCE_DIR}/build/bin/ptxas") 1382 message(STATUS "Copying PTXAS into the bin folder") 1383 file(COPY "${CUDAToolkit_BIN_DIR}/ptxas" 1384 DESTINATION "${PROJECT_BINARY_DIR}") 1385 endif() 1386 install(PROGRAMS "${PROJECT_BINARY_DIR}/ptxas" 1387 DESTINATION "${CMAKE_INSTALL_BINDIR}") 1388endif() 1389