1# ---[ Generate and install header and cpp files 2include(../cmake/Codegen.cmake) 3 4# ---[ Vulkan code gen 5if(USE_VULKAN) 6 include(../cmake/VulkanCodegen.cmake) 7endif() 8 9# Debug messages - if you want to get a list of source files and examine 10# target information, enable the following by -DPRINT_CMAKE_DEBUG_INFO=ON. 11set(PRINT_CMAKE_DEBUG_INFO FALSE CACHE BOOL "print cmake debug information") 12if(PRINT_CMAKE_DEBUG_INFO) 13 include(../cmake/DebugHelper.cmake) 14endif() 15 16# ATen parallelism settings 17# OMP - OpenMP for intra-op, native thread pool for inter-op parallelism 18# NATIVE - using native thread pool for intra- and inter-op parallelism 19if(INTERN_BUILD_MOBILE) 20 set(ATEN_THREADING "NATIVE" CACHE STRING "ATen parallel backend") 21else() 22 if(USE_OPENMP) 23 set(ATEN_THREADING "OMP" CACHE STRING "ATen parallel backend") 24 else() 25 set(ATEN_THREADING "NATIVE" CACHE STRING "ATen parallel backend") 26 endif() 27endif() 28 29set(AT_PARALLEL_OPENMP 0) 30set(AT_PARALLEL_NATIVE 0) 31 32message(STATUS "Using ATen parallel backend: ${ATEN_THREADING}") 33if("${ATEN_THREADING}" STREQUAL "OMP") 34 set(AT_PARALLEL_OPENMP 1) 35elseif("${ATEN_THREADING}" STREQUAL "NATIVE") 36 set(AT_PARALLEL_NATIVE 1) 37else() 38 message(FATAL_ERROR "Unknown ATen parallel backend: ${ATEN_THREADING}") 39endif() 40 41# ---[ Declare source file lists 42 43# ---[ ATen build 44if(INTERN_BUILD_ATEN_OPS) 45 set(__torch_CMAKE_POSITION_INDEPENDENT_CODE ${CMAKE_POSITION_INDEPENDENT_CODE}) 46 set(CMAKE_POSITION_INDEPENDENT_CODE ON) 47 add_subdirectory(../aten aten) 48 set(CMAKE_POSITION_INDEPENDENT_CODE ${__torch_CMAKE_POSITION_INDEPENDENT_CODE}) 49 50 # Generate the headers wrapped by our operator 51 file(GLOB_RECURSE torchgen_python "${PROJECT_SOURCE_DIR}/torchgen/*.py") 52 53 54 # Add source, includes, and libs to lists 55 list(APPEND Caffe2_CPU_SRCS ${ATen_CPU_SRCS}) 56 list(APPEND Caffe2_GPU_SRCS ${ATen_CUDA_CPP_SRCS}) 57 list(APPEND Caffe2_XPU_SRCS ${ATen_XPU_SRCS}) 58 list(APPEND Caffe2_XPU_INCLUDE ${ATen_XPU_INCLUDE}) 59 list(APPEND Caffe2_XPU_DEPENDENCY_LIBS ${ATen_XPU_DEPENDENCY_LIBS}) 60 list(APPEND Caffe2_GPU_SRCS_W_SORT_BY_KEY ${ATen_CUDA_SRCS_W_SORT_BY_KEY}) 61 list(APPEND Caffe2_GPU_CU_SRCS ${ATen_CUDA_CU_SRCS}) 62 list(APPEND Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY}) 63 list(APPEND Caffe2_HIP_SRCS ${ATen_HIP_SRCS}) 64 list(APPEND Caffe2_MPS_SRCS ${ATen_MPS_SRCS}) 65 list(APPEND Caffe2_XPU_SRCS ${ATen_XPU_SRCS}) 66 list(APPEND Caffe2_HIP_SRCS ${ATen_HIP_SRCS_W_SORT_BY_KEY}) 67 list(APPEND Caffe2_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS}) 68 list(APPEND Caffe2_MPS_TEST_SRCS ${ATen_MPS_TEST_SRCS}) 69 list(APPEND Caffe2_GPU_TEST_SRCS ${ATen_CUDA_TEST_SRCS}) 70 list(APPEND Caffe2_HIP_TEST_SRCS ${ATen_HIP_TEST_SRCS}) 71 list(APPEND Caffe2_XPU_TEST_SRCS ${ATen_XPU_TEST_SRCS}) 72 list(APPEND Caffe2_CPU_TEST_SRCS ${ATen_CORE_TEST_SRCS}) 73 list(APPEND Caffe2_VULKAN_TEST_SRCS ${ATen_VULKAN_TEST_SRCS}) 74 list(APPEND Caffe2_CPU_INCLUDE ${ATen_CPU_INCLUDE}) 75 list(APPEND Caffe2_GPU_INCLUDE ${ATen_CUDA_INCLUDE}) 76 list(APPEND Caffe2_HIP_INCLUDE ${ATen_HIP_INCLUDE}) 77 list(APPEND Caffe2_XPU_INCLUDE ${ATen_XPU_INCLUDE}) 78 list(APPEND Caffe2_VULKAN_INCLUDE ${ATen_VULKAN_INCLUDE}) 79 list(APPEND Caffe2_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS}) 80 list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${ATen_CUDA_DEPENDENCY_LIBS}) 81 list(APPEND Caffe2_HIP_DEPENDENCY_LIBS ${ATen_HIP_DEPENDENCY_LIBS}) 82 list(APPEND Caffe2_DEPENDENCY_INCLUDE ${ATen_THIRD_PARTY_INCLUDE}) 83 set(Caffe2_CUDA_DEPENDENCY_LIBS ${Caffe2_CUDA_DEPENDENCY_LIBS} PARENT_SCOPE) 84endif() 85 86# ---[ Caffe2 build 87# Note: the folders that are being commented out have not been properly 88# addressed yet. 89 90if(NOT MSVC AND USE_XNNPACK) 91 if(NOT TARGET fxdiv) 92 set(FXDIV_BUILD_TESTS OFF CACHE BOOL "") 93 set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "") 94 add_subdirectory( 95 "${FXDIV_SOURCE_DIR}" 96 "${CMAKE_BINARY_DIR}/FXdiv") 97 endif() 98endif() 99 100add_subdirectory(core) 101add_subdirectory(serialize) 102add_subdirectory(utils) 103if(NOT USE_FBGEMM) 104 add_subdirectory(perfkernels) 105endif() 106 107# Advanced: if we have allow list specified, we will do intersections for all 108# main lib srcs. 109if(CAFFE2_ALLOWLISTED_FILES) 110 caffe2_do_allowlist(Caffe2_CPU_SRCS CAFFE2_ALLOWLISTED_FILES) 111 caffe2_do_allowlist(Caffe2_GPU_SRCS CAFFE2_ALLOWLISTED_FILES) 112 caffe2_do_allowlist(Caffe2_XPU_SRCS CAFFE2_ALLOWLISTED_FILES) 113 caffe2_do_allowlist(Caffe2_GPU_SRCS_W_SORT_BY_KEY CAFFE2_ALLOWLISTED_FILES) 114 caffe2_do_allowlist(Caffe2_GPU_CU_SRCS CAFFE2_ALLOWLISTED_FILES) 115 caffe2_do_allowlist(Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY CAFFE2_ALLOWLISTED_FILES) 116 caffe2_do_allowlist(Caffe2_HIP_SRCS CAFFE2_ALLOWLISTED_FILES) 117endif() 118 119if(PRINT_CMAKE_DEBUG_INFO) 120 message(STATUS "CPU sources: ") 121 foreach(tmp ${Caffe2_CPU_SRCS}) 122 message(STATUS " " ${tmp}) 123 endforeach() 124 125 message(STATUS "GPU sources: (for torch_cuda_cpp)") 126 foreach(tmp ${Caffe2_GPU_SRCS}) 127 message(STATUS " " ${tmp}) 128 endforeach() 129 130 message(STATUS "GPU sources: (for torch_cuda_cu)") 131 foreach(tmp ${Caffe2_GPU_CU_SRCS}) 132 message(STATUS " " ${tmp}) 133 endforeach() 134 135 message(STATUS "torch_cuda_cu GPU sources (w/ sort by key): ") 136 foreach(tmp ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY}) 137 message(STATUS " " ${tmp}) 138 endforeach() 139 140 message(STATUS "torch_cuda_cpp GPU sources (w/ sort by key): ") 141 foreach(tmp ${Caffe2_GPU_SRCS_W_SORT_BY_KEY}) 142 message(STATUS " " ${tmp}) 143 endforeach() 144 145 message(STATUS "CPU include: ") 146 foreach(tmp ${Caffe2_CPU_INCLUDE}) 147 message(STATUS " " ${tmp}) 148 endforeach() 149 150 message(STATUS "GPU include: ") 151 foreach(tmp ${Caffe2_GPU_INCLUDE}) 152 message(STATUS " " ${tmp}) 153 endforeach() 154 155 message(STATUS "CPU test sources: ") 156 foreach(tmp ${Caffe2_CPU_TEST_SRCS}) 157 message(STATUS " " ${tmp}) 158 endforeach() 159 160 message(STATUS "GPU test sources: ") 161 foreach(tmp ${Caffe2_GPU_TEST_SRCS}) 162 message(STATUS " " ${tmp}) 163 endforeach() 164 165 message(STATUS "HIP sources: ") 166 foreach(tmp ${Caffe2_HIP_SRCS}) 167 message(STATUS " " ${tmp}) 168 endforeach() 169 170 message(STATUS "MPS sources: ") 171 foreach(tmp ${Caffe2_MPS_SRCS}) 172 message(STATUS " " ${tmp}) 173 endforeach() 174 175 message(STATUS "XPU sources: ") 176 foreach(tmp ${Caffe2_XPU_SRCS}) 177 message(STATUS " " ${tmp}) 178 endforeach() 179 180 message(STATUS "HIP test sources: ") 181 foreach(tmp ${Caffe2_HIP_TEST_SRCS}) 182 message(STATUS " " ${tmp}) 183 endforeach() 184 185 message(STATUS "ATen CPU test sources: ") 186 foreach(tmp ${ATen_CPU_TEST_SRCS}) 187 message(STATUS " " ${tmp}) 188 endforeach() 189 190 message(STATUS "ATen MPS test sources: ") 191 foreach(tmp ${ATen_MPS_TEST_SRCS}) 192 message(STATUS " " ${tmp}) 193 endforeach() 194 195 message(STATUS "ATen CUDA test sources: ") 196 foreach(tmp ${ATen_CUDA_TEST_SRCS}) 197 message(STATUS " " ${tmp}) 198 endforeach() 199 200 message(STATUS "ATen HIP test sources: ") 201 foreach(tmp ${ATen_HIP_TEST_SRCS}) 202 message(STATUS " " ${tmp}) 203 endforeach() 204 205 message(STATUS "ATen XPU test sources: ") 206 foreach(tmp ${ATen_XPU_TEST_SRCS}) 207 message(STATUS " " ${tmp}) 208 endforeach() 209 210 message(STATUS "ATen Vulkan test sources: ") 211 foreach(tmp ${ATen_VULKAN_TEST_SRCS}) 212 message(STATUS " " ${tmp}) 213 endforeach() 214 215endif() 216 217# ========================================================== 218# formerly-libtorch 219# ========================================================== 220 221set(TORCH_SRC_DIR "${PROJECT_SOURCE_DIR}/torch") 222set(TORCH_ROOT "${PROJECT_SOURCE_DIR}") 223 224if(NOT TORCH_INSTALL_BIN_DIR) 225 set(TORCH_INSTALL_BIN_DIR bin) 226endif() 227 228if(NOT TORCH_INSTALL_INCLUDE_DIR) 229 set(TORCH_INSTALL_INCLUDE_DIR include) 230endif() 231 232if(NOT TORCH_INSTALL_LIB_DIR) 233 set(TORCH_INSTALL_LIB_DIR lib) 234endif() 235 236set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) 237 238# Generate files 239set(TOOLS_PATH "${TORCH_ROOT}/tools") 240 241configure_file("${TORCH_SRC_DIR}/_utils_internal.py" 242 "${TOOLS_PATH}/shared/_utils_internal.py" 243 COPYONLY) 244 245# Generate header with version info 246configure_file("${TORCH_SRC_DIR}/csrc/api/include/torch/version.h.in" 247 "${TORCH_SRC_DIR}/csrc/api/include/torch/version.h" 248 @ONLY) 249 250set(GENERATED_CXX_TORCH 251 "${TORCH_SRC_DIR}/csrc/autograd/generated/Functions.cpp" 252 "${TORCH_SRC_DIR}/csrc/autograd/generated/ViewFuncs.cpp" 253 ) 254 255if(NOT INTERN_DISABLE_AUTOGRAD AND NOT BUILD_LITE_INTERPRETER) 256 list(APPEND GENERATED_CXX_TORCH 257 "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType_0.cpp" 258 "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType_1.cpp" 259 "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType_2.cpp" 260 "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType_3.cpp" 261 "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType_4.cpp" 262 "${TORCH_SRC_DIR}/csrc/autograd/generated/TraceType_0.cpp" 263 "${TORCH_SRC_DIR}/csrc/autograd/generated/TraceType_1.cpp" 264 "${TORCH_SRC_DIR}/csrc/autograd/generated/TraceType_2.cpp" 265 "${TORCH_SRC_DIR}/csrc/autograd/generated/TraceType_3.cpp" 266 "${TORCH_SRC_DIR}/csrc/autograd/generated/TraceType_4.cpp" 267 "${TORCH_SRC_DIR}/csrc/autograd/generated/ADInplaceOrViewType_0.cpp" 268 "${TORCH_SRC_DIR}/csrc/autograd/generated/ADInplaceOrViewType_1.cpp" 269 "${TORCH_SRC_DIR}/csrc/inductor/aoti_torch/generated/c_shim_cpu.cpp" 270 ) 271 if(BUILD_LAZY_TS_BACKEND) 272 list(APPEND GENERATED_CXX_TORCH 273 "${TORCH_SRC_DIR}/csrc/lazy/generated/LazyNativeFunctions.cpp" 274 "${TORCH_SRC_DIR}/csrc/lazy/generated/RegisterAutogradLazy.cpp" 275 "${TORCH_SRC_DIR}/csrc/lazy/generated/RegisterLazy.cpp" 276 ) 277 endif() 278endif() 279 280set(GENERATED_H_TORCH 281 "${TORCH_SRC_DIR}/csrc/autograd/generated/Functions.h" 282 "${TORCH_SRC_DIR}/csrc/autograd/generated/variable_factories.h" 283 "${TORCH_SRC_DIR}/csrc/autograd/generated/ViewFuncs.h" 284 ) 285 286if(NOT INTERN_DISABLE_AUTOGRAD) 287 list(APPEND GENERATED_H_TORCH 288 "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType.h" 289 "${TORCH_SRC_DIR}/csrc/lazy/generated/LazyIr.h" 290 "${TORCH_SRC_DIR}/csrc/lazy/generated/LazyNonNativeIr.h" 291 "${TORCH_SRC_DIR}/csrc/lazy/generated/LazyNativeFunctions.h" 292 ) 293endif() 294 295set(GENERATED_CXX_PYTHON 296 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions_0.cpp" 297 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions_1.cpp" 298 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions_2.cpp" 299 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions_3.cpp" 300 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions_4.cpp" 301 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_variable_methods.cpp" 302 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_torch_functions_0.cpp" 303 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_torch_functions_1.cpp" 304 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_torch_functions_2.cpp" 305 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_nn_functions.cpp" 306 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_fft_functions.cpp" 307 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_linalg_functions.cpp" 308 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_nested_functions.cpp" 309 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_sparse_functions.cpp" 310 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_special_functions.cpp" 311 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_return_types.cpp" 312 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_enum_tag.cpp" 313 ) 314 315set(GENERATED_H_PYTHON 316 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions.h" 317 "${TORCH_SRC_DIR}/csrc/autograd/generated/python_return_types.h" 318 ) 319 320set(GENERATED_TESTING_PYTHON 321 "${TORCH_SRC_DIR}/testing/_internal/generated/annotated_fn_args.py" 322 ) 323 324set(GENERATED_CXX_TORCH_CUDA 325 "${TORCH_SRC_DIR}/csrc/inductor/aoti_torch/generated/c_shim_cuda.cpp" 326 ) 327 328set(TORCH_GENERATED_CODE 329 ${GENERATED_CXX_TORCH} 330 ${GENERATED_H_TORCH} 331 ${GENERATED_CXX_PYTHON} 332 ${GENERATED_H_PYTHON} 333 ${GENERATED_TESTING_PYTHON} 334 ${GENERATED_CXX_TORCH_CUDA} 335 ) 336 337set(GEN_PER_OPERATOR_FLAG) 338if(USE_PER_OPERATOR_HEADERS) 339 list(APPEND GEN_PER_OPERATOR_FLAG "--per_operator_headers") 340endif() 341 342file(GLOB_RECURSE autograd_python "${TOOLS_PATH}/autograd/*.py") 343file(GLOB_RECURSE autograd_yaml "${TOOLS_PATH}/autograd/*.yaml") 344file(GLOB_RECURSE autograd_templates "${TOOLS_PATH}/autograd/templates/*") 345add_custom_command( 346 OUTPUT 347 ${TORCH_GENERATED_CODE} 348 COMMAND 349 Python::Interpreter tools/setup_helpers/generate_code.py 350 --native-functions-path "aten/src/ATen/native/native_functions.yaml" 351 --tags-path "aten/src/ATen/native/tags.yaml" 352 $<$<BOOL:${INTERN_DISABLE_AUTOGRAD}>:--disable-autograd> 353 $<$<BOOL:${SELECTED_OP_LIST}>:--selected-op-list-path="${SELECTED_OP_LIST}"> 354 --force_schema_registration 355 --gen_lazy_ts_backend 356 ${GEN_PER_OPERATOR_FLAG} 357 DEPENDS 358 "${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml" 359 "${TORCH_ROOT}/aten/src/ATen/native/tags.yaml" 360 "${TORCH_ROOT}/aten/src/ATen/native/ts_native_functions.yaml" 361 "${TORCH_ROOT}/torch/csrc/lazy/core/shape_inference.h" 362 "${TORCH_ROOT}/torch/csrc/lazy/ts_backend/ts_native_functions.cpp" 363 "${TORCH_ROOT}/aten/src/ATen/templates/DispatchKeyNativeFunctions.h" 364 "${TORCH_ROOT}/aten/src/ATen/templates/DispatchKeyNativeFunctions.cpp" 365 "${TORCH_ROOT}/aten/src/ATen/templates/LazyIr.h" 366 "${TORCH_ROOT}/aten/src/ATen/templates/LazyNonNativeIr.h" 367 "${TORCH_ROOT}/aten/src/ATen/templates/RegisterDispatchKey.cpp" 368 ${autograd_python} 369 ${autograd_yaml} 370 ${autograd_templates} 371 ${torchgen_python} 372 WORKING_DIRECTORY "${TORCH_ROOT}") 373 374 375# Required workaround for libtorch_python.so build 376# see https://samthursfield.wordpress.com/2015/11/21/cmake-dependencies-between-targets-and-files-and-custom-commands/#custom-commands-in-different-directories 377add_custom_target( 378 generate-torch-sources 379 DEPENDS ${TORCH_GENERATED_CODE} 380 ) 381 382set(TORCH_SRCS ${GENERATED_CXX_TORCH}) 383list(APPEND TORCH_SRCS ${GENERATED_H_TORCH}) 384list(APPEND LIBTORCH_CMAKE_SRCS "") 385 386list(APPEND LITE_EAGER_SYMOBLICATION_SRCS "") 387if(USE_SOURCE_DEBUG_ON_MOBILE) 388 append_filelist("libtorch_lite_eager_symbolication" LITE_EAGER_SYMOBLICATION_SRCS) 389 # For source debug on lite interpreter, we have to add dependency on pickling 390 # but references to read/writeArchiveAndTensor is not built for mobile 391 # so this condition specifically says we are building for source debug 392 # on mobile. 393 if(BUILD_LITE_INTERPRETER) 394 set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/serialization/pickle.cpp PROPERTIES COMPILE_FLAGS "-DC10_MOBILE -DFEATURE_TORCH_MOBILE") 395 endif() 396endif() 397 398list(APPEND LITE_PROFILER_SRCS "") 399if(USE_LITE_INTERPRETER_PROFILER) 400 append_filelist("libtorch_edge_profiler_sources " LITE_PROFILER_SRCS) 401endif() 402 403# Switch between the full jit interpreter and lite interpreter 404if(BUILD_LITE_INTERPRETER) 405 append_filelist("libtorch_lite_cmake_sources" LIBTORCH_CMAKE_SRCS) 406 list(APPEND LIBTORCH_CMAKE_SRCS ${LITE_EAGER_SYMOBLICATION_SRCS}) 407 list(APPEND LIBTORCH_CMAKE_SRCS ${LITE_PROFILER_SRCS}) 408 if(USE_LITE_AOTI) 409 append_filelist("inductor_core_resources" LIBTORCH_CMAKE_SRCS) 410 endif() 411 set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) 412else() 413 append_filelist("libtorch_cmake_sources" LIBTORCH_CMAKE_SRCS) 414 list(APPEND LIBTORCH_CMAKE_SRCS ${LITE_EAGER_SYMOBLICATION_SRCS}) 415 if(BUILD_LAZY_TS_BACKEND) 416 append_filelist("lazy_tensor_ts_sources" LIBTORCH_CMAKE_SRCS) 417 endif() 418 if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") 419 # TODO: Delete this when https://github.com/pytorch/pytorch/issues/35026 is fixed 420 set_source_files_properties(../torch/csrc/autograd/record_function_ops.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) 421 endif() 422endif() 423list(APPEND TORCH_SRCS ${LIBTORCH_CMAKE_SRCS}) 424 425if(PRINT_CMAKE_DEBUG_INFO) 426 message(STATUS "Interpreter sources: ") 427 foreach(tmp ${LIBTORCH_CMAKE_SRCS}) 428 message(STATUS " " ${tmp}) 429 endforeach() 430endif() 431 432# Mobile backend delegate srcs 433if(INTERN_BUILD_MOBILE) 434 set(DELEGATE_SRCS 435 ${TORCH_SRC_DIR}/csrc/jit/backends/backend_debug_info.cpp 436 ${TORCH_SRC_DIR}/csrc/jit/backends/backend_interface.cpp 437 ) 438 list(APPEND TORCH_SRCS ${DELEGATE_SRCS}) 439 if(IOS AND USE_COREML_DELEGATE) 440 set(COREML_DELEGATE_SRCS 441 ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/cpp/context.cpp 442 ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/objc/PTMCoreMLBackend.mm 443 ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/objc/PTMCoreMLExecutor.mm 444 ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/objc/PTMCoreMLCompiler.mm 445 ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/objc/PTMCoreMLFeatureProvider.mm 446 ) 447 set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/backends/coreml/objc/PTMCoreMLBackend.mm PROPERTIES COMPILE_FLAGS "-fno-objc-arc") 448 include_directories(${TORCH_ROOT}/third_party/nlohmann/single_include) 449 list(APPEND TORCH_SRCS ${COREML_DELEGATE_SRCS}) 450 endif() 451endif() 452 453# Required workaround for LLVM 9 includes. 454if(NOT MSVC) 455 set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/tensorexpr/llvm_jit.cpp PROPERTIES COMPILE_FLAGS -Wno-noexcept-type) 456endif() 457# Disable certain warnings for GCC-9.X 458if(CMAKE_COMPILER_IS_GNUCXX) 459 # See https://github.com/pytorch/pytorch/issues/38856 460 set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/tensorexpr/llvm_jit.cpp PROPERTIES COMPILE_FLAGS "-Wno-redundant-move -Wno-noexcept-type") 461 set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/tensorexpr/llvm_codegen.cpp PROPERTIES COMPILE_FLAGS "-Wno-init-list-lifetime") 462endif() 463 464# Enable conditional FP16 arithmetic intrinsics 465if(CPU_AARCH64 AND LINUX) 466set_source_files_properties(${TORCH_ROOT}/aten/src/ATen/native/BlasKernel.cpp PROPERTIES COMPILE_FLAGS "-march=armv8.2-a+fp16") 467endif() 468 469 470if(NOT INTERN_DISABLE_MOBILE_INTERP) 471 set(MOBILE_SRCS 472 ${TORCH_SRC_DIR}/csrc/jit/mobile/function.cpp 473 ${TORCH_SRC_DIR}/csrc/jit/mobile/import.cpp 474 ${TORCH_SRC_DIR}/csrc/jit/mobile/import_data.cpp 475 ${TORCH_SRC_DIR}/csrc/jit/mobile/interpreter.cpp 476 ${TORCH_SRC_DIR}/csrc/jit/mobile/compatibility/model_compatibility.cpp 477 ${TORCH_SRC_DIR}/csrc/jit/mobile/module.cpp 478 ${TORCH_SRC_DIR}/csrc/jit/mobile/flatbuffer_loader.cpp 479 ${TORCH_SRC_DIR}/csrc/jit/mobile/observer.cpp 480 ${TORCH_SRC_DIR}/csrc/jit/mobile/parse_bytecode.cpp 481 ${TORCH_SRC_DIR}/csrc/jit/mobile/parse_operators.cpp 482 ${TORCH_SRC_DIR}/csrc/jit/mobile/quantization.cpp 483 ${TORCH_SRC_DIR}/csrc/jit/mobile/train/export_data.cpp 484 ${TORCH_SRC_DIR}/csrc/jit/mobile/train/optim/sgd.cpp 485 ${TORCH_SRC_DIR}/csrc/jit/mobile/train/random.cpp 486 ${TORCH_SRC_DIR}/csrc/jit/mobile/train/sequential.cpp 487 ${TORCH_SRC_DIR}/csrc/jit/mobile/upgrader_mobile.cpp 488 ${TORCH_SRC_DIR}/csrc/jit/serialization/flatbuffer_serializer.cpp 489 ) 490 list(APPEND TORCH_SRCS ${MOBILE_SRCS}) 491 list(APPEND TORCH_SRCS ${LITE_EAGER_SYMOBLICATION_SRCS}) 492endif() 493 494# This one needs to be unconditionally added as Functions.cpp is also unconditionally added 495list(APPEND TORCH_SRCS 496 ${TORCH_SRC_DIR}/csrc/autograd/FunctionsManual.cpp 497 ${TORCH_SRC_DIR}/csrc/utils/out_types.cpp 498) 499 500if(NOT INTERN_DISABLE_AUTOGRAD AND NOT BUILD_LITE_INTERPRETER) 501 list(APPEND TORCH_SRCS 502 ${TORCH_SRC_DIR}/csrc/autograd/TraceTypeManual.cpp 503 ${TORCH_SRC_DIR}/csrc/autograd/VariableTypeManual.cpp 504 ) 505endif() 506 507if(${USE_ITT}) 508 list(APPEND TORCH_SRCS 509 ${TORCH_SRC_DIR}/csrc/itt_wrapper.cpp 510 ${TORCH_SRC_DIR}/csrc/profiler/stubs/itt.cpp 511 ) 512endif() 513 514if(NOT INTERN_BUILD_MOBILE AND NOT BUILD_LITE_INTERPRETER) 515 list(APPEND TORCH_SRCS 516 ${TORCH_SRC_DIR}/csrc/api/src/jit.cpp 517 ${TORCH_SRC_DIR}/csrc/jit/mobile/compatibility/backport.cpp 518 ${TORCH_SRC_DIR}/csrc/jit/mobile/compatibility/backport_manager.cpp 519 ${TORCH_SRC_DIR}/csrc/jit/serialization/onnx.cpp 520 ${TORCH_SRC_DIR}/csrc/jit/serialization/export.cpp 521 ${TORCH_SRC_DIR}/csrc/jit/serialization/export_bytecode.cpp 522 ${TORCH_SRC_DIR}/csrc/jit/serialization/export_module.cpp 523 ${TORCH_SRC_DIR}/csrc/jit/serialization/flatbuffer_serializer.cpp 524 ${TORCH_SRC_DIR}/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp 525 ${TORCH_SRC_DIR}/csrc/jit/api/module_save.cpp 526 ${TORCH_SRC_DIR}/csrc/utils/byte_order.cpp 527 ) 528 529 if(USE_DISTRIBUTED) 530 append_filelist("libtorch_distributed_base_sources" TORCH_SRCS) 531 if(NOT WIN32) 532 append_filelist("libtorch_distributed_extra_sources" TORCH_SRCS) 533 endif() 534 endif() 535endif() 536 537if(USE_CUDA OR USE_ROCM) 538 append_filelist("libtorch_cuda_core_sources" Caffe2_GPU_HIP_JIT_FUSERS_SRCS) 539endif() 540 541if(USE_CUDA) 542 list(APPEND Caffe2_GPU_CU_SRCS ${Caffe2_GPU_HIP_JIT_FUSERS_SRCS}) 543 add_library(caffe2_nvrtc SHARED ${ATen_NVRTC_STUB_SRCS}) 544 if(MSVC) 545 # Delay load nvcuda.dll so we can import torch compiled with cuda on a CPU-only machine 546 set(DELAY_LOAD_FLAGS "-DELAYLOAD:nvcuda.dll;delayimp.lib") 547 else() 548 set(DELAY_LOAD_FLAGS "") 549 endif() 550 551 target_link_libraries(caffe2_nvrtc PRIVATE caffe2::nvrtc ${DELAY_LOAD_FLAGS}) 552 install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}") 553 if(USE_NCCL) 554 list(APPEND Caffe2_GPU_SRCS 555 ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp) 556 endif() 557 if(USE_DISTRIBUTED) 558 append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_GPU_SRCS) 559 if(NOT WIN32) 560 append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_GPU_SRCS) 561 set_source_files_properties( 562 ${TORCH_SRC_DIR}/csrc/distributed/c10d/intra_node_comm.cpp 563 ${TORCH_SRC_DIR}/csrc/distributed/c10d/CudaDMAConnectivity.cpp 564 ${TORCH_SRC_DIR}/csrc/distributed/c10d/CUDASymmetricMemory.cu 565 PROPERTIES COMPILE_FLAGS "-DPYTORCH_C10_DRIVER_API_SUPPORTED=1" 566 ) 567 endif() 568 endif() 569 set_source_files_properties( 570 ${TORCH_ROOT}/aten/src/ATen/cuda/detail/LazyNVRTC.cpp 571 PROPERTIES COMPILE_DEFINITIONS "NVRTC_SHORTHASH=${CUDA_NVRTC_SHORTHASH}" 572 ) 573 set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/passes/frozen_conv_add_relu_fusion.cpp PROPERTIES COMPILE_FLAGS "-DUSE_CUDA=1") 574 set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/codegen/cuda/interface.cpp PROPERTIES COMPILE_FLAGS "-DUSE_CUDA=1") 575endif() 576 577if(BUILD_ONEDNN_GRAPH) 578 list(APPEND Caffe2_CPU_SRCS 579 ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/LlgaTensorImpl.cpp 580 ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/graph_fuser.cpp 581 ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/graph_rewriter.cpp 582 ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/graph_helper.cpp 583 ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/register_interface.cpp 584 ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/decompose_silu.cpp 585 ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/interface.cpp 586 ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/kernel.cpp 587 ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/defer_size_check.cpp 588 ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/layout_propagation.cpp 589 ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/prepare_binary.cpp 590 ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/guard_shape.cpp 591 ) 592endif() 593 594if(USE_ROCM) 595 list(APPEND Caffe2_HIP_SRCS ${Caffe2_GPU_HIP_JIT_FUSERS_SRCS}) 596 if(USE_NCCL) 597 list(APPEND Caffe2_HIP_SRCS 598 ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp) 599 endif() 600 if(USE_DISTRIBUTED) 601 append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_HIP_SRCS) 602 if(NOT WIN32) 603 append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_HIP_SRCS) 604 endif() 605 endif() 606 # caffe2_nvrtc's stubs to driver APIs are useful for HIP. 607 # See NOTE [ ATen NVRTC Stub and HIP ] 608 add_library(caffe2_nvrtc SHARED ${ATen_NVRTC_STUB_SRCS}) 609 target_link_libraries(caffe2_nvrtc ${PYTORCH_HIP_LIBRARIES} ${ROCM_HIPRTC_LIB}) 610 target_include_directories(caffe2_nvrtc PRIVATE ${CMAKE_BINARY_DIR}) 611 target_compile_definitions(caffe2_nvrtc PRIVATE USE_ROCM __HIP_PLATFORM_AMD__) 612 install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}") 613endif() 614 615if(NOT NO_API AND NOT BUILD_LITE_INTERPRETER) 616 list(APPEND TORCH_SRCS 617 ${TORCH_SRC_DIR}/csrc/api/src/cuda.cpp 618 ${TORCH_SRC_DIR}/csrc/api/src/data/datasets/mnist.cpp 619 ${TORCH_SRC_DIR}/csrc/api/src/data/samplers/distributed.cpp 620 ${TORCH_SRC_DIR}/csrc/api/src/data/samplers/random.cpp 621 ${TORCH_SRC_DIR}/csrc/api/src/data/samplers/sequential.cpp 622 ${TORCH_SRC_DIR}/csrc/api/src/data/samplers/stream.cpp 623 ${TORCH_SRC_DIR}/csrc/api/src/enum.cpp 624 ${TORCH_SRC_DIR}/csrc/api/src/imethod.cpp 625 ${TORCH_SRC_DIR}/csrc/api/src/serialize.cpp 626 ${TORCH_SRC_DIR}/csrc/api/src/jit.cpp 627 ${TORCH_SRC_DIR}/csrc/api/src/mps.cpp 628 ${TORCH_SRC_DIR}/csrc/api/src/nn/init.cpp 629 ${TORCH_SRC_DIR}/csrc/api/src/nn/module.cpp 630 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/_functions.cpp 631 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/activation.cpp 632 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/adaptive.cpp 633 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/batchnorm.cpp 634 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/normalization.cpp 635 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/instancenorm.cpp 636 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/conv.cpp 637 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/dropout.cpp 638 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/distance.cpp 639 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/embedding.cpp 640 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/fold.cpp 641 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/linear.cpp 642 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/loss.cpp 643 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/padding.cpp 644 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/pixelshuffle.cpp 645 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/pooling.cpp 646 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/rnn.cpp 647 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/upsampling.cpp 648 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/transformer.cpp 649 ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/container/functional.cpp 650 ${TORCH_SRC_DIR}/csrc/api/src/nn/options/activation.cpp 651 ${TORCH_SRC_DIR}/csrc/api/src/nn/options/adaptive.cpp 652 ${TORCH_SRC_DIR}/csrc/api/src/nn/options/batchnorm.cpp 653 ${TORCH_SRC_DIR}/csrc/api/src/nn/options/embedding.cpp 654 ${TORCH_SRC_DIR}/csrc/api/src/nn/options/instancenorm.cpp 655 ${TORCH_SRC_DIR}/csrc/api/src/nn/options/normalization.cpp 656 ${TORCH_SRC_DIR}/csrc/api/src/nn/options/conv.cpp 657 ${TORCH_SRC_DIR}/csrc/api/src/nn/options/dropout.cpp 658 ${TORCH_SRC_DIR}/csrc/api/src/nn/options/linear.cpp 659 ${TORCH_SRC_DIR}/csrc/api/src/nn/options/padding.cpp 660 ${TORCH_SRC_DIR}/csrc/api/src/nn/options/pooling.cpp 661 ${TORCH_SRC_DIR}/csrc/api/src/nn/options/rnn.cpp 662 ${TORCH_SRC_DIR}/csrc/api/src/nn/options/vision.cpp 663 ${TORCH_SRC_DIR}/csrc/api/src/nn/options/transformer.cpp 664 ${TORCH_SRC_DIR}/csrc/api/src/optim/adagrad.cpp 665 ${TORCH_SRC_DIR}/csrc/api/src/optim/adam.cpp 666 ${TORCH_SRC_DIR}/csrc/api/src/optim/adamw.cpp 667 ${TORCH_SRC_DIR}/csrc/api/src/optim/lbfgs.cpp 668 ${TORCH_SRC_DIR}/csrc/api/src/optim/optimizer.cpp 669 ${TORCH_SRC_DIR}/csrc/api/src/optim/rmsprop.cpp 670 ${TORCH_SRC_DIR}/csrc/api/src/optim/serialize.cpp 671 ${TORCH_SRC_DIR}/csrc/api/src/optim/sgd.cpp 672 ${TORCH_SRC_DIR}/csrc/api/src/optim/schedulers/lr_scheduler.cpp 673 ${TORCH_SRC_DIR}/csrc/api/src/optim/schedulers/step_lr.cpp 674 ${TORCH_SRC_DIR}/csrc/api/src/optim/schedulers/reduce_on_plateau_scheduler.cpp 675 ${TORCH_SRC_DIR}/csrc/api/src/serialize/input-archive.cpp 676 ${TORCH_SRC_DIR}/csrc/api/src/serialize/output-archive.cpp 677 ${TORCH_SRC_DIR}/csrc/api/src/xpu.cpp 678 ) 679endif() 680 681list(APPEND Caffe2_CPU_SRCS ${TORCH_SRCS}) 682 683if(USE_MPS) 684 list(APPEND Caffe2_CPU_SRCS ${Caffe2_MPS_SRCS}) 685endif() 686 687# NOTE [ Linking AVX and non-AVX files ] 688# 689# Regardless of the CPU capabilities, we build some files with AVX2, and AVX512 690# instruction set. If the host CPU doesn't support those, we simply ignore their 691# functions at runtime during dispatch. 692# 693# We must make sure that those files are at the end of the input list when 694# linking the torch_cpu library. Otherwise, the following error scenario might 695# occur: 696# 1. A non-AVX2 and an AVX2 file both call a function defined with the `inline` 697# keyword 698# 2. The compiler decides not to inline this function 699# 3. Two different versions of the machine code are generated for this function: 700# one without AVX2 instructions and one with AVX2. 701# 4. When linking, the AVX2 version is found earlier in the input object files, 702# so the linker makes the entire library use it, even in code not guarded by 703# the dispatcher. 704# 5. A CPU without AVX2 support executes this function, encounters an AVX2 705# instruction and crashes. 706# 707# Thus we organize the input files in the following order: 708# 1. All files with no AVX-n support 709# 2. All files with AVX2 support ('*AVX2.cpp') 710# 3. All files with AVX512 support ('*AVX512.cpp') 711set(Caffe2_CPU_SRCS_NON_AVX) 712set(Caffe2_CPU_SRCS_AVX2) 713set(Caffe2_CPU_SRCS_AVX512) 714foreach(input_filename ${Caffe2_CPU_SRCS}) 715 if(${input_filename} MATCHES "AVX2\\.cpp") 716 list(APPEND Caffe2_CPU_SRCS_AVX2 ${input_filename}) 717 elseif(${input_filename} MATCHES "AVX512\\.cpp") 718 list(APPEND Caffe2_CPU_SRCS_AVX512 ${input_filename}) 719 else() 720 list(APPEND Caffe2_CPU_SRCS_NON_AVX ${input_filename}) 721 endif() 722endforeach(input_filename) 723set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS_NON_AVX} ${Caffe2_CPU_SRCS_AVX2} ${Caffe2_CPU_SRCS_AVX512}) 724 725# ========================================================== 726# END formerly-libtorch sources 727# ========================================================== 728 729if(BUILD_LIBTORCHLESS) 730 find_library(TORCH_LIB torch PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH) 731 find_library(TORCH_CPU_LIB torch_cpu PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH) 732 733 if(USE_CUDA) 734 find_library(TORCH_CUDA_LIB torch_cuda PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH) 735 endif() 736 737 if(USE_ROCM) 738 find_library(TORCH_HIP_LIB torch_hip PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH) 739 endif() 740 741 if(USE_XPU) 742 find_library(TORCH_XPU_LIB torch_xpu PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH) 743 endif() 744 add_subdirectory(../torch torch) 745 # ---[ Torch python bindings build 746 set(TORCH_PYTHON_COMPILE_OPTIONS ${TORCH_PYTHON_COMPILE_OPTIONS} PARENT_SCOPE) 747 set(TORCH_PYTHON_LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS} PARENT_SCOPE) 748else() 749 set(TORCH_LIB torch) 750 set(TORCH_CPU_LIB torch_cpu) 751 set(TORCH_CUDA_LIB torch_cuda) 752 set(TORCH_HIP_LIB torch_hip) 753 set(TORCH_XPU_LIB torch_xpu) 754endif() 755 756 757if(NOT BUILD_LIBTORCHLESS) 758add_library(torch_cpu ${Caffe2_CPU_SRCS}) 759if(HAVE_SOVERSION) 760 set_target_properties(torch_cpu PROPERTIES 761 VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION}) 762endif() 763torch_compile_options(torch_cpu) # see cmake/public/utils.cmake 764 765# Ignore Wdeprecated-XXX errors from third-party libraries 766if(NOT MSVC) 767 set_source_files_properties(${PROJECT_SOURCE_DIR}/torch/csrc/distributed/c10d/socket.cpp PROPERTIES COMPILE_OPTIONS "-Wno-error=deprecated") 768endif() 769 770if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND NOT USE_IOS AND NOT USE_COREML_DELEGATE) 771 target_compile_options_if_supported(torch_cpu "-Wmissing-prototypes") 772 target_compile_options_if_supported(torch_cpu "-Werror=missing-prototypes") 773 get_target_property(TORCH_CPU_SOURCES torch_cpu SOURCES) 774 foreach(generated_file IN LISTS GENERATED_CXX_TORCH) 775 set_source_files_properties(${generated_file} PROPERTIES COMPILE_OPTIONS "-Wno-missing-prototypes;-Wno-error=missing-prototypes") 776 endforeach() 777 foreach(source_file IN LISTS TORCH_CPU_SOURCES) 778 get_filename_component(source_file "${source_file}" REALPATH) 779 string(FIND "${source_file}" "${CMAKE_BINARY_DIR}" res) 780 if(res GREATER -1) 781 set_source_files_properties(${source_file} PROPERTIES COMPILE_OPTIONS "-Wno-missing-prototypes;-Wno-error=missing-prototypes") 782 continue() 783 endif() 784 string(FIND "${source_file}" "caffe2" res) 785 if(res GREATER -1) 786 set_source_files_properties(${source_file} PROPERTIES COMPILE_OPTIONS "-Wno-missing-prototypes;-Wno-error=missing-prototypes") 787 endif() 788 endforeach() 789endif() 790 791option(TORCH_USE_IWYU "Use include-what-you-use to clean up header inclusion" OFF) 792if(TORCH_USE_IWYU) 793 find_program(iwyu NAMES include-what-you-use) 794 if(iwyu) 795 set(iwyu_cmd 796 "include-what-you-use" 797 "-Xiwyu" 798 "--transitive_includes_only" 799 "-Xiwyu" 800 "--no_fwd_decls" 801 "-Xiwyu" 802 "--prefix_header_includes=keep" 803 "-Xiwyu" 804 "--mapping_file=${CMAKE_CURRENT_LIST_DIR}/../tools/iwyu/all.imp" 805 ) 806 set_property(TARGET torch_cpu PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${iwyu_cmd}) 807 endif() 808endif() 809 810set_property(SOURCE ${ATen_CORE_SRCS} APPEND 811 PROPERTY COMPILE_DEFINITIONS "TORCH_ASSERT_ONLY_METHOD_OPERATORS") 812set_property(SOURCE ${ATen_ATTENTION_KERNEL_SRCS} APPEND 813 PROPERTY COMPILE_DEFINITIONS "TORCH_ASSERT_NO_OPERATORS") 814 815if(USE_MPS OR USE_PYTORCH_METAL) 816 enable_language(OBJC OBJCXX) 817endif() 818 819if(USE_PRECOMPILED_HEADERS) 820 target_precompile_headers(torch_cpu PRIVATE 821 "$<$<COMPILE_LANGUAGE:CXX>:ATen/core/ATen_pch.h>") 822 # Exclude some files from using PCH 823 set_source_files_properties( 824 # Not built with OpenMP, so PCH is invalid 825 ${Torch_SOURCE_DIR}/aten/src/ATen/MapAllocator.cpp 826 # Builds with incompatible compiler flags 827 ${Caffe2_CPU_SRCS_AVX2} 828 ${Caffe2_CPU_SRCS_AVX512} 829 PROPERTIES SKIP_PRECOMPILE_HEADERS ON) 830endif() 831 832# Pass path to PocketFFT 833if(AT_POCKETFFT_ENABLED) 834 set_source_files_properties( 835 "${PROJECT_SOURCE_DIR}/aten/src/ATen/native/mkl/SpectralOps.cpp" 836 PROPERTIES INCLUDE_DIRECTORIES "${POCKETFFT_INCLUDE_DIR}") 837endif() 838 839if(CMAKE_COMPILER_IS_GNUCXX AND BUILD_LIBTORCH_CPU_WITH_DEBUG) 840 # To enable debug fission we need to build libtorch_cpu with debug info on, 841 # but this increases link time and peak memory usage if we use the 842 # REL_WITH_DEB_INFO env var since that enables it for everything, but it's 843 # only really necessary for libtorch_cpu. 844 target_compile_options(torch_cpu PRIVATE "-g") 845endif() 846 847if(USE_LLVM AND LLVM_FOUND) 848 llvm_map_components_to_libnames(LLVM_LINK_LIBS 849 support core analysis executionengine instcombine 850 scalaropts transformutils ${LLVM_TARGETS_TO_BUILD} orcjit) 851 target_link_libraries(torch_cpu PRIVATE ${LLVM_LINK_LIBS}) 852 if(APPLE) 853 set(LINKER_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/unexported_symbols.lds") 854 set_target_properties(torch_cpu PROPERTIES LINK_DEPENDS ${LINKER_SCRIPT}) 855 set_target_properties(torch_cpu PROPERTIES LINK_FLAGS "-Wl,-unexported_symbols_list,${LINKER_SCRIPT}") 856 elseif(UNIX) 857 set(LINKER_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/version_script.lds") 858 set_target_properties(torch_cpu PROPERTIES LINK_DEPENDS ${LINKER_SCRIPT}) 859 target_link_libraries(torch_cpu PRIVATE "-Wl,--version-script=${LINKER_SCRIPT}") 860 endif() 861endif(USE_LLVM AND LLVM_FOUND) 862 863# This is required for older versions of CMake, which don't allow 864# specifying add_library() without a list of source files 865set(DUMMY_EMPTY_FILE ${CMAKE_BINARY_DIR}/empty.cpp) 866 867if(MSVC) 868 set(DUMMY_FILE_CONTENT "__declspec(dllexport) int ignore_this_library_placeholder(){return 0\\;}") 869else() 870 set(DUMMY_FILE_CONTENT "") 871endif() 872 873file(WRITE ${DUMMY_EMPTY_FILE} ${DUMMY_FILE_CONTENT}) 874 875# Wrapper library for people who link against torch and expect both CPU and CUDA support 876# Contains "torch_cpu" and "torch_cuda" 877add_library(torch ${DUMMY_EMPTY_FILE}) 878if(HAVE_SOVERSION) 879 set_target_properties(torch PROPERTIES 880 VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION}) 881endif() 882 883if(USE_ROCM) 884 filter_list(__caffe2_hip_srcs_cpp Caffe2_HIP_SRCS "\\.(cu|hip)$") 885 set_source_files_properties(${__caffe2_hip_srcs_cpp} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) 886endif() 887 888# Compile exposed libraries. 889if(USE_ROCM) 890 set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE) 891 list(APPEND Caffe2_HIP_SRCS ${GENERATED_CXX_TORCH_CUDA}) 892 hip_add_library(torch_hip ${Caffe2_HIP_SRCS}) 893 if(USE_FLASH_ATTENTION) 894 target_link_libraries(torch_hip PRIVATE __caffe2_aotriton) 895 endif() 896 set(CUDA_LINK_LIBRARIES_KEYWORD) 897 torch_compile_options(torch_hip) # see cmake/public/utils.cmake 898 # TODO: Not totally sure if this is live or not 899 if(USE_NCCL) 900 target_link_libraries(torch_hip PRIVATE __caffe2_nccl) 901 target_compile_definitions(torch_hip PRIVATE USE_NCCL) 902 endif() 903 904 if(USE_PRECOMPILED_HEADERS) 905 target_precompile_headers(torch_hip PRIVATE 906 "$<$<COMPILE_LANGUAGE:CXX>:ATen/core/ATen_pch.h>") 907 endif() 908elseif(USE_CUDA) 909 set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE) 910 list(APPEND Caffe2_GPU_SRCS ${GENERATED_CXX_TORCH_CUDA}) 911 if(CUDA_SEPARABLE_COMPILATION) 912 # Separate compilation fails when kernels using `thrust::sort_by_key` 913 # are linked with the rest of CUDA code. Workaround by linking them separately. 914 add_library(torch_cuda ${Caffe2_GPU_SRCS} ${Caffe2_GPU_CU_SRCS}) 915 set_property(TARGET torch_cuda PROPERTY CUDA_SEPARABLE_COMPILATION ON) 916 917 add_library(torch_cuda_w_sort_by_key OBJECT 918 ${Caffe2_GPU_SRCS_W_SORT_BY_KEY} 919 ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY}) 920 set_property(TARGET torch_cuda_w_sort_by_key PROPERTY CUDA_SEPARABLE_COMPILATION OFF) 921 target_link_libraries(torch_cuda PRIVATE torch_cuda_w_sort_by_key) 922 else() 923 add_library(torch_cuda 924 ${Caffe2_GPU_SRCS} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY} 925 ${Caffe2_GPU_CU_SRCS} ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY}) 926 endif() 927 set(CUDA_LINK_LIBRARIES_KEYWORD) 928 torch_compile_options(torch_cuda) # see cmake/public/utils.cmake 929 target_compile_definitions(torch_cuda PRIVATE USE_CUDA) 930 931 if(USE_CUFILE) 932 target_link_libraries(torch_cuda PRIVATE torch::cufile) 933 target_compile_definitions(torch_cuda PRIVATE USE_CUFILE) 934 endif() 935 if(USE_CUSPARSELT) 936 target_link_libraries(torch_cuda PRIVATE torch::cusparselt) 937 target_compile_definitions(torch_cuda PRIVATE USE_CUSPARSELT) 938 endif() 939 if(USE_CUDSS) 940 target_link_libraries(torch_cuda PRIVATE torch::cudss) 941 target_compile_definitions(torch_cuda PRIVATE USE_CUDSS) 942 endif() 943 if(USE_NCCL) 944 target_link_libraries(torch_cuda PRIVATE __caffe2_nccl) 945 target_compile_definitions(torch_cuda PRIVATE USE_NCCL) 946 endif() 947 if(USE_UCC) 948 target_link_libraries(torch_cuda PRIVATE __caffe2_ucc) 949 target_compile_definitions(torch_cuda PRIVATE USE_UCC) 950 endif() 951 if(USE_FLASH_ATTENTION) 952 target_compile_definitions(torch_cuda PRIVATE USE_FLASH_ATTENTION) 953 endif() 954 if(USE_MEM_EFF_ATTENTION) 955 target_compile_definitions(torch_cuda PRIVATE USE_MEM_EFF_ATTENTION) 956 endif() 957 if(BUILD_LAZY_CUDA_LINALG) 958 add_library(torch_cuda_linalg ${ATen_CUDA_LINALG_SRCS}) 959 target_compile_definitions(torch_cuda_linalg PRIVATE USE_CUDA BUILD_LAZY_CUDA_LINALG) 960 # Library order is important during static linking 961 # `torch::magma` should be mentioned before other CUDA 962 # to transitively include all symbols present in torch_cuda/torch_cpu 963 if(USE_MAGMA) 964 target_link_libraries(torch_cuda_linalg PRIVATE torch::magma) 965 # CUDAHooks reports version of MAGMA PyTorch was compiled against, i.e. needs to be able to include magma headers 966 get_target_property(HOOKS_INCLUDE_DIRECTORIES torch_cuda INCLUDE_DIRECTORIES) 967 if(NOT "${MAGMA_INCLUDE_DIR}" IN_LIST HOOKS_INCLUDE_DIRECTORIES) 968 set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/cuda/detail/CUDAHooks.cpp PROPERTIES INCLUDE_DIRECTORIES "${MAGMA_INCLUDE_DIR}") 969 endif() 970 endif() 971 target_link_libraries(torch_cuda_linalg PRIVATE 972 torch_cpu 973 torch_cuda 974 ) 975 if($ENV{ATEN_STATIC_CUDA}) 976 if(CUDA_VERSION_MAJOR LESS_EQUAL 11) 977 target_link_libraries(torch_cuda_linalg PRIVATE 978 CUDA::cusolver_static 979 ${CUDAToolkit_LIBRARY_DIR}/liblapack_static.a # needed for libcusolver_static 980 ) 981 elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 12) 982 target_link_libraries(torch_cuda_linalg PRIVATE 983 CUDA::cusolver_static 984 ${CUDAToolkit_LIBRARY_DIR}/libcusolver_lapack_static.a # needed for libcusolver_static 985 ) 986 endif() 987 else() 988 target_link_libraries(torch_cuda_linalg PRIVATE 989 CUDA::cusolver 990 ) 991 endif() 992 # NS: TODO, is this really necessary? 993 if(USE_MAGMA AND CAFFE2_STATIC_LINK_CUDA) 994 target_link_libraries(torch_cuda_linalg PRIVATE 995 CUDA::culibos ${CMAKE_DL_LIBS}) 996 endif() 997 set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp PROPERTIES COMPILE_FLAGS "-DBUILD_LAZY_CUDA_LINALG") 998 install(TARGETS torch_cuda_linalg DESTINATION "${TORCH_INSTALL_LIB_DIR}") 999 endif() 1000 1001 if(USE_PRECOMPILED_HEADERS) 1002 target_precompile_headers(torch_cuda PRIVATE 1003 "$<$<COMPILE_LANGUAGE:CXX>:ATen/core/ATen_pch.h>") 1004 endif() 1005 1006 # Apply suggestion from comment https://github.com/pytorch/pytorch/issues/113053#issuecomment-2115375714 1007 if(LINUX) 1008 set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/cuda/CUDASparseDescriptors.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) 1009 set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/cuda/CUDASparseBlas.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) 1010 set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) 1011 set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/sparse/cuda/SparseBlasImpl.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) 1012 endif() 1013endif() 1014 1015if(USE_XPU) 1016 add_library(torch_xpu ${Caffe2_XPU_SRCS}) 1017 torch_compile_options(torch_xpu) # see cmake/public/utils.cmake 1018 target_compile_definitions(torch_xpu PRIVATE USE_XPU) 1019 1020 # ATen XPU implementation 1021 set(TORCH_XPU_OPS_DIR ${TORCH_ROOT}/third_party/torch-xpu-ops) 1022 set(TORCH_XPU_OPS_REPO_URL https://github.com/intel/torch-xpu-ops.git) 1023 file(READ "${TORCH_ROOT}/third_party/xpu.txt" TORCH_XPU_OPS_COMMIT) 1024 string(REGEX REPLACE "\n$" "" TORCH_XPU_OPS_COMMIT "${TORCH_XPU_OPS_COMMIT}") 1025 if(NOT EXISTS "${TORCH_XPU_OPS_DIR}/.git") 1026 execute_process( 1027 COMMAND git clone --quiet ${TORCH_XPU_OPS_REPO_URL} ${TORCH_XPU_OPS_DIR} 1028 RESULT_VARIABLE _exitcode) 1029 if(NOT _exitcode EQUAL 0) 1030 message(FATAL_ERROR "Fail to clone ${TORCH_XPU_OPS_REPO_URL}") 1031 endif() 1032 endif() 1033 execute_process( 1034 COMMAND git fetch --quiet 1035 WORKING_DIRECTORY ${TORCH_XPU_OPS_DIR} 1036 RESULT_VARIABLE _exitcode) 1037 if(NOT _exitcode EQUAL 0) 1038 message(FATAL_ERROR "Fail to fetch ${TORCH_XPU_OPS_REPO_URL}") 1039 endif() 1040 execute_process( 1041 COMMAND git checkout --quiet ${TORCH_XPU_OPS_COMMIT} 1042 WORKING_DIRECTORY ${TORCH_XPU_OPS_DIR} 1043 RESULT_VARIABLE _exitcode) 1044 if(NOT _exitcode EQUAL 0) 1045 message(FATAL_ERROR "Fail to checkout ${TORCH_XPU_OPS_REPO_URL} to ${TORCH_XPU_OPS_COMMIT}") 1046 endif() 1047 1048 set(TORCH_XPU_OPS_INCLUDE_DIRS 1049 ${TORCH_SRC_DIR}/csrc/api 1050 ${TORCH_SRC_DIR}/csrc/api/include 1051 ${Caffe2_CPU_INCLUDE} 1052 ${Caffe2_XPU_INCLUDE}) 1053 # Pass the target as a dependency so that ATen headers generation 1054 # could be followed by torch-xpu-ops build. 1055 # 1. Sources in torch-xpu-ops depend on generated ATen headers. 1056 # 2. Using add_custom_command in torch-xpu-ops to define sycl device sources 1057 # compilation. add_custom_command requires an explicit dependency. 1058 list(APPEND ${Caffe2_XPU_INCLUDE} ${TORCH_XPU_OPS_DIR}/src/ATen/) 1059 set(TORCH_XPU_OPS_PYTORCH_DEPS ATEN_CPU_FILES_GEN_TARGET) 1060 1061 add_subdirectory(${TORCH_ROOT}/third_party/torch-xpu-ops 1062 ${CMAKE_BINARY_DIR}/caffe2/aten_xpu) 1063 if(NOT TARGET torch_xpu_ops) 1064 message(WARNING "Failed to include ATen XPU implementation target") 1065 else() 1066 target_link_libraries(torch_xpu PRIVATE torch_xpu_ops) 1067 if(MSVC) 1068 # Windows 1069 target_link_libraries(torch_xpu PRIVATE 1070 "-WHOLEARCHIVE:\"$<TARGET_FILE:torch_xpu_ops>\"") 1071 else() 1072 # Linux 1073 target_link_libraries(torch_xpu PRIVATE 1074 "-Wl,--whole-archive,\"$<TARGET_FILE:torch_xpu_ops>\" -Wl,--no-whole-archive") 1075 endif() 1076 1077 # Set cached ${ATen_XPU_INCLUDE_DIRS} to torch 1078 include_directories(SYSTEM ${ATen_XPU_INCLUDE_DIRS}) 1079 1080 endif() 1081endif() 1082 1083if(NOT MSVC AND USE_XNNPACK) 1084 TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv) 1085endif() 1086 1087# ========================================================== 1088# formerly-libtorch flags 1089# ========================================================== 1090 1091 1092# Build model tracer for tracing-based selective build 1093if(TRACING_BASED AND NOT BUILD_LITE_INTERPRETER AND NOT INTERN_BUILD_MOBILE) 1094 add_subdirectory( 1095 ${TORCH_ROOT}/torch/csrc/jit/mobile/model_tracer 1096 ${CMAKE_BINARY_DIR}/model_tracer 1097 ) 1098 string(APPEND CMAKE_CXX_FLAGS " -DENABLE_RECORD_KERNEL_FUNCTION_DTYPE") 1099endif() 1100 1101# Codegen selected_mobile_ops.h for template selective build 1102if(BUILD_LITE_INTERPRETER AND SELECTED_OP_LIST) 1103 message("running gen_selected_mobile_ops_header for: '${SELECTED_OP_LIST}'") 1104 file(GLOB lite_interpreter_python "${TOOLS_PATH}/lite_interpreter/*.py") 1105 if(${TRACING_BASED}) 1106 file(GLOB code_analyzer_python "${TOOLS_PATH}/code_analyzer/*.py") 1107 add_custom_command( 1108 OUTPUT ${CMAKE_BINARY_DIR}/aten/src/ATen/selected_mobile_ops.h 1109 COMMAND 1110 Python::Interpreter 1111 -m tools.code_analyzer.gen_oplist 1112 --model_file_list_path "${SELECTED_OP_LIST}" 1113 --output_dir "${CMAKE_BINARY_DIR}/aten/src/ATen" 1114 DEPENDS 1115 ${torchgen_python} 1116 ${lite_interpreter_python} 1117 ${code_analyzer_python} 1118 "${SELECTED_OP_LIST}" 1119 "${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml" 1120 WORKING_DIRECTORY "${TORCH_ROOT}") 1121 else() 1122 add_custom_command( 1123 OUTPUT ${CMAKE_BINARY_DIR}/aten/src/ATen/selected_mobile_ops.h 1124 COMMAND 1125 Python::Interpreter 1126 -m tools.lite_interpreter.gen_selected_mobile_ops_header 1127 --yaml_file_path "${SELECTED_OP_LIST}" 1128 --output_file_path "${CMAKE_BINARY_DIR}/aten/src/ATen" 1129 DEPENDS 1130 ${torchgen_python} 1131 ${lite_interpreter_python} 1132 "${SELECTED_OP_LIST}" 1133 "${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml" 1134 WORKING_DIRECTORY "${TORCH_ROOT}") 1135 endif() 1136 1137 add_custom_target( 1138 __selected_mobile_ops_header_gen 1139 DEPENDS ${CMAKE_BINARY_DIR}/aten/src/ATen/selected_mobile_ops.h) 1140 add_dependencies(torch_cpu __selected_mobile_ops_header_gen) 1141endif() 1142 1143if(NOT NO_API) 1144 target_include_directories(torch_cpu PRIVATE 1145 ${TORCH_SRC_DIR}/csrc/api 1146 ${TORCH_SRC_DIR}/csrc/api/include) 1147endif() 1148 1149if(USE_CUDA AND MSVC) 1150 # -INCLUDE is used to ensure torch_cuda is linked against in a project that relies on them. 1151 # Related issue: https://github.com/pytorch/pytorch/issues/31611 1152 target_link_libraries(torch_cuda INTERFACE "-INCLUDE:?warp_size@cuda@at@@YAHXZ") 1153endif() 1154 1155if(NOT BUILD_LITE_INTERPRETER) 1156 set(TH_CPU_INCLUDE 1157 # dense 1158 aten/src/TH 1159 ${CMAKE_CURRENT_BINARY_DIR}/aten/src/TH 1160 ${TORCH_ROOT}/aten/src 1161 ${CMAKE_CURRENT_BINARY_DIR}/aten/src 1162 1163 ${CMAKE_BINARY_DIR}/aten/src) 1164 target_include_directories(torch_cpu PRIVATE ${TH_CPU_INCLUDE}) 1165endif() 1166 1167set(ATen_CPU_INCLUDE 1168 ${TORCH_ROOT}/aten/src 1169 ${CMAKE_CURRENT_BINARY_DIR}/../aten/src 1170 ${CMAKE_BINARY_DIR}/aten/src) 1171 1172if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") 1173 set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/QuantizedLinear.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) 1174 set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/RNN.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) 1175 set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) 1176 set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/quantized/qlinear_unpack.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) 1177endif() 1178 1179target_include_directories(torch_cpu PRIVATE ${ATen_CPU_INCLUDE}) 1180 1181target_include_directories(torch_cpu PRIVATE 1182 ${TORCH_SRC_DIR}/csrc) 1183 1184target_include_directories(torch_cpu PRIVATE 1185 ${TORCH_ROOT}/third_party/miniz-2.1.0) 1186 1187target_include_directories(torch_cpu PRIVATE 1188 ${TORCH_ROOT}/third_party/kineto/libkineto/include) 1189 1190if(USE_KINETO) 1191 target_include_directories(torch_cpu PRIVATE 1192 ${TORCH_ROOT}/third_party/kineto/libkineto/src) 1193endif() 1194 1195target_include_directories(torch_cpu PRIVATE 1196 ${TORCH_ROOT}/third_party/cpp-httplib) 1197 1198target_include_directories(torch_cpu PRIVATE 1199 ${TORCH_ROOT}/third_party/nlohmann/include) 1200 1201install(DIRECTORY "${TORCH_SRC_DIR}/csrc" 1202 DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch 1203 FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp") 1204install(FILES 1205 "${TORCH_SRC_DIR}/script.h" 1206 "${TORCH_SRC_DIR}/extension.h" 1207 "${TORCH_SRC_DIR}/custom_class.h" 1208 "${TORCH_SRC_DIR}/library.h" 1209 "${TORCH_SRC_DIR}/custom_class_detail.h" 1210 DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch) 1211if(BUILD_TEST) 1212 if(BUILD_EXECUTORCH) 1213 add_subdirectory( 1214 ${TORCH_ROOT}/test/edge 1215 ${CMAKE_BINARY_DIR}/test_edge_op_registration 1216 ) 1217 endif() 1218 if(BUILD_LITE_INTERPRETER) 1219 add_subdirectory( 1220 ${TORCH_ROOT}/test/cpp/lite_interpreter_runtime 1221 ${CMAKE_BINARY_DIR}/test_lite_interpreter_runtime 1222 ) 1223 add_subdirectory( 1224 ${TORCH_ROOT}/test/mobile/lightweight_dispatch 1225 ${CMAKE_BINARY_DIR}/test_codegen_unboxing 1226 ) 1227 else() 1228 add_subdirectory(${TORCH_ROOT}/test/cpp/jit ${CMAKE_BINARY_DIR}/test_jit) 1229 add_subdirectory(${TORCH_ROOT}/test/inductor ${CMAKE_BINARY_DIR}/test_inductor) 1230 add_subdirectory( 1231 ${TORCH_ROOT}/test/cpp/tensorexpr 1232 ${CMAKE_BINARY_DIR}/test_tensorexpr 1233 ) 1234 if(USE_DISTRIBUTED) 1235 add_subdirectory(${TORCH_ROOT}/test/cpp/c10d ${CMAKE_BINARY_DIR}/test_cpp_c10d) 1236 if(NOT WIN32) 1237 add_subdirectory(${TORCH_ROOT}/test/cpp/dist_autograd ${CMAKE_BINARY_DIR}/dist_autograd) 1238 add_subdirectory(${TORCH_ROOT}/test/cpp/rpc ${CMAKE_BINARY_DIR}/test_cpp_rpc) 1239 endif() 1240 endif() 1241 if(NOT NO_API) 1242 add_subdirectory(${TORCH_ROOT}/test/cpp/api ${CMAKE_BINARY_DIR}/test_api) 1243 endif() 1244 1245 if(USE_LLVM AND LLVM_FOUND) 1246 add_subdirectory( 1247 ${TORCH_ROOT}/test/mobile/nnc 1248 ${CMAKE_BINARY_DIR}/test_mobile_nnc 1249 ) 1250 endif() 1251 add_subdirectory(${TORCH_ROOT}/test/cpp/lazy 1252 ${CMAKE_BINARY_DIR}/test_lazy) 1253 endif() 1254 if(BUILD_AOT_INDUCTOR_TEST) 1255 add_subdirectory( 1256 ${TORCH_ROOT}/test/cpp/aoti_abi_check 1257 ${CMAKE_BINARY_DIR}/test_aoti_abi_check) 1258 add_subdirectory( 1259 ${TORCH_ROOT}/test/cpp/aoti_inference 1260 ${CMAKE_BINARY_DIR}/test_aoti_inference) 1261 endif() 1262endif() 1263 1264if(CMAKE_SYSTEM_NAME STREQUAL "Linux") 1265 include(../cmake/CheckAbi.cmake) 1266endif() 1267 1268# CMake config for external projects. 1269configure_file( 1270 ${PROJECT_SOURCE_DIR}/cmake/TorchConfigVersion.cmake.in 1271 ${PROJECT_BINARY_DIR}/TorchConfigVersion.cmake 1272 @ONLY) 1273configure_file( 1274 ${TORCH_ROOT}/cmake/TorchConfig.cmake.in 1275 ${PROJECT_BINARY_DIR}/TorchConfig.cmake 1276 @ONLY) 1277install(FILES 1278 ${PROJECT_BINARY_DIR}/TorchConfigVersion.cmake 1279 ${PROJECT_BINARY_DIR}/TorchConfig.cmake 1280 DESTINATION share/cmake/Torch) 1281 1282# ---[ Torch python bindings build 1283add_subdirectory(../torch torch) 1284set(TORCH_PYTHON_COMPILE_OPTIONS ${TORCH_PYTHON_COMPILE_OPTIONS} PARENT_SCOPE) 1285set(TORCH_PYTHON_LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS} PARENT_SCOPE) 1286# ========================================================== 1287# END formerly-libtorch flags 1288# ========================================================== 1289 1290if(NOT NO_API) 1291 target_include_directories(torch_cpu PUBLIC 1292 $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/api> 1293 $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/api/include>) 1294endif() 1295 1296if(USE_ROCM) 1297 target_compile_definitions(torch_hip PRIVATE 1298 USE_ROCM 1299 __HIP_PLATFORM_AMD__ 1300 ) 1301 # NB: Massive hack. torch/csrc/jit/codegen/fuser/codegen.cpp includes 1302 # torch/csrc/jit/codegen/fuser/cuda/resource_strings.h which changes the 1303 # strings depending on if you're __HIP_PLATFORM_AMD__ or not. 1304 # But that file is in torch_cpu! So, against all odds, this macro 1305 # has to be set on torch_cpu too. I also added it to torch for 1306 # better luck 1307 target_compile_definitions(torch_cpu PRIVATE 1308 USE_ROCM 1309 __HIP_PLATFORM_AMD__ 1310 ) 1311 target_compile_definitions(torch PRIVATE 1312 USE_ROCM 1313 __HIP_PLATFORM_AMD__ 1314 ) 1315 1316 if(NOT ROCM_SOURCE_DIR) 1317 set(ROCM_SOURCE_DIR "$ENV{ROCM_SOURCE_DIR}") 1318 endif() 1319 if($ROCM_SOURCE_DIR STREQUAL "") 1320 set(ROCM_SOURCE_DIR "/opt/rocm") 1321 endif() 1322 message(INFO "caffe2 ROCM_SOURCE_DIR = ${ROCM_SOURCE_DIR}") 1323 target_include_directories(torch_hip PRIVATE 1324 ${ROCM_SOURCE_DIR}/include 1325 ${ROCM_SOURCE_DIR}/hcc/include 1326 ${ROCM_SOURCE_DIR}/rocblas/include 1327 ${ROCM_SOURCE_DIR}/hipsparse/include 1328 ) 1329 if(USE_FLASH_ATTENTION) 1330 target_compile_definitions(torch_hip PRIVATE USE_FLASH_ATTENTION) 1331 endif() 1332 if(USE_MEM_EFF_ATTENTION) 1333 target_compile_definitions(torch_hip PRIVATE USE_MEM_EFF_ATTENTION) 1334 endif() 1335endif() 1336 1337if(BUILD_LITE_INTERPRETER) 1338 target_compile_definitions(torch_cpu PRIVATE BUILD_LITE_INTERPRETER) 1339 # Enable template selective build only when SELECTED_OP_LIST is provided. 1340 if(SELECTED_OP_LIST) 1341 target_compile_definitions(torch_cpu PRIVATE TEMPLATE_SELECTIVE_BUILD) 1342 endif() 1343endif() 1344 1345 1346# Pass USE_DISTRIBUTED to torch_cpu, as some codes in jit/pickler.cpp and 1347# jit/unpickler.cpp need to be compiled only when USE_DISTRIBUTED is set 1348if(USE_DISTRIBUTED) 1349 target_compile_definitions(torch_cpu PUBLIC USE_DISTRIBUTED) 1350 if(USE_GLOO AND USE_C10D_GLOO) 1351 target_compile_definitions(torch_cpu PUBLIC USE_C10D_GLOO) 1352 endif() 1353 if(USE_UCC AND USE_C10D_UCC) 1354 target_compile_definitions(torch_cpu PUBLIC USE_C10D_UCC) 1355 if(USE_CUDA) 1356 target_compile_definitions(torch_cuda PUBLIC USE_C10D_UCC) 1357 endif() 1358 endif() 1359 if(USE_NCCL AND USE_C10D_NCCL) 1360 if(USE_ROCM) 1361 target_compile_definitions(torch_hip PUBLIC USE_C10D_NCCL) 1362 else() 1363 target_compile_definitions(torch_cuda PUBLIC USE_C10D_NCCL) 1364 endif() 1365 endif() 1366 if(USE_MPI AND USE_C10D_MPI) 1367 if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") 1368 set_source_files_properties( 1369 "${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupMPI.cpp" 1370 PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) 1371 endif() 1372 target_compile_definitions(torch_cpu PUBLIC USE_C10D_MPI) 1373 endif() 1374 # Pass USE_RPC in order to reduce use of 1375 # #if defined(USE_DISTRIBUTED) && !defined(_WIN32) 1376 # need to be removed when RPC is supported 1377 if(NOT WIN32) 1378 target_compile_definitions(torch_cpu PUBLIC USE_RPC) 1379 endif() 1380 # Pass USE_TENSORPIPE to torch_cpu as some parts of rpc/utils.cpp 1381 # can only be compiled with USE_TENSORPIPE is set. 1382 if(USE_TENSORPIPE) 1383 target_compile_definitions(torch_cpu PUBLIC USE_TENSORPIPE) 1384 endif() 1385endif() 1386 1387if(NOT INTERN_BUILD_MOBILE) 1388 if(${CAFFE2_LINK_LOCAL_PROTOBUF}) 1389 target_link_libraries(torch_cpu INTERFACE protobuf::libprotobuf) 1390 else() 1391 target_link_libraries(torch_cpu PUBLIC protobuf::libprotobuf) 1392 endif() 1393endif() 1394 1395if($ENV{TH_BINARY_BUILD}) 1396 if(NOT MSVC AND USE_CUDA AND NOT APPLE) 1397 # Note [Extra MKL symbols for MAGMA in torch_cpu] 1398 # 1399 # When we build CUDA libraries and link against MAGMA, MAGMA makes use of 1400 # some BLAS symbols in its CPU fallbacks when it has no GPU versions 1401 # of kernels. Previously, we ensured the BLAS symbols were filled in by 1402 # MKL by linking torch_cuda with BLAS, but when we are statically linking 1403 # against MKL (when we do wheel builds), this actually ends up pulling in a 1404 # decent chunk of MKL into torch_cuda, inflating our torch_cuda binary 1405 # size by 8M. torch_cpu exposes most of the MKL symbols we need, but 1406 # empirically we determined that there are four which it doesn't provide. If 1407 # we link torch_cpu with these --undefined symbols, we can ensure they 1408 # do get pulled in, and then we can avoid statically linking in MKL to 1409 # torch_cuda at all! 1410 # 1411 # We aren't really optimizing for binary size on Windows (and this link 1412 # line doesn't work on Windows), so don't do it there. 1413 # 1414 # These linker commands do not work on OS X, do not attempt this there. 1415 # (It shouldn't matter anyway, though, because OS X has dropped CUDA support) 1416 foreach(_symb slaed0 daled0 dormql sormql zheevd cheevd) 1417 STRING(APPEND _undefined_link_flags " -Wl,--undefined=mkl_lapack_${_symb}") 1418 endforeach(_symb) 1419 set_target_properties(torch_cpu PROPERTIES LINK_FLAGS ${_undefined_link_flags}) 1420 1421 endif() 1422endif() 1423 1424target_link_libraries(torch_cpu PUBLIC c10) 1425target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS}) 1426target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS}) 1427target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS}) 1428if(USE_MPI) 1429 target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX) 1430endif() 1431target_include_directories(torch_cpu INTERFACE $<INSTALL_INTERFACE:include>) 1432target_include_directories(torch_cpu PRIVATE ${Caffe2_CPU_INCLUDE}) 1433target_include_directories(torch_cpu SYSTEM PRIVATE "${Caffe2_DEPENDENCY_INCLUDE}") 1434 1435target_compile_definitions(torch_cpu PRIVATE CAFFE2_BUILD_MAIN_LIB) 1436if(USE_CUDA) 1437 target_compile_definitions(torch_cuda PRIVATE TORCH_CUDA_BUILD_MAIN_LIB) 1438elseif(USE_ROCM) 1439 target_compile_definitions(torch_hip PRIVATE TORCH_HIP_BUILD_MAIN_LIB) 1440endif() 1441 1442if(USE_XPU) 1443 target_compile_definitions(torch_xpu PRIVATE TORCH_XPU_BUILD_MAIN_LIB) 1444endif() 1445 1446set(EXPERIMENTAL_SINGLE_THREAD_POOL "0" CACHE STRING 1447 "Experimental option to use a single thread pool for inter- and intra-op parallelism") 1448if("${EXPERIMENTAL_SINGLE_THREAD_POOL}") 1449 target_compile_definitions(torch_cpu PUBLIC "-DAT_EXPERIMENTAL_SINGLE_THREAD_POOL=1") 1450endif() 1451 1452if(MSVC AND BUILD_SHARED_LIBS) 1453 # ONNX is linked statically and needs to be exported from this library 1454 # to be used externally. Make sure that references match the export. 1455 target_compile_options(torch_cpu PRIVATE "-DONNX_BUILD_MAIN_LIB") 1456endif() 1457 1458caffe2_interface_library(torch_cpu torch_cpu_library) 1459 1460if(USE_CUDA) 1461 caffe2_interface_library(torch_cuda torch_cuda_library) 1462elseif(USE_ROCM) 1463 caffe2_interface_library(torch_hip torch_hip_library) 1464elseif(USE_XPU) 1465 caffe2_interface_library(torch_xpu torch_xpu_library) 1466endif() 1467 1468caffe2_interface_library(torch torch_library) 1469 1470install(TARGETS torch_cpu torch_cpu_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}") 1471 1472if(USE_CUDA) 1473 install(TARGETS torch_cuda torch_cuda_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}") 1474elseif(USE_ROCM) 1475 install(TARGETS torch_hip torch_hip_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}") 1476elseif(USE_XPU) 1477 install(TARGETS torch_xpu torch_xpu_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}") 1478endif() 1479 1480install(TARGETS torch torch_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}") 1481 1482target_link_libraries(torch PUBLIC torch_cpu_library) 1483 1484if(USE_CUDA) 1485 target_link_libraries(torch PUBLIC torch_cuda_library) 1486elseif(USE_ROCM) 1487 target_link_libraries(torch PUBLIC torch_hip_library) 1488endif() 1489 1490if(USE_XPU) 1491 target_link_libraries(torch PUBLIC torch_xpu_library) 1492endif() 1493 1494if(PRINT_CMAKE_DEBUG_INFO) 1495 print_target_properties(torch) 1496 print_target_properties(torch_cpu) 1497endif() 1498 1499# Install PDB files for MSVC builds 1500if(MSVC AND BUILD_SHARED_LIBS) 1501 install(FILES $<TARGET_PDB_FILE:torch_cpu> DESTINATION "${TORCH_INSTALL_LIB_DIR}" OPTIONAL) 1502 if(USE_CUDA) 1503 install(FILES $<TARGET_PDB_FILE:torch_cuda> DESTINATION "${TORCH_INSTALL_LIB_DIR}" OPTIONAL) 1504 elseif(USE_ROCM) 1505 install(FILES $<TARGET_PDB_FILE:torch_hip> DESTINATION "${TORCH_INSTALL_LIB_DIR}" OPTIONAL) 1506 endif() 1507endif() 1508 1509# ---[ CUDA library. 1510if(USE_CUDA) 1511 # FIXME: If kineto is linked with CUPTI it pollutes torch_cpu with CUDA dependencies 1512 # Even worse, it never declares that it depends on cudart, but calls the API, see 1513 # https://github.com/pytorch/kineto/blob/aef2f5c0f15e3be52406ac0b885e8689de6bc9f6/libkineto/src/CudaDeviceProperties.cpp#L24 1514 if(USE_KINETO AND NOT MSVC AND NOT LIBKINETO_NOCUPTI) 1515 target_link_libraries(torch_cpu PRIVATE torch::cudart) 1516 endif() 1517 target_link_libraries(torch_cuda INTERFACE torch::cudart) 1518 target_link_libraries(torch_cuda PUBLIC c10_cuda) 1519 if(TARGET torch::nvtx3) 1520 target_link_libraries(torch_cuda PRIVATE torch::nvtx3) 1521 else() 1522 target_link_libraries(torch_cuda PUBLIC torch::nvtoolsext) 1523 endif() 1524 1525 target_include_directories( 1526 torch_cuda INTERFACE $<INSTALL_INTERFACE:include>) 1527 target_include_directories( 1528 torch_cuda PRIVATE ${Caffe2_GPU_INCLUDE}) 1529 target_link_libraries( 1530 torch_cuda PRIVATE ${Caffe2_CUDA_DEPENDENCY_LIBS}) 1531 1532 # These public dependencies must go after the previous dependencies, as the 1533 # order of the libraries in the linker call matters here when statically 1534 # linking; libculibos and cublas must be last. 1535 target_link_libraries(torch_cuda PUBLIC torch_cpu_library ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS}) 1536endif() 1537 1538# ---[ XPU library. 1539if(USE_XPU) 1540 target_link_libraries(torch_xpu INTERFACE torch::xpurt) 1541 1542 target_link_libraries(torch_xpu PUBLIC c10_xpu) 1543 1544 target_include_directories( 1545 torch_xpu INTERFACE $<INSTALL_INTERFACE:include>) 1546 target_include_directories( 1547 torch_xpu PRIVATE ${Caffe2_XPU_INCLUDE}) 1548 target_link_libraries( 1549 torch_xpu PRIVATE ${Caffe2_XPU_DEPENDENCY_LIBS}) 1550 1551 # Ensure that torch_cpu is ready before being linked by torch_xpu. 1552 add_dependencies(torch_xpu torch_cpu) 1553 1554 if(MSVC) 1555 target_link_libraries(torch_xpu PUBLIC torch_cpu_library) 1556 else() 1557 include(CheckLinkerFlag) 1558 1559 # Check whether the compiler supports '--no-as-needed' and '--as-needed' 1560 check_linker_flag(CXX "-Wl,--no-as-needed" HAVE_NO_AS_NEEDED) 1561 check_linker_flag(CXX "-Wl,--as-needed" HAVE_AS_NEEDED) 1562 1563 if(HAVE_NO_AS_NEEDED AND HAVE_AS_NEEDED) 1564 target_link_libraries(torch_xpu PRIVATE 1565 "-Wl,--no-as-needed,\"$<TARGET_FILE:torch_cpu>\" -Wl,--as-needed") 1566 else() 1567 target_link_libraries(torch_xpu PRIVATE "$<TARGET_FILE:torch_cpu>") 1568 endif() 1569 endif() 1570endif() 1571 1572# ---[ Metal(OSX) modification 1573if(APPLE AND USE_PYTORCH_METAL) 1574 if(NOT INTERN_BUILD_MOBILE) 1575 include(../cmake/Metal.cmake) 1576 # We need to link the system frameworks explicitly 1577 find_library(metal NAMES Metal) 1578 find_library(mps NAMES MetalPerformanceShaders) 1579 find_library(foundation NAMES Foundation) 1580 find_library(accelerate NAMES Accelerate) 1581 target_link_libraries(torch_cpu PUBLIC ${metal} ${mps} ${foundation} ${accelerate}) 1582 endif() 1583endif() 1584 1585 1586target_link_libraries(torch_cpu PRIVATE flatbuffers) 1587 1588# Note [Global dependencies] 1589# Some libraries (e.g. OpenMPI) like to dlopen plugins after they're initialized, 1590# and they assume that all of their symbols will be available in the global namespace. 1591# On the other hand we try to be good citizens and avoid polluting the symbol 1592# namespaces, so libtorch is loaded with all its dependencies in a local scope. 1593# That usually leads to missing symbol errors at run-time, so to avoid a situation like 1594# this we have to preload those libs in a global namespace. 1595if(BUILD_SHARED_LIBS) 1596 add_library(torch_global_deps SHARED ${TORCH_SRC_DIR}/csrc/empty.c) 1597 if(HAVE_SOVERSION) 1598 set_target_properties(torch_global_deps PROPERTIES 1599 VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION}) 1600 endif() 1601 set_target_properties(torch_global_deps PROPERTIES LINKER_LANGUAGE C) 1602 if(USE_MPI) 1603 target_link_libraries(torch_global_deps MPI::MPI_CXX) 1604 endif() 1605 if(CAFFE2_USE_MKL) 1606 target_link_libraries(torch_global_deps caffe2::mkl) 1607 endif() 1608 # The CUDA libraries are linked here for a different reason: in some 1609 # cases we load these libraries with ctypes, and if they weren't opened 1610 # with RTLD_GLOBAL, we'll do the "normal" search process again (and 1611 # not find them, because they're usually in non-standard locations) 1612 if(USE_CUDA) 1613 target_link_libraries(torch_global_deps ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS}) 1614 target_link_libraries(torch_global_deps torch::cudart) 1615 if(TARGET torch::nvtoolsext) 1616 target_link_libraries(torch_global_deps torch::nvtoolsext) 1617 endif() 1618 endif() 1619 install(TARGETS torch_global_deps DESTINATION "${TORCH_INSTALL_LIB_DIR}") 1620endif() 1621 1622# ---[ Caffe2 HIP sources. 1623if(USE_ROCM) 1624 # Call again since Caffe2_HIP_INCLUDE is extended with ATen include dirs. 1625 # Get Compile Definitions from the directory (FindHIP.cmake bug) 1626 get_directory_property(MY_DEFINITIONS COMPILE_DEFINITIONS) 1627 if(MY_DEFINITIONS) 1628 foreach(_item ${MY_DEFINITIONS}) 1629 list(APPEND HIP_CLANG_FLAGS "-D${_item}") 1630 endforeach() 1631 endif() 1632 1633 # Call again since Caffe2_HIP_INCLUDE is extended with ATen include dirs. 1634 hip_include_directories(${Caffe2_HIP_INCLUDE}) 1635 1636 # Since PyTorch files contain HIP headers, these flags are required for the necessary definitions to be added. 1637 target_compile_options(torch_hip PUBLIC ${HIP_CXX_FLAGS}) # experiment 1638 1639 target_link_libraries(torch_hip PUBLIC c10_hip) 1640 1641 if(NOT INTERN_BUILD_MOBILE) 1642 # TODO: Cut this over to ATEN_HIP_FILES_GEN_LIB. At the moment, we 1643 # only generate CUDA files 1644 # NB: This dependency must be PRIVATE, because we don't install 1645 # ATEN_CUDA_FILES_GEN_LIB (it's a synthetic target just to get the 1646 # correct dependency from generated files.) 1647 target_link_libraries(torch_hip PRIVATE ATEN_CUDA_FILES_GEN_LIB) 1648 endif() 1649 target_link_libraries(torch_hip PUBLIC torch_cpu_library ${Caffe2_PUBLIC_HIP_DEPENDENCY_LIBS}) 1650 target_link_libraries(torch_hip PRIVATE ${Caffe2_HIP_DEPENDENCY_LIBS}) 1651 1652 # Since PyTorch files contain HIP headers, this is also needed to capture the includes. 1653 target_include_directories(torch_hip PRIVATE ${Caffe2_HIP_INCLUDE}) 1654 target_include_directories(torch_hip INTERFACE $<INSTALL_INTERFACE:include>) 1655endif() 1656 1657if(BUILD_STATIC_RUNTIME_BENCHMARK) 1658 add_subdirectory(${TORCH_ROOT}/benchmarks/static_runtime ${PROJECT_BINARY_DIR}/bin) 1659 add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}") 1660 add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}") 1661 target_link_libraries(static_runtime_bench torch_library benchmark) 1662 target_link_libraries(static_runtime_test torch_library gtest_main) 1663endif() 1664 1665if(BUILD_MOBILE_BENCHMARK) 1666 foreach(benchmark_src ${ATen_MOBILE_BENCHMARK_SRCS}) 1667 get_filename_component(benchmark_name ${benchmark_src} NAME_WE) 1668 add_executable(${benchmark_name} "${benchmark_src}") 1669 target_link_libraries(${benchmark_name} torch_library benchmark) 1670 target_include_directories(${benchmark_name} PRIVATE $<INSTALL_INTERFACE:include>) 1671 target_include_directories(${benchmark_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) 1672 target_include_directories(${benchmark_name} PRIVATE ${ATen_CPU_INCLUDE}) 1673 target_link_options(${benchmark_name} PRIVATE "LINKER:--allow-multiple-definition") 1674 endforeach() 1675endif() 1676 1677if(BUILD_MOBILE_TEST) 1678 foreach(test_src ${ATen_MOBILE_TEST_SRCS}) 1679 get_filename_component(test_name ${test_src} NAME_WE) 1680 add_executable(${test_name} "${test_src}") 1681 target_link_libraries(${test_name} torch_library gtest_main) 1682 target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) 1683 target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) 1684 target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE}) 1685 add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>) 1686 endforeach() 1687endif() 1688 1689# ---[ Test binaries. 1690if(BUILD_TEST) 1691 1692 foreach(test_src ${ATen_VEC_TEST_SRCS}) 1693 foreach(i RANGE ${NUM_CPU_CAPABILITY_NAMES}) 1694 get_filename_component(test_name ${test_src} NAME_WE) 1695 list(GET CPU_CAPABILITY_NAMES ${i} CPU_CAPABILITY) 1696 list(GET CPU_CAPABILITY_FLAGS ${i} FLAGS) 1697 separate_arguments(FLAGS UNIX_COMMAND "${FLAGS}") 1698 # Build vec with minimal dependencies on all platforms but Windows 1699 if(NOT MSVC) 1700 add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp) 1701 # TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR) 1702 target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main nlohmann) 1703 if(USE_FBGEMM) 1704 target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm) 1705 endif() 1706 if(USE_ASAN) 1707 if(TARGET Sanitizer::address) 1708 target_link_libraries(${test_name}_${CPU_CAPABILITY} Sanitizer::address) 1709 endif() 1710 if(TARGET Sanitizer::undefined) 1711 target_link_libraries(${test_name}_${CPU_CAPABILITY} Sanitizer::undefined) 1712 endif() 1713 endif() 1714 else() 1715 add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}") 1716 target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main) 1717 endif() 1718 target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>) 1719 target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) 1720 target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE ${ATen_CPU_INCLUDE}) 1721 target_compile_definitions(${test_name}_${CPU_CAPABILITY} PRIVATE CPU_CAPABILITY=${CPU_CAPABILITY} CPU_CAPABILITY_${CPU_CAPABILITY}) 1722 target_compile_options(${test_name}_${CPU_CAPABILITY} PRIVATE ${FLAGS}) 1723 if(NOT MSVC) 1724 target_compile_options(${test_name}_${CPU_CAPABILITY} PRIVATE -Wno-ignored-qualifiers) 1725 endif(NOT MSVC) 1726 add_test(NAME ${test_name}_${CPU_CAPABILITY} COMMAND $<TARGET_FILE:${test_name}_${CPU_CAPABILITY}>) 1727 endforeach() 1728 endforeach() 1729 1730 foreach(test_src ${Caffe2_CPU_TEST_SRCS}) 1731 get_filename_component(test_name ${test_src} NAME_WE) 1732 add_executable(${test_name} "${test_src}") 1733 target_link_libraries(${test_name} torch_library gtest_main) 1734 if(NOT MSVC) 1735 target_link_libraries(${test_name} stdc++) 1736 endif() 1737 target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) 1738 target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) 1739 target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) 1740 add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>) 1741 if(INSTALL_TEST) 1742 install(TARGETS ${test_name} DESTINATION test) 1743 # Install PDB files for MSVC builds 1744 if(MSVC AND BUILD_SHARED_LIBS) 1745 install(FILES $<TARGET_PDB_FILE:${test_name}> DESTINATION test OPTIONAL) 1746 endif() 1747 endif() 1748 endforeach() 1749 1750 if(USE_MPS) 1751 foreach(test_src ${Caffe2_MPS_TEST_SRCS}) 1752 get_filename_component(test_name ${test_src} NAME_WE) 1753 add_executable(${test_name} "${test_src}") 1754 find_library(metal NAMES Metal) 1755 find_library(foundation NAMES Foundation) 1756 target_link_libraries(${test_name} torch_library gtest_main ${metal} ${foundation}) 1757 target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) 1758 target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) 1759 target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) 1760 add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>) 1761 if(INSTALL_TEST) 1762 install(TARGETS ${test_name} DESTINATION test) 1763 # Install PDB files for MSVC builds 1764 if(MSVC AND BUILD_SHARED_LIBS) 1765 install(FILES $<TARGET_PDB_FILE:${test_name}> DESTINATION test OPTIONAL) 1766 endif() 1767 endif() 1768 endforeach() 1769 endif() 1770 1771 if(USE_CUDA) 1772 foreach(test_src ${Caffe2_GPU_TEST_SRCS}) 1773 get_filename_component(test_name ${test_src} NAME_WE) 1774 add_executable(${test_name} "${test_src}") 1775 target_link_libraries(${test_name} torch_library gtest_main) 1776 if(USE_CUDNN AND ${test_name} MATCHES "cudnn") 1777 target_link_libraries(${test_name} torch::cudnn) 1778 endif() 1779 target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) 1780 target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) 1781 add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>) 1782 if(INSTALL_TEST) 1783 install(TARGETS ${test_name} DESTINATION test) 1784 # Install PDB files for MSVC builds 1785 if(MSVC AND BUILD_SHARED_LIBS) 1786 install(FILES $<TARGET_PDB_FILE:${test_name}> DESTINATION test OPTIONAL) 1787 endif() 1788 endif() 1789 endforeach() 1790 if(TARGET context_gpu_test) 1791 target_link_libraries(context_gpu_test caffe2::curand caffe2::cublas) 1792 endif() 1793 endif() 1794 1795 if(USE_XPU) 1796 foreach(test_src ${Caffe2_XPU_TEST_SRCS}) 1797 get_filename_component(test_name ${test_src} NAME_WE) 1798 add_executable(${test_name} "${test_src}") 1799 target_link_libraries(${test_name} torch_library gtest_main) 1800 target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) 1801 target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) 1802 add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>) 1803 if(INSTALL_TEST) 1804 install(TARGETS ${test_name} DESTINATION test) 1805 endif() 1806 endforeach() 1807 endif() 1808 1809 if(USE_VULKAN) 1810 foreach(test_src ${Caffe2_VULKAN_TEST_SRCS}) 1811 get_filename_component(test_name ${test_src} NAME_WE) 1812 add_executable(${test_name} "${test_src}") 1813 target_link_libraries(${test_name} torch_library gtest_main) 1814 target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) 1815 target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) 1816 add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>) 1817 if(INSTALL_TEST) 1818 install(TARGETS ${test_name} DESTINATION test) 1819 # Install PDB files for MSVC builds 1820 if(MSVC AND BUILD_SHARED_LIBS) 1821 install(FILES $<TARGET_PDB_FILE:${test_name}> DESTINATION test OPTIONAL) 1822 endif() 1823 endif() 1824 endforeach() 1825 endif() 1826 1827 if(USE_ROCM) 1828 foreach(test_src ${Caffe2_HIP_TEST_SRCS}) 1829 get_filename_component(test_name ${test_src} NAME_WE) 1830 add_executable(${test_name} "${test_src}") 1831 target_link_libraries(${test_name} torch_library gtest_main) 1832 target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) 1833 target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE}) 1834 target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS}) 1835 add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>) 1836 if(INSTALL_TEST) 1837 install(TARGETS ${test_name} DESTINATION test) 1838 endif() 1839 endforeach() 1840 endif() 1841endif() 1842 1843if(MSVC) 1844 # This is used to enable the conforming lambda processor in MSVC 1845 # Which allows us to capture constexpr in lambdas 1846 # Note that this will be turned on by default for std=c++20 and above 1847 # This should be applied globally when https://github.com/pytorch/pytorch/issues/92600 is fixed 1848 foreach(tmp ${MEM_EFF_ATTENTION_CUDA_SOURCES}) 1849 # MEM_EFF_ATTENTION_CUDA is populated in pytorch/aten/src/ATen/CMakeLists.txt 1850 # We iterate over these files, updating paths and adding the compile flag 1851 FILE(RELATIVE_PATH tmp_path "${PROJECT_SOURCE_DIR}" "${tmp}") 1852 SET(tmp_path "../${tmp_path}") 1853 set_source_files_properties(${tmp_path} PROPERTIES COMPILE_FLAGS "-Xcompiler /Zc:lambda") 1854 endforeach() 1855endif() 1856endif() 1857