xref: /aosp_15_r20/external/pytorch/caffe2/CMakeLists.txt (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1# ---[ Generate and install header and cpp files
2include(../cmake/Codegen.cmake)
3
4# ---[ Vulkan code gen
5if(USE_VULKAN)
6  include(../cmake/VulkanCodegen.cmake)
7endif()
8
9# Debug messages - if you want to get a list of source files and examine
10# target information, enable the following by -DPRINT_CMAKE_DEBUG_INFO=ON.
11set(PRINT_CMAKE_DEBUG_INFO FALSE CACHE BOOL "print cmake debug information")
12if(PRINT_CMAKE_DEBUG_INFO)
13  include(../cmake/DebugHelper.cmake)
14endif()
15
16# ATen parallelism settings
17#  OMP - OpenMP for intra-op, native thread pool for inter-op parallelism
18#  NATIVE - using native thread pool for intra- and inter-op parallelism
19if(INTERN_BUILD_MOBILE)
20  set(ATEN_THREADING "NATIVE" CACHE STRING "ATen parallel backend")
21else()
22  if(USE_OPENMP)
23    set(ATEN_THREADING "OMP" CACHE STRING "ATen parallel backend")
24  else()
25    set(ATEN_THREADING "NATIVE" CACHE STRING "ATen parallel backend")
26  endif()
27endif()
28
29set(AT_PARALLEL_OPENMP 0)
30set(AT_PARALLEL_NATIVE 0)
31
32message(STATUS "Using ATen parallel backend: ${ATEN_THREADING}")
33if("${ATEN_THREADING}" STREQUAL "OMP")
34  set(AT_PARALLEL_OPENMP 1)
35elseif("${ATEN_THREADING}" STREQUAL "NATIVE")
36  set(AT_PARALLEL_NATIVE 1)
37else()
38  message(FATAL_ERROR "Unknown ATen parallel backend: ${ATEN_THREADING}")
39endif()
40
41# ---[ Declare source file lists
42
43# ---[ ATen build
44if(INTERN_BUILD_ATEN_OPS)
45  set(__torch_CMAKE_POSITION_INDEPENDENT_CODE ${CMAKE_POSITION_INDEPENDENT_CODE})
46  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
47  add_subdirectory(../aten aten)
48  set(CMAKE_POSITION_INDEPENDENT_CODE ${__torch_CMAKE_POSITION_INDEPENDENT_CODE})
49
50  # Generate the headers wrapped by our operator
51  file(GLOB_RECURSE torchgen_python "${PROJECT_SOURCE_DIR}/torchgen/*.py")
52
53
54  # Add source, includes, and libs to lists
55  list(APPEND Caffe2_CPU_SRCS ${ATen_CPU_SRCS})
56  list(APPEND Caffe2_GPU_SRCS ${ATen_CUDA_CPP_SRCS})
57  list(APPEND Caffe2_XPU_SRCS ${ATen_XPU_SRCS})
58  list(APPEND Caffe2_XPU_INCLUDE ${ATen_XPU_INCLUDE})
59  list(APPEND Caffe2_XPU_DEPENDENCY_LIBS ${ATen_XPU_DEPENDENCY_LIBS})
60  list(APPEND Caffe2_GPU_SRCS_W_SORT_BY_KEY ${ATen_CUDA_SRCS_W_SORT_BY_KEY})
61  list(APPEND Caffe2_GPU_CU_SRCS ${ATen_CUDA_CU_SRCS})
62  list(APPEND Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY})
63  list(APPEND Caffe2_HIP_SRCS ${ATen_HIP_SRCS})
64  list(APPEND Caffe2_MPS_SRCS ${ATen_MPS_SRCS})
65  list(APPEND Caffe2_XPU_SRCS ${ATen_XPU_SRCS})
66  list(APPEND Caffe2_HIP_SRCS ${ATen_HIP_SRCS_W_SORT_BY_KEY})
67  list(APPEND Caffe2_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS})
68  list(APPEND Caffe2_MPS_TEST_SRCS ${ATen_MPS_TEST_SRCS})
69  list(APPEND Caffe2_GPU_TEST_SRCS ${ATen_CUDA_TEST_SRCS})
70  list(APPEND Caffe2_HIP_TEST_SRCS ${ATen_HIP_TEST_SRCS})
71  list(APPEND Caffe2_XPU_TEST_SRCS ${ATen_XPU_TEST_SRCS})
72  list(APPEND Caffe2_CPU_TEST_SRCS ${ATen_CORE_TEST_SRCS})
73  list(APPEND Caffe2_VULKAN_TEST_SRCS ${ATen_VULKAN_TEST_SRCS})
74  list(APPEND Caffe2_CPU_INCLUDE ${ATen_CPU_INCLUDE})
75  list(APPEND Caffe2_GPU_INCLUDE ${ATen_CUDA_INCLUDE})
76  list(APPEND Caffe2_HIP_INCLUDE ${ATen_HIP_INCLUDE})
77  list(APPEND Caffe2_XPU_INCLUDE ${ATen_XPU_INCLUDE})
78  list(APPEND Caffe2_VULKAN_INCLUDE ${ATen_VULKAN_INCLUDE})
79  list(APPEND Caffe2_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS})
80  list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${ATen_CUDA_DEPENDENCY_LIBS})
81  list(APPEND Caffe2_HIP_DEPENDENCY_LIBS ${ATen_HIP_DEPENDENCY_LIBS})
82  list(APPEND Caffe2_DEPENDENCY_INCLUDE ${ATen_THIRD_PARTY_INCLUDE})
83  set(Caffe2_CUDA_DEPENDENCY_LIBS ${Caffe2_CUDA_DEPENDENCY_LIBS} PARENT_SCOPE)
84endif()
85
86# ---[ Caffe2 build
87# Note: the folders that are being commented out have not been properly
88# addressed yet.
89
90if(NOT MSVC AND USE_XNNPACK)
91  if(NOT TARGET fxdiv)
92    set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
93    set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
94    add_subdirectory(
95      "${FXDIV_SOURCE_DIR}"
96      "${CMAKE_BINARY_DIR}/FXdiv")
97  endif()
98endif()
99
100add_subdirectory(core)
101add_subdirectory(serialize)
102add_subdirectory(utils)
103if(NOT USE_FBGEMM)
104  add_subdirectory(perfkernels)
105endif()
106
107# Advanced: if we have allow list specified, we will do intersections for all
108# main lib srcs.
109if(CAFFE2_ALLOWLISTED_FILES)
110  caffe2_do_allowlist(Caffe2_CPU_SRCS CAFFE2_ALLOWLISTED_FILES)
111  caffe2_do_allowlist(Caffe2_GPU_SRCS CAFFE2_ALLOWLISTED_FILES)
112  caffe2_do_allowlist(Caffe2_XPU_SRCS CAFFE2_ALLOWLISTED_FILES)
113  caffe2_do_allowlist(Caffe2_GPU_SRCS_W_SORT_BY_KEY CAFFE2_ALLOWLISTED_FILES)
114  caffe2_do_allowlist(Caffe2_GPU_CU_SRCS CAFFE2_ALLOWLISTED_FILES)
115  caffe2_do_allowlist(Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY CAFFE2_ALLOWLISTED_FILES)
116  caffe2_do_allowlist(Caffe2_HIP_SRCS CAFFE2_ALLOWLISTED_FILES)
117endif()
118
119if(PRINT_CMAKE_DEBUG_INFO)
120  message(STATUS "CPU sources: ")
121  foreach(tmp ${Caffe2_CPU_SRCS})
122    message(STATUS "  " ${tmp})
123  endforeach()
124
125  message(STATUS "GPU sources: (for torch_cuda_cpp)")
126  foreach(tmp ${Caffe2_GPU_SRCS})
127    message(STATUS "  " ${tmp})
128  endforeach()
129
130  message(STATUS "GPU sources: (for torch_cuda_cu)")
131  foreach(tmp ${Caffe2_GPU_CU_SRCS})
132    message(STATUS "  " ${tmp})
133  endforeach()
134
135  message(STATUS "torch_cuda_cu GPU sources (w/ sort by key): ")
136  foreach(tmp ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY})
137    message(STATUS "  " ${tmp})
138  endforeach()
139
140  message(STATUS "torch_cuda_cpp GPU sources (w/ sort by key): ")
141  foreach(tmp ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
142    message(STATUS "  " ${tmp})
143  endforeach()
144
145  message(STATUS "CPU include: ")
146  foreach(tmp ${Caffe2_CPU_INCLUDE})
147    message(STATUS "  " ${tmp})
148  endforeach()
149
150  message(STATUS "GPU include: ")
151  foreach(tmp ${Caffe2_GPU_INCLUDE})
152    message(STATUS "  " ${tmp})
153  endforeach()
154
155  message(STATUS "CPU test sources: ")
156  foreach(tmp ${Caffe2_CPU_TEST_SRCS})
157    message(STATUS "  " ${tmp})
158  endforeach()
159
160  message(STATUS "GPU test sources: ")
161  foreach(tmp ${Caffe2_GPU_TEST_SRCS})
162    message(STATUS "  " ${tmp})
163  endforeach()
164
165  message(STATUS "HIP sources: ")
166  foreach(tmp ${Caffe2_HIP_SRCS})
167    message(STATUS "  " ${tmp})
168  endforeach()
169
170  message(STATUS "MPS sources: ")
171  foreach(tmp ${Caffe2_MPS_SRCS})
172    message(STATUS "  " ${tmp})
173  endforeach()
174
175  message(STATUS "XPU sources: ")
176  foreach(tmp ${Caffe2_XPU_SRCS})
177    message(STATUS "  " ${tmp})
178  endforeach()
179
180  message(STATUS "HIP test sources: ")
181  foreach(tmp ${Caffe2_HIP_TEST_SRCS})
182    message(STATUS "  " ${tmp})
183  endforeach()
184
185  message(STATUS "ATen CPU test sources: ")
186  foreach(tmp ${ATen_CPU_TEST_SRCS})
187    message(STATUS "  " ${tmp})
188  endforeach()
189
190  message(STATUS "ATen MPS test sources: ")
191  foreach(tmp ${ATen_MPS_TEST_SRCS})
192    message(STATUS "  " ${tmp})
193  endforeach()
194
195  message(STATUS "ATen CUDA test sources: ")
196  foreach(tmp ${ATen_CUDA_TEST_SRCS})
197    message(STATUS "  " ${tmp})
198  endforeach()
199
200  message(STATUS "ATen HIP test sources: ")
201  foreach(tmp ${ATen_HIP_TEST_SRCS})
202    message(STATUS "  " ${tmp})
203  endforeach()
204
205  message(STATUS "ATen XPU test sources: ")
206  foreach(tmp ${ATen_XPU_TEST_SRCS})
207    message(STATUS "  " ${tmp})
208  endforeach()
209
210  message(STATUS "ATen Vulkan test sources: ")
211  foreach(tmp ${ATen_VULKAN_TEST_SRCS})
212    message(STATUS "  " ${tmp})
213  endforeach()
214
215endif()
216
217# ==========================================================
218# formerly-libtorch
219# ==========================================================
220
221set(TORCH_SRC_DIR "${PROJECT_SOURCE_DIR}/torch")
222set(TORCH_ROOT "${PROJECT_SOURCE_DIR}")
223
224if(NOT TORCH_INSTALL_BIN_DIR)
225  set(TORCH_INSTALL_BIN_DIR bin)
226endif()
227
228if(NOT TORCH_INSTALL_INCLUDE_DIR)
229  set(TORCH_INSTALL_INCLUDE_DIR include)
230endif()
231
232if(NOT TORCH_INSTALL_LIB_DIR)
233  set(TORCH_INSTALL_LIB_DIR lib)
234endif()
235
236set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
237
238# Generate files
239set(TOOLS_PATH "${TORCH_ROOT}/tools")
240
241configure_file("${TORCH_SRC_DIR}/_utils_internal.py"
242  "${TOOLS_PATH}/shared/_utils_internal.py"
243  COPYONLY)
244
245# Generate header with version info
246configure_file("${TORCH_SRC_DIR}/csrc/api/include/torch/version.h.in"
247  "${TORCH_SRC_DIR}/csrc/api/include/torch/version.h"
248  @ONLY)
249
250set(GENERATED_CXX_TORCH
251  "${TORCH_SRC_DIR}/csrc/autograd/generated/Functions.cpp"
252  "${TORCH_SRC_DIR}/csrc/autograd/generated/ViewFuncs.cpp"
253  )
254
255if(NOT INTERN_DISABLE_AUTOGRAD AND NOT BUILD_LITE_INTERPRETER)
256  list(APPEND GENERATED_CXX_TORCH
257    "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType_0.cpp"
258    "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType_1.cpp"
259    "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType_2.cpp"
260    "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType_3.cpp"
261    "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType_4.cpp"
262    "${TORCH_SRC_DIR}/csrc/autograd/generated/TraceType_0.cpp"
263    "${TORCH_SRC_DIR}/csrc/autograd/generated/TraceType_1.cpp"
264    "${TORCH_SRC_DIR}/csrc/autograd/generated/TraceType_2.cpp"
265    "${TORCH_SRC_DIR}/csrc/autograd/generated/TraceType_3.cpp"
266    "${TORCH_SRC_DIR}/csrc/autograd/generated/TraceType_4.cpp"
267    "${TORCH_SRC_DIR}/csrc/autograd/generated/ADInplaceOrViewType_0.cpp"
268    "${TORCH_SRC_DIR}/csrc/autograd/generated/ADInplaceOrViewType_1.cpp"
269    "${TORCH_SRC_DIR}/csrc/inductor/aoti_torch/generated/c_shim_cpu.cpp"
270  )
271  if(BUILD_LAZY_TS_BACKEND)
272    list(APPEND GENERATED_CXX_TORCH
273      "${TORCH_SRC_DIR}/csrc/lazy/generated/LazyNativeFunctions.cpp"
274      "${TORCH_SRC_DIR}/csrc/lazy/generated/RegisterAutogradLazy.cpp"
275      "${TORCH_SRC_DIR}/csrc/lazy/generated/RegisterLazy.cpp"
276    )
277  endif()
278endif()
279
280set(GENERATED_H_TORCH
281  "${TORCH_SRC_DIR}/csrc/autograd/generated/Functions.h"
282  "${TORCH_SRC_DIR}/csrc/autograd/generated/variable_factories.h"
283  "${TORCH_SRC_DIR}/csrc/autograd/generated/ViewFuncs.h"
284  )
285
286if(NOT INTERN_DISABLE_AUTOGRAD)
287  list(APPEND GENERATED_H_TORCH
288    "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType.h"
289    "${TORCH_SRC_DIR}/csrc/lazy/generated/LazyIr.h"
290    "${TORCH_SRC_DIR}/csrc/lazy/generated/LazyNonNativeIr.h"
291    "${TORCH_SRC_DIR}/csrc/lazy/generated/LazyNativeFunctions.h"
292  )
293endif()
294
295set(GENERATED_CXX_PYTHON
296  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions_0.cpp"
297  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions_1.cpp"
298  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions_2.cpp"
299  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions_3.cpp"
300  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions_4.cpp"
301  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_variable_methods.cpp"
302  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_torch_functions_0.cpp"
303  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_torch_functions_1.cpp"
304  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_torch_functions_2.cpp"
305  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_nn_functions.cpp"
306  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_fft_functions.cpp"
307  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_linalg_functions.cpp"
308  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_nested_functions.cpp"
309  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_sparse_functions.cpp"
310  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_special_functions.cpp"
311  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_return_types.cpp"
312  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_enum_tag.cpp"
313  )
314
315set(GENERATED_H_PYTHON
316  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions.h"
317  "${TORCH_SRC_DIR}/csrc/autograd/generated/python_return_types.h"
318  )
319
320set(GENERATED_TESTING_PYTHON
321  "${TORCH_SRC_DIR}/testing/_internal/generated/annotated_fn_args.py"
322  )
323
324set(GENERATED_CXX_TORCH_CUDA
325  "${TORCH_SRC_DIR}/csrc/inductor/aoti_torch/generated/c_shim_cuda.cpp"
326  )
327
328set(TORCH_GENERATED_CODE
329  ${GENERATED_CXX_TORCH}
330  ${GENERATED_H_TORCH}
331  ${GENERATED_CXX_PYTHON}
332  ${GENERATED_H_PYTHON}
333  ${GENERATED_TESTING_PYTHON}
334  ${GENERATED_CXX_TORCH_CUDA}
335  )
336
337set(GEN_PER_OPERATOR_FLAG)
338if(USE_PER_OPERATOR_HEADERS)
339  list(APPEND GEN_PER_OPERATOR_FLAG "--per_operator_headers")
340endif()
341
342file(GLOB_RECURSE autograd_python "${TOOLS_PATH}/autograd/*.py")
343file(GLOB_RECURSE autograd_yaml "${TOOLS_PATH}/autograd/*.yaml")
344file(GLOB_RECURSE autograd_templates "${TOOLS_PATH}/autograd/templates/*")
345add_custom_command(
346  OUTPUT
347  ${TORCH_GENERATED_CODE}
348  COMMAND
349  Python::Interpreter tools/setup_helpers/generate_code.py
350    --native-functions-path "aten/src/ATen/native/native_functions.yaml"
351    --tags-path "aten/src/ATen/native/tags.yaml"
352    $<$<BOOL:${INTERN_DISABLE_AUTOGRAD}>:--disable-autograd>
353    $<$<BOOL:${SELECTED_OP_LIST}>:--selected-op-list-path="${SELECTED_OP_LIST}">
354    --force_schema_registration
355    --gen_lazy_ts_backend
356    ${GEN_PER_OPERATOR_FLAG}
357  DEPENDS
358    "${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml"
359    "${TORCH_ROOT}/aten/src/ATen/native/tags.yaml"
360    "${TORCH_ROOT}/aten/src/ATen/native/ts_native_functions.yaml"
361    "${TORCH_ROOT}/torch/csrc/lazy/core/shape_inference.h"
362    "${TORCH_ROOT}/torch/csrc/lazy/ts_backend/ts_native_functions.cpp"
363    "${TORCH_ROOT}/aten/src/ATen/templates/DispatchKeyNativeFunctions.h"
364    "${TORCH_ROOT}/aten/src/ATen/templates/DispatchKeyNativeFunctions.cpp"
365    "${TORCH_ROOT}/aten/src/ATen/templates/LazyIr.h"
366    "${TORCH_ROOT}/aten/src/ATen/templates/LazyNonNativeIr.h"
367    "${TORCH_ROOT}/aten/src/ATen/templates/RegisterDispatchKey.cpp"
368    ${autograd_python}
369    ${autograd_yaml}
370    ${autograd_templates}
371    ${torchgen_python}
372  WORKING_DIRECTORY "${TORCH_ROOT}")
373
374
375# Required workaround for libtorch_python.so build
376# see https://samthursfield.wordpress.com/2015/11/21/cmake-dependencies-between-targets-and-files-and-custom-commands/#custom-commands-in-different-directories
377add_custom_target(
378  generate-torch-sources
379  DEPENDS ${TORCH_GENERATED_CODE}
380  )
381
382set(TORCH_SRCS ${GENERATED_CXX_TORCH})
383list(APPEND TORCH_SRCS ${GENERATED_H_TORCH})
384list(APPEND LIBTORCH_CMAKE_SRCS "")
385
386list(APPEND LITE_EAGER_SYMOBLICATION_SRCS "")
387if(USE_SOURCE_DEBUG_ON_MOBILE)
388  append_filelist("libtorch_lite_eager_symbolication" LITE_EAGER_SYMOBLICATION_SRCS)
389  # For source debug on lite interpreter, we have to add dependency on pickling
390  # but references to read/writeArchiveAndTensor is not built for mobile
391  # so this condition specifically says we are building for source debug
392  # on mobile.
393  if(BUILD_LITE_INTERPRETER)
394    set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/serialization/pickle.cpp PROPERTIES COMPILE_FLAGS "-DC10_MOBILE -DFEATURE_TORCH_MOBILE")
395  endif()
396endif()
397
398list(APPEND LITE_PROFILER_SRCS "")
399if(USE_LITE_INTERPRETER_PROFILER)
400  append_filelist("libtorch_edge_profiler_sources " LITE_PROFILER_SRCS)
401endif()
402
403# Switch between the full jit interpreter and lite interpreter
404if(BUILD_LITE_INTERPRETER)
405  append_filelist("libtorch_lite_cmake_sources" LIBTORCH_CMAKE_SRCS)
406  list(APPEND LIBTORCH_CMAKE_SRCS ${LITE_EAGER_SYMOBLICATION_SRCS})
407  list(APPEND LIBTORCH_CMAKE_SRCS ${LITE_PROFILER_SRCS})
408  if(USE_LITE_AOTI)
409    append_filelist("inductor_core_resources" LIBTORCH_CMAKE_SRCS)
410  endif()
411  set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
412else()
413  append_filelist("libtorch_cmake_sources" LIBTORCH_CMAKE_SRCS)
414  list(APPEND LIBTORCH_CMAKE_SRCS ${LITE_EAGER_SYMOBLICATION_SRCS})
415  if(BUILD_LAZY_TS_BACKEND)
416    append_filelist("lazy_tensor_ts_sources" LIBTORCH_CMAKE_SRCS)
417  endif()
418  if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
419    # TODO: Delete this when https://github.com/pytorch/pytorch/issues/35026 is fixed
420    set_source_files_properties(../torch/csrc/autograd/record_function_ops.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
421  endif()
422endif()
423list(APPEND TORCH_SRCS ${LIBTORCH_CMAKE_SRCS})
424
425if(PRINT_CMAKE_DEBUG_INFO)
426  message(STATUS "Interpreter sources: ")
427  foreach(tmp ${LIBTORCH_CMAKE_SRCS})
428    message(STATUS "  " ${tmp})
429  endforeach()
430endif()
431
432# Mobile backend delegate srcs
433if(INTERN_BUILD_MOBILE)
434  set(DELEGATE_SRCS
435    ${TORCH_SRC_DIR}/csrc/jit/backends/backend_debug_info.cpp
436    ${TORCH_SRC_DIR}/csrc/jit/backends/backend_interface.cpp
437  )
438  list(APPEND TORCH_SRCS ${DELEGATE_SRCS})
439  if(IOS AND USE_COREML_DELEGATE)
440    set(COREML_DELEGATE_SRCS
441      ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/cpp/context.cpp
442      ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/objc/PTMCoreMLBackend.mm
443      ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/objc/PTMCoreMLExecutor.mm
444      ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/objc/PTMCoreMLCompiler.mm
445      ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/objc/PTMCoreMLFeatureProvider.mm
446    )
447    set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/backends/coreml/objc/PTMCoreMLBackend.mm PROPERTIES COMPILE_FLAGS "-fno-objc-arc")
448    include_directories(${TORCH_ROOT}/third_party/nlohmann/single_include)
449    list(APPEND TORCH_SRCS ${COREML_DELEGATE_SRCS})
450  endif()
451endif()
452
453# Required workaround for LLVM 9 includes.
454if(NOT MSVC)
455  set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/tensorexpr/llvm_jit.cpp PROPERTIES COMPILE_FLAGS -Wno-noexcept-type)
456endif()
457# Disable certain warnings for GCC-9.X
458if(CMAKE_COMPILER_IS_GNUCXX)
459  # See https://github.com/pytorch/pytorch/issues/38856
460  set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/tensorexpr/llvm_jit.cpp PROPERTIES COMPILE_FLAGS "-Wno-redundant-move -Wno-noexcept-type")
461  set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/tensorexpr/llvm_codegen.cpp PROPERTIES COMPILE_FLAGS "-Wno-init-list-lifetime")
462endif()
463
464# Enable conditional FP16 arithmetic intrinsics
465if(CPU_AARCH64 AND LINUX)
466set_source_files_properties(${TORCH_ROOT}/aten/src/ATen/native/BlasKernel.cpp PROPERTIES COMPILE_FLAGS "-march=armv8.2-a+fp16")
467endif()
468
469
470if(NOT INTERN_DISABLE_MOBILE_INTERP)
471  set(MOBILE_SRCS
472     ${TORCH_SRC_DIR}/csrc/jit/mobile/function.cpp
473     ${TORCH_SRC_DIR}/csrc/jit/mobile/import.cpp
474     ${TORCH_SRC_DIR}/csrc/jit/mobile/import_data.cpp
475     ${TORCH_SRC_DIR}/csrc/jit/mobile/interpreter.cpp
476     ${TORCH_SRC_DIR}/csrc/jit/mobile/compatibility/model_compatibility.cpp
477     ${TORCH_SRC_DIR}/csrc/jit/mobile/module.cpp
478     ${TORCH_SRC_DIR}/csrc/jit/mobile/flatbuffer_loader.cpp
479     ${TORCH_SRC_DIR}/csrc/jit/mobile/observer.cpp
480     ${TORCH_SRC_DIR}/csrc/jit/mobile/parse_bytecode.cpp
481     ${TORCH_SRC_DIR}/csrc/jit/mobile/parse_operators.cpp
482     ${TORCH_SRC_DIR}/csrc/jit/mobile/quantization.cpp
483     ${TORCH_SRC_DIR}/csrc/jit/mobile/train/export_data.cpp
484     ${TORCH_SRC_DIR}/csrc/jit/mobile/train/optim/sgd.cpp
485     ${TORCH_SRC_DIR}/csrc/jit/mobile/train/random.cpp
486     ${TORCH_SRC_DIR}/csrc/jit/mobile/train/sequential.cpp
487     ${TORCH_SRC_DIR}/csrc/jit/mobile/upgrader_mobile.cpp
488     ${TORCH_SRC_DIR}/csrc/jit/serialization/flatbuffer_serializer.cpp
489     )
490  list(APPEND TORCH_SRCS ${MOBILE_SRCS})
491  list(APPEND TORCH_SRCS ${LITE_EAGER_SYMOBLICATION_SRCS})
492endif()
493
494# This one needs to be unconditionally added as Functions.cpp is also unconditionally added
495list(APPEND TORCH_SRCS
496  ${TORCH_SRC_DIR}/csrc/autograd/FunctionsManual.cpp
497  ${TORCH_SRC_DIR}/csrc/utils/out_types.cpp
498)
499
500if(NOT INTERN_DISABLE_AUTOGRAD AND NOT BUILD_LITE_INTERPRETER)
501  list(APPEND TORCH_SRCS
502    ${TORCH_SRC_DIR}/csrc/autograd/TraceTypeManual.cpp
503    ${TORCH_SRC_DIR}/csrc/autograd/VariableTypeManual.cpp
504  )
505endif()
506
507if(${USE_ITT})
508  list(APPEND TORCH_SRCS
509    ${TORCH_SRC_DIR}/csrc/itt_wrapper.cpp
510    ${TORCH_SRC_DIR}/csrc/profiler/stubs/itt.cpp
511  )
512endif()
513
514if(NOT INTERN_BUILD_MOBILE AND NOT BUILD_LITE_INTERPRETER)
515  list(APPEND TORCH_SRCS
516    ${TORCH_SRC_DIR}/csrc/api/src/jit.cpp
517    ${TORCH_SRC_DIR}/csrc/jit/mobile/compatibility/backport.cpp
518    ${TORCH_SRC_DIR}/csrc/jit/mobile/compatibility/backport_manager.cpp
519    ${TORCH_SRC_DIR}/csrc/jit/serialization/onnx.cpp
520    ${TORCH_SRC_DIR}/csrc/jit/serialization/export.cpp
521    ${TORCH_SRC_DIR}/csrc/jit/serialization/export_bytecode.cpp
522    ${TORCH_SRC_DIR}/csrc/jit/serialization/export_module.cpp
523    ${TORCH_SRC_DIR}/csrc/jit/serialization/flatbuffer_serializer.cpp
524    ${TORCH_SRC_DIR}/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp
525    ${TORCH_SRC_DIR}/csrc/jit/api/module_save.cpp
526    ${TORCH_SRC_DIR}/csrc/utils/byte_order.cpp
527  )
528
529  if(USE_DISTRIBUTED)
530    append_filelist("libtorch_distributed_base_sources" TORCH_SRCS)
531    if(NOT WIN32)
532      append_filelist("libtorch_distributed_extra_sources" TORCH_SRCS)
533    endif()
534  endif()
535endif()
536
537if(USE_CUDA OR USE_ROCM)
538  append_filelist("libtorch_cuda_core_sources" Caffe2_GPU_HIP_JIT_FUSERS_SRCS)
539endif()
540
541if(USE_CUDA)
542  list(APPEND Caffe2_GPU_CU_SRCS ${Caffe2_GPU_HIP_JIT_FUSERS_SRCS})
543  add_library(caffe2_nvrtc SHARED ${ATen_NVRTC_STUB_SRCS})
544  if(MSVC)
545    # Delay load nvcuda.dll so we can import torch compiled with cuda on a CPU-only machine
546    set(DELAY_LOAD_FLAGS "-DELAYLOAD:nvcuda.dll;delayimp.lib")
547  else()
548    set(DELAY_LOAD_FLAGS "")
549  endif()
550
551  target_link_libraries(caffe2_nvrtc PRIVATE caffe2::nvrtc ${DELAY_LOAD_FLAGS})
552  install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}")
553  if(USE_NCCL)
554    list(APPEND Caffe2_GPU_SRCS
555      ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp)
556  endif()
557  if(USE_DISTRIBUTED)
558    append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_GPU_SRCS)
559    if(NOT WIN32)
560      append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_GPU_SRCS)
561      set_source_files_properties(
562        ${TORCH_SRC_DIR}/csrc/distributed/c10d/intra_node_comm.cpp
563        ${TORCH_SRC_DIR}/csrc/distributed/c10d/CudaDMAConnectivity.cpp
564        ${TORCH_SRC_DIR}/csrc/distributed/c10d/CUDASymmetricMemory.cu
565        PROPERTIES COMPILE_FLAGS "-DPYTORCH_C10_DRIVER_API_SUPPORTED=1"
566      )
567    endif()
568  endif()
569  set_source_files_properties(
570    ${TORCH_ROOT}/aten/src/ATen/cuda/detail/LazyNVRTC.cpp
571    PROPERTIES COMPILE_DEFINITIONS "NVRTC_SHORTHASH=${CUDA_NVRTC_SHORTHASH}"
572  )
573  set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/passes/frozen_conv_add_relu_fusion.cpp PROPERTIES COMPILE_FLAGS "-DUSE_CUDA=1")
574  set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/codegen/cuda/interface.cpp PROPERTIES COMPILE_FLAGS "-DUSE_CUDA=1")
575endif()
576
577if(BUILD_ONEDNN_GRAPH)
578  list(APPEND Caffe2_CPU_SRCS
579    ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/LlgaTensorImpl.cpp
580    ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/graph_fuser.cpp
581    ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/graph_rewriter.cpp
582    ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/graph_helper.cpp
583    ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/register_interface.cpp
584    ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/decompose_silu.cpp
585    ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/interface.cpp
586    ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/kernel.cpp
587    ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/defer_size_check.cpp
588    ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/layout_propagation.cpp
589    ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/prepare_binary.cpp
590    ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/guard_shape.cpp
591  )
592endif()
593
594if(USE_ROCM)
595  list(APPEND Caffe2_HIP_SRCS ${Caffe2_GPU_HIP_JIT_FUSERS_SRCS})
596  if(USE_NCCL)
597    list(APPEND Caffe2_HIP_SRCS
598      ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp)
599  endif()
600  if(USE_DISTRIBUTED)
601    append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_HIP_SRCS)
602    if(NOT WIN32)
603      append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_HIP_SRCS)
604    endif()
605  endif()
606  # caffe2_nvrtc's stubs to driver APIs are useful for HIP.
607  # See NOTE [ ATen NVRTC Stub and HIP ]
608  add_library(caffe2_nvrtc SHARED ${ATen_NVRTC_STUB_SRCS})
609  target_link_libraries(caffe2_nvrtc ${PYTORCH_HIP_LIBRARIES} ${ROCM_HIPRTC_LIB})
610  target_include_directories(caffe2_nvrtc PRIVATE ${CMAKE_BINARY_DIR})
611  target_compile_definitions(caffe2_nvrtc PRIVATE USE_ROCM __HIP_PLATFORM_AMD__)
612  install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}")
613endif()
614
615if(NOT NO_API AND NOT BUILD_LITE_INTERPRETER)
616  list(APPEND TORCH_SRCS
617    ${TORCH_SRC_DIR}/csrc/api/src/cuda.cpp
618    ${TORCH_SRC_DIR}/csrc/api/src/data/datasets/mnist.cpp
619    ${TORCH_SRC_DIR}/csrc/api/src/data/samplers/distributed.cpp
620    ${TORCH_SRC_DIR}/csrc/api/src/data/samplers/random.cpp
621    ${TORCH_SRC_DIR}/csrc/api/src/data/samplers/sequential.cpp
622    ${TORCH_SRC_DIR}/csrc/api/src/data/samplers/stream.cpp
623    ${TORCH_SRC_DIR}/csrc/api/src/enum.cpp
624    ${TORCH_SRC_DIR}/csrc/api/src/imethod.cpp
625    ${TORCH_SRC_DIR}/csrc/api/src/serialize.cpp
626    ${TORCH_SRC_DIR}/csrc/api/src/jit.cpp
627    ${TORCH_SRC_DIR}/csrc/api/src/mps.cpp
628    ${TORCH_SRC_DIR}/csrc/api/src/nn/init.cpp
629    ${TORCH_SRC_DIR}/csrc/api/src/nn/module.cpp
630    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/_functions.cpp
631    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/activation.cpp
632    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/adaptive.cpp
633    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/batchnorm.cpp
634    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/normalization.cpp
635    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/instancenorm.cpp
636    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/conv.cpp
637    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/dropout.cpp
638    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/distance.cpp
639    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/embedding.cpp
640    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/fold.cpp
641    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/linear.cpp
642    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/loss.cpp
643    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/padding.cpp
644    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/pixelshuffle.cpp
645    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/pooling.cpp
646    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/rnn.cpp
647    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/upsampling.cpp
648    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/transformer.cpp
649    ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/container/functional.cpp
650    ${TORCH_SRC_DIR}/csrc/api/src/nn/options/activation.cpp
651    ${TORCH_SRC_DIR}/csrc/api/src/nn/options/adaptive.cpp
652    ${TORCH_SRC_DIR}/csrc/api/src/nn/options/batchnorm.cpp
653    ${TORCH_SRC_DIR}/csrc/api/src/nn/options/embedding.cpp
654    ${TORCH_SRC_DIR}/csrc/api/src/nn/options/instancenorm.cpp
655    ${TORCH_SRC_DIR}/csrc/api/src/nn/options/normalization.cpp
656    ${TORCH_SRC_DIR}/csrc/api/src/nn/options/conv.cpp
657    ${TORCH_SRC_DIR}/csrc/api/src/nn/options/dropout.cpp
658    ${TORCH_SRC_DIR}/csrc/api/src/nn/options/linear.cpp
659    ${TORCH_SRC_DIR}/csrc/api/src/nn/options/padding.cpp
660    ${TORCH_SRC_DIR}/csrc/api/src/nn/options/pooling.cpp
661    ${TORCH_SRC_DIR}/csrc/api/src/nn/options/rnn.cpp
662    ${TORCH_SRC_DIR}/csrc/api/src/nn/options/vision.cpp
663    ${TORCH_SRC_DIR}/csrc/api/src/nn/options/transformer.cpp
664    ${TORCH_SRC_DIR}/csrc/api/src/optim/adagrad.cpp
665    ${TORCH_SRC_DIR}/csrc/api/src/optim/adam.cpp
666    ${TORCH_SRC_DIR}/csrc/api/src/optim/adamw.cpp
667    ${TORCH_SRC_DIR}/csrc/api/src/optim/lbfgs.cpp
668    ${TORCH_SRC_DIR}/csrc/api/src/optim/optimizer.cpp
669    ${TORCH_SRC_DIR}/csrc/api/src/optim/rmsprop.cpp
670    ${TORCH_SRC_DIR}/csrc/api/src/optim/serialize.cpp
671    ${TORCH_SRC_DIR}/csrc/api/src/optim/sgd.cpp
672    ${TORCH_SRC_DIR}/csrc/api/src/optim/schedulers/lr_scheduler.cpp
673    ${TORCH_SRC_DIR}/csrc/api/src/optim/schedulers/step_lr.cpp
674    ${TORCH_SRC_DIR}/csrc/api/src/optim/schedulers/reduce_on_plateau_scheduler.cpp
675    ${TORCH_SRC_DIR}/csrc/api/src/serialize/input-archive.cpp
676    ${TORCH_SRC_DIR}/csrc/api/src/serialize/output-archive.cpp
677    ${TORCH_SRC_DIR}/csrc/api/src/xpu.cpp
678  )
679endif()
680
681list(APPEND Caffe2_CPU_SRCS ${TORCH_SRCS})
682
683if(USE_MPS)
684  list(APPEND Caffe2_CPU_SRCS ${Caffe2_MPS_SRCS})
685endif()
686
687# NOTE [ Linking AVX and non-AVX files ]
688#
689# Regardless of the CPU capabilities, we build some files with AVX2, and AVX512
690# instruction set. If the host CPU doesn't support those, we simply ignore their
691# functions at runtime during dispatch.
692#
693# We must make sure that those files are at the end of the input list when
694# linking the torch_cpu library. Otherwise, the following error scenario might
695# occur:
696# 1. A non-AVX2 and an AVX2 file both call a function defined with the `inline`
697#    keyword
698# 2. The compiler decides not to inline this function
699# 3. Two different versions of the machine code are generated for this function:
700#    one without AVX2 instructions and one with AVX2.
701# 4. When linking, the AVX2 version is found earlier in the input object files,
702#    so the linker makes the entire library use it, even in code not guarded by
703#    the dispatcher.
704# 5. A CPU without AVX2 support executes this function, encounters an AVX2
705#    instruction and crashes.
706#
707# Thus we organize the input files in the following order:
708# 1. All files with no AVX-n support
709# 2. All files with AVX2 support ('*AVX2.cpp')
710# 3. All files with AVX512 support ('*AVX512.cpp')
711set(Caffe2_CPU_SRCS_NON_AVX)
712set(Caffe2_CPU_SRCS_AVX2)
713set(Caffe2_CPU_SRCS_AVX512)
714foreach(input_filename ${Caffe2_CPU_SRCS})
715  if(${input_filename} MATCHES "AVX2\\.cpp")
716    list(APPEND Caffe2_CPU_SRCS_AVX2 ${input_filename})
717  elseif(${input_filename} MATCHES "AVX512\\.cpp")
718    list(APPEND Caffe2_CPU_SRCS_AVX512 ${input_filename})
719  else()
720    list(APPEND Caffe2_CPU_SRCS_NON_AVX ${input_filename})
721  endif()
722endforeach(input_filename)
723set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS_NON_AVX} ${Caffe2_CPU_SRCS_AVX2} ${Caffe2_CPU_SRCS_AVX512})
724
725# ==========================================================
726# END formerly-libtorch sources
727# ==========================================================
728
729if(BUILD_LIBTORCHLESS)
730  find_library(TORCH_LIB torch PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
731  find_library(TORCH_CPU_LIB torch_cpu PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
732
733  if(USE_CUDA)
734    find_library(TORCH_CUDA_LIB torch_cuda PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
735  endif()
736
737  if(USE_ROCM)
738    find_library(TORCH_HIP_LIB torch_hip PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
739  endif()
740
741  if(USE_XPU)
742    find_library(TORCH_XPU_LIB torch_xpu PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
743  endif()
744  add_subdirectory(../torch torch)
745  # ---[ Torch python bindings build
746  set(TORCH_PYTHON_COMPILE_OPTIONS ${TORCH_PYTHON_COMPILE_OPTIONS} PARENT_SCOPE)
747  set(TORCH_PYTHON_LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS} PARENT_SCOPE)
748else()
749  set(TORCH_LIB torch)
750  set(TORCH_CPU_LIB torch_cpu)
751  set(TORCH_CUDA_LIB torch_cuda)
752  set(TORCH_HIP_LIB torch_hip)
753  set(TORCH_XPU_LIB torch_xpu)
754endif()
755
756
757if(NOT BUILD_LIBTORCHLESS)
758add_library(torch_cpu ${Caffe2_CPU_SRCS})
759if(HAVE_SOVERSION)
760  set_target_properties(torch_cpu PROPERTIES
761      VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
762endif()
763torch_compile_options(torch_cpu)  # see cmake/public/utils.cmake
764
765# Ignore Wdeprecated-XXX errors from third-party libraries
766if(NOT MSVC)
767  set_source_files_properties(${PROJECT_SOURCE_DIR}/torch/csrc/distributed/c10d/socket.cpp PROPERTIES COMPILE_OPTIONS "-Wno-error=deprecated")
768endif()
769
770if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND NOT USE_IOS AND NOT USE_COREML_DELEGATE)
771  target_compile_options_if_supported(torch_cpu "-Wmissing-prototypes")
772  target_compile_options_if_supported(torch_cpu "-Werror=missing-prototypes")
773  get_target_property(TORCH_CPU_SOURCES torch_cpu SOURCES)
774  foreach(generated_file IN LISTS GENERATED_CXX_TORCH)
775    set_source_files_properties(${generated_file} PROPERTIES COMPILE_OPTIONS "-Wno-missing-prototypes;-Wno-error=missing-prototypes")
776  endforeach()
777  foreach(source_file IN LISTS TORCH_CPU_SOURCES)
778    get_filename_component(source_file "${source_file}" REALPATH)
779    string(FIND "${source_file}" "${CMAKE_BINARY_DIR}" res)
780    if(res GREATER -1)
781      set_source_files_properties(${source_file} PROPERTIES COMPILE_OPTIONS "-Wno-missing-prototypes;-Wno-error=missing-prototypes")
782      continue()
783    endif()
784    string(FIND "${source_file}" "caffe2" res)
785    if(res GREATER -1)
786      set_source_files_properties(${source_file} PROPERTIES COMPILE_OPTIONS "-Wno-missing-prototypes;-Wno-error=missing-prototypes")
787    endif()
788  endforeach()
789endif()
790
791option(TORCH_USE_IWYU "Use include-what-you-use to clean up header inclusion" OFF)
792if(TORCH_USE_IWYU)
793  find_program(iwyu NAMES include-what-you-use)
794  if(iwyu)
795    set(iwyu_cmd
796        "include-what-you-use"
797        "-Xiwyu"
798        "--transitive_includes_only"
799        "-Xiwyu"
800        "--no_fwd_decls"
801        "-Xiwyu"
802        "--prefix_header_includes=keep"
803        "-Xiwyu"
804        "--mapping_file=${CMAKE_CURRENT_LIST_DIR}/../tools/iwyu/all.imp"
805        )
806    set_property(TARGET torch_cpu PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${iwyu_cmd})
807  endif()
808endif()
809
810set_property(SOURCE ${ATen_CORE_SRCS} APPEND
811    PROPERTY COMPILE_DEFINITIONS "TORCH_ASSERT_ONLY_METHOD_OPERATORS")
812set_property(SOURCE ${ATen_ATTENTION_KERNEL_SRCS} APPEND
813    PROPERTY COMPILE_DEFINITIONS "TORCH_ASSERT_NO_OPERATORS")
814
815if(USE_MPS OR USE_PYTORCH_METAL)
816  enable_language(OBJC OBJCXX)
817endif()
818
819if(USE_PRECOMPILED_HEADERS)
820  target_precompile_headers(torch_cpu PRIVATE
821      "$<$<COMPILE_LANGUAGE:CXX>:ATen/core/ATen_pch.h>")
822  # Exclude some files from using PCH
823  set_source_files_properties(
824      # Not built with OpenMP, so PCH is invalid
825      ${Torch_SOURCE_DIR}/aten/src/ATen/MapAllocator.cpp
826      # Builds with incompatible compiler flags
827      ${Caffe2_CPU_SRCS_AVX2}
828      ${Caffe2_CPU_SRCS_AVX512}
829      PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
830endif()
831
832# Pass path to PocketFFT
833if(AT_POCKETFFT_ENABLED)
834  set_source_files_properties(
835      "${PROJECT_SOURCE_DIR}/aten/src/ATen/native/mkl/SpectralOps.cpp"
836      PROPERTIES INCLUDE_DIRECTORIES "${POCKETFFT_INCLUDE_DIR}")
837endif()
838
839if(CMAKE_COMPILER_IS_GNUCXX AND BUILD_LIBTORCH_CPU_WITH_DEBUG)
840  # To enable debug fission we need to build libtorch_cpu with debug info on,
841  # but this increases link time and peak memory usage if we use the
842  # REL_WITH_DEB_INFO env var since that enables it for everything, but it's
843  # only really necessary for libtorch_cpu.
844  target_compile_options(torch_cpu PRIVATE "-g")
845endif()
846
847if(USE_LLVM AND LLVM_FOUND)
848  llvm_map_components_to_libnames(LLVM_LINK_LIBS
849    support core analysis executionengine instcombine
850    scalaropts transformutils ${LLVM_TARGETS_TO_BUILD} orcjit)
851  target_link_libraries(torch_cpu PRIVATE ${LLVM_LINK_LIBS})
852  if(APPLE)
853    set(LINKER_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/unexported_symbols.lds")
854    set_target_properties(torch_cpu PROPERTIES LINK_DEPENDS ${LINKER_SCRIPT})
855    set_target_properties(torch_cpu PROPERTIES LINK_FLAGS "-Wl,-unexported_symbols_list,${LINKER_SCRIPT}")
856  elseif(UNIX)
857    set(LINKER_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/version_script.lds")
858    set_target_properties(torch_cpu PROPERTIES LINK_DEPENDS ${LINKER_SCRIPT})
859    target_link_libraries(torch_cpu PRIVATE "-Wl,--version-script=${LINKER_SCRIPT}")
860  endif()
861endif(USE_LLVM AND LLVM_FOUND)
862
863# This is required for older versions of CMake, which don't allow
864# specifying add_library() without a list of source files
865set(DUMMY_EMPTY_FILE ${CMAKE_BINARY_DIR}/empty.cpp)
866
867if(MSVC)
868  set(DUMMY_FILE_CONTENT "__declspec(dllexport) int ignore_this_library_placeholder(){return 0\\;}")
869else()
870  set(DUMMY_FILE_CONTENT "")
871endif()
872
873file(WRITE ${DUMMY_EMPTY_FILE} ${DUMMY_FILE_CONTENT})
874
875# Wrapper library for people who link against torch and expect both CPU and CUDA support
876# Contains "torch_cpu" and "torch_cuda"
877add_library(torch ${DUMMY_EMPTY_FILE})
878if(HAVE_SOVERSION)
879  set_target_properties(torch PROPERTIES
880      VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
881endif()
882
883if(USE_ROCM)
884  filter_list(__caffe2_hip_srcs_cpp Caffe2_HIP_SRCS "\\.(cu|hip)$")
885  set_source_files_properties(${__caffe2_hip_srcs_cpp} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
886endif()
887
888# Compile exposed libraries.
889if(USE_ROCM)
890  set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
891  list(APPEND Caffe2_HIP_SRCS ${GENERATED_CXX_TORCH_CUDA})
892  hip_add_library(torch_hip ${Caffe2_HIP_SRCS})
893  if(USE_FLASH_ATTENTION)
894    target_link_libraries(torch_hip PRIVATE __caffe2_aotriton)
895  endif()
896  set(CUDA_LINK_LIBRARIES_KEYWORD)
897  torch_compile_options(torch_hip)  # see cmake/public/utils.cmake
898  # TODO: Not totally sure if this is live or not
899  if(USE_NCCL)
900    target_link_libraries(torch_hip PRIVATE __caffe2_nccl)
901    target_compile_definitions(torch_hip PRIVATE USE_NCCL)
902  endif()
903
904  if(USE_PRECOMPILED_HEADERS)
905    target_precompile_headers(torch_hip PRIVATE
906        "$<$<COMPILE_LANGUAGE:CXX>:ATen/core/ATen_pch.h>")
907  endif()
908elseif(USE_CUDA)
909  set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
910  list(APPEND Caffe2_GPU_SRCS ${GENERATED_CXX_TORCH_CUDA})
911  if(CUDA_SEPARABLE_COMPILATION)
912    # Separate compilation fails when kernels using `thrust::sort_by_key`
913    # are linked with the rest of CUDA code. Workaround by linking them separately.
914    add_library(torch_cuda ${Caffe2_GPU_SRCS} ${Caffe2_GPU_CU_SRCS})
915    set_property(TARGET torch_cuda PROPERTY CUDA_SEPARABLE_COMPILATION ON)
916
917    add_library(torch_cuda_w_sort_by_key OBJECT
918        ${Caffe2_GPU_SRCS_W_SORT_BY_KEY}
919        ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY})
920    set_property(TARGET torch_cuda_w_sort_by_key PROPERTY CUDA_SEPARABLE_COMPILATION OFF)
921    target_link_libraries(torch_cuda PRIVATE torch_cuda_w_sort_by_key)
922  else()
923    add_library(torch_cuda
924        ${Caffe2_GPU_SRCS} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY}
925        ${Caffe2_GPU_CU_SRCS} ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY})
926  endif()
927  set(CUDA_LINK_LIBRARIES_KEYWORD)
928  torch_compile_options(torch_cuda)  # see cmake/public/utils.cmake
929  target_compile_definitions(torch_cuda PRIVATE USE_CUDA)
930
931  if(USE_CUFILE)
932    target_link_libraries(torch_cuda PRIVATE torch::cufile)
933    target_compile_definitions(torch_cuda PRIVATE USE_CUFILE)
934  endif()
935  if(USE_CUSPARSELT)
936      target_link_libraries(torch_cuda PRIVATE torch::cusparselt)
937      target_compile_definitions(torch_cuda PRIVATE USE_CUSPARSELT)
938  endif()
939  if(USE_CUDSS)
940    target_link_libraries(torch_cuda PRIVATE torch::cudss)
941    target_compile_definitions(torch_cuda PRIVATE USE_CUDSS)
942  endif()
943  if(USE_NCCL)
944    target_link_libraries(torch_cuda PRIVATE __caffe2_nccl)
945    target_compile_definitions(torch_cuda PRIVATE USE_NCCL)
946  endif()
947  if(USE_UCC)
948    target_link_libraries(torch_cuda PRIVATE __caffe2_ucc)
949    target_compile_definitions(torch_cuda PRIVATE USE_UCC)
950  endif()
951  if(USE_FLASH_ATTENTION)
952    target_compile_definitions(torch_cuda PRIVATE USE_FLASH_ATTENTION)
953  endif()
954  if(USE_MEM_EFF_ATTENTION)
955    target_compile_definitions(torch_cuda PRIVATE USE_MEM_EFF_ATTENTION)
956  endif()
957  if(BUILD_LAZY_CUDA_LINALG)
958    add_library(torch_cuda_linalg ${ATen_CUDA_LINALG_SRCS})
959    target_compile_definitions(torch_cuda_linalg PRIVATE USE_CUDA BUILD_LAZY_CUDA_LINALG)
960    # Library order is important during static linking
961    # `torch::magma` should be mentioned before other CUDA
962    # to transitively include all symbols present in torch_cuda/torch_cpu
963    if(USE_MAGMA)
964      target_link_libraries(torch_cuda_linalg PRIVATE torch::magma)
965      # CUDAHooks reports version of MAGMA PyTorch was compiled against, i.e. needs to be able to include magma headers
966      get_target_property(HOOKS_INCLUDE_DIRECTORIES torch_cuda INCLUDE_DIRECTORIES)
967      if(NOT "${MAGMA_INCLUDE_DIR}" IN_LIST HOOKS_INCLUDE_DIRECTORIES)
968        set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/cuda/detail/CUDAHooks.cpp PROPERTIES INCLUDE_DIRECTORIES  "${MAGMA_INCLUDE_DIR}")
969      endif()
970    endif()
971    target_link_libraries(torch_cuda_linalg PRIVATE
972        torch_cpu
973        torch_cuda
974    )
975    if($ENV{ATEN_STATIC_CUDA})
976      if(CUDA_VERSION_MAJOR LESS_EQUAL 11)
977        target_link_libraries(torch_cuda_linalg PRIVATE
978            CUDA::cusolver_static
979            ${CUDAToolkit_LIBRARY_DIR}/liblapack_static.a     # needed for libcusolver_static
980        )
981      elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 12)
982        target_link_libraries(torch_cuda_linalg PRIVATE
983            CUDA::cusolver_static
984            ${CUDAToolkit_LIBRARY_DIR}/libcusolver_lapack_static.a     # needed for libcusolver_static
985        )
986      endif()
987    else()
988      target_link_libraries(torch_cuda_linalg PRIVATE
989          CUDA::cusolver
990      )
991    endif()
992    # NS: TODO, is this really necessary?
993    if(USE_MAGMA AND CAFFE2_STATIC_LINK_CUDA)
994      target_link_libraries(torch_cuda_linalg PRIVATE
995          CUDA::culibos ${CMAKE_DL_LIBS})
996    endif()
997    set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp PROPERTIES COMPILE_FLAGS "-DBUILD_LAZY_CUDA_LINALG")
998    install(TARGETS torch_cuda_linalg DESTINATION "${TORCH_INSTALL_LIB_DIR}")
999  endif()
1000
1001  if(USE_PRECOMPILED_HEADERS)
1002    target_precompile_headers(torch_cuda PRIVATE
1003        "$<$<COMPILE_LANGUAGE:CXX>:ATen/core/ATen_pch.h>")
1004  endif()
1005
1006  # Apply suggestion from comment https://github.com/pytorch/pytorch/issues/113053#issuecomment-2115375714
1007  if(LINUX)
1008    set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/cuda/CUDASparseDescriptors.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
1009    set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/cuda/CUDASparseBlas.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
1010    set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
1011    set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/sparse/cuda/SparseBlasImpl.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
1012  endif()
1013endif()
1014
1015if(USE_XPU)
1016  add_library(torch_xpu ${Caffe2_XPU_SRCS})
1017  torch_compile_options(torch_xpu)  # see cmake/public/utils.cmake
1018  target_compile_definitions(torch_xpu PRIVATE USE_XPU)
1019
1020  # ATen XPU implementation
1021  set(TORCH_XPU_OPS_DIR ${TORCH_ROOT}/third_party/torch-xpu-ops)
1022  set(TORCH_XPU_OPS_REPO_URL https://github.com/intel/torch-xpu-ops.git)
1023  file(READ "${TORCH_ROOT}/third_party/xpu.txt" TORCH_XPU_OPS_COMMIT)
1024  string(REGEX REPLACE "\n$" "" TORCH_XPU_OPS_COMMIT "${TORCH_XPU_OPS_COMMIT}")
1025  if(NOT EXISTS "${TORCH_XPU_OPS_DIR}/.git")
1026    execute_process(
1027      COMMAND git clone --quiet ${TORCH_XPU_OPS_REPO_URL} ${TORCH_XPU_OPS_DIR}
1028      RESULT_VARIABLE _exitcode)
1029    if(NOT _exitcode EQUAL 0)
1030      message(FATAL_ERROR "Fail to clone ${TORCH_XPU_OPS_REPO_URL}")
1031    endif()
1032  endif()
1033  execute_process(
1034    COMMAND git fetch --quiet
1035    WORKING_DIRECTORY ${TORCH_XPU_OPS_DIR}
1036    RESULT_VARIABLE _exitcode)
1037  if(NOT _exitcode EQUAL 0)
1038    message(FATAL_ERROR "Fail to fetch ${TORCH_XPU_OPS_REPO_URL}")
1039  endif()
1040  execute_process(
1041    COMMAND git checkout --quiet ${TORCH_XPU_OPS_COMMIT}
1042    WORKING_DIRECTORY ${TORCH_XPU_OPS_DIR}
1043    RESULT_VARIABLE _exitcode)
1044  if(NOT _exitcode EQUAL 0)
1045    message(FATAL_ERROR "Fail to checkout ${TORCH_XPU_OPS_REPO_URL} to ${TORCH_XPU_OPS_COMMIT}")
1046  endif()
1047
1048  set(TORCH_XPU_OPS_INCLUDE_DIRS
1049      ${TORCH_SRC_DIR}/csrc/api
1050      ${TORCH_SRC_DIR}/csrc/api/include
1051      ${Caffe2_CPU_INCLUDE}
1052      ${Caffe2_XPU_INCLUDE})
1053  # Pass the target as a dependency so that ATen headers generation
1054  # could be followed by torch-xpu-ops build.
1055  # 1. Sources in torch-xpu-ops depend on generated ATen headers.
1056  # 2. Using add_custom_command in torch-xpu-ops to define sycl device sources
1057  #    compilation. add_custom_command requires an explicit dependency.
1058  list(APPEND ${Caffe2_XPU_INCLUDE} ${TORCH_XPU_OPS_DIR}/src/ATen/)
1059  set(TORCH_XPU_OPS_PYTORCH_DEPS ATEN_CPU_FILES_GEN_TARGET)
1060
1061  add_subdirectory(${TORCH_ROOT}/third_party/torch-xpu-ops
1062      ${CMAKE_BINARY_DIR}/caffe2/aten_xpu)
1063  if(NOT TARGET torch_xpu_ops)
1064    message(WARNING "Failed to include ATen XPU implementation target")
1065  else()
1066    target_link_libraries(torch_xpu PRIVATE torch_xpu_ops)
1067    if(MSVC)
1068      # Windows
1069      target_link_libraries(torch_xpu PRIVATE
1070      "-WHOLEARCHIVE:\"$<TARGET_FILE:torch_xpu_ops>\"")
1071    else()
1072      # Linux
1073      target_link_libraries(torch_xpu PRIVATE
1074        "-Wl,--whole-archive,\"$<TARGET_FILE:torch_xpu_ops>\" -Wl,--no-whole-archive")
1075    endif()
1076
1077    # Set cached ${ATen_XPU_INCLUDE_DIRS} to torch
1078    include_directories(SYSTEM ${ATen_XPU_INCLUDE_DIRS})
1079
1080  endif()
1081endif()
1082
1083if(NOT MSVC AND USE_XNNPACK)
1084  TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
1085endif()
1086
1087# ==========================================================
1088# formerly-libtorch flags
1089# ==========================================================
1090
1091
1092# Build model tracer for tracing-based selective build
1093if(TRACING_BASED AND NOT BUILD_LITE_INTERPRETER AND NOT INTERN_BUILD_MOBILE)
1094  add_subdirectory(
1095    ${TORCH_ROOT}/torch/csrc/jit/mobile/model_tracer
1096    ${CMAKE_BINARY_DIR}/model_tracer
1097  )
1098  string(APPEND CMAKE_CXX_FLAGS " -DENABLE_RECORD_KERNEL_FUNCTION_DTYPE")
1099endif()
1100
1101# Codegen selected_mobile_ops.h for template selective build
1102if(BUILD_LITE_INTERPRETER AND SELECTED_OP_LIST)
1103  message("running gen_selected_mobile_ops_header for:  '${SELECTED_OP_LIST}'")
1104  file(GLOB lite_interpreter_python "${TOOLS_PATH}/lite_interpreter/*.py")
1105  if(${TRACING_BASED})
1106    file(GLOB code_analyzer_python "${TOOLS_PATH}/code_analyzer/*.py")
1107    add_custom_command(
1108      OUTPUT ${CMAKE_BINARY_DIR}/aten/src/ATen/selected_mobile_ops.h
1109      COMMAND
1110        Python::Interpreter
1111        -m tools.code_analyzer.gen_oplist
1112        --model_file_list_path "${SELECTED_OP_LIST}"
1113        --output_dir "${CMAKE_BINARY_DIR}/aten/src/ATen"
1114      DEPENDS
1115        ${torchgen_python}
1116        ${lite_interpreter_python}
1117        ${code_analyzer_python}
1118        "${SELECTED_OP_LIST}"
1119        "${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml"
1120      WORKING_DIRECTORY "${TORCH_ROOT}")
1121  else()
1122    add_custom_command(
1123      OUTPUT ${CMAKE_BINARY_DIR}/aten/src/ATen/selected_mobile_ops.h
1124      COMMAND
1125        Python::Interpreter
1126        -m tools.lite_interpreter.gen_selected_mobile_ops_header
1127        --yaml_file_path "${SELECTED_OP_LIST}"
1128        --output_file_path "${CMAKE_BINARY_DIR}/aten/src/ATen"
1129      DEPENDS
1130        ${torchgen_python}
1131        ${lite_interpreter_python}
1132        "${SELECTED_OP_LIST}"
1133        "${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml"
1134      WORKING_DIRECTORY "${TORCH_ROOT}")
1135  endif()
1136
1137  add_custom_target(
1138    __selected_mobile_ops_header_gen
1139    DEPENDS ${CMAKE_BINARY_DIR}/aten/src/ATen/selected_mobile_ops.h)
1140  add_dependencies(torch_cpu __selected_mobile_ops_header_gen)
1141endif()
1142
1143if(NOT NO_API)
1144  target_include_directories(torch_cpu PRIVATE
1145    ${TORCH_SRC_DIR}/csrc/api
1146    ${TORCH_SRC_DIR}/csrc/api/include)
1147endif()
1148
1149if(USE_CUDA AND MSVC)
1150  # -INCLUDE is used to ensure torch_cuda is linked against in a project that relies on them.
1151  # Related issue: https://github.com/pytorch/pytorch/issues/31611
1152  target_link_libraries(torch_cuda INTERFACE "-INCLUDE:?warp_size@cuda@at@@YAHXZ")
1153endif()
1154
1155if(NOT BUILD_LITE_INTERPRETER)
1156  set(TH_CPU_INCLUDE
1157    # dense
1158    aten/src/TH
1159    ${CMAKE_CURRENT_BINARY_DIR}/aten/src/TH
1160    ${TORCH_ROOT}/aten/src
1161    ${CMAKE_CURRENT_BINARY_DIR}/aten/src
1162
1163    ${CMAKE_BINARY_DIR}/aten/src)
1164    target_include_directories(torch_cpu PRIVATE ${TH_CPU_INCLUDE})
1165endif()
1166
1167set(ATen_CPU_INCLUDE
1168  ${TORCH_ROOT}/aten/src
1169  ${CMAKE_CURRENT_BINARY_DIR}/../aten/src
1170  ${CMAKE_BINARY_DIR}/aten/src)
1171
1172if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
1173  set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/QuantizedLinear.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
1174  set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/RNN.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
1175  set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
1176  set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/quantized/qlinear_unpack.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
1177endif()
1178
1179target_include_directories(torch_cpu PRIVATE ${ATen_CPU_INCLUDE})
1180
1181target_include_directories(torch_cpu PRIVATE
1182  ${TORCH_SRC_DIR}/csrc)
1183
1184target_include_directories(torch_cpu PRIVATE
1185  ${TORCH_ROOT}/third_party/miniz-2.1.0)
1186
1187target_include_directories(torch_cpu PRIVATE
1188  ${TORCH_ROOT}/third_party/kineto/libkineto/include)
1189
1190if(USE_KINETO)
1191  target_include_directories(torch_cpu PRIVATE
1192    ${TORCH_ROOT}/third_party/kineto/libkineto/src)
1193endif()
1194
1195target_include_directories(torch_cpu PRIVATE
1196  ${TORCH_ROOT}/third_party/cpp-httplib)
1197
1198target_include_directories(torch_cpu PRIVATE
1199  ${TORCH_ROOT}/third_party/nlohmann/include)
1200
1201install(DIRECTORY "${TORCH_SRC_DIR}/csrc"
1202  DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch
1203  FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp")
1204install(FILES
1205  "${TORCH_SRC_DIR}/script.h"
1206  "${TORCH_SRC_DIR}/extension.h"
1207  "${TORCH_SRC_DIR}/custom_class.h"
1208  "${TORCH_SRC_DIR}/library.h"
1209  "${TORCH_SRC_DIR}/custom_class_detail.h"
1210  DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch)
1211if(BUILD_TEST)
1212  if(BUILD_EXECUTORCH)
1213    add_subdirectory(
1214            ${TORCH_ROOT}/test/edge
1215            ${CMAKE_BINARY_DIR}/test_edge_op_registration
1216    )
1217  endif()
1218  if(BUILD_LITE_INTERPRETER)
1219    add_subdirectory(
1220      ${TORCH_ROOT}/test/cpp/lite_interpreter_runtime
1221      ${CMAKE_BINARY_DIR}/test_lite_interpreter_runtime
1222    )
1223    add_subdirectory(
1224      ${TORCH_ROOT}/test/mobile/lightweight_dispatch
1225      ${CMAKE_BINARY_DIR}/test_codegen_unboxing
1226    )
1227  else()
1228    add_subdirectory(${TORCH_ROOT}/test/cpp/jit ${CMAKE_BINARY_DIR}/test_jit)
1229    add_subdirectory(${TORCH_ROOT}/test/inductor ${CMAKE_BINARY_DIR}/test_inductor)
1230    add_subdirectory(
1231      ${TORCH_ROOT}/test/cpp/tensorexpr
1232      ${CMAKE_BINARY_DIR}/test_tensorexpr
1233    )
1234    if(USE_DISTRIBUTED)
1235      add_subdirectory(${TORCH_ROOT}/test/cpp/c10d ${CMAKE_BINARY_DIR}/test_cpp_c10d)
1236      if(NOT WIN32)
1237        add_subdirectory(${TORCH_ROOT}/test/cpp/dist_autograd ${CMAKE_BINARY_DIR}/dist_autograd)
1238        add_subdirectory(${TORCH_ROOT}/test/cpp/rpc ${CMAKE_BINARY_DIR}/test_cpp_rpc)
1239      endif()
1240    endif()
1241    if(NOT NO_API)
1242      add_subdirectory(${TORCH_ROOT}/test/cpp/api ${CMAKE_BINARY_DIR}/test_api)
1243    endif()
1244
1245    if(USE_LLVM AND LLVM_FOUND)
1246      add_subdirectory(
1247        ${TORCH_ROOT}/test/mobile/nnc
1248        ${CMAKE_BINARY_DIR}/test_mobile_nnc
1249      )
1250    endif()
1251    add_subdirectory(${TORCH_ROOT}/test/cpp/lazy
1252                     ${CMAKE_BINARY_DIR}/test_lazy)
1253  endif()
1254  if(BUILD_AOT_INDUCTOR_TEST)
1255    add_subdirectory(
1256      ${TORCH_ROOT}/test/cpp/aoti_abi_check
1257      ${CMAKE_BINARY_DIR}/test_aoti_abi_check)
1258    add_subdirectory(
1259      ${TORCH_ROOT}/test/cpp/aoti_inference
1260      ${CMAKE_BINARY_DIR}/test_aoti_inference)
1261  endif()
1262endif()
1263
1264if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
1265  include(../cmake/CheckAbi.cmake)
1266endif()
1267
1268# CMake config for external projects.
1269configure_file(
1270  ${PROJECT_SOURCE_DIR}/cmake/TorchConfigVersion.cmake.in
1271  ${PROJECT_BINARY_DIR}/TorchConfigVersion.cmake
1272  @ONLY)
1273configure_file(
1274  ${TORCH_ROOT}/cmake/TorchConfig.cmake.in
1275  ${PROJECT_BINARY_DIR}/TorchConfig.cmake
1276  @ONLY)
1277install(FILES
1278  ${PROJECT_BINARY_DIR}/TorchConfigVersion.cmake
1279  ${PROJECT_BINARY_DIR}/TorchConfig.cmake
1280  DESTINATION share/cmake/Torch)
1281
1282# ---[ Torch python bindings build
1283add_subdirectory(../torch torch)
1284set(TORCH_PYTHON_COMPILE_OPTIONS ${TORCH_PYTHON_COMPILE_OPTIONS} PARENT_SCOPE)
1285set(TORCH_PYTHON_LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS} PARENT_SCOPE)
1286# ==========================================================
1287# END formerly-libtorch flags
1288# ==========================================================
1289
1290if(NOT NO_API)
1291  target_include_directories(torch_cpu PUBLIC
1292    $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/api>
1293    $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/api/include>)
1294endif()
1295
1296if(USE_ROCM)
1297  target_compile_definitions(torch_hip PRIVATE
1298    USE_ROCM
1299    __HIP_PLATFORM_AMD__
1300    )
1301  # NB: Massive hack.  torch/csrc/jit/codegen/fuser/codegen.cpp includes
1302  # torch/csrc/jit/codegen/fuser/cuda/resource_strings.h which changes the
1303  # strings depending on if you're __HIP_PLATFORM_AMD__ or not.
1304  # But that file is in torch_cpu!  So, against all odds, this macro
1305  # has to be set on torch_cpu too.  I also added it to torch for
1306  # better luck
1307  target_compile_definitions(torch_cpu PRIVATE
1308    USE_ROCM
1309    __HIP_PLATFORM_AMD__
1310    )
1311  target_compile_definitions(torch PRIVATE
1312    USE_ROCM
1313    __HIP_PLATFORM_AMD__
1314    )
1315
1316  if(NOT ROCM_SOURCE_DIR)
1317    set(ROCM_SOURCE_DIR "$ENV{ROCM_SOURCE_DIR}")
1318  endif()
1319  if($ROCM_SOURCE_DIR STREQUAL "")
1320    set(ROCM_SOURCE_DIR "/opt/rocm")
1321  endif()
1322  message(INFO "caffe2 ROCM_SOURCE_DIR = ${ROCM_SOURCE_DIR}")
1323  target_include_directories(torch_hip PRIVATE
1324    ${ROCM_SOURCE_DIR}/include
1325    ${ROCM_SOURCE_DIR}/hcc/include
1326    ${ROCM_SOURCE_DIR}/rocblas/include
1327    ${ROCM_SOURCE_DIR}/hipsparse/include
1328    )
1329  if(USE_FLASH_ATTENTION)
1330    target_compile_definitions(torch_hip PRIVATE USE_FLASH_ATTENTION)
1331  endif()
1332  if(USE_MEM_EFF_ATTENTION)
1333    target_compile_definitions(torch_hip PRIVATE USE_MEM_EFF_ATTENTION)
1334  endif()
1335endif()
1336
1337if(BUILD_LITE_INTERPRETER)
1338  target_compile_definitions(torch_cpu PRIVATE BUILD_LITE_INTERPRETER)
1339  # Enable template selective build only when SELECTED_OP_LIST is provided.
1340  if(SELECTED_OP_LIST)
1341    target_compile_definitions(torch_cpu PRIVATE TEMPLATE_SELECTIVE_BUILD)
1342  endif()
1343endif()
1344
1345
1346# Pass USE_DISTRIBUTED to torch_cpu, as some codes in jit/pickler.cpp and
1347# jit/unpickler.cpp need to be compiled only when USE_DISTRIBUTED is set
1348if(USE_DISTRIBUTED)
1349  target_compile_definitions(torch_cpu PUBLIC USE_DISTRIBUTED)
1350  if(USE_GLOO AND USE_C10D_GLOO)
1351    target_compile_definitions(torch_cpu PUBLIC USE_C10D_GLOO)
1352  endif()
1353  if(USE_UCC AND USE_C10D_UCC)
1354    target_compile_definitions(torch_cpu PUBLIC USE_C10D_UCC)
1355    if(USE_CUDA)
1356      target_compile_definitions(torch_cuda PUBLIC USE_C10D_UCC)
1357    endif()
1358  endif()
1359  if(USE_NCCL AND USE_C10D_NCCL)
1360    if(USE_ROCM)
1361      target_compile_definitions(torch_hip PUBLIC USE_C10D_NCCL)
1362    else()
1363      target_compile_definitions(torch_cuda PUBLIC USE_C10D_NCCL)
1364    endif()
1365  endif()
1366  if(USE_MPI AND USE_C10D_MPI)
1367    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
1368      set_source_files_properties(
1369        "${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupMPI.cpp"
1370        PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
1371    endif()
1372    target_compile_definitions(torch_cpu PUBLIC USE_C10D_MPI)
1373  endif()
1374  # Pass USE_RPC in order to reduce use of
1375  # #if defined(USE_DISTRIBUTED) && !defined(_WIN32)
1376  # need to be removed when RPC is supported
1377  if(NOT WIN32)
1378    target_compile_definitions(torch_cpu PUBLIC USE_RPC)
1379  endif()
1380  # Pass USE_TENSORPIPE to torch_cpu as some parts of rpc/utils.cpp
1381  # can only be compiled with USE_TENSORPIPE is set.
1382  if(USE_TENSORPIPE)
1383    target_compile_definitions(torch_cpu PUBLIC USE_TENSORPIPE)
1384  endif()
1385endif()
1386
1387if(NOT INTERN_BUILD_MOBILE)
1388  if(${CAFFE2_LINK_LOCAL_PROTOBUF})
1389    target_link_libraries(torch_cpu INTERFACE protobuf::libprotobuf)
1390  else()
1391    target_link_libraries(torch_cpu PUBLIC protobuf::libprotobuf)
1392  endif()
1393endif()
1394
1395if($ENV{TH_BINARY_BUILD})
1396  if(NOT MSVC AND USE_CUDA AND NOT APPLE)
1397    # Note [Extra MKL symbols for MAGMA in torch_cpu]
1398    #
1399    # When we build CUDA libraries and link against MAGMA, MAGMA makes use of
1400    # some BLAS symbols in its CPU fallbacks when it has no GPU versions
1401    # of kernels.  Previously, we ensured the BLAS symbols were filled in by
1402    # MKL by linking torch_cuda with BLAS, but when we are statically linking
1403    # against MKL (when we do wheel builds), this actually ends up pulling in a
1404    # decent chunk of MKL into torch_cuda, inflating our torch_cuda binary
1405    # size by 8M.  torch_cpu exposes most of the MKL symbols we need, but
1406    # empirically we determined that there are four which it doesn't provide.  If
1407    # we link torch_cpu with these --undefined symbols, we can ensure they
1408    # do get pulled in, and then we can avoid statically linking in MKL to
1409    # torch_cuda at all!
1410    #
1411    # We aren't really optimizing for binary size on Windows (and this link
1412    # line doesn't work on Windows), so don't do it there.
1413    #
1414    # These linker commands do not work on OS X, do not attempt this there.
1415    # (It shouldn't matter anyway, though, because OS X has dropped CUDA support)
1416    foreach(_symb  slaed0 daled0 dormql sormql zheevd cheevd)
1417    STRING(APPEND _undefined_link_flags " -Wl,--undefined=mkl_lapack_${_symb}")
1418    endforeach(_symb)
1419    set_target_properties(torch_cpu PROPERTIES LINK_FLAGS  ${_undefined_link_flags})
1420
1421  endif()
1422endif()
1423
1424target_link_libraries(torch_cpu PUBLIC c10)
1425target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS})
1426target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS})
1427target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS})
1428if(USE_MPI)
1429  target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX)
1430endif()
1431target_include_directories(torch_cpu INTERFACE $<INSTALL_INTERFACE:include>)
1432target_include_directories(torch_cpu PRIVATE ${Caffe2_CPU_INCLUDE})
1433target_include_directories(torch_cpu SYSTEM PRIVATE "${Caffe2_DEPENDENCY_INCLUDE}")
1434
1435target_compile_definitions(torch_cpu PRIVATE CAFFE2_BUILD_MAIN_LIB)
1436if(USE_CUDA)
1437  target_compile_definitions(torch_cuda PRIVATE TORCH_CUDA_BUILD_MAIN_LIB)
1438elseif(USE_ROCM)
1439  target_compile_definitions(torch_hip PRIVATE TORCH_HIP_BUILD_MAIN_LIB)
1440endif()
1441
1442if(USE_XPU)
1443  target_compile_definitions(torch_xpu PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
1444endif()
1445
1446set(EXPERIMENTAL_SINGLE_THREAD_POOL "0" CACHE STRING
1447  "Experimental option to use a single thread pool for inter- and intra-op parallelism")
1448if("${EXPERIMENTAL_SINGLE_THREAD_POOL}")
1449  target_compile_definitions(torch_cpu PUBLIC "-DAT_EXPERIMENTAL_SINGLE_THREAD_POOL=1")
1450endif()
1451
1452if(MSVC AND BUILD_SHARED_LIBS)
1453  # ONNX is linked statically and needs to be exported from this library
1454  # to be used externally. Make sure that references match the export.
1455  target_compile_options(torch_cpu PRIVATE "-DONNX_BUILD_MAIN_LIB")
1456endif()
1457
1458caffe2_interface_library(torch_cpu torch_cpu_library)
1459
1460if(USE_CUDA)
1461  caffe2_interface_library(torch_cuda torch_cuda_library)
1462elseif(USE_ROCM)
1463  caffe2_interface_library(torch_hip torch_hip_library)
1464elseif(USE_XPU)
1465  caffe2_interface_library(torch_xpu torch_xpu_library)
1466endif()
1467
1468caffe2_interface_library(torch torch_library)
1469
1470install(TARGETS torch_cpu torch_cpu_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
1471
1472if(USE_CUDA)
1473  install(TARGETS torch_cuda torch_cuda_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
1474elseif(USE_ROCM)
1475  install(TARGETS torch_hip torch_hip_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
1476elseif(USE_XPU)
1477  install(TARGETS torch_xpu torch_xpu_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
1478endif()
1479
1480install(TARGETS torch torch_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
1481
1482target_link_libraries(torch PUBLIC torch_cpu_library)
1483
1484if(USE_CUDA)
1485  target_link_libraries(torch PUBLIC torch_cuda_library)
1486elseif(USE_ROCM)
1487  target_link_libraries(torch PUBLIC torch_hip_library)
1488endif()
1489
1490if(USE_XPU)
1491  target_link_libraries(torch PUBLIC torch_xpu_library)
1492endif()
1493
1494if(PRINT_CMAKE_DEBUG_INFO)
1495  print_target_properties(torch)
1496  print_target_properties(torch_cpu)
1497endif()
1498
1499# Install PDB files for MSVC builds
1500if(MSVC AND BUILD_SHARED_LIBS)
1501  install(FILES $<TARGET_PDB_FILE:torch_cpu> DESTINATION "${TORCH_INSTALL_LIB_DIR}" OPTIONAL)
1502  if(USE_CUDA)
1503    install(FILES $<TARGET_PDB_FILE:torch_cuda> DESTINATION "${TORCH_INSTALL_LIB_DIR}" OPTIONAL)
1504  elseif(USE_ROCM)
1505    install(FILES $<TARGET_PDB_FILE:torch_hip> DESTINATION "${TORCH_INSTALL_LIB_DIR}" OPTIONAL)
1506  endif()
1507endif()
1508
1509# ---[ CUDA library.
1510if(USE_CUDA)
1511  # FIXME: If kineto is linked with CUPTI it pollutes torch_cpu with CUDA dependencies
1512  # Even worse, it never declares that it depends on cudart, but calls the API, see
1513  # https://github.com/pytorch/kineto/blob/aef2f5c0f15e3be52406ac0b885e8689de6bc9f6/libkineto/src/CudaDeviceProperties.cpp#L24
1514  if(USE_KINETO AND NOT MSVC AND NOT LIBKINETO_NOCUPTI)
1515    target_link_libraries(torch_cpu PRIVATE torch::cudart)
1516  endif()
1517  target_link_libraries(torch_cuda INTERFACE torch::cudart)
1518  target_link_libraries(torch_cuda PUBLIC c10_cuda)
1519  if(TARGET torch::nvtx3)
1520    target_link_libraries(torch_cuda PRIVATE torch::nvtx3)
1521  else()
1522    target_link_libraries(torch_cuda PUBLIC torch::nvtoolsext)
1523  endif()
1524
1525  target_include_directories(
1526      torch_cuda INTERFACE $<INSTALL_INTERFACE:include>)
1527  target_include_directories(
1528      torch_cuda PRIVATE ${Caffe2_GPU_INCLUDE})
1529  target_link_libraries(
1530      torch_cuda PRIVATE ${Caffe2_CUDA_DEPENDENCY_LIBS})
1531
1532  # These public dependencies must go after the previous dependencies, as the
1533  # order of the libraries in the linker call matters here when statically
1534  # linking; libculibos and cublas must be last.
1535  target_link_libraries(torch_cuda PUBLIC torch_cpu_library ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS})
1536endif()
1537
1538# ---[ XPU library.
1539if(USE_XPU)
1540  target_link_libraries(torch_xpu INTERFACE torch::xpurt)
1541
1542  target_link_libraries(torch_xpu PUBLIC c10_xpu)
1543
1544  target_include_directories(
1545      torch_xpu INTERFACE $<INSTALL_INTERFACE:include>)
1546  target_include_directories(
1547      torch_xpu PRIVATE ${Caffe2_XPU_INCLUDE})
1548  target_link_libraries(
1549      torch_xpu PRIVATE ${Caffe2_XPU_DEPENDENCY_LIBS})
1550
1551  # Ensure that torch_cpu is ready before being linked by torch_xpu.
1552  add_dependencies(torch_xpu torch_cpu)
1553
1554  if(MSVC)
1555    target_link_libraries(torch_xpu PUBLIC torch_cpu_library)
1556  else()
1557    include(CheckLinkerFlag)
1558
1559    # Check whether the compiler supports '--no-as-needed' and '--as-needed'
1560    check_linker_flag(CXX "-Wl,--no-as-needed" HAVE_NO_AS_NEEDED)
1561    check_linker_flag(CXX "-Wl,--as-needed" HAVE_AS_NEEDED)
1562
1563    if(HAVE_NO_AS_NEEDED AND HAVE_AS_NEEDED)
1564      target_link_libraries(torch_xpu PRIVATE
1565          "-Wl,--no-as-needed,\"$<TARGET_FILE:torch_cpu>\" -Wl,--as-needed")
1566    else()
1567      target_link_libraries(torch_xpu PRIVATE "$<TARGET_FILE:torch_cpu>")
1568    endif()
1569  endif()
1570endif()
1571
1572# ---[ Metal(OSX) modification
1573if(APPLE AND USE_PYTORCH_METAL)
1574  if(NOT INTERN_BUILD_MOBILE)
1575    include(../cmake/Metal.cmake)
1576    # We need to link the system frameworks explicitly
1577    find_library(metal NAMES Metal)
1578    find_library(mps NAMES MetalPerformanceShaders)
1579    find_library(foundation NAMES Foundation)
1580    find_library(accelerate NAMES Accelerate)
1581    target_link_libraries(torch_cpu PUBLIC ${metal} ${mps} ${foundation} ${accelerate})
1582  endif()
1583endif()
1584
1585
1586target_link_libraries(torch_cpu PRIVATE flatbuffers)
1587
1588# Note [Global dependencies]
1589# Some libraries (e.g. OpenMPI) like to dlopen plugins after they're initialized,
1590# and they assume that all of their symbols will be available in the global namespace.
1591# On the other hand we try to be good citizens and avoid polluting the symbol
1592# namespaces, so libtorch is loaded with all its dependencies in a local scope.
1593# That usually leads to missing symbol errors at run-time, so to avoid a situation like
1594# this we have to preload those libs in a global namespace.
1595if(BUILD_SHARED_LIBS)
1596  add_library(torch_global_deps SHARED ${TORCH_SRC_DIR}/csrc/empty.c)
1597  if(HAVE_SOVERSION)
1598    set_target_properties(torch_global_deps PROPERTIES
1599        VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
1600  endif()
1601  set_target_properties(torch_global_deps PROPERTIES LINKER_LANGUAGE C)
1602  if(USE_MPI)
1603    target_link_libraries(torch_global_deps MPI::MPI_CXX)
1604  endif()
1605  if(CAFFE2_USE_MKL)
1606    target_link_libraries(torch_global_deps caffe2::mkl)
1607  endif()
1608  # The CUDA libraries are linked here for a different reason: in some
1609  # cases we load these libraries with ctypes, and if they weren't opened
1610  # with RTLD_GLOBAL, we'll do the "normal" search process again (and
1611  # not find them, because they're usually in non-standard locations)
1612  if(USE_CUDA)
1613    target_link_libraries(torch_global_deps ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS})
1614    target_link_libraries(torch_global_deps torch::cudart)
1615    if(TARGET torch::nvtoolsext)
1616      target_link_libraries(torch_global_deps torch::nvtoolsext)
1617    endif()
1618  endif()
1619  install(TARGETS torch_global_deps DESTINATION "${TORCH_INSTALL_LIB_DIR}")
1620endif()
1621
1622# ---[ Caffe2 HIP sources.
1623if(USE_ROCM)
1624  # Call again since Caffe2_HIP_INCLUDE is extended with ATen include dirs.
1625  # Get Compile Definitions from the directory (FindHIP.cmake bug)
1626  get_directory_property(MY_DEFINITIONS COMPILE_DEFINITIONS)
1627  if(MY_DEFINITIONS)
1628    foreach(_item ${MY_DEFINITIONS})
1629      list(APPEND HIP_CLANG_FLAGS "-D${_item}")
1630    endforeach()
1631  endif()
1632
1633  # Call again since Caffe2_HIP_INCLUDE is extended with ATen include dirs.
1634  hip_include_directories(${Caffe2_HIP_INCLUDE})
1635
1636  # Since PyTorch files contain HIP headers, these flags are required for the necessary definitions to be added.
1637  target_compile_options(torch_hip PUBLIC ${HIP_CXX_FLAGS})  # experiment
1638
1639  target_link_libraries(torch_hip PUBLIC c10_hip)
1640
1641  if(NOT INTERN_BUILD_MOBILE)
1642    # TODO: Cut this over to ATEN_HIP_FILES_GEN_LIB.  At the moment, we
1643    # only generate CUDA files
1644    # NB: This dependency must be PRIVATE, because we don't install
1645    # ATEN_CUDA_FILES_GEN_LIB (it's a synthetic target just to get the
1646    # correct dependency from generated files.)
1647    target_link_libraries(torch_hip PRIVATE ATEN_CUDA_FILES_GEN_LIB)
1648  endif()
1649  target_link_libraries(torch_hip PUBLIC torch_cpu_library ${Caffe2_PUBLIC_HIP_DEPENDENCY_LIBS})
1650  target_link_libraries(torch_hip PRIVATE ${Caffe2_HIP_DEPENDENCY_LIBS})
1651
1652  # Since PyTorch files contain HIP headers, this is also needed to capture the includes.
1653  target_include_directories(torch_hip PRIVATE ${Caffe2_HIP_INCLUDE})
1654  target_include_directories(torch_hip INTERFACE $<INSTALL_INTERFACE:include>)
1655endif()
1656
1657if(BUILD_STATIC_RUNTIME_BENCHMARK)
1658  add_subdirectory(${TORCH_ROOT}/benchmarks/static_runtime ${PROJECT_BINARY_DIR}/bin)
1659  add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}")
1660  add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}")
1661  target_link_libraries(static_runtime_bench torch_library benchmark)
1662  target_link_libraries(static_runtime_test torch_library gtest_main)
1663endif()
1664
1665if(BUILD_MOBILE_BENCHMARK)
1666  foreach(benchmark_src ${ATen_MOBILE_BENCHMARK_SRCS})
1667    get_filename_component(benchmark_name ${benchmark_src} NAME_WE)
1668    add_executable(${benchmark_name} "${benchmark_src}")
1669    target_link_libraries(${benchmark_name} torch_library benchmark)
1670    target_include_directories(${benchmark_name} PRIVATE $<INSTALL_INTERFACE:include>)
1671    target_include_directories(${benchmark_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
1672    target_include_directories(${benchmark_name} PRIVATE ${ATen_CPU_INCLUDE})
1673    target_link_options(${benchmark_name} PRIVATE "LINKER:--allow-multiple-definition")
1674  endforeach()
1675endif()
1676
1677if(BUILD_MOBILE_TEST)
1678  foreach(test_src ${ATen_MOBILE_TEST_SRCS})
1679    get_filename_component(test_name ${test_src} NAME_WE)
1680    add_executable(${test_name} "${test_src}")
1681    target_link_libraries(${test_name} torch_library gtest_main)
1682    target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
1683    target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
1684    target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE})
1685    add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
1686  endforeach()
1687endif()
1688
1689# ---[ Test binaries.
1690if(BUILD_TEST)
1691
1692  foreach(test_src ${ATen_VEC_TEST_SRCS})
1693    foreach(i RANGE ${NUM_CPU_CAPABILITY_NAMES})
1694        get_filename_component(test_name ${test_src} NAME_WE)
1695        list(GET CPU_CAPABILITY_NAMES ${i} CPU_CAPABILITY)
1696        list(GET CPU_CAPABILITY_FLAGS ${i} FLAGS)
1697        separate_arguments(FLAGS UNIX_COMMAND "${FLAGS}")
1698        # Build vec with minimal dependencies on all platforms but Windows
1699        if(NOT MSVC)
1700          add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp)
1701          # TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR)
1702          target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main nlohmann)
1703          if(USE_FBGEMM)
1704            target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm)
1705          endif()
1706          if(USE_ASAN)
1707            if(TARGET Sanitizer::address)
1708              target_link_libraries(${test_name}_${CPU_CAPABILITY} Sanitizer::address)
1709            endif()
1710            if(TARGET Sanitizer::undefined)
1711              target_link_libraries(${test_name}_${CPU_CAPABILITY} Sanitizer::undefined)
1712            endif()
1713          endif()
1714        else()
1715          add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}")
1716          target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main)
1717        endif()
1718        target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>)
1719        target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
1720        target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE ${ATen_CPU_INCLUDE})
1721        target_compile_definitions(${test_name}_${CPU_CAPABILITY} PRIVATE CPU_CAPABILITY=${CPU_CAPABILITY}  CPU_CAPABILITY_${CPU_CAPABILITY})
1722        target_compile_options(${test_name}_${CPU_CAPABILITY} PRIVATE  ${FLAGS})
1723        if(NOT MSVC)
1724              target_compile_options(${test_name}_${CPU_CAPABILITY} PRIVATE -Wno-ignored-qualifiers)
1725        endif(NOT MSVC)
1726        add_test(NAME ${test_name}_${CPU_CAPABILITY} COMMAND $<TARGET_FILE:${test_name}_${CPU_CAPABILITY}>)
1727    endforeach()
1728  endforeach()
1729
1730  foreach(test_src ${Caffe2_CPU_TEST_SRCS})
1731    get_filename_component(test_name ${test_src} NAME_WE)
1732    add_executable(${test_name} "${test_src}")
1733    target_link_libraries(${test_name} torch_library gtest_main)
1734    if(NOT MSVC)
1735      target_link_libraries(${test_name} stdc++)
1736    endif()
1737    target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
1738    target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
1739    target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
1740    add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
1741    if(INSTALL_TEST)
1742      install(TARGETS ${test_name} DESTINATION test)
1743      # Install PDB files for MSVC builds
1744      if(MSVC AND BUILD_SHARED_LIBS)
1745        install(FILES $<TARGET_PDB_FILE:${test_name}> DESTINATION test OPTIONAL)
1746      endif()
1747    endif()
1748  endforeach()
1749
1750  if(USE_MPS)
1751    foreach(test_src ${Caffe2_MPS_TEST_SRCS})
1752      get_filename_component(test_name ${test_src} NAME_WE)
1753      add_executable(${test_name} "${test_src}")
1754      find_library(metal NAMES Metal)
1755      find_library(foundation NAMES Foundation)
1756      target_link_libraries(${test_name} torch_library gtest_main ${metal} ${foundation})
1757      target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
1758      target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
1759      target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
1760      add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
1761      if(INSTALL_TEST)
1762        install(TARGETS ${test_name} DESTINATION test)
1763        # Install PDB files for MSVC builds
1764        if(MSVC AND BUILD_SHARED_LIBS)
1765          install(FILES $<TARGET_PDB_FILE:${test_name}> DESTINATION test OPTIONAL)
1766        endif()
1767      endif()
1768    endforeach()
1769  endif()
1770
1771  if(USE_CUDA)
1772    foreach(test_src ${Caffe2_GPU_TEST_SRCS})
1773      get_filename_component(test_name ${test_src} NAME_WE)
1774      add_executable(${test_name} "${test_src}")
1775      target_link_libraries(${test_name} torch_library gtest_main)
1776      if(USE_CUDNN AND ${test_name} MATCHES "cudnn")
1777        target_link_libraries(${test_name} torch::cudnn)
1778      endif()
1779      target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
1780      target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
1781      add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
1782      if(INSTALL_TEST)
1783        install(TARGETS ${test_name} DESTINATION test)
1784        # Install PDB files for MSVC builds
1785        if(MSVC AND BUILD_SHARED_LIBS)
1786          install(FILES $<TARGET_PDB_FILE:${test_name}> DESTINATION test OPTIONAL)
1787        endif()
1788      endif()
1789    endforeach()
1790    if(TARGET context_gpu_test)
1791      target_link_libraries(context_gpu_test caffe2::curand caffe2::cublas)
1792    endif()
1793  endif()
1794
1795  if(USE_XPU)
1796    foreach(test_src ${Caffe2_XPU_TEST_SRCS})
1797      get_filename_component(test_name ${test_src} NAME_WE)
1798      add_executable(${test_name} "${test_src}")
1799      target_link_libraries(${test_name} torch_library gtest_main)
1800      target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
1801      target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
1802      add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
1803      if(INSTALL_TEST)
1804        install(TARGETS ${test_name} DESTINATION test)
1805      endif()
1806    endforeach()
1807  endif()
1808
1809  if(USE_VULKAN)
1810    foreach(test_src ${Caffe2_VULKAN_TEST_SRCS})
1811      get_filename_component(test_name ${test_src} NAME_WE)
1812      add_executable(${test_name} "${test_src}")
1813      target_link_libraries(${test_name} torch_library gtest_main)
1814      target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
1815      target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
1816      add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
1817      if(INSTALL_TEST)
1818        install(TARGETS ${test_name} DESTINATION test)
1819        # Install PDB files for MSVC builds
1820        if(MSVC AND BUILD_SHARED_LIBS)
1821          install(FILES $<TARGET_PDB_FILE:${test_name}> DESTINATION test OPTIONAL)
1822        endif()
1823      endif()
1824    endforeach()
1825  endif()
1826
1827  if(USE_ROCM)
1828    foreach(test_src ${Caffe2_HIP_TEST_SRCS})
1829      get_filename_component(test_name ${test_src} NAME_WE)
1830      add_executable(${test_name} "${test_src}")
1831      target_link_libraries(${test_name} torch_library gtest_main)
1832      target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
1833      target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
1834      target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
1835      add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
1836      if(INSTALL_TEST)
1837        install(TARGETS ${test_name} DESTINATION test)
1838      endif()
1839    endforeach()
1840  endif()
1841endif()
1842
1843if(MSVC)
1844  # This is used to enable the conforming lambda processor in MSVC
1845  # Which allows us to capture constexpr in lambdas
1846  # Note that this will be turned on by default for std=c++20 and above
1847  # This should be applied globally when https://github.com/pytorch/pytorch/issues/92600 is fixed
1848  foreach(tmp ${MEM_EFF_ATTENTION_CUDA_SOURCES})
1849    # MEM_EFF_ATTENTION_CUDA is populated in pytorch/aten/src/ATen/CMakeLists.txt
1850    # We iterate over these files, updating paths and adding the compile flag
1851    FILE(RELATIVE_PATH tmp_path "${PROJECT_SOURCE_DIR}" "${tmp}")
1852    SET(tmp_path "../${tmp_path}")
1853    set_source_files_properties(${tmp_path} PROPERTIES COMPILE_FLAGS "-Xcompiler /Zc:lambda")
1854  endforeach()
1855endif()
1856endif()
1857