xref: /aosp_15_r20/external/pytorch/cmake/Codegen.cmake (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1# This ill-named file does a number of things:
2# - Installs Caffe2 header files (this has nothing to do with code generation)
3# - Configures caffe2/core/macros.h
4# - Creates an ATen target for its generated C++ files and adds it
5#   as a dependency
6# - Reads build lists defined in build_variables.bzl
7
8################################################################################
9# Helper functions
10################################################################################
11
12function(filter_list output input)
13    unset(result)
14    foreach(filename ${${input}})
15        foreach(pattern ${ARGN})
16            if("${filename}" MATCHES "${pattern}")
17                list(APPEND result "${filename}")
18            endif()
19        endforeach()
20    endforeach()
21    set(${output} ${result} PARENT_SCOPE)
22endfunction()
23
24function(filter_list_exclude output input)
25    unset(result)
26    foreach(filename ${${input}})
27        foreach(pattern ${ARGN})
28            if(NOT "${filename}" MATCHES "${pattern}")
29                list(APPEND result "${filename}")
30            endif()
31        endforeach()
32    endforeach()
33    set(${output} ${result} PARENT_SCOPE)
34endfunction()
35
36################################################################################
37
38# ---[ Write the macros file
39configure_file(
40    ${CMAKE_CURRENT_LIST_DIR}/../caffe2/core/macros.h.in
41    ${CMAKE_BINARY_DIR}/caffe2/core/macros.h)
42
43# ---[ Installing the header files
44install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../caffe2
45        DESTINATION include
46        FILES_MATCHING PATTERN "*.h")
47if(NOT INTERN_BUILD_ATEN_OPS)
48  install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/core
49          DESTINATION include/ATen
50          FILES_MATCHING PATTERN "*.h")
51endif()
52install(FILES ${CMAKE_BINARY_DIR}/caffe2/core/macros.h
53        DESTINATION include/caffe2/core)
54
55# ---[ ATen specific
56if(INTERN_BUILD_ATEN_OPS)
57  if(MSVC)
58    set(OPT_FLAG "/fp:strict ")
59  else(MSVC)
60    set(OPT_FLAG "-O3 ")
61    if("${CMAKE_BUILD_TYPE}" MATCHES "Debug")
62      set(OPT_FLAG " ")
63    endif()
64  endif(MSVC)
65
66  if(NOT MSVC AND NOT "${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
67    set_source_files_properties(${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/MapAllocator.cpp PROPERTIES COMPILE_FLAGS "-fno-openmp")
68  endif()
69
70  file(GLOB_RECURSE all_python "${CMAKE_CURRENT_LIST_DIR}/../torchgen/*.py")
71
72  # RowwiseScaled.cu requires sm90a flags
73  if(USE_CUDA)
74    set(ROWWISE_SCALED_MM_FILE "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/cuda/RowwiseScaledMM.cu")
75
76    # Get existing arch flags
77    torch_cuda_get_nvcc_gencode_flag(EXISTING_ARCH_FLAGS)
78
79    # Check NVCC version and existing arch flags
80    if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0 AND
81      EXISTING_ARCH_FLAGS MATCHES ".*compute_90.*")
82      set_source_files_properties(${ROWWISE_SCALED_MM_FILE}
83        PROPERTIES COMPILE_FLAGS "-gencode arch=compute_90a,code=sm_90a")
84    endif()
85  endif()
86
87  set(GEN_ROCM_FLAG)
88  if(USE_ROCM)
89    set(GEN_ROCM_FLAG --rocm)
90  endif()
91
92  set(GEN_MPS_FLAG)
93  if(USE_MPS)
94    set(GEN_MPS_FLAG --mps)
95  endif()
96
97  set(CUSTOM_BUILD_FLAGS)
98  if(INTERN_BUILD_MOBILE)
99    if(USE_VULKAN)
100      list(APPEND CUSTOM_BUILD_FLAGS --backend_whitelist CPU QuantizedCPU Vulkan)
101    else()
102      list(APPEND CUSTOM_BUILD_FLAGS --backend_whitelist CPU QuantizedCPU)
103    endif()
104  endif()
105
106  if(SELECTED_OP_LIST)
107    if(TRACING_BASED)
108      message(STATUS "Running tracing-based selective build given operator list: ${SELECTED_OP_LIST}")
109      list(APPEND CUSTOM_BUILD_FLAGS
110        --op_selection_yaml_path ${SELECTED_OP_LIST})
111    elseif(NOT STATIC_DISPATCH_BACKEND)
112      message(WARNING
113        "You have to run tracing-based selective build with dynamic dispatch.\n"
114        "Switching to STATIC_DISPATCH_BACKEND=CPU."
115      )
116      set(STATIC_DISPATCH_BACKEND CPU)
117    endif()
118  endif()
119
120  if(STATIC_DISPATCH_BACKEND)
121    message(STATUS "Custom build with static dispatch backends: ${STATIC_DISPATCH_BACKEND}")
122    list(LENGTH STATIC_DISPATCH_BACKEND len)
123    list(APPEND CUSTOM_BUILD_FLAGS
124      --static_dispatch_backend ${STATIC_DISPATCH_BACKEND})
125  endif()
126
127  # Codegen unboxing
128  if(USE_LIGHTWEIGHT_DISPATCH)
129    file(GLOB_RECURSE all_unboxing_script "${CMAKE_CURRENT_LIST_DIR}/../tools/jit/*.py")
130    list(APPEND CUSTOM_BUILD_FLAGS --skip_dispatcher_op_registration)
131    set(GEN_UNBOXING_COMMAND
132        "${Python_EXECUTABLE}" -m tools.jit.gen_unboxing
133        --source-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen
134        --install_dir ${CMAKE_BINARY_DIR}/aten/src/ATen
135        )
136    if(SELECTED_OP_LIST)
137      list(APPEND GEN_UNBOXING_COMMAND
138              --TEST_ONLY_op_registration_allowlist_yaml_path "${SELECTED_OP_LIST}")
139    endif()
140    set("GEN_UNBOXING_COMMAND_sources"
141        ${GEN_UNBOXING_COMMAND}
142        --output-dependencies ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake
143        )
144    message(STATUS "Generating sources for lightweight dispatch")
145    execute_process(
146        COMMAND ${GEN_UNBOXING_COMMAND_sources} --dry-run
147        RESULT_VARIABLE RETURN_VALUE
148        WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
149    )
150    if(NOT RETURN_VALUE EQUAL 0)
151      message(FATAL_ERROR "Failed to get generated_unboxing_sources list")
152    endif()
153
154    include("${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake")
155    add_custom_command(
156        COMMENT "Generating ATen unboxing sources"
157        OUTPUT
158        ${generated_unboxing_sources}
159        ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake
160        COMMAND ${GEN_UNBOXING_COMMAND_sources}
161        DEPENDS ${all_unboxing_script} ${sources_templates}
162        ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/native_functions.yaml
163        ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/tags.yaml
164        WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
165    )
166  else() # Otherwise do not generate or include sources into build.
167    set(generated_unboxing_sources "")
168  endif()
169
170  set(GEN_PER_OPERATOR_FLAG)
171  if(USE_PER_OPERATOR_HEADERS)
172    list(APPEND GEN_PER_OPERATOR_FLAG "--per-operator-headers")
173  endif()
174
175  set(GEN_COMMAND
176      "${Python_EXECUTABLE}" -m torchgen.gen
177      --source-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen
178      --install_dir ${CMAKE_BINARY_DIR}/aten/src/ATen
179      ${GEN_PER_OPERATOR_FLAG}
180      ${GEN_ROCM_FLAG}
181      ${GEN_MPS_FLAG}
182      ${CUSTOM_BUILD_FLAGS}
183  )
184
185  file(GLOB_RECURSE headers_templates "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*\.h")
186  file(GLOB_RECURSE sources_templates "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*\.cpp")
187  set(declarations_yaml_templates "")
188
189  foreach(gen_type "headers" "sources" "declarations_yaml")
190    # The codegen outputs may change dynamically as PyTorch is
191    # developed, but add_custom_command only supports dynamic inputs.
192    #
193    # We work around this by generating a .cmake file which is
194    # included below to set the list of output files. If that file
195    # ever changes then cmake will be re-run automatically because it
196    # was included and so we get fully dynamic outputs.
197
198    set("GEN_COMMAND_${gen_type}"
199        ${GEN_COMMAND}
200        --generate ${gen_type}
201        --output-dependencies ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake
202    )
203
204    # Dry run to bootstrap the output variables
205    execute_process(
206        COMMAND ${GEN_COMMAND_${gen_type}} --dry-run
207        RESULT_VARIABLE RETURN_VALUE
208        WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
209    )
210
211    if(NOT RETURN_VALUE EQUAL 0)
212      message(FATAL_ERROR "Failed to get generated_${gen_type} list")
213    endif()
214
215    include("${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake")
216    include("${CMAKE_BINARY_DIR}/aten/src/ATen/core_generated_${gen_type}.cmake")
217    include("${CMAKE_BINARY_DIR}/aten/src/ATen/cpu_vec_generated_${gen_type}.cmake")
218    include("${CMAKE_BINARY_DIR}/aten/src/ATen/cuda_generated_${gen_type}.cmake")
219    include("${CMAKE_BINARY_DIR}/aten/src/ATen/ops_generated_${gen_type}.cmake")
220
221    message(STATUS "${gen_type} outputs: ${gen_outputs}")
222
223    add_custom_command(
224      COMMENT "Generating ATen ${gen_type}"
225      OUTPUT
226        ${generated_${gen_type}}
227        ${cuda_generated_${gen_type}}
228        ${core_generated_${gen_type}}
229        ${cpu_vec_generated_${gen_type}}
230        ${ops_generated_${gen_type}}
231        ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake
232        ${CMAKE_BINARY_DIR}/aten/src/ATen/ops_generated_${gen_type}.cmake
233        ${CMAKE_BINARY_DIR}/aten/src/ATen/core_generated_${gen_type}.cmake
234        ${CMAKE_BINARY_DIR}/aten/src/ATen/cpu_vec_generated_${gen_type}.cmake
235        ${CMAKE_BINARY_DIR}/aten/src/ATen/cuda_generated_${gen_type}.cmake
236      COMMAND ${GEN_COMMAND_${gen_type}}
237      DEPENDS ${all_python} ${${gen_type}_templates}
238        ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/native_functions.yaml
239        ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/tags.yaml
240      WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
241    )
242  endforeach()
243
244  # Generated headers used from a CUDA (.cu) file are
245  # not tracked correctly in CMake. We make the libATen.so depend explicitly
246  # on building the generated ATen files to workaround.
247  add_custom_target(ATEN_CPU_FILES_GEN_TARGET DEPENDS
248      ${generated_headers} ${core_generated_headers} ${cpu_vec_generated_headers} ${ops_generated_headers}
249      ${generated_sources} ${core_generated_sources} ${cpu_vec_generated_sources} ${ops_generated_sources}
250      ${generated_declarations_yaml} ${generated_unboxing_sources})
251  add_custom_target(ATEN_CUDA_FILES_GEN_TARGET DEPENDS
252      ${cuda_generated_headers} ${cuda_generated_sources})
253  add_library(ATEN_CPU_FILES_GEN_LIB INTERFACE)
254  add_library(ATEN_CUDA_FILES_GEN_LIB INTERFACE)
255  add_dependencies(ATEN_CPU_FILES_GEN_LIB ATEN_CPU_FILES_GEN_TARGET)
256  add_dependencies(ATEN_CUDA_FILES_GEN_LIB ATEN_CUDA_FILES_GEN_TARGET)
257
258  if(USE_PER_OPERATOR_HEADERS)
259    target_compile_definitions(ATEN_CPU_FILES_GEN_LIB INTERFACE AT_PER_OPERATOR_HEADERS)
260    target_compile_definitions(ATEN_CUDA_FILES_GEN_LIB INTERFACE AT_PER_OPERATOR_HEADERS)
261  endif()
262
263  # Handle source files that need to be compiled multiple times for
264  # different vectorization options
265  file(GLOB cpu_kernel_cpp_in "${PROJECT_SOURCE_DIR}/aten/src/ATen/native/cpu/*.cpp" "${PROJECT_SOURCE_DIR}/aten/src/ATen/native/quantized/cpu/kernels/*.cpp")
266
267  list(APPEND CPU_CAPABILITY_NAMES "DEFAULT")
268  list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}")
269
270  if(CXX_AVX512_FOUND)
271    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_AVX512_CPU_DEFINITION")
272    list(APPEND CPU_CAPABILITY_NAMES "AVX512")
273    if(MSVC)
274      list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX512")
275    else(MSVC)
276      list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -mavx512f -mavx512bw -mavx512vl -mavx512dq -mfma")
277    endif(MSVC)
278  endif(CXX_AVX512_FOUND)
279
280  if(CXX_AVX2_FOUND)
281    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_AVX2_CPU_DEFINITION")
282
283    # Some versions of GCC pessimistically split unaligned load and store
284    # instructions when using the default tuning. This is a bad choice on
285    # new Intel and AMD processors so we disable it when compiling with AVX2.
286    # See https://stackoverflow.com/questions/52626726/why-doesnt-gcc-resolve-mm256-loadu-pd-as-single-vmovupd#tab-top
287    check_cxx_compiler_flag("-mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store" COMPILER_SUPPORTS_NO_AVX256_SPLIT)
288    if(COMPILER_SUPPORTS_NO_AVX256_SPLIT)
289      set(CPU_NO_AVX256_SPLIT_FLAGS "-mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store")
290    endif(COMPILER_SUPPORTS_NO_AVX256_SPLIT)
291
292    list(APPEND CPU_CAPABILITY_NAMES "AVX2")
293    if(DEFINED ENV{ATEN_AVX512_256})
294      if($ENV{ATEN_AVX512_256} MATCHES "TRUE")
295        if(CXX_AVX512_FOUND)
296          message("-- ATen AVX2 kernels will use 32 ymm registers")
297          if(MSVC)
298            list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX512")
299          else(MSVC)
300            list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -march=native ${CPU_NO_AVX256_SPLIT_FLAGS}")
301          endif(MSVC)
302        endif(CXX_AVX512_FOUND)
303      endif()
304    else()
305      if(MSVC)
306        list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX2")
307      else(MSVC)
308        list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -mavx2 -mfma -mf16c ${CPU_NO_AVX256_SPLIT_FLAGS}")
309      endif(MSVC)
310    endif()
311  endif(CXX_AVX2_FOUND)
312
313  if(CXX_VSX_FOUND)
314    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_VSX_CPU_DEFINITION")
315    LIST(APPEND CPU_CAPABILITY_NAMES "VSX")
316    LIST(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}  ${CXX_VSX_FLAGS}")
317  endif(CXX_VSX_FOUND)
318
319  if(CXX_ZVECTOR_FOUND)
320    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_ZVECTOR_CPU_DEFINITION")
321    LIST(APPEND CPU_CAPABILITY_NAMES "ZVECTOR")
322    LIST(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}  ${CXX_ZVECTOR_FLAGS}")
323  endif(CXX_ZVECTOR_FOUND)
324
325  list(LENGTH CPU_CAPABILITY_NAMES NUM_CPU_CAPABILITY_NAMES)
326  math(EXPR NUM_CPU_CAPABILITY_NAMES "${NUM_CPU_CAPABILITY_NAMES}-1")
327
328  # The sources list might get reordered later based on the capabilites.
329  # See NOTE [ Linking AVX and non-AVX files ]
330  foreach(i RANGE ${NUM_CPU_CAPABILITY_NAMES})
331    function(process_vec NAME)
332      list(GET CPU_CAPABILITY_NAMES ${i} CPU_CAPABILITY)
333      set(NEW_IMPL ${CMAKE_BINARY_DIR}/aten/src/ATen/${NAME}.${CPU_CAPABILITY}.cpp)
334      configure_file("${PROJECT_SOURCE_DIR}/cmake/IncludeSource.cpp.in" ${NEW_IMPL})
335      set(cpu_kernel_cpp ${NEW_IMPL} ${cpu_kernel_cpp} PARENT_SCOPE) # Create list of copies
336      list(GET CPU_CAPABILITY_FLAGS ${i} FLAGS)
337      if(MSVC)
338        set(EXTRA_FLAGS "/DCPU_CAPABILITY=${CPU_CAPABILITY} /DCPU_CAPABILITY_${CPU_CAPABILITY}")
339      else(MSVC)
340        set(EXTRA_FLAGS "-DCPU_CAPABILITY=${CPU_CAPABILITY} -DCPU_CAPABILITY_${CPU_CAPABILITY}")
341      endif(MSVC)
342      # Disable certain warnings for GCC-9.X
343      if(CMAKE_COMPILER_IS_GNUCXX)
344        if(("${NAME}" STREQUAL "native/cpu/GridSamplerKernel.cpp") AND ("${CPU_CAPABILITY}" STREQUAL "DEFAULT"))
345          # See https://github.com/pytorch/pytorch/issues/38855
346          set(EXTRA_FLAGS "${EXTRA_FLAGS} -Wno-uninitialized")
347        endif()
348        if("${NAME}" STREQUAL "native/quantized/cpu/kernels/QuantizedOpKernels.cpp")
349          # See https://github.com/pytorch/pytorch/issues/38854
350          set(EXTRA_FLAGS "${EXTRA_FLAGS} -Wno-deprecated-copy")
351        endif()
352      endif()
353      set_source_files_properties(${NEW_IMPL} PROPERTIES COMPILE_FLAGS "${FLAGS} ${EXTRA_FLAGS}")
354    endfunction()
355    foreach(IMPL ${cpu_kernel_cpp_in})
356      file(RELATIVE_PATH NAME "${PROJECT_SOURCE_DIR}/aten/src/ATen/" "${IMPL}")
357      process_vec("${NAME}")
358    endforeach()
359    foreach(IMPL ${cpu_vec_generated_sources})
360      file(RELATIVE_PATH NAME "${CMAKE_BINARY_DIR}/aten/src/ATen/" "${IMPL}")
361      process_vec("${NAME}")
362    endforeach()
363  endforeach()
364  list(APPEND ATen_CPU_SRCS ${cpu_kernel_cpp})
365endif()
366
367function(append_filelist name outputvar)
368  set(_rootdir "${Torch_SOURCE_DIR}/")
369  # configure_file adds its input to the list of CMAKE_RERUN dependencies
370  configure_file(
371      ${PROJECT_SOURCE_DIR}/build_variables.bzl
372      ${PROJECT_BINARY_DIR}/caffe2/build_variables.bzl)
373  execute_process(
374    COMMAND "${Python_EXECUTABLE}" -c
375            "exec(open('${PROJECT_SOURCE_DIR}/build_variables.bzl').read());print(';'.join(['${_rootdir}' + x for x in ${name}]))"
376    WORKING_DIRECTORY "${_rootdir}"
377    RESULT_VARIABLE _retval
378    OUTPUT_VARIABLE _tempvar)
379  if(NOT _retval EQUAL 0)
380    message(FATAL_ERROR "Failed to fetch filelist ${name} from build_variables.bzl")
381  endif()
382  string(REPLACE "\n" "" _tempvar "${_tempvar}")
383  list(APPEND ${outputvar} ${_tempvar})
384  set(${outputvar} "${${outputvar}}" PARENT_SCOPE)
385endfunction()
386
387set(NUM_CPU_CAPABILITY_NAMES ${NUM_CPU_CAPABILITY_NAMES} PARENT_SCOPE)
388set(CPU_CAPABILITY_FLAGS ${CPU_CAPABILITY_FLAGS} PARENT_SCOPE)
389