1# This ill-named file does a number of things: 2# - Installs Caffe2 header files (this has nothing to do with code generation) 3# - Configures caffe2/core/macros.h 4# - Creates an ATen target for its generated C++ files and adds it 5# as a dependency 6# - Reads build lists defined in build_variables.bzl 7 8################################################################################ 9# Helper functions 10################################################################################ 11 12function(filter_list output input) 13 unset(result) 14 foreach(filename ${${input}}) 15 foreach(pattern ${ARGN}) 16 if("${filename}" MATCHES "${pattern}") 17 list(APPEND result "${filename}") 18 endif() 19 endforeach() 20 endforeach() 21 set(${output} ${result} PARENT_SCOPE) 22endfunction() 23 24function(filter_list_exclude output input) 25 unset(result) 26 foreach(filename ${${input}}) 27 foreach(pattern ${ARGN}) 28 if(NOT "${filename}" MATCHES "${pattern}") 29 list(APPEND result "${filename}") 30 endif() 31 endforeach() 32 endforeach() 33 set(${output} ${result} PARENT_SCOPE) 34endfunction() 35 36################################################################################ 37 38# ---[ Write the macros file 39configure_file( 40 ${CMAKE_CURRENT_LIST_DIR}/../caffe2/core/macros.h.in 41 ${CMAKE_BINARY_DIR}/caffe2/core/macros.h) 42 43# ---[ Installing the header files 44install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../caffe2 45 DESTINATION include 46 FILES_MATCHING PATTERN "*.h") 47if(NOT INTERN_BUILD_ATEN_OPS) 48 install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/core 49 DESTINATION include/ATen 50 FILES_MATCHING PATTERN "*.h") 51endif() 52install(FILES ${CMAKE_BINARY_DIR}/caffe2/core/macros.h 53 DESTINATION include/caffe2/core) 54 55# ---[ ATen specific 56if(INTERN_BUILD_ATEN_OPS) 57 if(MSVC) 58 set(OPT_FLAG "/fp:strict ") 59 else(MSVC) 60 set(OPT_FLAG "-O3 ") 61 if("${CMAKE_BUILD_TYPE}" MATCHES "Debug") 62 set(OPT_FLAG " ") 63 endif() 64 endif(MSVC) 65 66 if(NOT MSVC AND NOT "${CMAKE_C_COMPILER_ID}" MATCHES "Clang") 67 set_source_files_properties(${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/MapAllocator.cpp PROPERTIES COMPILE_FLAGS "-fno-openmp") 68 endif() 69 70 file(GLOB_RECURSE all_python "${CMAKE_CURRENT_LIST_DIR}/../torchgen/*.py") 71 72 # RowwiseScaled.cu requires sm90a flags 73 if(USE_CUDA) 74 set(ROWWISE_SCALED_MM_FILE "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/cuda/RowwiseScaledMM.cu") 75 76 # Get existing arch flags 77 torch_cuda_get_nvcc_gencode_flag(EXISTING_ARCH_FLAGS) 78 79 # Check NVCC version and existing arch flags 80 if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0 AND 81 EXISTING_ARCH_FLAGS MATCHES ".*compute_90.*") 82 set_source_files_properties(${ROWWISE_SCALED_MM_FILE} 83 PROPERTIES COMPILE_FLAGS "-gencode arch=compute_90a,code=sm_90a") 84 endif() 85 endif() 86 87 set(GEN_ROCM_FLAG) 88 if(USE_ROCM) 89 set(GEN_ROCM_FLAG --rocm) 90 endif() 91 92 set(GEN_MPS_FLAG) 93 if(USE_MPS) 94 set(GEN_MPS_FLAG --mps) 95 endif() 96 97 set(CUSTOM_BUILD_FLAGS) 98 if(INTERN_BUILD_MOBILE) 99 if(USE_VULKAN) 100 list(APPEND CUSTOM_BUILD_FLAGS --backend_whitelist CPU QuantizedCPU Vulkan) 101 else() 102 list(APPEND CUSTOM_BUILD_FLAGS --backend_whitelist CPU QuantizedCPU) 103 endif() 104 endif() 105 106 if(SELECTED_OP_LIST) 107 if(TRACING_BASED) 108 message(STATUS "Running tracing-based selective build given operator list: ${SELECTED_OP_LIST}") 109 list(APPEND CUSTOM_BUILD_FLAGS 110 --op_selection_yaml_path ${SELECTED_OP_LIST}) 111 elseif(NOT STATIC_DISPATCH_BACKEND) 112 message(WARNING 113 "You have to run tracing-based selective build with dynamic dispatch.\n" 114 "Switching to STATIC_DISPATCH_BACKEND=CPU." 115 ) 116 set(STATIC_DISPATCH_BACKEND CPU) 117 endif() 118 endif() 119 120 if(STATIC_DISPATCH_BACKEND) 121 message(STATUS "Custom build with static dispatch backends: ${STATIC_DISPATCH_BACKEND}") 122 list(LENGTH STATIC_DISPATCH_BACKEND len) 123 list(APPEND CUSTOM_BUILD_FLAGS 124 --static_dispatch_backend ${STATIC_DISPATCH_BACKEND}) 125 endif() 126 127 # Codegen unboxing 128 if(USE_LIGHTWEIGHT_DISPATCH) 129 file(GLOB_RECURSE all_unboxing_script "${CMAKE_CURRENT_LIST_DIR}/../tools/jit/*.py") 130 list(APPEND CUSTOM_BUILD_FLAGS --skip_dispatcher_op_registration) 131 set(GEN_UNBOXING_COMMAND 132 "${Python_EXECUTABLE}" -m tools.jit.gen_unboxing 133 --source-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen 134 --install_dir ${CMAKE_BINARY_DIR}/aten/src/ATen 135 ) 136 if(SELECTED_OP_LIST) 137 list(APPEND GEN_UNBOXING_COMMAND 138 --TEST_ONLY_op_registration_allowlist_yaml_path "${SELECTED_OP_LIST}") 139 endif() 140 set("GEN_UNBOXING_COMMAND_sources" 141 ${GEN_UNBOXING_COMMAND} 142 --output-dependencies ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake 143 ) 144 message(STATUS "Generating sources for lightweight dispatch") 145 execute_process( 146 COMMAND ${GEN_UNBOXING_COMMAND_sources} --dry-run 147 RESULT_VARIABLE RETURN_VALUE 148 WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/.. 149 ) 150 if(NOT RETURN_VALUE EQUAL 0) 151 message(FATAL_ERROR "Failed to get generated_unboxing_sources list") 152 endif() 153 154 include("${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake") 155 add_custom_command( 156 COMMENT "Generating ATen unboxing sources" 157 OUTPUT 158 ${generated_unboxing_sources} 159 ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake 160 COMMAND ${GEN_UNBOXING_COMMAND_sources} 161 DEPENDS ${all_unboxing_script} ${sources_templates} 162 ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/native_functions.yaml 163 ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/tags.yaml 164 WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/.. 165 ) 166 else() # Otherwise do not generate or include sources into build. 167 set(generated_unboxing_sources "") 168 endif() 169 170 set(GEN_PER_OPERATOR_FLAG) 171 if(USE_PER_OPERATOR_HEADERS) 172 list(APPEND GEN_PER_OPERATOR_FLAG "--per-operator-headers") 173 endif() 174 175 set(GEN_COMMAND 176 "${Python_EXECUTABLE}" -m torchgen.gen 177 --source-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen 178 --install_dir ${CMAKE_BINARY_DIR}/aten/src/ATen 179 ${GEN_PER_OPERATOR_FLAG} 180 ${GEN_ROCM_FLAG} 181 ${GEN_MPS_FLAG} 182 ${CUSTOM_BUILD_FLAGS} 183 ) 184 185 file(GLOB_RECURSE headers_templates "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*\.h") 186 file(GLOB_RECURSE sources_templates "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*\.cpp") 187 set(declarations_yaml_templates "") 188 189 foreach(gen_type "headers" "sources" "declarations_yaml") 190 # The codegen outputs may change dynamically as PyTorch is 191 # developed, but add_custom_command only supports dynamic inputs. 192 # 193 # We work around this by generating a .cmake file which is 194 # included below to set the list of output files. If that file 195 # ever changes then cmake will be re-run automatically because it 196 # was included and so we get fully dynamic outputs. 197 198 set("GEN_COMMAND_${gen_type}" 199 ${GEN_COMMAND} 200 --generate ${gen_type} 201 --output-dependencies ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake 202 ) 203 204 # Dry run to bootstrap the output variables 205 execute_process( 206 COMMAND ${GEN_COMMAND_${gen_type}} --dry-run 207 RESULT_VARIABLE RETURN_VALUE 208 WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/.. 209 ) 210 211 if(NOT RETURN_VALUE EQUAL 0) 212 message(FATAL_ERROR "Failed to get generated_${gen_type} list") 213 endif() 214 215 include("${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake") 216 include("${CMAKE_BINARY_DIR}/aten/src/ATen/core_generated_${gen_type}.cmake") 217 include("${CMAKE_BINARY_DIR}/aten/src/ATen/cpu_vec_generated_${gen_type}.cmake") 218 include("${CMAKE_BINARY_DIR}/aten/src/ATen/cuda_generated_${gen_type}.cmake") 219 include("${CMAKE_BINARY_DIR}/aten/src/ATen/ops_generated_${gen_type}.cmake") 220 221 message(STATUS "${gen_type} outputs: ${gen_outputs}") 222 223 add_custom_command( 224 COMMENT "Generating ATen ${gen_type}" 225 OUTPUT 226 ${generated_${gen_type}} 227 ${cuda_generated_${gen_type}} 228 ${core_generated_${gen_type}} 229 ${cpu_vec_generated_${gen_type}} 230 ${ops_generated_${gen_type}} 231 ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake 232 ${CMAKE_BINARY_DIR}/aten/src/ATen/ops_generated_${gen_type}.cmake 233 ${CMAKE_BINARY_DIR}/aten/src/ATen/core_generated_${gen_type}.cmake 234 ${CMAKE_BINARY_DIR}/aten/src/ATen/cpu_vec_generated_${gen_type}.cmake 235 ${CMAKE_BINARY_DIR}/aten/src/ATen/cuda_generated_${gen_type}.cmake 236 COMMAND ${GEN_COMMAND_${gen_type}} 237 DEPENDS ${all_python} ${${gen_type}_templates} 238 ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/native_functions.yaml 239 ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/tags.yaml 240 WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/.. 241 ) 242 endforeach() 243 244 # Generated headers used from a CUDA (.cu) file are 245 # not tracked correctly in CMake. We make the libATen.so depend explicitly 246 # on building the generated ATen files to workaround. 247 add_custom_target(ATEN_CPU_FILES_GEN_TARGET DEPENDS 248 ${generated_headers} ${core_generated_headers} ${cpu_vec_generated_headers} ${ops_generated_headers} 249 ${generated_sources} ${core_generated_sources} ${cpu_vec_generated_sources} ${ops_generated_sources} 250 ${generated_declarations_yaml} ${generated_unboxing_sources}) 251 add_custom_target(ATEN_CUDA_FILES_GEN_TARGET DEPENDS 252 ${cuda_generated_headers} ${cuda_generated_sources}) 253 add_library(ATEN_CPU_FILES_GEN_LIB INTERFACE) 254 add_library(ATEN_CUDA_FILES_GEN_LIB INTERFACE) 255 add_dependencies(ATEN_CPU_FILES_GEN_LIB ATEN_CPU_FILES_GEN_TARGET) 256 add_dependencies(ATEN_CUDA_FILES_GEN_LIB ATEN_CUDA_FILES_GEN_TARGET) 257 258 if(USE_PER_OPERATOR_HEADERS) 259 target_compile_definitions(ATEN_CPU_FILES_GEN_LIB INTERFACE AT_PER_OPERATOR_HEADERS) 260 target_compile_definitions(ATEN_CUDA_FILES_GEN_LIB INTERFACE AT_PER_OPERATOR_HEADERS) 261 endif() 262 263 # Handle source files that need to be compiled multiple times for 264 # different vectorization options 265 file(GLOB cpu_kernel_cpp_in "${PROJECT_SOURCE_DIR}/aten/src/ATen/native/cpu/*.cpp" "${PROJECT_SOURCE_DIR}/aten/src/ATen/native/quantized/cpu/kernels/*.cpp") 266 267 list(APPEND CPU_CAPABILITY_NAMES "DEFAULT") 268 list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}") 269 270 if(CXX_AVX512_FOUND) 271 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_AVX512_CPU_DEFINITION") 272 list(APPEND CPU_CAPABILITY_NAMES "AVX512") 273 if(MSVC) 274 list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX512") 275 else(MSVC) 276 list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -mavx512f -mavx512bw -mavx512vl -mavx512dq -mfma") 277 endif(MSVC) 278 endif(CXX_AVX512_FOUND) 279 280 if(CXX_AVX2_FOUND) 281 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_AVX2_CPU_DEFINITION") 282 283 # Some versions of GCC pessimistically split unaligned load and store 284 # instructions when using the default tuning. This is a bad choice on 285 # new Intel and AMD processors so we disable it when compiling with AVX2. 286 # See https://stackoverflow.com/questions/52626726/why-doesnt-gcc-resolve-mm256-loadu-pd-as-single-vmovupd#tab-top 287 check_cxx_compiler_flag("-mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store" COMPILER_SUPPORTS_NO_AVX256_SPLIT) 288 if(COMPILER_SUPPORTS_NO_AVX256_SPLIT) 289 set(CPU_NO_AVX256_SPLIT_FLAGS "-mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store") 290 endif(COMPILER_SUPPORTS_NO_AVX256_SPLIT) 291 292 list(APPEND CPU_CAPABILITY_NAMES "AVX2") 293 if(DEFINED ENV{ATEN_AVX512_256}) 294 if($ENV{ATEN_AVX512_256} MATCHES "TRUE") 295 if(CXX_AVX512_FOUND) 296 message("-- ATen AVX2 kernels will use 32 ymm registers") 297 if(MSVC) 298 list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX512") 299 else(MSVC) 300 list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -march=native ${CPU_NO_AVX256_SPLIT_FLAGS}") 301 endif(MSVC) 302 endif(CXX_AVX512_FOUND) 303 endif() 304 else() 305 if(MSVC) 306 list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX2") 307 else(MSVC) 308 list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -mavx2 -mfma -mf16c ${CPU_NO_AVX256_SPLIT_FLAGS}") 309 endif(MSVC) 310 endif() 311 endif(CXX_AVX2_FOUND) 312 313 if(CXX_VSX_FOUND) 314 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_VSX_CPU_DEFINITION") 315 LIST(APPEND CPU_CAPABILITY_NAMES "VSX") 316 LIST(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} ${CXX_VSX_FLAGS}") 317 endif(CXX_VSX_FOUND) 318 319 if(CXX_ZVECTOR_FOUND) 320 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_ZVECTOR_CPU_DEFINITION") 321 LIST(APPEND CPU_CAPABILITY_NAMES "ZVECTOR") 322 LIST(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} ${CXX_ZVECTOR_FLAGS}") 323 endif(CXX_ZVECTOR_FOUND) 324 325 list(LENGTH CPU_CAPABILITY_NAMES NUM_CPU_CAPABILITY_NAMES) 326 math(EXPR NUM_CPU_CAPABILITY_NAMES "${NUM_CPU_CAPABILITY_NAMES}-1") 327 328 # The sources list might get reordered later based on the capabilites. 329 # See NOTE [ Linking AVX and non-AVX files ] 330 foreach(i RANGE ${NUM_CPU_CAPABILITY_NAMES}) 331 function(process_vec NAME) 332 list(GET CPU_CAPABILITY_NAMES ${i} CPU_CAPABILITY) 333 set(NEW_IMPL ${CMAKE_BINARY_DIR}/aten/src/ATen/${NAME}.${CPU_CAPABILITY}.cpp) 334 configure_file("${PROJECT_SOURCE_DIR}/cmake/IncludeSource.cpp.in" ${NEW_IMPL}) 335 set(cpu_kernel_cpp ${NEW_IMPL} ${cpu_kernel_cpp} PARENT_SCOPE) # Create list of copies 336 list(GET CPU_CAPABILITY_FLAGS ${i} FLAGS) 337 if(MSVC) 338 set(EXTRA_FLAGS "/DCPU_CAPABILITY=${CPU_CAPABILITY} /DCPU_CAPABILITY_${CPU_CAPABILITY}") 339 else(MSVC) 340 set(EXTRA_FLAGS "-DCPU_CAPABILITY=${CPU_CAPABILITY} -DCPU_CAPABILITY_${CPU_CAPABILITY}") 341 endif(MSVC) 342 # Disable certain warnings for GCC-9.X 343 if(CMAKE_COMPILER_IS_GNUCXX) 344 if(("${NAME}" STREQUAL "native/cpu/GridSamplerKernel.cpp") AND ("${CPU_CAPABILITY}" STREQUAL "DEFAULT")) 345 # See https://github.com/pytorch/pytorch/issues/38855 346 set(EXTRA_FLAGS "${EXTRA_FLAGS} -Wno-uninitialized") 347 endif() 348 if("${NAME}" STREQUAL "native/quantized/cpu/kernels/QuantizedOpKernels.cpp") 349 # See https://github.com/pytorch/pytorch/issues/38854 350 set(EXTRA_FLAGS "${EXTRA_FLAGS} -Wno-deprecated-copy") 351 endif() 352 endif() 353 set_source_files_properties(${NEW_IMPL} PROPERTIES COMPILE_FLAGS "${FLAGS} ${EXTRA_FLAGS}") 354 endfunction() 355 foreach(IMPL ${cpu_kernel_cpp_in}) 356 file(RELATIVE_PATH NAME "${PROJECT_SOURCE_DIR}/aten/src/ATen/" "${IMPL}") 357 process_vec("${NAME}") 358 endforeach() 359 foreach(IMPL ${cpu_vec_generated_sources}) 360 file(RELATIVE_PATH NAME "${CMAKE_BINARY_DIR}/aten/src/ATen/" "${IMPL}") 361 process_vec("${NAME}") 362 endforeach() 363 endforeach() 364 list(APPEND ATen_CPU_SRCS ${cpu_kernel_cpp}) 365endif() 366 367function(append_filelist name outputvar) 368 set(_rootdir "${Torch_SOURCE_DIR}/") 369 # configure_file adds its input to the list of CMAKE_RERUN dependencies 370 configure_file( 371 ${PROJECT_SOURCE_DIR}/build_variables.bzl 372 ${PROJECT_BINARY_DIR}/caffe2/build_variables.bzl) 373 execute_process( 374 COMMAND "${Python_EXECUTABLE}" -c 375 "exec(open('${PROJECT_SOURCE_DIR}/build_variables.bzl').read());print(';'.join(['${_rootdir}' + x for x in ${name}]))" 376 WORKING_DIRECTORY "${_rootdir}" 377 RESULT_VARIABLE _retval 378 OUTPUT_VARIABLE _tempvar) 379 if(NOT _retval EQUAL 0) 380 message(FATAL_ERROR "Failed to fetch filelist ${name} from build_variables.bzl") 381 endif() 382 string(REPLACE "\n" "" _tempvar "${_tempvar}") 383 list(APPEND ${outputvar} ${_tempvar}) 384 set(${outputvar} "${${outputvar}}" PARENT_SCOPE) 385endfunction() 386 387set(NUM_CPU_CAPABILITY_NAMES ${NUM_CPU_CAPABILITY_NAMES} PARENT_SCOPE) 388set(CPU_CAPABILITY_FLAGS ${CPU_CAPABILITY_FLAGS} PARENT_SCOPE) 389