1load("@rules_cc//cc:defs.bzl", "cc_library") 2load("@pytorch//third_party:substitution.bzl", "template_rule") 3 4_DNNL_RUNTIME_OMP = { 5 "#cmakedefine DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_${DNNL_CPU_THREADING_RUNTIME}": "#define DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_OMP", 6 "#cmakedefine DNNL_CPU_RUNTIME DNNL_RUNTIME_${DNNL_CPU_RUNTIME}": "#define DNNL_CPU_RUNTIME DNNL_RUNTIME_OMP", 7 "#cmakedefine DNNL_GPU_RUNTIME DNNL_RUNTIME_${DNNL_GPU_RUNTIME}": "#define DNNL_GPU_RUNTIME DNNL_RUNTIME_NONE", 8 "#cmakedefine DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE": "/* undef DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE */", 9 "#cmakedefine DNNL_WITH_SYCL": "/* #undef DNNL_WITH_SYCL */", 10 "#cmakedefine DNNL_WITH_LEVEL_ZERO": "/* #undef DNNL_WITH_LEVEL_ZERO */", 11 "#cmakedefine DNNL_SYCL_CUDA": "/* #undef DNNL_SYCL_CUDA */", 12 "#cmakedefine DNNL_SYCL_HIP": "/* #undef DNNL_SYCL_HIP */", 13 "#cmakedefine DNNL_ENABLE_STACK_CHECKER": "#undef DNNL_ENABLE_STACK_CHECKER", 14 "#cmakedefine DNNL_EXPERIMENTAL_UKERNEL": "/* undef DNNL_EXPERIMENTAL_UKERNEL */", 15 "#cmakedefine DNNL_EXPERIMENTAL": "#undef DNNL_EXPERIMENTAL", 16 "#cmakedefine DNNL_EXPERIMENTAL_SPARSE": "#undef DNNL_EXPERIMENTAL_SPARSE", 17 "#cmakedefine ONEDNN_BUILD_GRAPH": "#undef ONEDNN_BUILD_GRAPH", 18 "#cmakedefine DNNL_EXPERIMENTAL_PROFILING": "#undef DNNL_EXPERIMENTAL_PROFILING", 19 "#cmakedefine01 BUILD_TRAINING": "#define BUILD_TRAINING 1", 20 "#cmakedefine01 BUILD_INFERENCE": "#define BUILD_INFERENCE 0", 21 "#cmakedefine01 BUILD_PRIMITIVE_ALL": "#define BUILD_PRIMITIVE_ALL 1", 22 "#cmakedefine01 BUILD_BATCH_NORMALIZATION": "#define BUILD_BATCH_NORMALIZATION 0", 23 "#cmakedefine01 BUILD_BINARY": "#define BUILD_BINARY 0", 24 "#cmakedefine01 BUILD_CONCAT": "#define BUILD_CONCAT 0", 25 "#cmakedefine01 BUILD_CONVOLUTION": "#define BUILD_CONVOLUTION 0", 26 "#cmakedefine01 BUILD_DECONVOLUTION": "#define BUILD_DECONVOLUTION 0", 27 "#cmakedefine01 BUILD_ELTWISE": "#define BUILD_ELTWISE 0", 28 "#cmakedefine01 BUILD_GROUP_NORMALIZATION": "#define BUILD_GROUP_NORMALIZATION 0", 29 "#cmakedefine01 BUILD_INNER_PRODUCT": "#define BUILD_INNER_PRODUCT 0", 30 "#cmakedefine01 BUILD_LAYER_NORMALIZATION": "#define BUILD_LAYER_NORMALIZATION 0", 31 "#cmakedefine01 BUILD_LRN": "#define BUILD_LRN 0", 32 "#cmakedefine01 BUILD_MATMUL": "#define BUILD_MATMUL 0", 33 "#cmakedefine01 BUILD_POOLING": "#define BUILD_POOLING 0", 34 "#cmakedefine01 BUILD_PRELU": "#define BUILD_PRELU 0", 35 "#cmakedefine01 BUILD_REDUCTION": "#define BUILD_REDUCTION 0", 36 "#cmakedefine01 BUILD_REORDER": "#define BUILD_REORDER 0", 37 "#cmakedefine01 BUILD_RESAMPLING": "#define BUILD_RESAMPLING 0", 38 "#cmakedefine01 BUILD_RNN": "#define BUILD_RNN 0", 39 "#cmakedefine01 BUILD_SHUFFLE": "#define BUILD_SHUFFLE 0", 40 "#cmakedefine01 BUILD_SOFTMAX": "#define BUILD_SOFTMAX 0", 41 "#cmakedefine01 BUILD_SUM": "#define BUILD_SUM 0", 42 "#cmakedefine01 BUILD_PRIMITIVE_CPU_ISA_ALL": "#define BUILD_PRIMITIVE_CPU_ISA_ALL 1", 43 "#cmakedefine01 BUILD_SSE41": "#define BUILD_SSE41 0", 44 "#cmakedefine01 BUILD_AVX2": "#define BUILD_AVX2 0", 45 "#cmakedefine01 BUILD_AVX512": "#define BUILD_AVX512 0", 46 "#cmakedefine01 BUILD_AMX": "#define BUILD_AMX 0", 47 "#cmakedefine01 BUILD_PRIMITIVE_GPU_ISA_ALL": "#define BUILD_PRIMITIVE_GPU_ISA_ALL 1", 48 "#cmakedefine01 BUILD_GEN9": "#define BUILD_GEN9 0", 49 "#cmakedefine01 BUILD_GEN11": "#define BUILD_GEN11 0", 50 "#cmakedefine01 BUILD_XELP": "#define BUILD_XELP 0", 51 "#cmakedefine01 BUILD_XEHPG": "#define BUILD_XEHPG 0", 52 "#cmakedefine01 BUILD_XEHPC": "#define BUILD_XEHPC 0", 53 "#cmakedefine01 BUILD_XEHP": "#define BUILD_XEHP 0", 54 "#cmakedefine01 BUILD_XE2": "#define BUILD_XE2 0", 55 "#cmakedefine01 BUILD_GEMM_KERNELS_ALL": "#define BUILD_GEMM_KERNELS_ALL 0", 56 "#cmakedefine01 BUILD_GEMM_KERNELS_NONE": "#define BUILD_GEMM_KERNELS_NONE 0", 57 "#cmakedefine01 BUILD_GEMM_SSE41": "#define BUILD_GEMM_SSE41 0", 58 "#cmakedefine01 BUILD_GEMM_AVX2": "#define BUILD_GEMM_AVX2 0", 59 "#cmakedefine01 BUILD_GEMM_AVX512": "#define BUILD_GEMM_AVX512 0", 60} 61 62template_rule( 63 name = "include_dnnl_version", 64 src = "include/oneapi/dnnl/dnnl_version.h.in", 65 out = "include/oneapi/dnnl/dnnl_version.h", 66 substitutions = { 67 "@DNNL_VERSION_MAJOR@": "3", 68 "@DNNL_VERSION_MINOR@": "5", 69 "@DNNL_VERSION_PATCH@": "3", 70 "@DNNL_VERSION_HASH@": "66f0cb9eb66affd2da3bf5f8d897376f04aae6af", 71 }, 72) 73 74template_rule( 75 name = "include_dnnl_config", 76 src = "include/oneapi/dnnl/dnnl_config.h.in", 77 out = "include/oneapi/dnnl/dnnl_config.h", 78 substitutions = _DNNL_RUNTIME_OMP, 79) 80 81cc_library( 82 name = "mkl-dnn", 83 srcs = glob([ 84 "src/common/*.cpp", 85 "src/cpu/**/*.cpp", 86 ], exclude=[ 87 "src/cpu/aarch64/**/*.cpp", 88 "src/cpu/rv64/**/*.cpp", 89 ]), 90 hdrs = glob([ 91 "include/oneapi/dnnl/*.h", 92 "include/oneapi/dnnl/*.hpp", 93 "include/*.h", 94 "include/*.hpp", 95 "src/cpu/**/*.hpp", 96 "src/cpu/**/*.h", 97 "src/common/*.hpp", 98 "src/common/ittnotify/jitprofiling.h", 99 ], exclude=[ 100 "src/cpu/aarch64/**/*.hpp", 101 "src/cpu/aarch64/**/*.h", 102 "src/cpu/rv64/**/*.hpp", 103 "src/cpu/rv64/**/*.h", 104 ]) + [ 105 "include/oneapi/dnnl/dnnl_config.h", 106 "include/oneapi/dnnl/dnnl_version.h", 107 ], 108 copts = [ 109 "-DDNNL_DLL", 110 "-DDNNL_DLL_EXPORTS", 111 "-DDNNL_ENABLE_CONCURRENT_EXEC", 112 "-D__STDC_CONSTANT_MACROS", 113 "-D__STDC_LIMIT_MACROS", 114 "-fno-strict-overflow", 115 "-fopenmp", 116 ] + select({ 117 "@pytorch//tools/config:thread_sanitizer": ["-DDNNL_CPU_RUNTIME=0"], 118 "//conditions:default": ["-DDNNL_CPU_RUNTIME=2"], 119 }), 120 includes = [ 121 "include/", 122 "include/oneapi/", 123 "include/oneapi/dnnl/", 124 "src/", 125 "src/common/", 126 "src/cpu/", 127 "src/cpu/x64/xbyak/", 128 ], 129 visibility = ["//visibility:public"], 130 linkopts = [ 131 "-lgomp", 132 ], 133 deps = [ 134 "@mkl", 135 ], 136 defines = [ 137 "DNNL_ENABLE_MAX_CPU_ISA", 138 "DNNL_ENABLE_CONCURRENT_EXEC", 139 "DNNL_ENABLE_PRIMITIVE_CACHE", 140 "DNNL_ENABLE_CPU_ISA_HINTS", 141 "DNNL_EXPERIMENTAL_UKERNEL", 142 "ONEDNN_BUILD_GRAPH", 143 ], 144) 145