1# detect-intrinsics.cmake -- Detect compiler intrinsics support 2# Licensed under the Zlib license, see LICENSE.md for details 3 4macro(check_acle_compiler_flag) 5 if(NOT NATIVEFLAG AND NOT HAVE_ACLE_FLAG) 6 set(ACLEFLAG "-march=armv8-a+crc" CACHE INTERNAL "Compiler option to enable ACLE support") 7 endif() 8 # Check whether compiler supports ACLE flag 9 set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG} ${NATIVEFLAG}") 10 check_c_source_compiles( 11 "int main() { return 0; }" 12 HAVE_ACLE_FLAG FAIL_REGEX "not supported") 13 if(NOT NATIVEFLAG AND NOT HAVE_ACLE_FLAG) 14 set(ACLEFLAG "-march=armv8-a+crc+simd" CACHE INTERNAL "Compiler option to enable ACLE support" FORCE) 15 # Check whether compiler supports ACLE flag 16 set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG}") 17 check_c_source_compiles( 18 "int main() { return 0; }" 19 HAVE_ACLE_FLAG2 FAIL_REGEX "not supported") 20 set(HAVE_ACLE_FLAG ${HAVE_ACLE_FLAG2} CACHE INTERNAL "Have compiler option to enable ACLE intrinsics" FORCE) 21 unset(HAVE_ACLE_FLAG2 CACHE) # Don't cache this internal variable 22 endif() 23 set(CMAKE_REQUIRED_FLAGS) 24endmacro() 25 26macro(check_avx512_intrinsics) 27 if(CMAKE_C_COMPILER_ID MATCHES "Intel") 28 if(CMAKE_HOST_UNIX OR APPLE) 29 set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl") 30 else() 31 set(AVX512FLAG "/arch:AVX512") 32 endif() 33 elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 34 if(NOT NATIVEFLAG) 35 # For CPUs that can benefit from AVX512, it seems GCC generates suboptimal 36 # instruction scheduling unless you specify a reasonable -mtune= target 37 set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl") 38 if(NOT CMAKE_GENERATOR_TOOLSET MATCHES "ClangCl") 39 set(AVX512FLAG "${AVX512FLAG} -mtune=cascadelake") 40 endif() 41 endif() 42 elseif(MSVC) 43 set(AVX512FLAG "/arch:AVX512") 44 endif() 45 # Check whether compiler supports AVX512 intrinsics 46 set(CMAKE_REQUIRED_FLAGS "${AVX512FLAG} ${NATIVEFLAG}") 47 check_c_source_compile_or_run( 48 "#include <immintrin.h> 49 int main(void) { 50 __m512i x = _mm512_set1_epi8(2); 51 const __m512i y = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 52 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 53 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 54 56, 57, 58, 59, 60, 61, 62, 63, 64); 55 x = _mm512_sub_epi8(x, y); 56 (void)x; 57 return 0; 58 }" 59 HAVE_AVX512_INTRIN 60 ) 61 62 # Evidently both GCC and clang were late to implementing these 63 check_c_source_compile_or_run( 64 "#include <immintrin.h> 65 int main(void) { 66 __mmask16 a = 0xFF; 67 a = _knot_mask16(a); 68 (void)a; 69 return 0; 70 }" 71 HAVE_MASK_INTRIN 72 ) 73 set(CMAKE_REQUIRED_FLAGS) 74endmacro() 75 76macro(check_avx512vnni_intrinsics) 77 if(CMAKE_C_COMPILER_ID MATCHES "Intel") 78 if(CMAKE_HOST_UNIX OR APPLE) 79 set(AVX512VNNIFLAG "-mavx512f -mavx512bw -mavx512dq -mavx512vl -mavx512vnni") 80 else() 81 set(AVX512VNNIFLAG "/arch:AVX512") 82 endif() 83 elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 84 if(NOT NATIVEFLAG) 85 set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni") 86 if(NOT CMAKE_GENERATOR_TOOLSET MATCHES "ClangCl") 87 set(AVX512VNNIFLAG "${AVX512VNNIFLAG} -mtune=cascadelake") 88 endif() 89 endif() 90 elseif(MSVC) 91 set(AVX512VNNIFLAG "/arch:AVX512") 92 endif() 93 94 # Check whether compiler supports AVX512vnni intrinsics 95 set(CMAKE_REQUIRED_FLAGS "${AVX512VNNIFLAG} ${NATIVEFLAG}") 96 check_c_source_compile_or_run( 97 "#include <immintrin.h> 98 int main(void) { 99 __m512i x = _mm512_set1_epi8(2); 100 const __m512i y = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 101 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 102 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 103 56, 57, 58, 59, 60, 61, 62, 63, 64); 104 __m512i z = _mm512_setzero_epi32(); 105 z = _mm512_dpbusd_epi32(z, x, y); 106 (void)z; 107 return 0; 108 }" 109 HAVE_AVX512VNNI_INTRIN 110 ) 111 set(CMAKE_REQUIRED_FLAGS) 112endmacro() 113 114macro(check_avx2_intrinsics) 115 if(CMAKE_C_COMPILER_ID MATCHES "Intel") 116 if(CMAKE_HOST_UNIX OR APPLE) 117 set(AVX2FLAG "-mavx2") 118 else() 119 set(AVX2FLAG "/arch:AVX2") 120 endif() 121 elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 122 if(NOT NATIVEFLAG) 123 set(AVX2FLAG "-mavx2") 124 endif() 125 endif() 126 # Check whether compiler supports AVX2 intrinics 127 set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG} ${NATIVEFLAG}") 128 check_c_source_compile_or_run( 129 "#include <immintrin.h> 130 int main(void) { 131 __m256i x = _mm256_set1_epi16(2); 132 const __m256i y = _mm256_set1_epi16(1); 133 x = _mm256_subs_epu16(x, y); 134 (void)x; 135 return 0; 136 }" 137 HAVE_AVX2_INTRIN 138 ) 139 set(CMAKE_REQUIRED_FLAGS) 140endmacro() 141 142macro(check_neon_compiler_flag) 143 if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 144 if(NOT NATIVEFLAG) 145 if("${ARCH}" MATCHES "aarch64") 146 set(NEONFLAG "-march=armv8-a+simd") 147 else() 148 set(NEONFLAG "-mfpu=neon") 149 endif() 150 endif() 151 endif() 152 # Check whether compiler supports NEON flag 153 set(CMAKE_REQUIRED_FLAGS "${NEONFLAG} ${NATIVEFLAG}") 154 check_c_source_compiles( 155 "int main() { return 0; }" 156 MFPU_NEON_AVAILABLE FAIL_REGEX "not supported") 157 set(CMAKE_REQUIRED_FLAGS) 158endmacro() 159 160macro(check_neon_ld4_intrinsics) 161 if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 162 if(NOT NATIVEFLAG) 163 if("${ARCH}" MATCHES "aarch64") 164 set(NEONFLAG "-march=armv8-a+simd") 165 else() 166 set(NEONFLAG "-mfpu=neon") 167 endif() 168 endif() 169 endif() 170 # Check whether compiler supports loading 4 neon vecs into a register range 171 set(CMAKE_REQUIRED_FLAGS "${NEONFLAG}") 172 check_c_source_compiles( 173 "#ifdef _M_ARM64 174 # include <arm64_neon.h> 175 #else 176 # include <arm_neon.h> 177 #endif 178 int main(void) { 179 int stack_var[16]; 180 int32x4x4_t v = vld1q_s32_x4(stack_var); 181 (void)v; 182 return 0; 183 }" 184 NEON_HAS_LD4) 185 set(CMAKE_REQUIRED_FLAGS) 186endmacro() 187 188macro(check_pclmulqdq_intrinsics) 189 if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 190 if(NOT NATIVEFLAG) 191 set(PCLMULFLAG "-mpclmul") 192 endif() 193 endif() 194 # Check whether compiler supports PCLMULQDQ intrinsics 195 if(NOT (APPLE AND "${ARCH}" MATCHES "i386")) 196 # The pclmul code currently crashes on Mac in 32bit mode. Avoid for now. 197 set(CMAKE_REQUIRED_FLAGS "${PCLMULFLAG} ${NATIVEFLAG}") 198 check_c_source_compile_or_run( 199 "#include <immintrin.h> 200 int main(void) { 201 __m128i a = _mm_setzero_si128(); 202 __m128i b = _mm_setzero_si128(); 203 __m128i c = _mm_clmulepi64_si128(a, b, 0x10); 204 (void)c; 205 return 0; 206 }" 207 HAVE_PCLMULQDQ_INTRIN 208 ) 209 set(CMAKE_REQUIRED_FLAGS) 210 else() 211 set(HAVE_PCLMULQDQ_INTRIN OFF) 212 endif() 213endmacro() 214 215macro(check_vpclmulqdq_intrinsics) 216 if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 217 if(NOT NATIVEFLAG) 218 set(VPCLMULFLAG "-mvpclmulqdq") 219 endif() 220 endif() 221 # Check whether compiler supports VPCLMULQDQ intrinsics 222 if(NOT (APPLE AND "${ARCH}" MATCHES "i386")) 223 set(CMAKE_REQUIRED_FLAGS "${VPCLMULFLAG} ${NATIVEFLAG}") 224 check_c_source_compile_or_run( 225 "#include <immintrin.h> 226 int main(void) { 227 __m512i a = _mm512_setzero_si512(); 228 __m512i b = _mm512_setzero_si512(); 229 __m512i c = _mm512_clmulepi64_epi128(a, b, 0x10); 230 (void)c; 231 return 0; 232 }" 233 HAVE_VPCLMULQDQ_INTRIN 234 ) 235 set(CMAKE_REQUIRED_FLAGS) 236 else() 237 set(HAVE_VPCLMULQDQ_INTRIN OFF) 238 endif() 239endmacro() 240 241macro(check_ppc_intrinsics) 242 # Check if compiler supports AltiVec 243 set(CMAKE_REQUIRED_FLAGS "-maltivec") 244 check_c_source_compiles( 245 "#include <altivec.h> 246 int main(void) 247 { 248 vector int a = vec_splats(0); 249 vector int b = vec_splats(0); 250 a = vec_add(a, b); 251 return 0; 252 }" 253 HAVE_ALTIVEC 254 ) 255 set(CMAKE_REQUIRED_FLAGS) 256 257 if(HAVE_ALTIVEC) 258 set(PPCFLAGS "-maltivec") 259 endif() 260 261 set(CMAKE_REQUIRED_FLAGS "-maltivec -mno-vsx") 262 check_c_source_compiles( 263 "#include <altivec.h> 264 int main(void) 265 { 266 vector int a = vec_splats(0); 267 vector int b = vec_splats(0); 268 a = vec_add(a, b); 269 return 0; 270 }" 271 HAVE_NOVSX 272 ) 273 set(CMAKE_REQUIRED_FLAGS) 274 275 if(HAVE_NOVSX) 276 set(PPCFLAGS "${PPCFLAGS} -mno-vsx") 277 endif() 278 279 # Check if we have what we need for AltiVec optimizations 280 set(CMAKE_REQUIRED_FLAGS "${PPCFLAGS} ${NATIVEFLAG}") 281 check_c_source_compiles( 282 "#include <sys/auxv.h> 283 int main() { 284 return (getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC); 285 }" 286 HAVE_VMX 287 ) 288 set(CMAKE_REQUIRED_FLAGS) 289endmacro() 290 291macro(check_power8_intrinsics) 292 if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 293 if(NOT NATIVEFLAG) 294 set(POWER8FLAG "-mcpu=power8") 295 endif() 296 endif() 297 # Check if we have what we need for POWER8 optimizations 298 set(CMAKE_REQUIRED_FLAGS "${POWER8FLAG} ${NATIVEFLAG}") 299 check_c_source_compiles( 300 "#include <sys/auxv.h> 301 int main() { 302 return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); 303 }" 304 HAVE_POWER8_INTRIN 305 ) 306 set(CMAKE_REQUIRED_FLAGS) 307endmacro() 308 309macro(check_s390_intrinsics) 310 check_c_source_compiles( 311 "#include <sys/auxv.h> 312 int main() { 313 return (getauxval(AT_HWCAP) & HWCAP_S390_VX); 314 }" 315 HAVE_S390_INTRIN 316 ) 317endmacro() 318 319macro(check_power9_intrinsics) 320 if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 321 if(NOT NATIVEFLAG) 322 set(POWER9FLAG "-mcpu=power9") 323 endif() 324 endif() 325 # Check if we have what we need for POWER9 optimizations 326 set(CMAKE_REQUIRED_FLAGS "${POWER9FLAG} ${NATIVEFLAG}") 327 check_c_source_compiles( 328 "int main() { 329 return 0; 330 }" 331 HAVE_POWER9_INTRIN 332 ) 333 set(CMAKE_REQUIRED_FLAGS) 334endmacro() 335 336macro(check_sse2_intrinsics) 337 if(CMAKE_C_COMPILER_ID MATCHES "Intel") 338 if(CMAKE_HOST_UNIX OR APPLE) 339 set(SSE2FLAG "-msse2") 340 else() 341 set(SSE2FLAG "/arch:SSE2") 342 endif() 343 elseif(MSVC) 344 if(NOT "${ARCH}" MATCHES "x86_64") 345 set(SSE2FLAG "/arch:SSE2") 346 endif() 347 elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 348 if(NOT NATIVEFLAG) 349 set(SSE2FLAG "-msse2") 350 endif() 351 endif() 352 # Check whether compiler supports SSE2 instrinics 353 set(CMAKE_REQUIRED_FLAGS "${SSE2FLAG} ${NATIVEFLAG}") 354 check_c_source_compile_or_run( 355 "#include <immintrin.h> 356 int main(void) { 357 __m128i zero = _mm_setzero_si128(); 358 (void)zero; 359 return 0; 360 }" 361 HAVE_SSE2_INTRIN 362 ) 363 set(CMAKE_REQUIRED_FLAGS) 364endmacro() 365 366macro(check_ssse3_intrinsics) 367 if(CMAKE_C_COMPILER_ID MATCHES "Intel") 368 if(CMAKE_HOST_UNIX OR APPLE) 369 set(SSSE3FLAG "-mssse3") 370 else() 371 set(SSSE3FLAG "/arch:SSSE3") 372 endif() 373 elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 374 if(NOT NATIVEFLAG) 375 set(SSSE3FLAG "-mssse3") 376 endif() 377 endif() 378 # Check whether compiler supports SSSE3 intrinsics 379 set(CMAKE_REQUIRED_FLAGS "${SSSE3FLAG} ${NATIVEFLAG}") 380 check_c_source_compile_or_run( 381 "#include <immintrin.h> 382 int main(void) { 383 __m128i u, v, w; 384 u = _mm_set1_epi32(1); 385 v = _mm_set1_epi32(2); 386 w = _mm_hadd_epi32(u, v); 387 (void)w; 388 return 0; 389 }" 390 HAVE_SSSE3_INTRIN 391 ) 392endmacro() 393 394macro(check_sse41_intrinsics) 395 if(CMAKE_C_COMPILER_ID MATCHES "Intel") 396 if(CMAKE_HOST_UNIX OR APPLE) 397 set(SSE41FLAG "-msse4.1") 398 else() 399 set(SSE41FLAG "/arch:SSE4.1") 400 endif() 401 elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 402 if(NOT NATIVEFLAG) 403 set(SSE41FLAG "-msse4.1") 404 endif() 405 endif() 406 # Check whether compiler supports SSE4.1 intrinsics 407 set(CMAKE_REQUIRED_FLAGS "${SSE41FLAG} ${NATIVEFLAG}") 408 check_c_source_compile_or_run( 409 "#include <immintrin.h> 410 int main(void) { 411 __m128i u, v, w; 412 u = _mm_set1_epi8(1); 413 v = _mm_set1_epi8(2); 414 w = _mm_sad_epu8(u, v); 415 (void)w; 416 return 0; 417 }" 418 HAVE_SSE41_INTRIN 419 ) 420endmacro() 421 422macro(check_sse42_intrinsics) 423 if(CMAKE_C_COMPILER_ID MATCHES "Intel") 424 if(CMAKE_HOST_UNIX OR APPLE) 425 set(SSE42FLAG "-msse4.2") 426 else() 427 set(SSE42FLAG "/arch:SSE4.2") 428 endif() 429 elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 430 if(NOT NATIVEFLAG) 431 set(SSE42FLAG "-msse4.2") 432 endif() 433 endif() 434 # Check whether compiler supports SSE4 CRC inline asm 435 set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG} ${NATIVEFLAG}") 436 check_c_source_compile_or_run( 437 "int main(void) { 438 unsigned val = 0, h = 0; 439 #if defined(_MSC_VER) 440 { __asm mov edx, h __asm mov eax, val __asm crc32 eax, edx __asm mov h, eax } 441 #else 442 __asm__ __volatile__ ( \"crc32 %1,%0\" : \"+r\" (h) : \"r\" (val) ); 443 #endif 444 return (int)h; 445 }" 446 HAVE_SSE42CRC_INLINE_ASM 447 ) 448 # Check whether compiler supports SSE4 CRC intrinsics 449 check_c_source_compile_or_run( 450 "#include <immintrin.h> 451 int main(void) { 452 unsigned crc = 0; 453 char c = 'c'; 454 #if defined(_MSC_VER) 455 crc = _mm_crc32_u32(crc, c); 456 #else 457 crc = __builtin_ia32_crc32qi(crc, c); 458 #endif 459 (void)crc; 460 return 0; 461 }" 462 HAVE_SSE42CRC_INTRIN 463 ) 464 # Check whether compiler supports SSE4.2 compare string instrinics 465 check_c_source_compile_or_run( 466 "#include <immintrin.h> 467 int main(void) { 468 unsigned char a[64] = { 0 }; 469 unsigned char b[64] = { 0 }; 470 __m128i xmm_src0, xmm_src1; 471 xmm_src0 = _mm_loadu_si128((__m128i *)(char *)a); 472 xmm_src1 = _mm_loadu_si128((__m128i *)(char *)b); 473 return _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, 0); 474 }" 475 HAVE_SSE42CMPSTR_INTRIN 476 ) 477 set(CMAKE_REQUIRED_FLAGS) 478endmacro() 479 480macro(check_vgfma_intrinsics) 481 if(NOT NATIVEFLAG) 482 set(VGFMAFLAG "-march=z13") 483 if(CMAKE_C_COMPILER_ID MATCHES "GNU") 484 set(VGFMAFLAG "${VGFMAFLAG} -mzarch") 485 endif() 486 if(CMAKE_C_COMPILER_ID MATCHES "Clang") 487 set(VGFMAFLAG "${VGFMAFLAG} -fzvector") 488 endif() 489 endif() 490 # Check whether compiler supports "VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE" intrinsic 491 set(CMAKE_REQUIRED_FLAGS "${VGFMAFLAG} ${NATIVEFLAG}") 492 check_c_source_compiles( 493 "#include <vecintrin.h> 494 int main(void) { 495 unsigned long long a __attribute__((vector_size(16))) = { 0 }; 496 unsigned long long b __attribute__((vector_size(16))) = { 0 }; 497 unsigned char c __attribute__((vector_size(16))) = { 0 }; 498 c = vec_gfmsum_accum_128(a, b, c); 499 return c[0]; 500 }" 501 HAVE_VGFMA_INTRIN FAIL_REGEX "not supported") 502 set(CMAKE_REQUIRED_FLAGS) 503endmacro() 504