1# detect-intrinsics.cmake -- Detect compiler intrinsics support
2# Licensed under the Zlib license, see LICENSE.md for details
3
4macro(check_acle_compiler_flag)
5    if(NOT NATIVEFLAG AND NOT HAVE_ACLE_FLAG)
6        set(ACLEFLAG "-march=armv8-a+crc" CACHE INTERNAL "Compiler option to enable ACLE support")
7    endif()
8    # Check whether compiler supports ACLE flag
9    set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG} ${NATIVEFLAG}")
10    check_c_source_compiles(
11        "int main() { return 0; }"
12        HAVE_ACLE_FLAG FAIL_REGEX "not supported")
13    if(NOT NATIVEFLAG AND NOT HAVE_ACLE_FLAG)
14        set(ACLEFLAG "-march=armv8-a+crc+simd" CACHE INTERNAL "Compiler option to enable ACLE support" FORCE)
15        # Check whether compiler supports ACLE flag
16        set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG}")
17        check_c_source_compiles(
18            "int main() { return 0; }"
19            HAVE_ACLE_FLAG2 FAIL_REGEX "not supported")
20        set(HAVE_ACLE_FLAG ${HAVE_ACLE_FLAG2} CACHE INTERNAL "Have compiler option to enable ACLE intrinsics" FORCE)
21        unset(HAVE_ACLE_FLAG2 CACHE) # Don't cache this internal variable
22    endif()
23    set(CMAKE_REQUIRED_FLAGS)
24endmacro()
25
26macro(check_avx512_intrinsics)
27    if(CMAKE_C_COMPILER_ID MATCHES "Intel")
28        if(CMAKE_HOST_UNIX OR APPLE)
29            set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl")
30        else()
31            set(AVX512FLAG "/arch:AVX512")
32        endif()
33    elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
34        if(NOT NATIVEFLAG)
35            # For CPUs that can benefit from AVX512, it seems GCC generates suboptimal
36            # instruction scheduling unless you specify a reasonable -mtune= target
37            set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl")
38            if(NOT CMAKE_GENERATOR_TOOLSET MATCHES "ClangCl")
39                set(AVX512FLAG "${AVX512FLAG} -mtune=cascadelake")
40            endif()
41        endif()
42    elseif(MSVC)
43        set(AVX512FLAG "/arch:AVX512")
44    endif()
45    # Check whether compiler supports AVX512 intrinsics
46    set(CMAKE_REQUIRED_FLAGS "${AVX512FLAG} ${NATIVEFLAG}")
47    check_c_source_compile_or_run(
48        "#include <immintrin.h>
49        int main(void) {
50            __m512i x = _mm512_set1_epi8(2);
51            const __m512i y = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
52                                              20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
53                                              38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
54                                              56, 57, 58, 59, 60, 61, 62, 63, 64);
55            x = _mm512_sub_epi8(x, y);
56            (void)x;
57            return 0;
58        }"
59        HAVE_AVX512_INTRIN
60    )
61
62    # Evidently both GCC and clang were late to implementing these
63    check_c_source_compile_or_run(
64        "#include <immintrin.h>
65        int main(void) {
66            __mmask16 a = 0xFF;
67            a = _knot_mask16(a);
68            (void)a;
69            return 0;
70        }"
71        HAVE_MASK_INTRIN
72    )
73    set(CMAKE_REQUIRED_FLAGS)
74endmacro()
75
76macro(check_avx512vnni_intrinsics)
77    if(CMAKE_C_COMPILER_ID MATCHES "Intel")
78        if(CMAKE_HOST_UNIX OR APPLE)
79            set(AVX512VNNIFLAG "-mavx512f -mavx512bw -mavx512dq -mavx512vl -mavx512vnni")
80        else()
81            set(AVX512VNNIFLAG "/arch:AVX512")
82        endif()
83    elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
84        if(NOT NATIVEFLAG)
85            set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni")
86            if(NOT CMAKE_GENERATOR_TOOLSET MATCHES "ClangCl")
87                set(AVX512VNNIFLAG "${AVX512VNNIFLAG} -mtune=cascadelake")
88            endif()
89        endif()
90    elseif(MSVC)
91        set(AVX512VNNIFLAG "/arch:AVX512")
92    endif()
93
94    # Check whether compiler supports AVX512vnni intrinsics
95    set(CMAKE_REQUIRED_FLAGS "${AVX512VNNIFLAG} ${NATIVEFLAG}")
96    check_c_source_compile_or_run(
97        "#include <immintrin.h>
98        int main(void) {
99            __m512i x = _mm512_set1_epi8(2);
100            const __m512i y = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
101                                              20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
102                                              38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
103                                              56, 57, 58, 59, 60, 61, 62, 63, 64);
104            __m512i z = _mm512_setzero_epi32();
105            z = _mm512_dpbusd_epi32(z, x, y);
106            (void)z;
107            return 0;
108        }"
109        HAVE_AVX512VNNI_INTRIN
110    )
111    set(CMAKE_REQUIRED_FLAGS)
112endmacro()
113
114macro(check_avx2_intrinsics)
115    if(CMAKE_C_COMPILER_ID MATCHES "Intel")
116        if(CMAKE_HOST_UNIX OR APPLE)
117            set(AVX2FLAG "-mavx2")
118        else()
119            set(AVX2FLAG "/arch:AVX2")
120        endif()
121    elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
122        if(NOT NATIVEFLAG)
123            set(AVX2FLAG "-mavx2")
124        endif()
125    endif()
126    # Check whether compiler supports AVX2 intrinics
127    set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG} ${NATIVEFLAG}")
128    check_c_source_compile_or_run(
129        "#include <immintrin.h>
130        int main(void) {
131            __m256i x = _mm256_set1_epi16(2);
132            const __m256i y = _mm256_set1_epi16(1);
133            x = _mm256_subs_epu16(x, y);
134            (void)x;
135            return 0;
136        }"
137        HAVE_AVX2_INTRIN
138    )
139    set(CMAKE_REQUIRED_FLAGS)
140endmacro()
141
142macro(check_neon_compiler_flag)
143    if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
144        if(NOT NATIVEFLAG)
145            if("${ARCH}" MATCHES "aarch64")
146                set(NEONFLAG "-march=armv8-a+simd")
147            else()
148                set(NEONFLAG "-mfpu=neon")
149            endif()
150        endif()
151    endif()
152    # Check whether compiler supports NEON flag
153    set(CMAKE_REQUIRED_FLAGS "${NEONFLAG} ${NATIVEFLAG}")
154    check_c_source_compiles(
155        "int main() { return 0; }"
156        MFPU_NEON_AVAILABLE FAIL_REGEX "not supported")
157    set(CMAKE_REQUIRED_FLAGS)
158endmacro()
159
160macro(check_neon_ld4_intrinsics)
161    if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
162        if(NOT NATIVEFLAG)
163            if("${ARCH}" MATCHES "aarch64")
164                set(NEONFLAG "-march=armv8-a+simd")
165            else()
166                set(NEONFLAG "-mfpu=neon")
167            endif()
168        endif()
169    endif()
170    # Check whether compiler supports loading 4 neon vecs into a register range
171    set(CMAKE_REQUIRED_FLAGS "${NEONFLAG}")
172    check_c_source_compiles(
173        "#ifdef _M_ARM64
174        #  include <arm64_neon.h>
175        #else
176        #  include <arm_neon.h>
177        #endif
178        int main(void) {
179            int stack_var[16];
180            int32x4x4_t v = vld1q_s32_x4(stack_var);
181            (void)v;
182            return 0;
183        }"
184        NEON_HAS_LD4)
185    set(CMAKE_REQUIRED_FLAGS)
186endmacro()
187
188macro(check_pclmulqdq_intrinsics)
189    if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
190        if(NOT NATIVEFLAG)
191            set(PCLMULFLAG "-mpclmul")
192        endif()
193    endif()
194    # Check whether compiler supports PCLMULQDQ intrinsics
195    if(NOT (APPLE AND "${ARCH}" MATCHES "i386"))
196        # The pclmul code currently crashes on Mac in 32bit mode. Avoid for now.
197        set(CMAKE_REQUIRED_FLAGS "${PCLMULFLAG} ${NATIVEFLAG}")
198        check_c_source_compile_or_run(
199            "#include <immintrin.h>
200            int main(void) {
201                __m128i a = _mm_setzero_si128();
202                __m128i b = _mm_setzero_si128();
203                __m128i c = _mm_clmulepi64_si128(a, b, 0x10);
204                (void)c;
205                return 0;
206            }"
207            HAVE_PCLMULQDQ_INTRIN
208        )
209        set(CMAKE_REQUIRED_FLAGS)
210    else()
211        set(HAVE_PCLMULQDQ_INTRIN OFF)
212    endif()
213endmacro()
214
215macro(check_vpclmulqdq_intrinsics)
216    if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
217        if(NOT NATIVEFLAG)
218            set(VPCLMULFLAG "-mvpclmulqdq")
219        endif()
220    endif()
221    # Check whether compiler supports VPCLMULQDQ intrinsics
222    if(NOT (APPLE AND "${ARCH}" MATCHES "i386"))
223        set(CMAKE_REQUIRED_FLAGS "${VPCLMULFLAG} ${NATIVEFLAG}")
224        check_c_source_compile_or_run(
225            "#include <immintrin.h>
226            int main(void) {
227                __m512i a = _mm512_setzero_si512();
228                __m512i b = _mm512_setzero_si512();
229                __m512i c = _mm512_clmulepi64_epi128(a, b, 0x10);
230                (void)c;
231                return 0;
232            }"
233            HAVE_VPCLMULQDQ_INTRIN
234        )
235        set(CMAKE_REQUIRED_FLAGS)
236    else()
237        set(HAVE_VPCLMULQDQ_INTRIN OFF)
238    endif()
239endmacro()
240
241macro(check_ppc_intrinsics)
242    # Check if compiler supports AltiVec
243    set(CMAKE_REQUIRED_FLAGS "-maltivec")
244    check_c_source_compiles(
245        "#include <altivec.h>
246        int main(void)
247        {
248            vector int a = vec_splats(0);
249            vector int b = vec_splats(0);
250            a = vec_add(a, b);
251            return 0;
252        }"
253        HAVE_ALTIVEC
254        )
255    set(CMAKE_REQUIRED_FLAGS)
256
257    if(HAVE_ALTIVEC)
258        set(PPCFLAGS "-maltivec")
259    endif()
260
261    set(CMAKE_REQUIRED_FLAGS "-maltivec -mno-vsx")
262    check_c_source_compiles(
263        "#include <altivec.h>
264        int main(void)
265        {
266            vector int a = vec_splats(0);
267            vector int b = vec_splats(0);
268            a = vec_add(a, b);
269            return 0;
270        }"
271        HAVE_NOVSX
272        )
273    set(CMAKE_REQUIRED_FLAGS)
274
275    if(HAVE_NOVSX)
276        set(PPCFLAGS "${PPCFLAGS} -mno-vsx")
277    endif()
278
279    # Check if we have what we need for AltiVec optimizations
280    set(CMAKE_REQUIRED_FLAGS "${PPCFLAGS} ${NATIVEFLAG}")
281    check_c_source_compiles(
282        "#include <sys/auxv.h>
283        int main() {
284            return (getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
285        }"
286        HAVE_VMX
287    )
288    set(CMAKE_REQUIRED_FLAGS)
289endmacro()
290
291macro(check_power8_intrinsics)
292    if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
293        if(NOT NATIVEFLAG)
294            set(POWER8FLAG "-mcpu=power8")
295        endif()
296    endif()
297    # Check if we have what we need for POWER8 optimizations
298    set(CMAKE_REQUIRED_FLAGS "${POWER8FLAG} ${NATIVEFLAG}")
299    check_c_source_compiles(
300        "#include <sys/auxv.h>
301        int main() {
302            return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
303        }"
304        HAVE_POWER8_INTRIN
305    )
306    set(CMAKE_REQUIRED_FLAGS)
307endmacro()
308
309macro(check_s390_intrinsics)
310    check_c_source_compiles(
311        "#include <sys/auxv.h>
312        int main() {
313            return (getauxval(AT_HWCAP) & HWCAP_S390_VX);
314        }"
315        HAVE_S390_INTRIN
316    )
317endmacro()
318
319macro(check_power9_intrinsics)
320    if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
321        if(NOT NATIVEFLAG)
322            set(POWER9FLAG "-mcpu=power9")
323        endif()
324    endif()
325    # Check if we have what we need for POWER9 optimizations
326    set(CMAKE_REQUIRED_FLAGS "${POWER9FLAG} ${NATIVEFLAG}")
327    check_c_source_compiles(
328        "int main() {
329            return 0;
330        }"
331        HAVE_POWER9_INTRIN
332    )
333    set(CMAKE_REQUIRED_FLAGS)
334endmacro()
335
336macro(check_sse2_intrinsics)
337    if(CMAKE_C_COMPILER_ID MATCHES "Intel")
338        if(CMAKE_HOST_UNIX OR APPLE)
339            set(SSE2FLAG "-msse2")
340        else()
341            set(SSE2FLAG "/arch:SSE2")
342        endif()
343    elseif(MSVC)
344        if(NOT "${ARCH}" MATCHES "x86_64")
345            set(SSE2FLAG "/arch:SSE2")
346        endif()
347    elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
348        if(NOT NATIVEFLAG)
349            set(SSE2FLAG "-msse2")
350        endif()
351    endif()
352    # Check whether compiler supports SSE2 instrinics
353    set(CMAKE_REQUIRED_FLAGS "${SSE2FLAG} ${NATIVEFLAG}")
354    check_c_source_compile_or_run(
355        "#include <immintrin.h>
356        int main(void) {
357            __m128i zero = _mm_setzero_si128();
358            (void)zero;
359            return 0;
360        }"
361        HAVE_SSE2_INTRIN
362    )
363    set(CMAKE_REQUIRED_FLAGS)
364endmacro()
365
366macro(check_ssse3_intrinsics)
367    if(CMAKE_C_COMPILER_ID MATCHES "Intel")
368        if(CMAKE_HOST_UNIX OR APPLE)
369            set(SSSE3FLAG "-mssse3")
370        else()
371            set(SSSE3FLAG "/arch:SSSE3")
372        endif()
373    elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
374        if(NOT NATIVEFLAG)
375            set(SSSE3FLAG "-mssse3")
376        endif()
377    endif()
378    # Check whether compiler supports SSSE3 intrinsics
379    set(CMAKE_REQUIRED_FLAGS "${SSSE3FLAG} ${NATIVEFLAG}")
380    check_c_source_compile_or_run(
381        "#include <immintrin.h>
382        int main(void) {
383            __m128i u, v, w;
384            u = _mm_set1_epi32(1);
385            v = _mm_set1_epi32(2);
386            w = _mm_hadd_epi32(u, v);
387            (void)w;
388            return 0;
389        }"
390        HAVE_SSSE3_INTRIN
391    )
392endmacro()
393
394macro(check_sse41_intrinsics)
395    if(CMAKE_C_COMPILER_ID MATCHES "Intel")
396        if(CMAKE_HOST_UNIX OR APPLE)
397            set(SSE41FLAG "-msse4.1")
398        else()
399            set(SSE41FLAG "/arch:SSE4.1")
400        endif()
401    elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
402        if(NOT NATIVEFLAG)
403            set(SSE41FLAG "-msse4.1")
404        endif()
405    endif()
406    # Check whether compiler supports SSE4.1 intrinsics
407    set(CMAKE_REQUIRED_FLAGS "${SSE41FLAG} ${NATIVEFLAG}")
408    check_c_source_compile_or_run(
409        "#include <immintrin.h>
410        int main(void) {
411            __m128i u, v, w;
412            u = _mm_set1_epi8(1);
413            v = _mm_set1_epi8(2);
414            w = _mm_sad_epu8(u, v);
415            (void)w;
416            return 0;
417        }"
418        HAVE_SSE41_INTRIN
419    )
420endmacro()
421
422macro(check_sse42_intrinsics)
423    if(CMAKE_C_COMPILER_ID MATCHES "Intel")
424        if(CMAKE_HOST_UNIX OR APPLE)
425            set(SSE42FLAG "-msse4.2")
426        else()
427            set(SSE42FLAG "/arch:SSE4.2")
428        endif()
429    elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
430        if(NOT NATIVEFLAG)
431            set(SSE42FLAG "-msse4.2")
432        endif()
433    endif()
434    # Check whether compiler supports SSE4 CRC inline asm
435    set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG} ${NATIVEFLAG}")
436    check_c_source_compile_or_run(
437        "int main(void) {
438            unsigned val = 0, h = 0;
439        #if defined(_MSC_VER)
440            { __asm mov edx, h __asm mov eax, val __asm crc32 eax, edx __asm mov h, eax }
441        #else
442            __asm__ __volatile__ ( \"crc32 %1,%0\" : \"+r\" (h) : \"r\" (val) );
443        #endif
444            return (int)h;
445        }"
446        HAVE_SSE42CRC_INLINE_ASM
447    )
448    # Check whether compiler supports SSE4 CRC intrinsics
449    check_c_source_compile_or_run(
450        "#include <immintrin.h>
451        int main(void) {
452            unsigned crc = 0;
453            char c = 'c';
454        #if defined(_MSC_VER)
455            crc = _mm_crc32_u32(crc, c);
456        #else
457            crc = __builtin_ia32_crc32qi(crc, c);
458        #endif
459            (void)crc;
460            return 0;
461        }"
462        HAVE_SSE42CRC_INTRIN
463    )
464    # Check whether compiler supports SSE4.2 compare string instrinics
465    check_c_source_compile_or_run(
466        "#include <immintrin.h>
467        int main(void) {
468            unsigned char a[64] = { 0 };
469            unsigned char b[64] = { 0 };
470            __m128i xmm_src0, xmm_src1;
471            xmm_src0 = _mm_loadu_si128((__m128i *)(char *)a);
472            xmm_src1 = _mm_loadu_si128((__m128i *)(char *)b);
473            return _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, 0);
474        }"
475        HAVE_SSE42CMPSTR_INTRIN
476    )
477    set(CMAKE_REQUIRED_FLAGS)
478endmacro()
479
480macro(check_vgfma_intrinsics)
481    if(NOT NATIVEFLAG)
482        set(VGFMAFLAG "-march=z13")
483        if(CMAKE_C_COMPILER_ID MATCHES "GNU")
484            set(VGFMAFLAG "${VGFMAFLAG} -mzarch")
485        endif()
486        if(CMAKE_C_COMPILER_ID MATCHES "Clang")
487            set(VGFMAFLAG "${VGFMAFLAG} -fzvector")
488        endif()
489    endif()
490    # Check whether compiler supports "VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE" intrinsic
491    set(CMAKE_REQUIRED_FLAGS "${VGFMAFLAG} ${NATIVEFLAG}")
492    check_c_source_compiles(
493        "#include <vecintrin.h>
494        int main(void) {
495            unsigned long long a __attribute__((vector_size(16))) = { 0 };
496            unsigned long long b __attribute__((vector_size(16))) = { 0 };
497            unsigned char c __attribute__((vector_size(16))) = { 0 };
498            c = vec_gfmsum_accum_128(a, b, c);
499            return c[0];
500        }"
501        HAVE_VGFMA_INTRIN FAIL_REGEX "not supported")
502    set(CMAKE_REQUIRED_FLAGS)
503endmacro()
504