xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/avx512ifmavl-intrinsics.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl -mattr=+avx512ifma | FileCheck %s
3
4declare <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5
6define <2 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
7; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_128:
8; CHECK:       ## BB#0:
9; CHECK-NEXT:    kmovw %edi, %k1
10; CHECK-NEXT:    vmovaps %zmm0, %zmm3
11; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm3 {%k1}
12; CHECK-NEXT:    vmovaps %zmm0, %zmm4
13; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm4
14; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2
15; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1}
16; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z}
17; CHECK-NEXT:    vpaddq %xmm0, %xmm3, %xmm0
18; CHECK-NEXT:    vpaddq %xmm2, %xmm4, %xmm1
19; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
20; CHECK-NEXT:    retq
21
22  %res = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
23  %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
24  %res2 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
25  %res3 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
26  %res4 = add <2 x i64> %res, %res1
27  %res5 = add <2 x i64> %res3, %res2
28  %res6 = add <2 x i64> %res5, %res4
29  ret <2 x i64> %res6
30}
31
32declare <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
33
34define <4 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
35; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_256:
36; CHECK:       ## BB#0:
37; CHECK-NEXT:    kmovw %edi, %k1
38; CHECK-NEXT:    vmovaps %zmm0, %zmm3
39; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm3 {%k1}
40; CHECK-NEXT:    vmovaps %zmm0, %zmm4
41; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm4
42; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2
43; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1}
44; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z}
45; CHECK-NEXT:    vpaddq %ymm0, %ymm3, %ymm0
46; CHECK-NEXT:    vpaddq %ymm2, %ymm4, %ymm1
47; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
48; CHECK-NEXT:    retq
49
50  %res = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
51  %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
52  %res2 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
53  %res3 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
54  %res4 = add <4 x i64> %res, %res1
55  %res5 = add <4 x i64> %res3, %res2
56  %res6 = add <4 x i64> %res5, %res4
57  ret <4 x i64> %res6
58}
59
60declare <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
61
62define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
63; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_128:
64; CHECK:       ## BB#0:
65; CHECK-NEXT:    kmovw %edi, %k1
66; CHECK-NEXT:    vmovaps %zmm0, %zmm3
67; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm3 {%k1} {z}
68; CHECK-NEXT:    vmovaps %zmm0, %zmm4
69; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm4
70; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2
71; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z}
72; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z}
73; CHECK-NEXT:    vpaddq %xmm0, %xmm3, %xmm0
74; CHECK-NEXT:    vpaddq %xmm2, %xmm4, %xmm1
75; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
76; CHECK-NEXT:    retq
77
78  %res = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
79  %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
80  %res2 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
81  %res3 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
82  %res4 = add <2 x i64> %res, %res1
83  %res5 = add <2 x i64> %res3, %res2
84  %res6 = add <2 x i64> %res5, %res4
85  ret <2 x i64> %res6
86}
87
88declare <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
89
90define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
91; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_256:
92; CHECK:       ## BB#0:
93; CHECK-NEXT:    kmovw %edi, %k1
94; CHECK-NEXT:    vmovaps %zmm0, %zmm3
95; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm3 {%k1} {z}
96; CHECK-NEXT:    vmovaps %zmm0, %zmm4
97; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm4
98; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2
99; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z}
100; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z}
101; CHECK-NEXT:    vpaddq %ymm0, %ymm3, %ymm0
102; CHECK-NEXT:    vpaddq %ymm2, %ymm4, %ymm1
103; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
104; CHECK-NEXT:    retq
105
106  %res = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
107  %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
108  %res2 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
109  %res3 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
110  %res4 = add <4 x i64> %res, %res1
111  %res5 = add <4 x i64> %res3, %res2
112  %res6 = add <4 x i64> %res5, %res4
113  ret <4 x i64> %res6
114}
115
116declare <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
117
118define <2 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
119; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_128:
120; CHECK:       ## BB#0:
121; CHECK-NEXT:    kmovw %edi, %k1
122; CHECK-NEXT:    vmovaps %zmm0, %zmm3
123; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm3 {%k1}
124; CHECK-NEXT:    vmovaps %zmm0, %zmm4
125; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm4
126; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2
127; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1}
128; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z}
129; CHECK-NEXT:    vpaddq %xmm0, %xmm3, %xmm0
130; CHECK-NEXT:    vpaddq %xmm2, %xmm4, %xmm1
131; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
132; CHECK-NEXT:    retq
133
134  %res = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
135  %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
136  %res2 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
137  %res3 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
138  %res4 = add <2 x i64> %res, %res1
139  %res5 = add <2 x i64> %res3, %res2
140  %res6 = add <2 x i64> %res5, %res4
141  ret <2 x i64> %res6
142}
143
144declare <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
145
146define <4 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
147; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_256:
148; CHECK:       ## BB#0:
149; CHECK-NEXT:    kmovw %edi, %k1
150; CHECK-NEXT:    vmovaps %zmm0, %zmm3
151; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm3 {%k1}
152; CHECK-NEXT:    vmovaps %zmm0, %zmm4
153; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm4
154; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2
155; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1}
156; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z}
157; CHECK-NEXT:    vpaddq %ymm0, %ymm3, %ymm0
158; CHECK-NEXT:    vpaddq %ymm2, %ymm4, %ymm1
159; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
160; CHECK-NEXT:    retq
161
162  %res = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
163  %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
164  %res2 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
165  %res3 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
166  %res4 = add <4 x i64> %res, %res1
167  %res5 = add <4 x i64> %res3, %res2
168  %res6 = add <4 x i64> %res5, %res4
169  ret <4 x i64> %res6
170}
171
172declare <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
173
174define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
175; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_128:
176; CHECK:       ## BB#0:
177; CHECK-NEXT:    kmovw %edi, %k1
178; CHECK-NEXT:    vmovaps %zmm0, %zmm3
179; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm3 {%k1} {z}
180; CHECK-NEXT:    vmovaps %zmm0, %zmm4
181; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm4
182; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2
183; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z}
184; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z}
185; CHECK-NEXT:    vpaddq %xmm0, %xmm3, %xmm0
186; CHECK-NEXT:    vpaddq %xmm2, %xmm4, %xmm1
187; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
188; CHECK-NEXT:    retq
189
190  %res = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
191  %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
192  %res2 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
193  %res3 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
194  %res4 = add <2 x i64> %res, %res1
195  %res5 = add <2 x i64> %res3, %res2
196  %res6 = add <2 x i64> %res5, %res4
197  ret <2 x i64> %res6
198}
199
200declare <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
201
202define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
203; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_256:
204; CHECK:       ## BB#0:
205; CHECK-NEXT:    kmovw %edi, %k1
206; CHECK-NEXT:    vmovaps %zmm0, %zmm3
207; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm3 {%k1} {z}
208; CHECK-NEXT:    vmovaps %zmm0, %zmm4
209; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm4
210; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2
211; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z}
212; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z}
213; CHECK-NEXT:    vpaddq %ymm0, %ymm3, %ymm0
214; CHECK-NEXT:    vpaddq %ymm2, %ymm4, %ymm1
215; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
216; CHECK-NEXT:    retq
217
218  %res = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
219  %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
220  %res2 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
221  %res3 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
222  %res4 = add <4 x i64> %res, %res1
223  %res5 = add <4 x i64> %res3, %res2
224  %res6 = add <4 x i64> %res5, %res4
225  ret <4 x i64> %res6
226}
227