xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/avx512-vbroadcast.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Workerdefine   <16 x i32> @_inreg16xi32(i32 %a) {
6*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _inreg16xi32:
7*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
8*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpbroadcastd %edi, %zmm0
9*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
10*9880d681SAndroid Build Coastguard Worker  %b = insertelement <16 x i32> undef, i32 %a, i32 0
11*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
12*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %c
13*9880d681SAndroid Build Coastguard Worker}
14*9880d681SAndroid Build Coastguard Worker
15*9880d681SAndroid Build Coastguard Workerdefine   <8 x i64> @_inreg8xi64(i64 %a) {
16*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _inreg8xi64:
17*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
18*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpbroadcastq %rdi, %zmm0
19*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
20*9880d681SAndroid Build Coastguard Worker  %b = insertelement <8 x i64> undef, i64 %a, i32 0
21*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
22*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %c
23*9880d681SAndroid Build Coastguard Worker}
24*9880d681SAndroid Build Coastguard Worker
25*9880d681SAndroid Build Coastguard Workerdefine   <16 x float> @_ss16xfloat_v4(<4 x float> %a) {
26*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _ss16xfloat_v4:
27*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
28*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
29*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
30*9880d681SAndroid Build Coastguard Worker  %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer
31*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %b
32*9880d681SAndroid Build Coastguard Worker}
33*9880d681SAndroid Build Coastguard Worker
34*9880d681SAndroid Build Coastguard Workerdefine   <16 x float> @_inreg16xfloat(float %a) {
35*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _inreg16xfloat:
36*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
37*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
38*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
39*9880d681SAndroid Build Coastguard Worker  %b = insertelement <16 x float> undef, float %a, i32 0
40*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
41*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %c
42*9880d681SAndroid Build Coastguard Worker}
43*9880d681SAndroid Build Coastguard Worker
44*9880d681SAndroid Build Coastguard Workerdefine   <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) {
45*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _ss16xfloat_mask:
46*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
47*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxord %zmm3, %zmm3, %zmm3
48*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
49*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1}
50*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovaps %zmm1, %zmm0
51*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
52*9880d681SAndroid Build Coastguard Worker  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
53*9880d681SAndroid Build Coastguard Worker  %b = insertelement <16 x float> undef, float %a, i32 0
54*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
55*9880d681SAndroid Build Coastguard Worker  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
56*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %r
57*9880d681SAndroid Build Coastguard Worker}
58*9880d681SAndroid Build Coastguard Worker
59*9880d681SAndroid Build Coastguard Workerdefine   <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
60*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _ss16xfloat_maskz:
61*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
62*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxord %zmm2, %zmm2, %zmm2
63*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
64*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z}
65*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
66*9880d681SAndroid Build Coastguard Worker  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
67*9880d681SAndroid Build Coastguard Worker  %b = insertelement <16 x float> undef, float %a, i32 0
68*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
69*9880d681SAndroid Build Coastguard Worker  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
70*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %r
71*9880d681SAndroid Build Coastguard Worker}
72*9880d681SAndroid Build Coastguard Worker
73*9880d681SAndroid Build Coastguard Workerdefine   <16 x float> @_ss16xfloat_load(float* %a.ptr) {
74*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _ss16xfloat_load:
75*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
76*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastss (%rdi), %zmm0
77*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
78*9880d681SAndroid Build Coastguard Worker  %a = load float, float* %a.ptr
79*9880d681SAndroid Build Coastguard Worker  %b = insertelement <16 x float> undef, float %a, i32 0
80*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
81*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %c
82*9880d681SAndroid Build Coastguard Worker}
83*9880d681SAndroid Build Coastguard Worker
84*9880d681SAndroid Build Coastguard Workerdefine   <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) {
85*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _ss16xfloat_mask_load:
86*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
87*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxord %zmm2, %zmm2, %zmm2
88*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
89*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1}
90*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
91*9880d681SAndroid Build Coastguard Worker  %a = load float, float* %a.ptr
92*9880d681SAndroid Build Coastguard Worker  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
93*9880d681SAndroid Build Coastguard Worker  %b = insertelement <16 x float> undef, float %a, i32 0
94*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
95*9880d681SAndroid Build Coastguard Worker  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
96*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %r
97*9880d681SAndroid Build Coastguard Worker}
98*9880d681SAndroid Build Coastguard Worker
99*9880d681SAndroid Build Coastguard Workerdefine   <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) {
100*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _ss16xfloat_maskz_load:
101*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
102*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
103*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpcmpneqd %zmm1, %zmm0, %k1
104*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z}
105*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
106*9880d681SAndroid Build Coastguard Worker  %a = load float, float* %a.ptr
107*9880d681SAndroid Build Coastguard Worker  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
108*9880d681SAndroid Build Coastguard Worker  %b = insertelement <16 x float> undef, float %a, i32 0
109*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
110*9880d681SAndroid Build Coastguard Worker  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
111*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %r
112*9880d681SAndroid Build Coastguard Worker}
113*9880d681SAndroid Build Coastguard Worker
114*9880d681SAndroid Build Coastguard Workerdefine   <8 x double> @_inreg8xdouble(double %a) {
115*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _inreg8xdouble:
116*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
117*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
118*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
119*9880d681SAndroid Build Coastguard Worker  %b = insertelement <8 x double> undef, double %a, i32 0
120*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
121*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %c
122*9880d681SAndroid Build Coastguard Worker}
123*9880d681SAndroid Build Coastguard Worker
124*9880d681SAndroid Build Coastguard Workerdefine   <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) {
125*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _sd8xdouble_mask:
126*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
127*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
128*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxor %ymm3, %ymm3, %ymm3
129*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
130*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1}
131*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovaps %zmm1, %zmm0
132*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
133*9880d681SAndroid Build Coastguard Worker  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
134*9880d681SAndroid Build Coastguard Worker  %b = insertelement <8 x double> undef, double %a, i32 0
135*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
136*9880d681SAndroid Build Coastguard Worker  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
137*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %r
138*9880d681SAndroid Build Coastguard Worker}
139*9880d681SAndroid Build Coastguard Worker
140*9880d681SAndroid Build Coastguard Workerdefine   <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
141*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _sd8xdouble_maskz:
142*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
143*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
144*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxor %ymm2, %ymm2, %ymm2
145*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
146*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z}
147*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
148*9880d681SAndroid Build Coastguard Worker  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
149*9880d681SAndroid Build Coastguard Worker  %b = insertelement <8 x double> undef, double %a, i32 0
150*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
151*9880d681SAndroid Build Coastguard Worker  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
152*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %r
153*9880d681SAndroid Build Coastguard Worker}
154*9880d681SAndroid Build Coastguard Worker
155*9880d681SAndroid Build Coastguard Workerdefine   <8 x double> @_sd8xdouble_load(double* %a.ptr) {
156*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _sd8xdouble_load:
157*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
158*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastsd (%rdi), %zmm0
159*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
160*9880d681SAndroid Build Coastguard Worker  %a = load double, double* %a.ptr
161*9880d681SAndroid Build Coastguard Worker  %b = insertelement <8 x double> undef, double %a, i32 0
162*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
163*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %c
164*9880d681SAndroid Build Coastguard Worker}
165*9880d681SAndroid Build Coastguard Worker
166*9880d681SAndroid Build Coastguard Workerdefine   <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) {
167*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _sd8xdouble_mask_load:
168*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
169*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
170*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxor %ymm2, %ymm2, %ymm2
171*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
172*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1}
173*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
174*9880d681SAndroid Build Coastguard Worker  %a = load double, double* %a.ptr
175*9880d681SAndroid Build Coastguard Worker  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
176*9880d681SAndroid Build Coastguard Worker  %b = insertelement <8 x double> undef, double %a, i32 0
177*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
178*9880d681SAndroid Build Coastguard Worker  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
179*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %r
180*9880d681SAndroid Build Coastguard Worker}
181*9880d681SAndroid Build Coastguard Worker
182*9880d681SAndroid Build Coastguard Workerdefine   <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) {
183*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _sd8xdouble_maskz_load:
184*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
185*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
186*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
187*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpcmpneqd %zmm1, %zmm0, %k1
188*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
189*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
190*9880d681SAndroid Build Coastguard Worker  %a = load double, double* %a.ptr
191*9880d681SAndroid Build Coastguard Worker  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
192*9880d681SAndroid Build Coastguard Worker  %b = insertelement <8 x double> undef, double %a, i32 0
193*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
194*9880d681SAndroid Build Coastguard Worker  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
195*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %r
196*9880d681SAndroid Build Coastguard Worker}
197*9880d681SAndroid Build Coastguard Worker
198*9880d681SAndroid Build Coastguard Workerdefine   <16 x i32> @_xmm16xi32(<16 x i32> %a) {
199*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _xmm16xi32:
200*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
201*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpbroadcastd %xmm0, %zmm0
202*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
203*9880d681SAndroid Build Coastguard Worker  %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer
204*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %b
205*9880d681SAndroid Build Coastguard Worker}
206*9880d681SAndroid Build Coastguard Worker
207*9880d681SAndroid Build Coastguard Workerdefine   <16 x float> @_xmm16xfloat(<16 x float> %a) {
208*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _xmm16xfloat:
209*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
210*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
211*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
212*9880d681SAndroid Build Coastguard Worker  %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer
213*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %b
214*9880d681SAndroid Build Coastguard Worker}
215*9880d681SAndroid Build Coastguard Worker
216*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @test_vbroadcast() {
217*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_vbroadcast:
218*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0: # %entry
219*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxord %zmm0, %zmm0, %zmm0
220*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vcmpunordps %zmm0, %zmm0, %k1
221*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
222*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
223*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    knotw %k1, %k1
224*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
225*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
226*9880d681SAndroid Build Coastguard Workerentry:
227*9880d681SAndroid Build Coastguard Worker  %0 = sext <16 x i1> zeroinitializer to <16 x i32>
228*9880d681SAndroid Build Coastguard Worker  %1 = fcmp uno <16 x float> undef, zeroinitializer
229*9880d681SAndroid Build Coastguard Worker  %2 = sext <16 x i1> %1 to <16 x i32>
230*9880d681SAndroid Build Coastguard Worker  %3 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> %2
231*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %3
232*9880d681SAndroid Build Coastguard Worker}
233*9880d681SAndroid Build Coastguard Worker
234*9880d681SAndroid Build Coastguard Worker; We implement the set1 intrinsics with vector initializers.  Verify that the
235*9880d681SAndroid Build Coastguard Worker; IR generated will produce broadcasts at the end.
236*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @test_set1_pd(double %d) #2 {
237*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_set1_pd:
238*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0: # %entry
239*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
240*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
241*9880d681SAndroid Build Coastguard Workerentry:
242*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <8 x double> undef, double %d, i32 0
243*9880d681SAndroid Build Coastguard Worker  %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1
244*9880d681SAndroid Build Coastguard Worker  %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %d, i32 2
245*9880d681SAndroid Build Coastguard Worker  %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %d, i32 3
246*9880d681SAndroid Build Coastguard Worker  %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %d, i32 4
247*9880d681SAndroid Build Coastguard Worker  %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %d, i32 5
248*9880d681SAndroid Build Coastguard Worker  %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %d, i32 6
249*9880d681SAndroid Build Coastguard Worker  %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %d, i32 7
250*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %vecinit7.i
251*9880d681SAndroid Build Coastguard Worker}
252*9880d681SAndroid Build Coastguard Worker
253*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @test_set1_epi64(i64 %d) #2 {
254*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_set1_epi64:
255*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0: # %entry
256*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpbroadcastq %rdi, %zmm0
257*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
258*9880d681SAndroid Build Coastguard Workerentry:
259*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0
260*9880d681SAndroid Build Coastguard Worker  %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1
261*9880d681SAndroid Build Coastguard Worker  %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %d, i32 2
262*9880d681SAndroid Build Coastguard Worker  %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %d, i32 3
263*9880d681SAndroid Build Coastguard Worker  %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %d, i32 4
264*9880d681SAndroid Build Coastguard Worker  %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %d, i32 5
265*9880d681SAndroid Build Coastguard Worker  %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %d, i32 6
266*9880d681SAndroid Build Coastguard Worker  %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %d, i32 7
267*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %vecinit7.i
268*9880d681SAndroid Build Coastguard Worker}
269*9880d681SAndroid Build Coastguard Worker
270*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_set1_ps(float %f) #2 {
271*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_set1_ps:
272*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0: # %entry
273*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
274*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
275*9880d681SAndroid Build Coastguard Workerentry:
276*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <16 x float> undef, float %f, i32 0
277*9880d681SAndroid Build Coastguard Worker  %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1
278*9880d681SAndroid Build Coastguard Worker  %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %f, i32 2
279*9880d681SAndroid Build Coastguard Worker  %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %f, i32 3
280*9880d681SAndroid Build Coastguard Worker  %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %f, i32 4
281*9880d681SAndroid Build Coastguard Worker  %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %f, i32 5
282*9880d681SAndroid Build Coastguard Worker  %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %f, i32 6
283*9880d681SAndroid Build Coastguard Worker  %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %f, i32 7
284*9880d681SAndroid Build Coastguard Worker  %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %f, i32 8
285*9880d681SAndroid Build Coastguard Worker  %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %f, i32 9
286*9880d681SAndroid Build Coastguard Worker  %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %f, i32 10
287*9880d681SAndroid Build Coastguard Worker  %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %f, i32 11
288*9880d681SAndroid Build Coastguard Worker  %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %f, i32 12
289*9880d681SAndroid Build Coastguard Worker  %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %f, i32 13
290*9880d681SAndroid Build Coastguard Worker  %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %f, i32 14
291*9880d681SAndroid Build Coastguard Worker  %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %f, i32 15
292*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %vecinit15.i
293*9880d681SAndroid Build Coastguard Worker}
294*9880d681SAndroid Build Coastguard Worker
295*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @test_set1_epi32(i32 %f) #2 {
296*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_set1_epi32:
297*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0: # %entry
298*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpbroadcastd %edi, %zmm0
299*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
300*9880d681SAndroid Build Coastguard Workerentry:
301*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0
302*9880d681SAndroid Build Coastguard Worker  %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1
303*9880d681SAndroid Build Coastguard Worker  %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %f, i32 2
304*9880d681SAndroid Build Coastguard Worker  %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %f, i32 3
305*9880d681SAndroid Build Coastguard Worker  %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %f, i32 4
306*9880d681SAndroid Build Coastguard Worker  %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %f, i32 5
307*9880d681SAndroid Build Coastguard Worker  %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %f, i32 6
308*9880d681SAndroid Build Coastguard Worker  %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %f, i32 7
309*9880d681SAndroid Build Coastguard Worker  %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %f, i32 8
310*9880d681SAndroid Build Coastguard Worker  %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %f, i32 9
311*9880d681SAndroid Build Coastguard Worker  %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %f, i32 10
312*9880d681SAndroid Build Coastguard Worker  %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %f, i32 11
313*9880d681SAndroid Build Coastguard Worker  %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %f, i32 12
314*9880d681SAndroid Build Coastguard Worker  %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %f, i32 13
315*9880d681SAndroid Build Coastguard Worker  %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %f, i32 14
316*9880d681SAndroid Build Coastguard Worker  %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %f, i32 15
317*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %vecinit15.i
318*9880d681SAndroid Build Coastguard Worker}
319*9880d681SAndroid Build Coastguard Worker
320*9880d681SAndroid Build Coastguard Worker; We implement the scalar broadcast intrinsics with vector initializers.
321*9880d681SAndroid Build Coastguard Worker; Verify that the IR generated will produce the broadcast at the end.
322*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) {
323*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_mm512_broadcastsd_pd:
324*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0: # %entry
325*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
326*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
327*9880d681SAndroid Build Coastguard Workerentry:
328*9880d681SAndroid Build Coastguard Worker  %0 = extractelement <2 x double> %a, i32 0
329*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <8 x double> undef, double %0, i32 0
330*9880d681SAndroid Build Coastguard Worker  %vecinit1.i = insertelement <8 x double> %vecinit.i, double %0, i32 1
331*9880d681SAndroid Build Coastguard Worker  %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %0, i32 2
332*9880d681SAndroid Build Coastguard Worker  %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %0, i32 3
333*9880d681SAndroid Build Coastguard Worker  %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %0, i32 4
334*9880d681SAndroid Build Coastguard Worker  %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %0, i32 5
335*9880d681SAndroid Build Coastguard Worker  %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %0, i32 6
336*9880d681SAndroid Build Coastguard Worker  %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %0, i32 7
337*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %vecinit7.i
338*9880d681SAndroid Build Coastguard Worker}
339*9880d681SAndroid Build Coastguard Worker
340*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test1(<8 x float>%a)  {
341*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test1:
342*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
343*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
344*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
345*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> zeroinitializer
346*9880d681SAndroid Build Coastguard Worker  ret <16 x float>%res
347*9880d681SAndroid Build Coastguard Worker}
348*9880d681SAndroid Build Coastguard Worker
349*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @test2(<4 x double>%a)  {
350*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test2:
351*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
352*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
353*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
354*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> zeroinitializer
355*9880d681SAndroid Build Coastguard Worker  ret <8 x double>%res
356*9880d681SAndroid Build Coastguard Worker}
357*9880d681SAndroid Build Coastguard Worker
358*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @_invec32xi8(<32 x i8>%a)  {
359*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: _invec32xi8:
360*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
361*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpbroadcastb %xmm0, %ymm0
362*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovaps %zmm0, %zmm1
363*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
364*9880d681SAndroid Build Coastguard Worker;
365*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: _invec32xi8:
366*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
367*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpbroadcastb %xmm0, %zmm0
368*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
369*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <32 x i8> %a, <32 x i8> undef, <64 x i32> zeroinitializer
370*9880d681SAndroid Build Coastguard Worker  ret <64 x i8>%res
371*9880d681SAndroid Build Coastguard Worker}
372*9880d681SAndroid Build Coastguard Worker
373*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @_invec16xi16(<16 x i16>%a)  {
374*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: _invec16xi16:
375*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
376*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpbroadcastw %xmm0, %ymm0
377*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovaps %zmm0, %zmm1
378*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
379*9880d681SAndroid Build Coastguard Worker;
380*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: _invec16xi16:
381*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
382*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpbroadcastw %xmm0, %zmm0
383*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
384*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <16 x i16> %a, <16 x i16> undef, <32 x i32> zeroinitializer
385*9880d681SAndroid Build Coastguard Worker  ret <32 x i16>%res
386*9880d681SAndroid Build Coastguard Worker}
387*9880d681SAndroid Build Coastguard Worker
388*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @_invec8xi32(<8 x i32>%a)  {
389*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _invec8xi32:
390*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
391*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpbroadcastd %xmm0, %zmm0
392*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
393*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> zeroinitializer
394*9880d681SAndroid Build Coastguard Worker  ret <16 x i32>%res
395*9880d681SAndroid Build Coastguard Worker}
396*9880d681SAndroid Build Coastguard Worker
397*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @_invec4xi64(<4 x i64>%a)  {
398*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: _invec4xi64:
399*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
400*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpbroadcastq %xmm0, %zmm0
401*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
402*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x i64> %a, <4 x i64> undef, <8 x i32> zeroinitializer
403*9880d681SAndroid Build Coastguard Worker  ret <8 x i64>%res
404*9880d681SAndroid Build Coastguard Worker}
405*9880d681SAndroid Build Coastguard Worker
406*9880d681SAndroid Build Coastguard Workerdeclare void @func_f32(float)
407*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @broadcast_ss_spill(float %x) {
408*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: broadcast_ss_spill:
409*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
410*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    pushq %rax
411*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:  .Ltmp0:
412*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    .cfi_def_cfa_offset 16
413*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vaddss %xmm0, %xmm0, %xmm0
414*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Folded Spill
415*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    callq func_f32
416*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastss {{[0-9]+}}(%rsp), %zmm0 # 4-byte Folded Reload
417*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    popq %rax
418*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
419*9880d681SAndroid Build Coastguard Worker  %a  = fadd float %x, %x
420*9880d681SAndroid Build Coastguard Worker  call void @func_f32(float %a)
421*9880d681SAndroid Build Coastguard Worker  %b = insertelement <16 x float> undef, float %a, i32 0
422*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
423*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %c
424*9880d681SAndroid Build Coastguard Worker}
425*9880d681SAndroid Build Coastguard Worker
426*9880d681SAndroid Build Coastguard Workerdeclare void @func_f64(double)
427*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @broadcast_sd_spill(double %x) {
428*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: broadcast_sd_spill:
429*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
430*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    pushq %rax
431*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:  .Ltmp1:
432*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    .cfi_def_cfa_offset 16
433*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
434*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovsd %xmm0, (%rsp) # 8-byte Folded Spill
435*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    callq func_f64
436*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastsd (%rsp), %zmm0 # 8-byte Folded Reload
437*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    popq %rax
438*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
439*9880d681SAndroid Build Coastguard Worker  %a  = fadd double %x, %x
440*9880d681SAndroid Build Coastguard Worker  call void @func_f64(double %a)
441*9880d681SAndroid Build Coastguard Worker  %b = insertelement <8 x double> undef, double %a, i32 0
442*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
443*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %c
444*9880d681SAndroid Build Coastguard Worker}
445