xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/avx-splat.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
3*9880d681SAndroid Build Coastguard Worker
4*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
5*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: funcA:
6*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0: ## %entry
7*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
8*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
9*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
10*9880d681SAndroid Build Coastguard Workerentry:
11*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
12*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %shuffle
13*9880d681SAndroid Build Coastguard Worker}
14*9880d681SAndroid Build Coastguard Worker
15*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
16*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: funcB:
17*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0: ## %entry
18*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
19*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
20*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
21*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
22*9880d681SAndroid Build Coastguard Workerentry:
23*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
24*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %shuffle
25*9880d681SAndroid Build Coastguard Worker}
26*9880d681SAndroid Build Coastguard Worker
27*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
28*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: funcC:
29*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0: ## %entry
30*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmovq %rdi, %xmm0
31*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
32*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
33*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
34*9880d681SAndroid Build Coastguard Workerentry:
35*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
36*9880d681SAndroid Build Coastguard Worker  %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
37*9880d681SAndroid Build Coastguard Worker  %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
38*9880d681SAndroid Build Coastguard Worker  %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
39*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %vecinit6.i
40*9880d681SAndroid Build Coastguard Worker}
41*9880d681SAndroid Build Coastguard Worker
42*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
43*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: funcD:
44*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0: ## %entry
45*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
46*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
47*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
48*9880d681SAndroid Build Coastguard Workerentry:
49*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
50*9880d681SAndroid Build Coastguard Worker  %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
51*9880d681SAndroid Build Coastguard Worker  %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
52*9880d681SAndroid Build Coastguard Worker  %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
53*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %vecinit6.i
54*9880d681SAndroid Build Coastguard Worker}
55*9880d681SAndroid Build Coastguard Worker
56*9880d681SAndroid Build Coastguard Worker; Test this turns into a broadcast:
57*9880d681SAndroid Build Coastguard Worker;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
58*9880d681SAndroid Build Coastguard Worker;
59*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @funcE() nounwind {
60*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: funcE:
61*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0: ## %for_exit499
62*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    xorl %eax, %eax
63*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    ## implicit-def: %YMM0
64*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    testb %al, %al
65*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    jne LBB4_2
66*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ## BB#1: ## %load.i1247
67*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pushq %rbp
68*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %rsp, %rbp
69*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    andq $-32, %rsp
70*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    subq $1312, %rsp ## imm = 0x520
71*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vbroadcastss {{[0-9]+}}(%rsp), %ymm0
72*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %rbp, %rsp
73*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    popq %rbp
74*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  LBB4_2: ## %__load_and_broadcast_32.exit1249
75*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
76*9880d681SAndroid Build Coastguard Workerallocas:
77*9880d681SAndroid Build Coastguard Worker  %udx495 = alloca [18 x [18 x float]], align 32
78*9880d681SAndroid Build Coastguard Worker  br label %for_test505.preheader
79*9880d681SAndroid Build Coastguard Worker
80*9880d681SAndroid Build Coastguard Workerfor_test505.preheader:                            ; preds = %for_test505.preheader, %allocas
81*9880d681SAndroid Build Coastguard Worker  br i1 undef, label %for_exit499, label %for_test505.preheader
82*9880d681SAndroid Build Coastguard Worker
83*9880d681SAndroid Build Coastguard Workerfor_exit499:                                      ; preds = %for_test505.preheader
84*9880d681SAndroid Build Coastguard Worker  br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
85*9880d681SAndroid Build Coastguard Worker
86*9880d681SAndroid Build Coastguard Workerload.i1247:                                       ; preds = %for_exit499
87*9880d681SAndroid Build Coastguard Worker  %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
88*9880d681SAndroid Build Coastguard Worker  %ptr.i1237 = bitcast float* %ptr1227 to i32*
89*9880d681SAndroid Build Coastguard Worker  %val.i1238 = load i32, i32* %ptr.i1237, align 4
90*9880d681SAndroid Build Coastguard Worker  %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
91*9880d681SAndroid Build Coastguard Worker  %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
92*9880d681SAndroid Build Coastguard Worker  %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
93*9880d681SAndroid Build Coastguard Worker  br label %__load_and_broadcast_32.exit1249
94*9880d681SAndroid Build Coastguard Worker
95*9880d681SAndroid Build Coastguard Worker__load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_exit499
96*9880d681SAndroid Build Coastguard Worker  %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
97*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %load_broadcast12281250
98*9880d681SAndroid Build Coastguard Worker}
99*9880d681SAndroid Build Coastguard Worker
100*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @funcF(i32 %val) nounwind {
101*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: funcF:
102*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
103*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmovd %edi, %xmm0
104*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
105*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
106*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
107*9880d681SAndroid Build Coastguard Worker  %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
108*9880d681SAndroid Build Coastguard Worker  %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
109*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <8 x i32> %ret7 to <8 x float>
110*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %tmp
111*9880d681SAndroid Build Coastguard Worker}
112*9880d681SAndroid Build Coastguard Worker
113*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
114*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: funcG:
115*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0: ## %entry
116*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
117*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
118*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
119*9880d681SAndroid Build Coastguard Workerentry:
120*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
121*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
122*9880d681SAndroid Build Coastguard Worker}
123*9880d681SAndroid Build Coastguard Worker
124*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
125*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: funcH:
126*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0: ## %entry
127*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
128*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
129*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
130*9880d681SAndroid Build Coastguard Workerentry:
131*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
132*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
133*9880d681SAndroid Build Coastguard Worker}
134*9880d681SAndroid Build Coastguard Worker
135*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
136*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: splat_load_2f64_11:
137*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
138*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
139*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
140*9880d681SAndroid Build Coastguard Worker  %x = load <2 x double>, <2 x double>* %ptr
141*9880d681SAndroid Build Coastguard Worker  %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
142*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %x1
143*9880d681SAndroid Build Coastguard Worker}
144*9880d681SAndroid Build Coastguard Worker
145*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
146*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: splat_load_4f64_2222:
147*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
148*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vbroadcastsd 16(%rdi), %ymm0
149*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
150*9880d681SAndroid Build Coastguard Worker  %x = load <4 x double>, <4 x double>* %ptr
151*9880d681SAndroid Build Coastguard Worker  %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
152*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %x1
153*9880d681SAndroid Build Coastguard Worker}
154*9880d681SAndroid Build Coastguard Worker
155*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
156*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: splat_load_4f32_0000:
157*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
158*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
159*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
160*9880d681SAndroid Build Coastguard Worker  %x = load <4 x float>, <4 x float>* %ptr
161*9880d681SAndroid Build Coastguard Worker  %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
162*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %x1
163*9880d681SAndroid Build Coastguard Worker}
164*9880d681SAndroid Build Coastguard Worker
165*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
166*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: splat_load_8f32_77777777:
167*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
168*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vbroadcastss 28(%rdi), %ymm0
169*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
170*9880d681SAndroid Build Coastguard Worker  %x = load <8 x float>, <8 x float>* %ptr
171*9880d681SAndroid Build Coastguard Worker  %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
172*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %x1
173*9880d681SAndroid Build Coastguard Worker}
174