xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/2012-04-26-sdglue.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s
3*9880d681SAndroid Build Coastguard Worker
4*9880d681SAndroid Build Coastguard Worker; rdar://11314175: SD Scheduler, BuildSchedUnits assert:
5*9880d681SAndroid Build Coastguard Worker;                  N->getNodeId() == -1 && "Node already inserted!
6*9880d681SAndroid Build Coastguard Worker
7*9880d681SAndroid Build Coastguard Workerdefine void @func() nounwind ssp {
8*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: func:
9*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
10*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmovups 0, %xmm0
11*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vxorps %ymm1, %ymm1, %ymm1
12*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vblendps {{.*#+}} ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
13*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
14*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vbroadcastss 32, %xmm3
15*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
16*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmulps %ymm0, %ymm2, %ymm2
17*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmulps %ymm0, %ymm0, %ymm0
18*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0
19*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vaddps %ymm0, %ymm0, %ymm0
20*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmulps %xmm0, %xmm0, %xmm0
21*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
22*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vaddps %ymm0, %ymm0, %ymm0
23*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vhaddps %ymm0, %ymm0, %ymm0
24*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vsubps %ymm0, %ymm0, %ymm0
25*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vhaddps %ymm0, %ymm1, %ymm0
26*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmovaps %ymm0, (%rax)
27*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzeroupper
28*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
29*9880d681SAndroid Build Coastguard Worker  %tmp = load <4 x float>, <4 x float>* null, align 1
30*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr <4 x float>, <4 x float>* null, i32 2
31*9880d681SAndroid Build Coastguard Worker  %tmp15 = load <4 x float>, <4 x float>* %tmp14, align 1
32*9880d681SAndroid Build Coastguard Worker  %tmp16 = shufflevector <4 x float> %tmp, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
33*9880d681SAndroid Build Coastguard Worker  %tmp17 = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %tmp16, <4 x float> undef, i8 1)
34*9880d681SAndroid Build Coastguard Worker  %tmp18 = bitcast <4 x float> %tmp to <16 x i8>
35*9880d681SAndroid Build Coastguard Worker  %tmp19 = shufflevector <16 x i8> %tmp18, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
36*9880d681SAndroid Build Coastguard Worker  %tmp20 = bitcast <16 x i8> %tmp19 to <4 x float>
37*9880d681SAndroid Build Coastguard Worker  %tmp21 = bitcast <4 x float> %tmp15 to <16 x i8>
38*9880d681SAndroid Build Coastguard Worker  %tmp22 = shufflevector <16 x i8> undef, <16 x i8> %tmp21, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
39*9880d681SAndroid Build Coastguard Worker  %tmp23 = bitcast <16 x i8> %tmp22 to <4 x float>
40*9880d681SAndroid Build Coastguard Worker  %tmp24 = shufflevector <4 x float> %tmp20, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
41*9880d681SAndroid Build Coastguard Worker  %tmp25 = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %tmp24, <4 x float> %tmp23, i8 1)
42*9880d681SAndroid Build Coastguard Worker  %tmp26 = fmul <8 x float> %tmp17, undef
43*9880d681SAndroid Build Coastguard Worker  %tmp27 = fmul <8 x float> %tmp25, undef
44*9880d681SAndroid Build Coastguard Worker  %tmp28 = fadd <8 x float> %tmp26, %tmp27
45*9880d681SAndroid Build Coastguard Worker  %tmp29 = fadd <8 x float> %tmp28, undef
46*9880d681SAndroid Build Coastguard Worker  %tmp30 = shufflevector <8 x float> %tmp29, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
47*9880d681SAndroid Build Coastguard Worker  %tmp31 = fmul <4 x float> undef, %tmp30
48*9880d681SAndroid Build Coastguard Worker  %tmp32 = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> zeroinitializer, <4 x float> %tmp31, i8 1)
49*9880d681SAndroid Build Coastguard Worker  %tmp33 = fadd <8 x float> undef, %tmp32
50*9880d681SAndroid Build Coastguard Worker  %tmp34 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %tmp33, <8 x float> undef) nounwind
51*9880d681SAndroid Build Coastguard Worker  %tmp35 = fsub <8 x float> %tmp34, undef
52*9880d681SAndroid Build Coastguard Worker  %tmp36 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> zeroinitializer, <8 x float> %tmp35) nounwind
53*9880d681SAndroid Build Coastguard Worker  store <8 x float> %tmp36, <8 x float>* undef, align 32
54*9880d681SAndroid Build Coastguard Worker  ret void
55*9880d681SAndroid Build Coastguard Worker}
56*9880d681SAndroid Build Coastguard Worker
57*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
58*9880d681SAndroid Build Coastguard Worker
59*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
60