xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 < %s | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Workertarget datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4*9880d681SAndroid Build Coastguard Workertarget triple = "x86_64-unknown-unknown"
5*9880d681SAndroid Build Coastguard Worker
6*9880d681SAndroid Build Coastguard Worker; Stack reload folding tests.
7*9880d681SAndroid Build Coastguard Worker;
8*9880d681SAndroid Build Coastguard Worker; By including a nop call with sideeffects we can force a partial register spill of the
9*9880d681SAndroid Build Coastguard Worker; relevant registers and check that the reload is correctly folded into the instruction.
10*9880d681SAndroid Build Coastguard Worker
11*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @stack_fold_broadcastsd_ymm(<2 x double> %a0) {
12*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_broadcastsd_ymm
13*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vbroadcastsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
14*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
15*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer
16*9880d681SAndroid Build Coastguard Worker  ; fadd forces execution domain
17*9880d681SAndroid Build Coastguard Worker  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
18*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %3
19*9880d681SAndroid Build Coastguard Worker}
20*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
21*9880d681SAndroid Build Coastguard Worker
22*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @stack_fold_broadcastss(<4 x float> %a0) {
23*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_broadcastss
24*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vbroadcastss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
25*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
26*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer
27*9880d681SAndroid Build Coastguard Worker  ; fadd forces execution domain
28*9880d681SAndroid Build Coastguard Worker  %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
29*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %3
30*9880d681SAndroid Build Coastguard Worker}
31*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
32*9880d681SAndroid Build Coastguard Worker
33*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @stack_fold_broadcastss_ymm(<4 x float> %a0) {
34*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_broadcastss_ymm
35*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vbroadcastss {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
36*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
37*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <4 x float> %a0, <4 x float> undef, <8 x i32> zeroinitializer
38*9880d681SAndroid Build Coastguard Worker  ; fadd forces execution domain
39*9880d681SAndroid Build Coastguard Worker  %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
40*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %3
41*9880d681SAndroid Build Coastguard Worker}
42*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly
43*9880d681SAndroid Build Coastguard Worker
44*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @stack_fold_extracti128(<8 x i32> %a0, <8 x i32> %a1) {
45*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_extracti128
46*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vextracti128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
47*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
48*9880d681SAndroid Build Coastguard Worker  %1 = add <8 x i32> %a0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
49*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x i32> %1, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
50*9880d681SAndroid Build Coastguard Worker  %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
51*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %2
52*9880d681SAndroid Build Coastguard Worker}
53*9880d681SAndroid Build Coastguard Worker
54*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_inserti128(<4 x i32> %a0, <4 x i32> %a1) {
55*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_inserti128
56*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vinserti128 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
57*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
58*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
59*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
60*9880d681SAndroid Build Coastguard Worker  %3 = add <8 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
61*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %3
62*9880d681SAndroid Build Coastguard Worker}
63*9880d681SAndroid Build Coastguard Worker
64*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
65*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_mpsadbw
66*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vmpsadbw $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
67*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
68*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7)
69*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
70*9880d681SAndroid Build Coastguard Worker}
71*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
72*9880d681SAndroid Build Coastguard Worker
73*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_pabsb(<32 x i8> %a0) {
74*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pabsb
75*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpabsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
76*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
77*9880d681SAndroid Build Coastguard Worker  %2 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0)
78*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
79*9880d681SAndroid Build Coastguard Worker}
80*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
81*9880d681SAndroid Build Coastguard Worker
82*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pabsd(<8 x i32> %a0) {
83*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pabsd
84*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpabsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
85*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
86*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0)
87*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
88*9880d681SAndroid Build Coastguard Worker}
89*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
90*9880d681SAndroid Build Coastguard Worker
91*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pabsw(<16 x i16> %a0) {
92*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pabsw
93*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpabsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
94*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
95*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0)
96*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
97*9880d681SAndroid Build Coastguard Worker}
98*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
99*9880d681SAndroid Build Coastguard Worker
100*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_packssdw(<8 x i32> %a0, <8 x i32> %a1) {
101*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_packssdw
102*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpackssdw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
103*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
104*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1)
105*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
106*9880d681SAndroid Build Coastguard Worker}
107*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
108*9880d681SAndroid Build Coastguard Worker
109*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_packsswb(<16 x i16> %a0, <16 x i16> %a1) {
110*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_packsswb
111*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpacksswb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
112*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
113*9880d681SAndroid Build Coastguard Worker  %2 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1)
114*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
115*9880d681SAndroid Build Coastguard Worker}
116*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
117*9880d681SAndroid Build Coastguard Worker
118*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_packusdw(<8 x i32> %a0, <8 x i32> %a1) {
119*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_packusdw
120*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpackusdw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
121*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
122*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1)
123*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
124*9880d681SAndroid Build Coastguard Worker}
125*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
126*9880d681SAndroid Build Coastguard Worker
127*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_packuswb(<16 x i16> %a0, <16 x i16> %a1) {
128*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_packuswb
129*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpackuswb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
130*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
131*9880d681SAndroid Build Coastguard Worker  %2 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1)
132*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
133*9880d681SAndroid Build Coastguard Worker}
134*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
135*9880d681SAndroid Build Coastguard Worker
136*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_paddb(<32 x i8> %a0, <32 x i8> %a1) {
137*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_paddb
138*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpaddb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
139*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
140*9880d681SAndroid Build Coastguard Worker  %2 = add <32 x i8> %a0, %a1
141*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
142*9880d681SAndroid Build Coastguard Worker}
143*9880d681SAndroid Build Coastguard Worker
144*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_paddd(<8 x i32> %a0, <8 x i32> %a1) {
145*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_paddd
146*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpaddd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
147*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
148*9880d681SAndroid Build Coastguard Worker  %2 = add <8 x i32> %a0, %a1
149*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
150*9880d681SAndroid Build Coastguard Worker}
151*9880d681SAndroid Build Coastguard Worker
152*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_paddq(<4 x i64> %a0, <4 x i64> %a1) {
153*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_paddq
154*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpaddq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
155*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
156*9880d681SAndroid Build Coastguard Worker  %2 = add <4 x i64> %a0, %a1
157*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %2
158*9880d681SAndroid Build Coastguard Worker}
159*9880d681SAndroid Build Coastguard Worker
160*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_paddsb(<32 x i8> %a0, <32 x i8> %a1) {
161*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_paddsb
162*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpaddsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
163*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
164*9880d681SAndroid Build Coastguard Worker  %2 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1)
165*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
166*9880d681SAndroid Build Coastguard Worker}
167*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
168*9880d681SAndroid Build Coastguard Worker
169*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_paddsw(<16 x i16> %a0, <16 x i16> %a1) {
170*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_paddsw
171*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpaddsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
172*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
173*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1)
174*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
175*9880d681SAndroid Build Coastguard Worker}
176*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
177*9880d681SAndroid Build Coastguard Worker
178*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_paddusb(<32 x i8> %a0, <32 x i8> %a1) {
179*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_paddusb
180*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpaddusb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
181*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
182*9880d681SAndroid Build Coastguard Worker  %2 = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1)
183*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
184*9880d681SAndroid Build Coastguard Worker}
185*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone
186*9880d681SAndroid Build Coastguard Worker
187*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_paddusw(<16 x i16> %a0, <16 x i16> %a1) {
188*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_paddusw
189*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpaddusw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
190*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
191*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1)
192*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
193*9880d681SAndroid Build Coastguard Worker}
194*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone
195*9880d681SAndroid Build Coastguard Worker
196*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_paddw(<16 x i16> %a0, <16 x i16> %a1) {
197*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_paddw
198*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpaddw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
199*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
200*9880d681SAndroid Build Coastguard Worker  %2 = add <16 x i16> %a0, %a1
201*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
202*9880d681SAndroid Build Coastguard Worker}
203*9880d681SAndroid Build Coastguard Worker
204*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_palignr(<32 x i8> %a0, <32 x i8> %a1) {
205*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_palignr
206*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpalignr $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
207*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
208*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <32 x i8> %a1, <32 x i8> %a0, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
209*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
210*9880d681SAndroid Build Coastguard Worker}
211*9880d681SAndroid Build Coastguard Worker
212*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_pand(<32 x i8> %a0, <32 x i8> %a1) {
213*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pand
214*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpand {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
215*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
216*9880d681SAndroid Build Coastguard Worker  %2 = and <32 x i8> %a0, %a1
217*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
218*9880d681SAndroid Build Coastguard Worker  %3 = add <32 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
219*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %3
220*9880d681SAndroid Build Coastguard Worker}
221*9880d681SAndroid Build Coastguard Worker
222*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_pandn(<32 x i8> %a0, <32 x i8> %a1) {
223*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pandn
224*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpandn {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
225*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
226*9880d681SAndroid Build Coastguard Worker  %2 = xor <32 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
227*9880d681SAndroid Build Coastguard Worker  %3 = and <32 x i8> %2, %a1
228*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
229*9880d681SAndroid Build Coastguard Worker  %4 = add <32 x i8> %3, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
230*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %4
231*9880d681SAndroid Build Coastguard Worker}
232*9880d681SAndroid Build Coastguard Worker
233*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_pavgb(<32 x i8> %a0, <32 x i8> %a1) {
234*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pavgb
235*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpavgb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
236*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
237*9880d681SAndroid Build Coastguard Worker  %2 = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1)
238*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
239*9880d681SAndroid Build Coastguard Worker}
240*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone
241*9880d681SAndroid Build Coastguard Worker
242*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pavgw(<16 x i16> %a0, <16 x i16> %a1) {
243*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pavgw
244*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpavgw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
245*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
246*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1)
247*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
248*9880d681SAndroid Build Coastguard Worker}
249*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone
250*9880d681SAndroid Build Coastguard Worker
251*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @stack_fold_pblendd(<4 x i32> %a0, <4 x i32> %a1) {
252*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pblendd
253*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpblendd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
254*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
255*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
256*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
257*9880d681SAndroid Build Coastguard Worker  %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
258*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %3
259*9880d681SAndroid Build Coastguard Worker}
260*9880d681SAndroid Build Coastguard Worker
261*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1) {
262*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pblendd_ymm
263*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpblendd $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
264*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
265*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
266*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
267*9880d681SAndroid Build Coastguard Worker  %3 = add <8 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
268*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %3
269*9880d681SAndroid Build Coastguard Worker}
270*9880d681SAndroid Build Coastguard Worker
271*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %c) {
272*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pblendvb
273*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpblendvb {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
274*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
275*9880d681SAndroid Build Coastguard Worker  %2 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a1, <32 x i8> %c, <32 x i8> %a0)
276*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
277*9880d681SAndroid Build Coastguard Worker}
278*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
279*9880d681SAndroid Build Coastguard Worker
280*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
281*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pblendw
282*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpblendw $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
283*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
284*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7)
285*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
286*9880d681SAndroid Build Coastguard Worker}
287*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone
288*9880d681SAndroid Build Coastguard Worker
289*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @stack_fold_pbroadcastb(<16 x i8> %a0) {
290*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pbroadcastb
291*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpbroadcastb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
292*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
293*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer
294*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %2
295*9880d681SAndroid Build Coastguard Worker}
296*9880d681SAndroid Build Coastguard Worker
297*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_pbroadcastb_ymm(<16 x i8> %a0) {
298*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pbroadcastb_ymm
299*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpbroadcastb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
300*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
301*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <32 x i32> zeroinitializer
302*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
303*9880d681SAndroid Build Coastguard Worker}
304*9880d681SAndroid Build Coastguard Worker
305*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @stack_fold_pbroadcastd(<4 x i32> %a0) {
306*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pbroadcastd
307*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpbroadcastd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
308*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
309*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> zeroinitializer
310*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
311*9880d681SAndroid Build Coastguard Worker  %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
312*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %3
313*9880d681SAndroid Build Coastguard Worker}
314*9880d681SAndroid Build Coastguard Worker
315*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pbroadcastd_ymm(<4 x i32> %a0) {
316*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pbroadcastd_ymm
317*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpbroadcastd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
318*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
319*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <8 x i32> zeroinitializer
320*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
321*9880d681SAndroid Build Coastguard Worker  %3 = add <8 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
322*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %3
323*9880d681SAndroid Build Coastguard Worker}
324*9880d681SAndroid Build Coastguard Worker
325*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @stack_fold_pbroadcastq(<2 x i64> %a0) {
326*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pbroadcastq
327*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpbroadcastq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
328*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
329*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer
330*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
331*9880d681SAndroid Build Coastguard Worker  %3 = add <2 x i64> %2, <i64 1, i64 1>
332*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %3
333*9880d681SAndroid Build Coastguard Worker}
334*9880d681SAndroid Build Coastguard Worker
335*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_pbroadcastq_ymm(<2 x i64> %a0) {
336*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pbroadcastq_ymm
337*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpbroadcastq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
338*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
339*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> zeroinitializer
340*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
341*9880d681SAndroid Build Coastguard Worker  %3 = add <4 x i64> %2, <i64 1, i64 1, i64 1, i64 1>
342*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %3
343*9880d681SAndroid Build Coastguard Worker}
344*9880d681SAndroid Build Coastguard Worker
345*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @stack_fold_pbroadcastw(<8 x i16> %a0) {
346*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pbroadcastw
347*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpbroadcastw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
348*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
349*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
350*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %2
351*9880d681SAndroid Build Coastguard Worker}
352*9880d681SAndroid Build Coastguard Worker
353*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pbroadcastw_ymm(<8 x i16> %a0) {
354*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pbroadcastw_ymm
355*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpbroadcastw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
356*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
357*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <16 x i32> zeroinitializer
358*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
359*9880d681SAndroid Build Coastguard Worker}
360*9880d681SAndroid Build Coastguard Worker
361*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1) {
362*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pcmpeqb
363*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpcmpeqb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
364*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
365*9880d681SAndroid Build Coastguard Worker  %2 = icmp eq <32 x i8> %a0, %a1
366*9880d681SAndroid Build Coastguard Worker  %3 = sext <32 x i1> %2 to <32 x i8>
367*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %3
368*9880d681SAndroid Build Coastguard Worker}
369*9880d681SAndroid Build Coastguard Worker
370*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1) {
371*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pcmpeqd
372*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpcmpeqd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
373*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
374*9880d681SAndroid Build Coastguard Worker  %2 = icmp eq <8 x i32> %a0, %a1
375*9880d681SAndroid Build Coastguard Worker  %3 = sext <8 x i1> %2 to <8 x i32>
376*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %3
377*9880d681SAndroid Build Coastguard Worker}
378*9880d681SAndroid Build Coastguard Worker
379*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1) {
380*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pcmpeqq
381*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpcmpeqq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
382*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
383*9880d681SAndroid Build Coastguard Worker  %2 = icmp eq <4 x i64> %a0, %a1
384*9880d681SAndroid Build Coastguard Worker  %3 = sext <4 x i1> %2 to <4 x i64>
385*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %3
386*9880d681SAndroid Build Coastguard Worker}
387*9880d681SAndroid Build Coastguard Worker
388*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1) {
389*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pcmpeqw
390*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpcmpeqw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
391*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
392*9880d681SAndroid Build Coastguard Worker  %2 = icmp eq <16 x i16> %a0, %a1
393*9880d681SAndroid Build Coastguard Worker  %3 = sext <16 x i1> %2 to <16 x i16>
394*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %3
395*9880d681SAndroid Build Coastguard Worker}
396*9880d681SAndroid Build Coastguard Worker
397*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1) {
398*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pcmpgtb
399*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpcmpgtb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
400*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
401*9880d681SAndroid Build Coastguard Worker  %2 = icmp sgt <32 x i8> %a0, %a1
402*9880d681SAndroid Build Coastguard Worker  %3 = sext <32 x i1> %2 to <32 x i8>
403*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %3
404*9880d681SAndroid Build Coastguard Worker}
405*9880d681SAndroid Build Coastguard Worker
406*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1) {
407*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pcmpgtd
408*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpcmpgtd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
409*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
410*9880d681SAndroid Build Coastguard Worker  %2 = icmp sgt <8 x i32> %a0, %a1
411*9880d681SAndroid Build Coastguard Worker  %3 = sext <8 x i1> %2 to <8 x i32>
412*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %3
413*9880d681SAndroid Build Coastguard Worker}
414*9880d681SAndroid Build Coastguard Worker
415*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1) {
416*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pcmpgtq
417*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpcmpgtq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
418*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
419*9880d681SAndroid Build Coastguard Worker  %2 = icmp sgt <4 x i64> %a0, %a1
420*9880d681SAndroid Build Coastguard Worker  %3 = sext <4 x i1> %2 to <4 x i64>
421*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %3
422*9880d681SAndroid Build Coastguard Worker}
423*9880d681SAndroid Build Coastguard Worker
424*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1) {
425*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pcmpgtw
426*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpcmpgtw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
427*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
428*9880d681SAndroid Build Coastguard Worker  %2 = icmp sgt <16 x i16> %a0, %a1
429*9880d681SAndroid Build Coastguard Worker  %3 = sext <16 x i1> %2 to <16 x i16>
430*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %3
431*9880d681SAndroid Build Coastguard Worker}
432*9880d681SAndroid Build Coastguard Worker
433*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_perm2i128(<8 x i32> %a0, <8 x i32> %a1) {
434*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_perm2i128
435*9880d681SAndroid Build Coastguard Worker  ;CHECK:   vperm2i128 $33, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
436*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
437*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
438*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
439*9880d681SAndroid Build Coastguard Worker  %3 = add <8 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
440*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %3
441*9880d681SAndroid Build Coastguard Worker}
442*9880d681SAndroid Build Coastguard Worker
443*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_permd(<8 x i32> %a0, <8 x i32> %a1) {
444*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_permd
445*9880d681SAndroid Build Coastguard Worker  ;CHECK:   vpermd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
446*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
447*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a1, <8 x i32> %a0)
448*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
449*9880d681SAndroid Build Coastguard Worker}
450*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
451*9880d681SAndroid Build Coastguard Worker
452*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @stack_fold_permpd(<4 x double> %a0) {
453*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_permpd
454*9880d681SAndroid Build Coastguard Worker  ;CHECK:   vpermpd $235, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
455*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
456*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
457*9880d681SAndroid Build Coastguard Worker  ; fadd forces execution domain
458*9880d681SAndroid Build Coastguard Worker  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
459*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %3
460*9880d681SAndroid Build Coastguard Worker}
461*9880d681SAndroid Build Coastguard Worker
462*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @stack_fold_permps(<8 x i32> %a0, <8 x float> %a1) {
463*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_permps
464*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpermps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
465*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
466*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x i32> %a0)
467*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %2
468*9880d681SAndroid Build Coastguard Worker}
469*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
470*9880d681SAndroid Build Coastguard Worker
471*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_permq(<4 x i64> %a0) {
472*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_permq
473*9880d681SAndroid Build Coastguard Worker  ;CHECK:   vpermq $235, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
474*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
475*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
476*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
477*9880d681SAndroid Build Coastguard Worker  %3 = add <4 x i64> %2, <i64 1, i64 1, i64 1, i64 1>
478*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %3
479*9880d681SAndroid Build Coastguard Worker}
480*9880d681SAndroid Build Coastguard Worker
481*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_phaddd(<8 x i32> %a0, <8 x i32> %a1) {
482*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_phaddd
483*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vphaddd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
484*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
485*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1)
486*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
487*9880d681SAndroid Build Coastguard Worker}
488*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
489*9880d681SAndroid Build Coastguard Worker
490*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_phaddsw(<16 x i16> %a0, <16 x i16> %a1) {
491*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_phaddsw
492*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vphaddsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
493*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
494*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1)
495*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
496*9880d681SAndroid Build Coastguard Worker}
497*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
498*9880d681SAndroid Build Coastguard Worker
499*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_phaddw(<16 x i16> %a0, <16 x i16> %a1) {
500*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_phaddw
501*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vphaddw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
502*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
503*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1)
504*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
505*9880d681SAndroid Build Coastguard Worker}
506*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
507*9880d681SAndroid Build Coastguard Worker
508*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_phsubd(<8 x i32> %a0, <8 x i32> %a1) {
509*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_phsubd
510*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vphsubd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
511*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
512*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1)
513*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
514*9880d681SAndroid Build Coastguard Worker}
515*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
516*9880d681SAndroid Build Coastguard Worker
517*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_phsubsw(<16 x i16> %a0, <16 x i16> %a1) {
518*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_phsubsw
519*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vphsubsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
520*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
521*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1)
522*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
523*9880d681SAndroid Build Coastguard Worker}
524*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
525*9880d681SAndroid Build Coastguard Worker
526*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_phsubw(<16 x i16> %a0, <16 x i16> %a1) {
527*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_phsubw
528*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vphsubw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
529*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
530*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1)
531*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
532*9880d681SAndroid Build Coastguard Worker}
533*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
534*9880d681SAndroid Build Coastguard Worker
535*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1) {
536*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmaddubsw
537*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmaddubsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
538*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
539*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1)
540*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
541*9880d681SAndroid Build Coastguard Worker}
542*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
543*9880d681SAndroid Build Coastguard Worker
544*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pmaddwd(<16 x i16> %a0, <16 x i16> %a1) {
545*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmaddwd
546*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmaddwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
547*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
548*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1)
549*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
550*9880d681SAndroid Build Coastguard Worker}
551*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
552*9880d681SAndroid Build Coastguard Worker
553*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
554*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmaxsb
555*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmaxsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
556*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
557*9880d681SAndroid Build Coastguard Worker  %2 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1)
558*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
559*9880d681SAndroid Build Coastguard Worker}
560*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
561*9880d681SAndroid Build Coastguard Worker
562*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pmaxsd(<8 x i32> %a0, <8 x i32> %a1) {
563*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmaxsd
564*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmaxsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
565*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
566*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1)
567*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
568*9880d681SAndroid Build Coastguard Worker}
569*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
570*9880d681SAndroid Build Coastguard Worker
571*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pmaxsw(<16 x i16> %a0, <16 x i16> %a1) {
572*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmaxsw
573*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmaxsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
574*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
575*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1)
576*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
577*9880d681SAndroid Build Coastguard Worker}
578*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
579*9880d681SAndroid Build Coastguard Worker
580*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_pmaxub(<32 x i8> %a0, <32 x i8> %a1) {
581*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmaxub
582*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmaxub {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
583*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
584*9880d681SAndroid Build Coastguard Worker  %2 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1)
585*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
586*9880d681SAndroid Build Coastguard Worker}
587*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
588*9880d681SAndroid Build Coastguard Worker
589*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pmaxud(<8 x i32> %a0, <8 x i32> %a1) {
590*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmaxud
591*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmaxud {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
592*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
593*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1)
594*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
595*9880d681SAndroid Build Coastguard Worker}
596*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
597*9880d681SAndroid Build Coastguard Worker
598*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pmaxuw(<16 x i16> %a0, <16 x i16> %a1) {
599*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmaxuw
600*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmaxuw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
601*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
602*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1)
603*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
604*9880d681SAndroid Build Coastguard Worker}
605*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
606*9880d681SAndroid Build Coastguard Worker
607*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_pminsb(<32 x i8> %a0, <32 x i8> %a1) {
608*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pminsb
609*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpminsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
610*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
611*9880d681SAndroid Build Coastguard Worker  %2 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1)
612*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
613*9880d681SAndroid Build Coastguard Worker}
614*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
615*9880d681SAndroid Build Coastguard Worker
616*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pminsd(<8 x i32> %a0, <8 x i32> %a1) {
617*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pminsd
618*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpminsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
619*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
620*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1)
621*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
622*9880d681SAndroid Build Coastguard Worker}
623*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
624*9880d681SAndroid Build Coastguard Worker
625*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pminsw(<16 x i16> %a0, <16 x i16> %a1) {
626*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pminsw
627*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpminsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
628*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
629*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1)
630*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
631*9880d681SAndroid Build Coastguard Worker}
632*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
633*9880d681SAndroid Build Coastguard Worker
634*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_pminub(<32 x i8> %a0, <32 x i8> %a1) {
635*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pminub
636*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpminub {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
637*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
638*9880d681SAndroid Build Coastguard Worker  %2 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1)
639*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
640*9880d681SAndroid Build Coastguard Worker}
641*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
642*9880d681SAndroid Build Coastguard Worker
643*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pminud(<8 x i32> %a0, <8 x i32> %a1) {
644*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pminud
645*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpminud {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
646*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
647*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1)
648*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
649*9880d681SAndroid Build Coastguard Worker}
650*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
651*9880d681SAndroid Build Coastguard Worker
652*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pminuw(<16 x i16> %a0, <16 x i16> %a1) {
653*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pminuw
654*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpminuw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
655*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
656*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1)
657*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
658*9880d681SAndroid Build Coastguard Worker}
659*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
660*9880d681SAndroid Build Coastguard Worker
661*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
662*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmovsxbd
663*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmovsxbd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
664*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
665*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
666*9880d681SAndroid Build Coastguard Worker  %3 = sext <8 x i8> %2 to <8 x i32>
667*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %3
668*9880d681SAndroid Build Coastguard Worker}
669*9880d681SAndroid Build Coastguard Worker
670*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
671*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmovsxbq
672*9880d681SAndroid Build Coastguard Worker  ;CHECK:       pmovsxbq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
673*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
674*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
675*9880d681SAndroid Build Coastguard Worker  %3 = sext <4 x i8> %2 to <4 x i64>
676*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %3
677*9880d681SAndroid Build Coastguard Worker}
678*9880d681SAndroid Build Coastguard Worker
679*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
680*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmovsxbw
681*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmovsxbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
682*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
683*9880d681SAndroid Build Coastguard Worker  %2 = sext <16 x i8> %a0 to <16 x i16>
684*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
685*9880d681SAndroid Build Coastguard Worker}
686*9880d681SAndroid Build Coastguard Worker
687*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
688*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmovsxdq
689*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmovsxdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
690*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
691*9880d681SAndroid Build Coastguard Worker  %2 = sext <4 x i32> %a0 to <4 x i64>
692*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %2
693*9880d681SAndroid Build Coastguard Worker}
694*9880d681SAndroid Build Coastguard Worker
695*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
696*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmovsxwd
697*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmovsxwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
698*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
699*9880d681SAndroid Build Coastguard Worker  %2 = sext <8 x i16> %a0 to <8 x i32>
700*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
701*9880d681SAndroid Build Coastguard Worker}
702*9880d681SAndroid Build Coastguard Worker
703*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
704*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmovsxwq
705*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmovsxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
706*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
707*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
708*9880d681SAndroid Build Coastguard Worker  %3 = sext <4 x i16> %2 to <4 x i64>
709*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %3
710*9880d681SAndroid Build Coastguard Worker}
711*9880d681SAndroid Build Coastguard Worker
712*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
713*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmovzxbd
714*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmovzxbd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
715*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
716*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
717*9880d681SAndroid Build Coastguard Worker  %3 = zext <8 x i8> %2 to <8 x i32>
718*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %3
719*9880d681SAndroid Build Coastguard Worker}
720*9880d681SAndroid Build Coastguard Worker
721*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_pmovzxbq(<16 x i8> %a0) {
722*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmovzxbq
723*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmovzxbq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
724*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
725*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
726*9880d681SAndroid Build Coastguard Worker  %3 = zext <4 x i8> %2 to <4 x i64>
727*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %3
728*9880d681SAndroid Build Coastguard Worker}
729*9880d681SAndroid Build Coastguard Worker
730*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pmovzxbw(<16 x i8> %a0) {
731*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmovzxbw
732*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmovzxbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
733*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
734*9880d681SAndroid Build Coastguard Worker  %2 = zext <16 x i8> %a0 to <16 x i16>
735*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
736*9880d681SAndroid Build Coastguard Worker}
737*9880d681SAndroid Build Coastguard Worker
738*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_pmovzxdq(<4 x i32> %a0) {
739*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmovzxdq
740*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmovzxdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
741*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
742*9880d681SAndroid Build Coastguard Worker  %2 = zext <4 x i32> %a0 to <4 x i64>
743*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %2
744*9880d681SAndroid Build Coastguard Worker}
745*9880d681SAndroid Build Coastguard Worker
746*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pmovzxwd(<8 x i16> %a0) {
747*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmovzxwd
748*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmovzxwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
749*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
750*9880d681SAndroid Build Coastguard Worker  %2 = zext <8 x i16> %a0 to <8 x i32>
751*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
752*9880d681SAndroid Build Coastguard Worker}
753*9880d681SAndroid Build Coastguard Worker
754*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_pmovzxwq(<8 x i16> %a0) {
755*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmovzxwq
756*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmovzxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
757*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
758*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
759*9880d681SAndroid Build Coastguard Worker  %3 = zext <4 x i16> %2 to <4 x i64>
760*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %3
761*9880d681SAndroid Build Coastguard Worker}
762*9880d681SAndroid Build Coastguard Worker
763*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_pmuldq(<8 x i32> %a0, <8 x i32> %a1) {
764*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmuldq
765*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmuldq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
766*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
767*9880d681SAndroid Build Coastguard Worker  %2 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1)
768*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %2
769*9880d681SAndroid Build Coastguard Worker}
770*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
771*9880d681SAndroid Build Coastguard Worker
772*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1) {
773*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmulhrsw
774*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmulhrsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
775*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
776*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1)
777*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
778*9880d681SAndroid Build Coastguard Worker}
779*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
780*9880d681SAndroid Build Coastguard Worker
781*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pmulhuw(<16 x i16> %a0, <16 x i16> %a1) {
782*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmulhuw
783*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmulhuw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
784*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
785*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1)
786*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
787*9880d681SAndroid Build Coastguard Worker}
788*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
789*9880d681SAndroid Build Coastguard Worker
790*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pmulhw(<16 x i16> %a0, <16 x i16> %a1) {
791*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmulhw
792*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmulhw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
793*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
794*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1)
795*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
796*9880d681SAndroid Build Coastguard Worker}
797*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
798*9880d681SAndroid Build Coastguard Worker
799*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pmulld(<8 x i32> %a0, <8 x i32> %a1) {
800*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmulld
801*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmulld {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
802*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
803*9880d681SAndroid Build Coastguard Worker  %2 = mul <8 x i32> %a0, %a1
804*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
805*9880d681SAndroid Build Coastguard Worker}
806*9880d681SAndroid Build Coastguard Worker
807*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_pmullw(<16 x i16> %a0, <16 x i16> %a1) {
808*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmullw
809*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmullw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
810*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
811*9880d681SAndroid Build Coastguard Worker  %2 = mul <16 x i16> %a0, %a1
812*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
813*9880d681SAndroid Build Coastguard Worker}
814*9880d681SAndroid Build Coastguard Worker
815*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_pmuludq(<8 x i32> %a0, <8 x i32> %a1) {
816*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pmuludq
817*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpmuludq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
818*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
819*9880d681SAndroid Build Coastguard Worker  %2 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1)
820*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %2
821*9880d681SAndroid Build Coastguard Worker}
822*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
823*9880d681SAndroid Build Coastguard Worker
824*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_por(<32 x i8> %a0, <32 x i8> %a1) {
825*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_por
826*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpor {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
827*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
828*9880d681SAndroid Build Coastguard Worker  %2 = or <32 x i8> %a0, %a1
829*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
830*9880d681SAndroid Build Coastguard Worker  %3 = add <32 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
831*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %3
832*9880d681SAndroid Build Coastguard Worker}
833*9880d681SAndroid Build Coastguard Worker
834*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_psadbw(<32 x i8> %a0, <32 x i8> %a1) {
835*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psadbw
836*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsadbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
837*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
838*9880d681SAndroid Build Coastguard Worker  %2 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1)
839*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %2
840*9880d681SAndroid Build Coastguard Worker}
841*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
842*9880d681SAndroid Build Coastguard Worker
843*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_pshufb(<32 x i8> %a0, <32 x i8> %a1) {
844*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pshufb
845*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpshufb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
846*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
847*9880d681SAndroid Build Coastguard Worker  %2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1)
848*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
849*9880d681SAndroid Build Coastguard Worker}
850*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
851*9880d681SAndroid Build Coastguard Worker
852*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pshufd(<8 x i32> %a0) {
853*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pshufd
854*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpshufd $27, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
855*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
856*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
857*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
858*9880d681SAndroid Build Coastguard Worker}
859*9880d681SAndroid Build Coastguard Worker
860*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_vpshufhw(<16 x i16> %a0) {
861*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_vpshufhw
862*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpshufhw $27, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
863*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
864*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 13, i32 12>
865*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
866*9880d681SAndroid Build Coastguard Worker}
867*9880d681SAndroid Build Coastguard Worker
868*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_vpshuflw(<16 x i16> %a0) {
869*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_vpshuflw
870*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpshuflw $27, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
871*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
872*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
873*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
874*9880d681SAndroid Build Coastguard Worker}
875*9880d681SAndroid Build Coastguard Worker
876*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_psignb(<32 x i8> %a0, <32 x i8> %a1) {
877*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psignb
878*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsignb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
879*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
880*9880d681SAndroid Build Coastguard Worker  %2 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1)
881*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
882*9880d681SAndroid Build Coastguard Worker}
883*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
884*9880d681SAndroid Build Coastguard Worker
885*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_psignd(<8 x i32> %a0, <8 x i32> %a1) {
886*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psignd
887*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsignd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
888*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
889*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1)
890*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
891*9880d681SAndroid Build Coastguard Worker}
892*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
893*9880d681SAndroid Build Coastguard Worker
894*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_psignw(<16 x i16> %a0, <16 x i16> %a1) {
895*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psignw
896*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsignw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
897*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
898*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1)
899*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
900*9880d681SAndroid Build Coastguard Worker}
901*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
902*9880d681SAndroid Build Coastguard Worker
903*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_pslld(<8 x i32> %a0, <4 x i32> %a1) {
904*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pslld
905*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpslld {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
906*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
907*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1)
908*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
909*9880d681SAndroid Build Coastguard Worker}
910*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
911*9880d681SAndroid Build Coastguard Worker
912*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_psllq(<4 x i64> %a0, <2 x i64> %a1) {
913*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psllq
914*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsllq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
915*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
916*9880d681SAndroid Build Coastguard Worker  %2 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1)
917*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %2
918*9880d681SAndroid Build Coastguard Worker}
919*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
920*9880d681SAndroid Build Coastguard Worker
921*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @stack_fold_psllvd(<4 x i32> %a0, <4 x i32> %a1) {
922*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psllvd
923*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsllvd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
924*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
925*9880d681SAndroid Build Coastguard Worker  %2 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1)
926*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %2
927*9880d681SAndroid Build Coastguard Worker}
928*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
929*9880d681SAndroid Build Coastguard Worker
930*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_psllvd_ymm(<8 x i32> %a0, <8 x i32> %a1) {
931*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psllvd_ymm
932*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsllvd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
933*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
934*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1)
935*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
936*9880d681SAndroid Build Coastguard Worker}
937*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
938*9880d681SAndroid Build Coastguard Worker
939*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @stack_fold_psllvq(<2 x i64> %a0, <2 x i64> %a1) {
940*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psllvq
941*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsllvq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
942*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
943*9880d681SAndroid Build Coastguard Worker  %2 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1)
944*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %2
945*9880d681SAndroid Build Coastguard Worker}
946*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
947*9880d681SAndroid Build Coastguard Worker
948*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_psllvq_ymm(<4 x i64> %a0, <4 x i64> %a1) {
949*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psllvq_ymm
950*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsllvq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
951*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
952*9880d681SAndroid Build Coastguard Worker  %2 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1)
953*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %2
954*9880d681SAndroid Build Coastguard Worker}
955*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
956*9880d681SAndroid Build Coastguard Worker
957*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_psllw(<16 x i16> %a0, <8 x i16> %a1) {
958*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psllw
959*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsllw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
960*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
961*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1)
962*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
963*9880d681SAndroid Build Coastguard Worker}
964*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
965*9880d681SAndroid Build Coastguard Worker
966*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_psrad(<8 x i32> %a0, <4 x i32> %a1) {
967*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psrad
968*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsrad {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
969*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
970*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1)
971*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
972*9880d681SAndroid Build Coastguard Worker}
973*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
974*9880d681SAndroid Build Coastguard Worker
975*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @stack_fold_psravd(<4 x i32> %a0, <4 x i32> %a1) {
976*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psravd
977*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsravd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
978*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
979*9880d681SAndroid Build Coastguard Worker  %2 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1)
980*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %2
981*9880d681SAndroid Build Coastguard Worker}
982*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
983*9880d681SAndroid Build Coastguard Worker
984*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_psravd_ymm(<8 x i32> %a0, <8 x i32> %a1) {
985*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psravd_ymm
986*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsravd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
987*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
988*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1)
989*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
990*9880d681SAndroid Build Coastguard Worker}
991*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
992*9880d681SAndroid Build Coastguard Worker
993*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_psraw(<16 x i16> %a0, <8 x i16> %a1) {
994*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psraw
995*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsraw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
996*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
997*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1)
998*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
999*9880d681SAndroid Build Coastguard Worker}
1000*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
1001*9880d681SAndroid Build Coastguard Worker
1002*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_psrld(<8 x i32> %a0, <4 x i32> %a1) {
1003*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psrld
1004*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsrld {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1005*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1006*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1)
1007*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
1008*9880d681SAndroid Build Coastguard Worker}
1009*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
1010*9880d681SAndroid Build Coastguard Worker
1011*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_psrlq(<4 x i64> %a0, <2 x i64> %a1) {
1012*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psrlq
1013*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsrlq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1014*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1015*9880d681SAndroid Build Coastguard Worker  %2 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1)
1016*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %2
1017*9880d681SAndroid Build Coastguard Worker}
1018*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
1019*9880d681SAndroid Build Coastguard Worker
1020*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @stack_fold_psrlvd(<4 x i32> %a0, <4 x i32> %a1) {
1021*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psrlvd
1022*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsrlvd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1023*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1024*9880d681SAndroid Build Coastguard Worker  %2 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1)
1025*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %2
1026*9880d681SAndroid Build Coastguard Worker}
1027*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
1028*9880d681SAndroid Build Coastguard Worker
1029*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_psrlvd_ymm(<8 x i32> %a0, <8 x i32> %a1) {
1030*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psrlvd_ymm
1031*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsrlvd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1032*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1033*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1)
1034*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
1035*9880d681SAndroid Build Coastguard Worker}
1036*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
1037*9880d681SAndroid Build Coastguard Worker
1038*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @stack_fold_psrlvq(<2 x i64> %a0, <2 x i64> %a1) {
1039*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psrlvq
1040*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsrlvq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1041*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1042*9880d681SAndroid Build Coastguard Worker  %2 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1)
1043*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %2
1044*9880d681SAndroid Build Coastguard Worker}
1045*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
1046*9880d681SAndroid Build Coastguard Worker
1047*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_psrlvq_ymm(<4 x i64> %a0, <4 x i64> %a1) {
1048*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psrlvq_ymm
1049*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsrlvq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1050*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1051*9880d681SAndroid Build Coastguard Worker  %2 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1)
1052*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %2
1053*9880d681SAndroid Build Coastguard Worker}
1054*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
1055*9880d681SAndroid Build Coastguard Worker
1056*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_psrlw(<16 x i16> %a0, <8 x i16> %a1) {
1057*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psrlw
1058*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsrlw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1059*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1060*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1)
1061*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
1062*9880d681SAndroid Build Coastguard Worker}
1063*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
1064*9880d681SAndroid Build Coastguard Worker
1065*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_psubb(<32 x i8> %a0, <32 x i8> %a1) {
1066*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psubb
1067*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsubb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1068*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1069*9880d681SAndroid Build Coastguard Worker  %2 = sub <32 x i8> %a0, %a1
1070*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
1071*9880d681SAndroid Build Coastguard Worker}
1072*9880d681SAndroid Build Coastguard Worker
1073*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_psubd(<8 x i32> %a0, <8 x i32> %a1) {
1074*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psubd
1075*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsubd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1076*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1077*9880d681SAndroid Build Coastguard Worker  %2 = sub <8 x i32> %a0, %a1
1078*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
1079*9880d681SAndroid Build Coastguard Worker}
1080*9880d681SAndroid Build Coastguard Worker
1081*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_psubq(<4 x i64> %a0, <4 x i64> %a1) {
1082*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psubq
1083*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsubq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1084*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1085*9880d681SAndroid Build Coastguard Worker  %2 = sub <4 x i64> %a0, %a1
1086*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %2
1087*9880d681SAndroid Build Coastguard Worker}
1088*9880d681SAndroid Build Coastguard Worker
1089*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_psubsb(<32 x i8> %a0, <32 x i8> %a1) {
1090*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psubsb
1091*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsubsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1092*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1093*9880d681SAndroid Build Coastguard Worker  %2 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1)
1094*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
1095*9880d681SAndroid Build Coastguard Worker}
1096*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
1097*9880d681SAndroid Build Coastguard Worker
1098*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_psubsw(<16 x i16> %a0, <16 x i16> %a1) {
1099*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psubsw
1100*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsubsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1101*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1102*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1)
1103*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
1104*9880d681SAndroid Build Coastguard Worker}
1105*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
1106*9880d681SAndroid Build Coastguard Worker
1107*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_psubusb(<32 x i8> %a0, <32 x i8> %a1) {
1108*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psubusb
1109*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsubusb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1110*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1111*9880d681SAndroid Build Coastguard Worker  %2 = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1)
1112*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
1113*9880d681SAndroid Build Coastguard Worker}
1114*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone
1115*9880d681SAndroid Build Coastguard Worker
1116*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_psubusw(<16 x i16> %a0, <16 x i16> %a1) {
1117*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psubusw
1118*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsubusw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1119*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1120*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1)
1121*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
1122*9880d681SAndroid Build Coastguard Worker}
1123*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
1124*9880d681SAndroid Build Coastguard Worker
1125*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_psubw(<16 x i16> %a0, <16 x i16> %a1) {
1126*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_psubw
1127*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpsubw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1128*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1129*9880d681SAndroid Build Coastguard Worker  %2 = sub <16 x i16> %a0, %a1
1130*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
1131*9880d681SAndroid Build Coastguard Worker}
1132*9880d681SAndroid Build Coastguard Worker
1133*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_punpckhbw(<32 x i8> %a0, <32 x i8> %a1) {
1134*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_punpckhbw
1135*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpunpckhbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1136*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1137*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
1138*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
1139*9880d681SAndroid Build Coastguard Worker}
1140*9880d681SAndroid Build Coastguard Worker
1141*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_punpckhdq(<8 x i32> %a0, <8 x i32> %a1) {
1142*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_punpckhdq
1143*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpunpckhdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1144*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1145*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1146*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
1147*9880d681SAndroid Build Coastguard Worker  %3 = add <8 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1148*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %3
1149*9880d681SAndroid Build Coastguard Worker}
1150*9880d681SAndroid Build Coastguard Worker
1151*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1) {
1152*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_punpckhqdq
1153*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpunpckhqdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1154*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1155*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1156*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
1157*9880d681SAndroid Build Coastguard Worker  %3 = add <4 x i64> %2, <i64 1, i64 1, i64 1, i64 1>
1158*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %3
1159*9880d681SAndroid Build Coastguard Worker}
1160*9880d681SAndroid Build Coastguard Worker
1161*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_punpckhwd(<16 x i16> %a0, <16 x i16> %a1) {
1162*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_punpckhwd
1163*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpunpckhwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1164*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1165*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1166*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
1167*9880d681SAndroid Build Coastguard Worker}
1168*9880d681SAndroid Build Coastguard Worker
1169*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_punpcklbw(<32 x i8> %a0, <32 x i8> %a1) {
1170*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_punpcklbw
1171*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpunpcklbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1172*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1173*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
1174*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
1175*9880d681SAndroid Build Coastguard Worker}
1176*9880d681SAndroid Build Coastguard Worker
1177*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @stack_fold_punpckldq(<8 x i32> %a0, <8 x i32> %a1) {
1178*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_punpckldq
1179*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpunpckldq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1180*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1181*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
1182*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
1183*9880d681SAndroid Build Coastguard Worker  %3 = add <8 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1184*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %3
1185*9880d681SAndroid Build Coastguard Worker}
1186*9880d681SAndroid Build Coastguard Worker
1187*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @stack_fold_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1) {
1188*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_punpcklqdq
1189*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpunpcklqdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1190*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1191*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1192*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
1193*9880d681SAndroid Build Coastguard Worker  %3 = add <4 x i64> %2, <i64 1, i64 1, i64 1, i64 1>
1194*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %3
1195*9880d681SAndroid Build Coastguard Worker}
1196*9880d681SAndroid Build Coastguard Worker
1197*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @stack_fold_punpcklwd(<16 x i16> %a0, <16 x i16> %a1) {
1198*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_punpcklwd
1199*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpunpcklwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1200*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1201*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
1202*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
1203*9880d681SAndroid Build Coastguard Worker}
1204*9880d681SAndroid Build Coastguard Worker
1205*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @stack_fold_pxor(<32 x i8> %a0, <32 x i8> %a1) {
1206*9880d681SAndroid Build Coastguard Worker  ;CHECK-LABEL: stack_fold_pxor
1207*9880d681SAndroid Build Coastguard Worker  ;CHECK:       vpxor {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1208*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1209*9880d681SAndroid Build Coastguard Worker  %2 = xor <32 x i8> %a0, %a1
1210*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
1211*9880d681SAndroid Build Coastguard Worker  %3 = add <32 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1212*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %3
1213*9880d681SAndroid Build Coastguard Worker}
1214