xref: /aosp_15_r20/external/vixl/test/aarch64/test-trace-aarch64.cc (revision f5c631da2f1efdd72b5fd1e20510e4042af13d77)
1*f5c631daSSadaf Ebrahimi // Copyright 2016, VIXL authors
2*f5c631daSSadaf Ebrahimi // All rights reserved.
3*f5c631daSSadaf Ebrahimi //
4*f5c631daSSadaf Ebrahimi // Redistribution and use in source and binary forms, with or without
5*f5c631daSSadaf Ebrahimi // modification, are permitted provided that the following conditions are met:
6*f5c631daSSadaf Ebrahimi //
7*f5c631daSSadaf Ebrahimi //   * Redistributions of source code must retain the above copyright notice,
8*f5c631daSSadaf Ebrahimi //     this list of conditions and the following disclaimer.
9*f5c631daSSadaf Ebrahimi //   * Redistributions in binary form must reproduce the above copyright notice,
10*f5c631daSSadaf Ebrahimi //     this list of conditions and the following disclaimer in the documentation
11*f5c631daSSadaf Ebrahimi //     and/or other materials provided with the distribution.
12*f5c631daSSadaf Ebrahimi //   * Neither the name of ARM Limited nor the names of its contributors may be
13*f5c631daSSadaf Ebrahimi //     used to endorse or promote products derived from this software without
14*f5c631daSSadaf Ebrahimi //     specific prior written permission.
15*f5c631daSSadaf Ebrahimi //
16*f5c631daSSadaf Ebrahimi // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17*f5c631daSSadaf Ebrahimi // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*f5c631daSSadaf Ebrahimi // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*f5c631daSSadaf Ebrahimi // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20*f5c631daSSadaf Ebrahimi // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*f5c631daSSadaf Ebrahimi // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22*f5c631daSSadaf Ebrahimi // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23*f5c631daSSadaf Ebrahimi // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24*f5c631daSSadaf Ebrahimi // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25*f5c631daSSadaf Ebrahimi // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*f5c631daSSadaf Ebrahimi 
27*f5c631daSSadaf Ebrahimi #include <cfloat>
28*f5c631daSSadaf Ebrahimi #include <cmath>
29*f5c631daSSadaf Ebrahimi #include <cstdio>
30*f5c631daSSadaf Ebrahimi #include <cstdlib>
31*f5c631daSSadaf Ebrahimi #include <cstring>
32*f5c631daSSadaf Ebrahimi 
33*f5c631daSSadaf Ebrahimi #include <fstream>
34*f5c631daSSadaf Ebrahimi #include <regex>
35*f5c631daSSadaf Ebrahimi 
36*f5c631daSSadaf Ebrahimi #include "test-runner.h"
37*f5c631daSSadaf Ebrahimi #include "test-utils-aarch64.h"
38*f5c631daSSadaf Ebrahimi 
39*f5c631daSSadaf Ebrahimi #include "aarch64/cpu-aarch64.h"
40*f5c631daSSadaf Ebrahimi #include "aarch64/disasm-aarch64.h"
41*f5c631daSSadaf Ebrahimi #include "aarch64/macro-assembler-aarch64.h"
42*f5c631daSSadaf Ebrahimi #include "aarch64/simulator-aarch64.h"
43*f5c631daSSadaf Ebrahimi 
44*f5c631daSSadaf Ebrahimi namespace vixl {
45*f5c631daSSadaf Ebrahimi namespace aarch64 {
46*f5c631daSSadaf Ebrahimi 
47*f5c631daSSadaf Ebrahimi #define __ masm->
48*f5c631daSSadaf Ebrahimi #define TEST(name) TEST_(TRACE_##name)
49*f5c631daSSadaf Ebrahimi 
50*f5c631daSSadaf Ebrahimi #define REF(name) "test/test-trace-reference/" name
51*f5c631daSSadaf Ebrahimi 
GenerateTestSequenceBase(MacroAssembler * masm)52*f5c631daSSadaf Ebrahimi static void GenerateTestSequenceBase(MacroAssembler* masm) {
53*f5c631daSSadaf Ebrahimi   ExactAssemblyScope guard(masm,
54*f5c631daSSadaf Ebrahimi                            masm->GetBuffer()->GetRemainingBytes(),
55*f5c631daSSadaf Ebrahimi                            ExactAssemblyScope::kMaximumSize);
56*f5c631daSSadaf Ebrahimi 
57*f5c631daSSadaf Ebrahimi   __ adc(w3, w4, w5);
58*f5c631daSSadaf Ebrahimi   __ adc(x6, x7, x8);
59*f5c631daSSadaf Ebrahimi   __ adcs(w9, w10, w11);
60*f5c631daSSadaf Ebrahimi   __ adcs(x12, x13, x14);
61*f5c631daSSadaf Ebrahimi   __ add(w15, w16, w17);
62*f5c631daSSadaf Ebrahimi   __ add(x18, x19, x20);
63*f5c631daSSadaf Ebrahimi   __ adds(w21, w22, w23);
64*f5c631daSSadaf Ebrahimi   __ adds(x24, x25, x26);
65*f5c631daSSadaf Ebrahimi   __ and_(w27, w28, w29);
66*f5c631daSSadaf Ebrahimi   __ and_(x2, x3, x4);
67*f5c631daSSadaf Ebrahimi   __ ands(w5, w6, w7);
68*f5c631daSSadaf Ebrahimi   __ ands(x8, x9, x10);
69*f5c631daSSadaf Ebrahimi   __ asr(w11, w12, 0);
70*f5c631daSSadaf Ebrahimi   __ asr(x13, x14, 1);
71*f5c631daSSadaf Ebrahimi   __ asrv(w15, w16, w17);
72*f5c631daSSadaf Ebrahimi   __ asrv(x18, x19, x20);
73*f5c631daSSadaf Ebrahimi   __ bfm(w21, w22, 5, 6);
74*f5c631daSSadaf Ebrahimi   __ bfm(x23, x24, 7, 8);
75*f5c631daSSadaf Ebrahimi   __ bic(w25, w26, w27);
76*f5c631daSSadaf Ebrahimi   __ bic(x28, x29, x2);
77*f5c631daSSadaf Ebrahimi   __ bics(w3, w4, w5);
78*f5c631daSSadaf Ebrahimi   __ bics(x6, x7, x8);
79*f5c631daSSadaf Ebrahimi   __ ccmn(w9, w10, NoFlag, al);
80*f5c631daSSadaf Ebrahimi   __ ccmn(w9, w10, NoFlag, eq);
81*f5c631daSSadaf Ebrahimi   __ ccmn(w9, w10, NoFlag, ne);
82*f5c631daSSadaf Ebrahimi   __ ccmn(x11, x12, CFlag, al);
83*f5c631daSSadaf Ebrahimi   __ ccmn(x11, x12, CFlag, cc);
84*f5c631daSSadaf Ebrahimi   __ ccmn(x11, x12, CFlag, cs);
85*f5c631daSSadaf Ebrahimi   __ ccmp(w13, w14, VFlag, al);
86*f5c631daSSadaf Ebrahimi   __ ccmp(w13, w14, VFlag, hi);
87*f5c631daSSadaf Ebrahimi   __ ccmp(w13, w14, VFlag, ls);
88*f5c631daSSadaf Ebrahimi   __ ccmp(x15, x16, CVFlag, al);
89*f5c631daSSadaf Ebrahimi   __ ccmp(x15, x16, CVFlag, eq);
90*f5c631daSSadaf Ebrahimi   __ ccmp(x15, x16, CVFlag, ne);
91*f5c631daSSadaf Ebrahimi   __ cinc(w17, w18, cc);
92*f5c631daSSadaf Ebrahimi   __ cinc(w17, w18, cs);
93*f5c631daSSadaf Ebrahimi   __ cinc(x19, x20, hi);
94*f5c631daSSadaf Ebrahimi   __ cinc(x19, x20, ls);
95*f5c631daSSadaf Ebrahimi   __ cinv(w21, w22, eq);
96*f5c631daSSadaf Ebrahimi   __ cinv(w21, w22, ne);
97*f5c631daSSadaf Ebrahimi   __ cinv(x23, x24, cc);
98*f5c631daSSadaf Ebrahimi   __ cinv(x23, x24, cs);
99*f5c631daSSadaf Ebrahimi   __ clrex();
100*f5c631daSSadaf Ebrahimi   __ cls(w25, w26);
101*f5c631daSSadaf Ebrahimi   __ cls(x27, x28);
102*f5c631daSSadaf Ebrahimi   __ clz(w29, w2);
103*f5c631daSSadaf Ebrahimi   __ clz(x3, x4);
104*f5c631daSSadaf Ebrahimi   __ cmn(w5, w6);
105*f5c631daSSadaf Ebrahimi   __ cmn(x7, x8);
106*f5c631daSSadaf Ebrahimi   __ cmp(w9, w10);
107*f5c631daSSadaf Ebrahimi   __ cmp(x11, x12);
108*f5c631daSSadaf Ebrahimi   __ cneg(w13, w14, hi);
109*f5c631daSSadaf Ebrahimi   __ cneg(w13, w14, ls);
110*f5c631daSSadaf Ebrahimi   __ cneg(x15, x16, eq);
111*f5c631daSSadaf Ebrahimi   __ cneg(x15, x16, ne);
112*f5c631daSSadaf Ebrahimi   __ crc32b(w17, w18, w19);
113*f5c631daSSadaf Ebrahimi   __ crc32cb(w20, w21, w22);
114*f5c631daSSadaf Ebrahimi   __ crc32ch(w23, w24, w25);
115*f5c631daSSadaf Ebrahimi   __ crc32cw(w26, w27, w28);
116*f5c631daSSadaf Ebrahimi   __ crc32h(w4, w5, w6);
117*f5c631daSSadaf Ebrahimi   __ crc32w(w7, w8, w9);
118*f5c631daSSadaf Ebrahimi   __ csel(w13, w14, w15, cc);
119*f5c631daSSadaf Ebrahimi   __ csel(w13, w14, w15, cs);
120*f5c631daSSadaf Ebrahimi   __ csel(x16, x17, x18, hi);
121*f5c631daSSadaf Ebrahimi   __ csel(x16, x17, x18, ls);
122*f5c631daSSadaf Ebrahimi   __ cset(w19, eq);
123*f5c631daSSadaf Ebrahimi   __ cset(w19, ne);
124*f5c631daSSadaf Ebrahimi   __ cset(x20, cc);
125*f5c631daSSadaf Ebrahimi   __ cset(x20, cs);
126*f5c631daSSadaf Ebrahimi   __ csetm(w21, hi);
127*f5c631daSSadaf Ebrahimi   __ csetm(w21, ls);
128*f5c631daSSadaf Ebrahimi   __ csetm(x22, eq);
129*f5c631daSSadaf Ebrahimi   __ csetm(x22, ne);
130*f5c631daSSadaf Ebrahimi   __ csinc(w23, w24, w25, cc);
131*f5c631daSSadaf Ebrahimi   __ csinc(w23, w24, w25, cs);
132*f5c631daSSadaf Ebrahimi   __ csinc(x26, x27, x28, hi);
133*f5c631daSSadaf Ebrahimi   __ csinc(x26, x27, x28, ls);
134*f5c631daSSadaf Ebrahimi   __ csinv(w29, w2, w3, eq);
135*f5c631daSSadaf Ebrahimi   __ csinv(w29, w2, w3, ne);
136*f5c631daSSadaf Ebrahimi   __ csinv(x4, x5, x6, cc);
137*f5c631daSSadaf Ebrahimi   __ csinv(x4, x5, x6, cs);
138*f5c631daSSadaf Ebrahimi   __ csneg(w7, w8, w9, hi);
139*f5c631daSSadaf Ebrahimi   __ csneg(w7, w8, w9, ls);
140*f5c631daSSadaf Ebrahimi   __ csneg(x10, x11, x12, eq);
141*f5c631daSSadaf Ebrahimi   __ csneg(x10, x11, x12, ne);
142*f5c631daSSadaf Ebrahimi   __ dc(CVAC, x0);
143*f5c631daSSadaf Ebrahimi   __ dmb(InnerShareable, BarrierAll);
144*f5c631daSSadaf Ebrahimi   __ dsb(InnerShareable, BarrierAll);
145*f5c631daSSadaf Ebrahimi   __ eon(w13, w14, w15);
146*f5c631daSSadaf Ebrahimi   __ eon(x16, x17, x18);
147*f5c631daSSadaf Ebrahimi   __ eor(w19, w20, w21);
148*f5c631daSSadaf Ebrahimi   __ eor(x22, x23, x24);
149*f5c631daSSadaf Ebrahimi   __ extr(w25, w26, w27, 9);
150*f5c631daSSadaf Ebrahimi   __ extr(x28, x29, x2, 10);
151*f5c631daSSadaf Ebrahimi   __ hint(NOP);
152*f5c631daSSadaf Ebrahimi   __ ic(IVAU, x0);
153*f5c631daSSadaf Ebrahimi   __ isb();
154*f5c631daSSadaf Ebrahimi   __ ldar(w3, MemOperand(x0));
155*f5c631daSSadaf Ebrahimi   __ ldar(x4, MemOperand(x0));
156*f5c631daSSadaf Ebrahimi   __ ldarb(w5, MemOperand(x0));
157*f5c631daSSadaf Ebrahimi   __ ldarb(x6, MemOperand(x0));
158*f5c631daSSadaf Ebrahimi   __ ldarh(w7, MemOperand(x0));
159*f5c631daSSadaf Ebrahimi   __ ldarh(x8, MemOperand(x0));
160*f5c631daSSadaf Ebrahimi   __ ldaxp(w9, w10, MemOperand(x0));
161*f5c631daSSadaf Ebrahimi   __ ldaxp(x11, x12, MemOperand(x0));
162*f5c631daSSadaf Ebrahimi   __ ldaxr(w13, MemOperand(x0));
163*f5c631daSSadaf Ebrahimi   __ ldaxr(x14, MemOperand(x0));
164*f5c631daSSadaf Ebrahimi   __ ldaxrb(w15, MemOperand(x0));
165*f5c631daSSadaf Ebrahimi   __ ldaxrb(x16, MemOperand(x0));
166*f5c631daSSadaf Ebrahimi   __ ldaxrh(w17, MemOperand(x0));
167*f5c631daSSadaf Ebrahimi   __ ldaxrh(x18, MemOperand(x0));
168*f5c631daSSadaf Ebrahimi   __ ldnp(w19, w20, MemOperand(x0));
169*f5c631daSSadaf Ebrahimi   __ ldnp(x21, x22, MemOperand(x0));
170*f5c631daSSadaf Ebrahimi   __ ldp(w23, w24, MemOperand(x0));
171*f5c631daSSadaf Ebrahimi   __ ldp(w23, w24, MemOperand(x1, 8, PostIndex));
172*f5c631daSSadaf Ebrahimi   __ ldp(w23, w24, MemOperand(x1, 8, PreIndex));
173*f5c631daSSadaf Ebrahimi   __ ldp(x25, x26, MemOperand(x0));
174*f5c631daSSadaf Ebrahimi   __ ldp(x25, x26, MemOperand(x1, 16, PostIndex));
175*f5c631daSSadaf Ebrahimi   __ ldp(x25, x26, MemOperand(x1, 16, PreIndex));
176*f5c631daSSadaf Ebrahimi   __ ldpsw(x27, x28, MemOperand(x0));
177*f5c631daSSadaf Ebrahimi   __ ldpsw(x27, x28, MemOperand(x1, 8, PostIndex));
178*f5c631daSSadaf Ebrahimi   __ ldpsw(x27, x28, MemOperand(x1, 8, PreIndex));
179*f5c631daSSadaf Ebrahimi   __ ldr(w29, MemOperand(x0));
180*f5c631daSSadaf Ebrahimi   __ ldr(w29, MemOperand(x1, 4, PostIndex));
181*f5c631daSSadaf Ebrahimi   __ ldr(w29, MemOperand(x1, 4, PreIndex));
182*f5c631daSSadaf Ebrahimi   __ ldr(x2, MemOperand(x0));
183*f5c631daSSadaf Ebrahimi   __ ldr(x2, MemOperand(x1, 8, PostIndex));
184*f5c631daSSadaf Ebrahimi   __ ldr(x2, MemOperand(x1, 8, PreIndex));
185*f5c631daSSadaf Ebrahimi   __ ldrb(w3, MemOperand(x0));
186*f5c631daSSadaf Ebrahimi   __ ldrb(w3, MemOperand(x1, 1, PostIndex));
187*f5c631daSSadaf Ebrahimi   __ ldrb(w3, MemOperand(x1, 1, PreIndex));
188*f5c631daSSadaf Ebrahimi   __ ldrb(x4, MemOperand(x0));
189*f5c631daSSadaf Ebrahimi   __ ldrb(x4, MemOperand(x1, 1, PostIndex));
190*f5c631daSSadaf Ebrahimi   __ ldrb(x4, MemOperand(x1, 1, PreIndex));
191*f5c631daSSadaf Ebrahimi   __ ldrh(w5, MemOperand(x0));
192*f5c631daSSadaf Ebrahimi   __ ldrh(w5, MemOperand(x1, 2, PostIndex));
193*f5c631daSSadaf Ebrahimi   __ ldrh(w5, MemOperand(x1, 2, PreIndex));
194*f5c631daSSadaf Ebrahimi   __ ldrh(x6, MemOperand(x0));
195*f5c631daSSadaf Ebrahimi   __ ldrh(x6, MemOperand(x1, 2, PostIndex));
196*f5c631daSSadaf Ebrahimi   __ ldrh(x6, MemOperand(x1, 2, PreIndex));
197*f5c631daSSadaf Ebrahimi   __ ldrsb(w7, MemOperand(x0));
198*f5c631daSSadaf Ebrahimi   __ ldrsb(w7, MemOperand(x1, 1, PostIndex));
199*f5c631daSSadaf Ebrahimi   __ ldrsb(w7, MemOperand(x1, 1, PreIndex));
200*f5c631daSSadaf Ebrahimi   __ ldrsb(x8, MemOperand(x0));
201*f5c631daSSadaf Ebrahimi   __ ldrsb(x8, MemOperand(x1, 1, PostIndex));
202*f5c631daSSadaf Ebrahimi   __ ldrsb(x8, MemOperand(x1, 1, PreIndex));
203*f5c631daSSadaf Ebrahimi   __ ldrsh(w9, MemOperand(x0));
204*f5c631daSSadaf Ebrahimi   __ ldrsh(w9, MemOperand(x1, 2, PostIndex));
205*f5c631daSSadaf Ebrahimi   __ ldrsh(w9, MemOperand(x1, 2, PreIndex));
206*f5c631daSSadaf Ebrahimi   __ ldrsh(x10, MemOperand(x0));
207*f5c631daSSadaf Ebrahimi   __ ldrsh(x10, MemOperand(x1, 2, PostIndex));
208*f5c631daSSadaf Ebrahimi   __ ldrsh(x10, MemOperand(x1, 2, PreIndex));
209*f5c631daSSadaf Ebrahimi   __ ldrsw(x11, MemOperand(x0));
210*f5c631daSSadaf Ebrahimi   __ ldrsw(x11, MemOperand(x1, 4, PostIndex));
211*f5c631daSSadaf Ebrahimi   __ ldrsw(x11, MemOperand(x1, 4, PreIndex));
212*f5c631daSSadaf Ebrahimi   __ ldur(w12, MemOperand(x0, 7));
213*f5c631daSSadaf Ebrahimi   __ ldur(x13, MemOperand(x0, 15));
214*f5c631daSSadaf Ebrahimi   __ ldurb(w14, MemOperand(x0, 1));
215*f5c631daSSadaf Ebrahimi   __ ldurb(x15, MemOperand(x0, 1));
216*f5c631daSSadaf Ebrahimi   __ ldurh(w16, MemOperand(x0, 3));
217*f5c631daSSadaf Ebrahimi   __ ldurh(x17, MemOperand(x0, 3));
218*f5c631daSSadaf Ebrahimi   __ ldursb(w18, MemOperand(x0, 1));
219*f5c631daSSadaf Ebrahimi   __ ldursb(x19, MemOperand(x0, 1));
220*f5c631daSSadaf Ebrahimi   __ ldursh(w20, MemOperand(x0, 3));
221*f5c631daSSadaf Ebrahimi   __ ldursh(x21, MemOperand(x0, 3));
222*f5c631daSSadaf Ebrahimi   __ ldursw(x22, MemOperand(x0, 7));
223*f5c631daSSadaf Ebrahimi   __ ldxp(w23, w24, MemOperand(x0));
224*f5c631daSSadaf Ebrahimi   __ ldxp(x25, x26, MemOperand(x0));
225*f5c631daSSadaf Ebrahimi   __ ldxr(w27, MemOperand(x0));
226*f5c631daSSadaf Ebrahimi   __ ldxr(x28, MemOperand(x0));
227*f5c631daSSadaf Ebrahimi   __ ldxrb(w29, MemOperand(x0));
228*f5c631daSSadaf Ebrahimi   __ ldxrb(x2, MemOperand(x0));
229*f5c631daSSadaf Ebrahimi   __ ldxrh(w3, MemOperand(x0));
230*f5c631daSSadaf Ebrahimi   __ ldxrh(x4, MemOperand(x0));
231*f5c631daSSadaf Ebrahimi   __ lsl(w5, w6, 2);
232*f5c631daSSadaf Ebrahimi   __ lsl(x7, x8, 3);
233*f5c631daSSadaf Ebrahimi   __ lslv(w9, w10, w11);
234*f5c631daSSadaf Ebrahimi   __ lslv(x12, x13, x14);
235*f5c631daSSadaf Ebrahimi   __ lsr(w15, w16, 4);
236*f5c631daSSadaf Ebrahimi   __ lsr(x17, x18, 5);
237*f5c631daSSadaf Ebrahimi   __ lsrv(w19, w20, w21);
238*f5c631daSSadaf Ebrahimi   __ lsrv(x22, x23, x24);
239*f5c631daSSadaf Ebrahimi   __ madd(w25, w26, w27, w28);
240*f5c631daSSadaf Ebrahimi   __ madd(x29, x2, x3, x4);
241*f5c631daSSadaf Ebrahimi   __ mneg(w5, w6, w7);
242*f5c631daSSadaf Ebrahimi   __ mneg(x8, x9, x10);
243*f5c631daSSadaf Ebrahimi   __ mov(w11, w12);
244*f5c631daSSadaf Ebrahimi   __ mov(x13, x14);
245*f5c631daSSadaf Ebrahimi   __ movk(w15, 130);
246*f5c631daSSadaf Ebrahimi   __ movk(x16, 131);
247*f5c631daSSadaf Ebrahimi   __ movn(w17, 132);
248*f5c631daSSadaf Ebrahimi   __ movn(x18, 133);
249*f5c631daSSadaf Ebrahimi   __ movz(w19, 134);
250*f5c631daSSadaf Ebrahimi   __ movz(x20, 135);
251*f5c631daSSadaf Ebrahimi   __ msub(w22, w23, w24, w25);
252*f5c631daSSadaf Ebrahimi   __ msub(x26, x27, x28, x29);
253*f5c631daSSadaf Ebrahimi   __ mul(w2, w3, w4);
254*f5c631daSSadaf Ebrahimi   __ mul(x5, x6, x7);
255*f5c631daSSadaf Ebrahimi   __ mvn(w8, w9);
256*f5c631daSSadaf Ebrahimi   __ mvn(x10, x11);
257*f5c631daSSadaf Ebrahimi   __ neg(w12, w13);
258*f5c631daSSadaf Ebrahimi   __ neg(x14, x15);
259*f5c631daSSadaf Ebrahimi   __ negs(w16, w17);
260*f5c631daSSadaf Ebrahimi   __ negs(x18, x19);
261*f5c631daSSadaf Ebrahimi   __ ngc(w20, w21);
262*f5c631daSSadaf Ebrahimi   __ ngc(x22, x23);
263*f5c631daSSadaf Ebrahimi   __ ngcs(w24, w25);
264*f5c631daSSadaf Ebrahimi   __ ngcs(x26, x27);
265*f5c631daSSadaf Ebrahimi   __ nop();
266*f5c631daSSadaf Ebrahimi   __ orn(w28, w29, w2);
267*f5c631daSSadaf Ebrahimi   __ orn(x3, x4, x5);
268*f5c631daSSadaf Ebrahimi   __ orr(w6, w7, w8);
269*f5c631daSSadaf Ebrahimi   __ orr(x9, x10, x11);
270*f5c631daSSadaf Ebrahimi   __ prfm(PLDL1KEEP, MemOperand(x0, 4));
271*f5c631daSSadaf Ebrahimi   __ prfum(PLDL1KEEP, MemOperand(x0, 1));
272*f5c631daSSadaf Ebrahimi   __ rbit(w12, w13);
273*f5c631daSSadaf Ebrahimi   __ rbit(x14, x15);
274*f5c631daSSadaf Ebrahimi   __ rev(w16, w17);
275*f5c631daSSadaf Ebrahimi   __ rev(x18, x19);
276*f5c631daSSadaf Ebrahimi   __ rev16(w20, w21);
277*f5c631daSSadaf Ebrahimi   __ rev16(x22, x23);
278*f5c631daSSadaf Ebrahimi   __ rev32(x24, x25);
279*f5c631daSSadaf Ebrahimi   __ rorv(w26, w27, w28);
280*f5c631daSSadaf Ebrahimi   __ rorv(x29, x2, x3);
281*f5c631daSSadaf Ebrahimi   __ sbc(w4, w5, w6);
282*f5c631daSSadaf Ebrahimi   __ sbc(x7, x8, x9);
283*f5c631daSSadaf Ebrahimi   __ sbcs(w10, w11, w12);
284*f5c631daSSadaf Ebrahimi   __ sbcs(x13, x14, x15);
285*f5c631daSSadaf Ebrahimi   __ sbfiz(w16, w17, 2, 3);
286*f5c631daSSadaf Ebrahimi   __ sbfiz(x18, x19, 4, 5);
287*f5c631daSSadaf Ebrahimi   __ sbfx(w22, w23, 6, 7);
288*f5c631daSSadaf Ebrahimi   __ sbfx(x24, x25, 8, 9);
289*f5c631daSSadaf Ebrahimi   __ sdiv(w26, w27, w28);
290*f5c631daSSadaf Ebrahimi   __ sdiv(x29, x2, x3);
291*f5c631daSSadaf Ebrahimi   __ smulh(x12, x13, x14);
292*f5c631daSSadaf Ebrahimi   __ stlr(w18, MemOperand(x0));
293*f5c631daSSadaf Ebrahimi   __ stlr(x19, MemOperand(x0));
294*f5c631daSSadaf Ebrahimi   __ stlrb(w20, MemOperand(x0));
295*f5c631daSSadaf Ebrahimi   __ stlrb(x21, MemOperand(x0));
296*f5c631daSSadaf Ebrahimi   __ stlrh(w22, MemOperand(x0));
297*f5c631daSSadaf Ebrahimi   __ stlrh(x23, MemOperand(x0));
298*f5c631daSSadaf Ebrahimi   __ stlxp(w24, w25, w26, MemOperand(x0));
299*f5c631daSSadaf Ebrahimi   __ stlxp(x27, x28, x29, MemOperand(x0));
300*f5c631daSSadaf Ebrahimi   __ stlxr(w2, w3, MemOperand(x0));
301*f5c631daSSadaf Ebrahimi   __ stlxr(x4, x5, MemOperand(x0));
302*f5c631daSSadaf Ebrahimi   __ stlxrb(w6, w7, MemOperand(x0));
303*f5c631daSSadaf Ebrahimi   __ stlxrb(x8, x9, MemOperand(x0));
304*f5c631daSSadaf Ebrahimi   __ stlxrh(w10, w11, MemOperand(x0));
305*f5c631daSSadaf Ebrahimi   __ stlxrh(x12, x13, MemOperand(x0));
306*f5c631daSSadaf Ebrahimi   __ stnp(w14, w15, MemOperand(x0));
307*f5c631daSSadaf Ebrahimi   __ stnp(x16, x17, MemOperand(x0));
308*f5c631daSSadaf Ebrahimi   __ stp(w18, w19, MemOperand(x0));
309*f5c631daSSadaf Ebrahimi   __ stp(w18, w19, MemOperand(x1, 8, PostIndex));
310*f5c631daSSadaf Ebrahimi   __ stp(w18, w19, MemOperand(x1, 8, PreIndex));
311*f5c631daSSadaf Ebrahimi   __ stp(x20, x21, MemOperand(x0));
312*f5c631daSSadaf Ebrahimi   __ stp(x20, x21, MemOperand(x1, 16, PostIndex));
313*f5c631daSSadaf Ebrahimi   __ stp(x20, x21, MemOperand(x1, 16, PreIndex));
314*f5c631daSSadaf Ebrahimi   __ str(w22, MemOperand(x0));
315*f5c631daSSadaf Ebrahimi   __ str(w22, MemOperand(x1, 4, PostIndex));
316*f5c631daSSadaf Ebrahimi   __ str(w22, MemOperand(x1, 4, PreIndex));
317*f5c631daSSadaf Ebrahimi   __ str(x23, MemOperand(x0));
318*f5c631daSSadaf Ebrahimi   __ str(x23, MemOperand(x1, 8, PostIndex));
319*f5c631daSSadaf Ebrahimi   __ str(x23, MemOperand(x1, 8, PreIndex));
320*f5c631daSSadaf Ebrahimi   __ strb(w24, MemOperand(x0));
321*f5c631daSSadaf Ebrahimi   __ strb(w24, MemOperand(x1, 1, PostIndex));
322*f5c631daSSadaf Ebrahimi   __ strb(w24, MemOperand(x1, 1, PreIndex));
323*f5c631daSSadaf Ebrahimi   __ strb(x25, MemOperand(x0));
324*f5c631daSSadaf Ebrahimi   __ strb(x25, MemOperand(x1, 1, PostIndex));
325*f5c631daSSadaf Ebrahimi   __ strb(x25, MemOperand(x1, 1, PreIndex));
326*f5c631daSSadaf Ebrahimi   __ strh(w26, MemOperand(x0));
327*f5c631daSSadaf Ebrahimi   __ strh(w26, MemOperand(x1, 2, PostIndex));
328*f5c631daSSadaf Ebrahimi   __ strh(w26, MemOperand(x1, 2, PreIndex));
329*f5c631daSSadaf Ebrahimi   __ strh(x27, MemOperand(x0));
330*f5c631daSSadaf Ebrahimi   __ strh(x27, MemOperand(x1, 2, PostIndex));
331*f5c631daSSadaf Ebrahimi   __ strh(x27, MemOperand(x1, 2, PreIndex));
332*f5c631daSSadaf Ebrahimi   __ stur(w28, MemOperand(x0, 7));
333*f5c631daSSadaf Ebrahimi   __ stur(x29, MemOperand(x0, 15));
334*f5c631daSSadaf Ebrahimi   __ sturb(w2, MemOperand(x0, 1));
335*f5c631daSSadaf Ebrahimi   __ sturb(x3, MemOperand(x0, 1));
336*f5c631daSSadaf Ebrahimi   __ sturh(w4, MemOperand(x0, 3));
337*f5c631daSSadaf Ebrahimi   __ sturh(x5, MemOperand(x0, 3));
338*f5c631daSSadaf Ebrahimi   __ stxp(w6, w7, w8, MemOperand(x0));
339*f5c631daSSadaf Ebrahimi   __ stxp(x9, x10, x11, MemOperand(x0));
340*f5c631daSSadaf Ebrahimi   __ stxr(w12, w13, MemOperand(x0));
341*f5c631daSSadaf Ebrahimi   __ stxr(x14, x15, MemOperand(x0));
342*f5c631daSSadaf Ebrahimi   __ stxrb(w16, w17, MemOperand(x0));
343*f5c631daSSadaf Ebrahimi   __ stxrb(x18, x19, MemOperand(x0));
344*f5c631daSSadaf Ebrahimi   __ stxrh(w20, w21, MemOperand(x0));
345*f5c631daSSadaf Ebrahimi   __ stxrh(x22, x23, MemOperand(x0));
346*f5c631daSSadaf Ebrahimi   __ sub(w24, w25, w26);
347*f5c631daSSadaf Ebrahimi   __ sub(x27, x28, x29);
348*f5c631daSSadaf Ebrahimi   __ subs(w2, w3, w4);
349*f5c631daSSadaf Ebrahimi   __ subs(x5, x6, x7);
350*f5c631daSSadaf Ebrahimi   __ sxtb(w8, w9);
351*f5c631daSSadaf Ebrahimi   __ sxtb(x10, x11);
352*f5c631daSSadaf Ebrahimi   __ sxth(w12, w13);
353*f5c631daSSadaf Ebrahimi   __ sxth(x14, x15);
354*f5c631daSSadaf Ebrahimi   __ sxtw(w16, w17);
355*f5c631daSSadaf Ebrahimi   __ sxtw(x18, x19);
356*f5c631daSSadaf Ebrahimi   __ tst(w20, w21);
357*f5c631daSSadaf Ebrahimi   __ tst(x22, x23);
358*f5c631daSSadaf Ebrahimi   __ ubfiz(w24, w25, 10, 11);
359*f5c631daSSadaf Ebrahimi   __ ubfiz(x26, x27, 12, 13);
360*f5c631daSSadaf Ebrahimi   __ ubfm(w28, w29, 14, 15);
361*f5c631daSSadaf Ebrahimi   __ ubfm(x2, x3, 1, 2);
362*f5c631daSSadaf Ebrahimi   __ ubfx(w4, w5, 3, 4);
363*f5c631daSSadaf Ebrahimi   __ ubfx(x6, x7, 5, 6);
364*f5c631daSSadaf Ebrahimi   __ udiv(w8, w9, w10);
365*f5c631daSSadaf Ebrahimi   __ udiv(x11, x12, x13);
366*f5c631daSSadaf Ebrahimi   __ umulh(x22, x23, x24);
367*f5c631daSSadaf Ebrahimi   __ uxtb(w28, w29);
368*f5c631daSSadaf Ebrahimi   __ uxtb(x2, x3);
369*f5c631daSSadaf Ebrahimi   __ uxth(w4, w5);
370*f5c631daSSadaf Ebrahimi   __ uxth(x6, x7);
371*f5c631daSSadaf Ebrahimi   __ uxtw(w8, w9);
372*f5c631daSSadaf Ebrahimi   __ uxtw(x10, x11);
373*f5c631daSSadaf Ebrahimi 
374*f5c631daSSadaf Ebrahimi   // Branch tests.
375*f5c631daSSadaf Ebrahimi   {
376*f5c631daSSadaf Ebrahimi     Label end;
377*f5c631daSSadaf Ebrahimi     // Branch to the next instruction.
378*f5c631daSSadaf Ebrahimi     __ b(&end);
379*f5c631daSSadaf Ebrahimi     __ bind(&end);
380*f5c631daSSadaf Ebrahimi   }
381*f5c631daSSadaf Ebrahimi   {
382*f5c631daSSadaf Ebrahimi     Label loop, end;
383*f5c631daSSadaf Ebrahimi     __ subs(x3, x3, x3);
384*f5c631daSSadaf Ebrahimi     __ bind(&loop);
385*f5c631daSSadaf Ebrahimi     // Not-taken branch (the first time).
386*f5c631daSSadaf Ebrahimi     // Taken branch (the second time).
387*f5c631daSSadaf Ebrahimi     __ b(&end, ne);
388*f5c631daSSadaf Ebrahimi     __ cmp(x3, 1);
389*f5c631daSSadaf Ebrahimi     // Backwards branch.
390*f5c631daSSadaf Ebrahimi     __ b(&loop);
391*f5c631daSSadaf Ebrahimi     __ bind(&end);
392*f5c631daSSadaf Ebrahimi   }
393*f5c631daSSadaf Ebrahimi }
394*f5c631daSSadaf Ebrahimi 
395*f5c631daSSadaf Ebrahimi 
GenerateTestSequenceFP(MacroAssembler * masm)396*f5c631daSSadaf Ebrahimi static void GenerateTestSequenceFP(MacroAssembler* masm) {
397*f5c631daSSadaf Ebrahimi   ExactAssemblyScope guard(masm,
398*f5c631daSSadaf Ebrahimi                            masm->GetBuffer()->GetRemainingBytes(),
399*f5c631daSSadaf Ebrahimi                            ExactAssemblyScope::kMaximumSize);
400*f5c631daSSadaf Ebrahimi 
401*f5c631daSSadaf Ebrahimi   // Scalar floating point instructions.
402*f5c631daSSadaf Ebrahimi   __ fabd(d13, d2, d19);
403*f5c631daSSadaf Ebrahimi   __ fabd(s8, s10, s30);
404*f5c631daSSadaf Ebrahimi   __ fabs(d1, d1);
405*f5c631daSSadaf Ebrahimi   __ fabs(s25, s7);
406*f5c631daSSadaf Ebrahimi   __ facge(d1, d23, d16);
407*f5c631daSSadaf Ebrahimi   __ facge(s4, s17, s1);
408*f5c631daSSadaf Ebrahimi   __ facgt(d2, d21, d24);
409*f5c631daSSadaf Ebrahimi   __ facgt(s12, s26, s12);
410*f5c631daSSadaf Ebrahimi   __ fadd(d13, d11, d22);
411*f5c631daSSadaf Ebrahimi   __ fadd(s27, s19, s8);
412*f5c631daSSadaf Ebrahimi   __ fccmp(d6, d10, NoFlag, hs);
413*f5c631daSSadaf Ebrahimi   __ fccmp(s29, s20, NZVFlag, ne);
414*f5c631daSSadaf Ebrahimi   __ fccmpe(d10, d2, NZCFlag, al);
415*f5c631daSSadaf Ebrahimi   __ fccmpe(s3, s3, NZVFlag, pl);
416*f5c631daSSadaf Ebrahimi   __ fcmeq(d19, d8, d10);
417*f5c631daSSadaf Ebrahimi   __ fcmeq(d0, d18, 0.0);
418*f5c631daSSadaf Ebrahimi   __ fcmeq(s1, s4, s30);
419*f5c631daSSadaf Ebrahimi   __ fcmeq(s22, s29, 0.0);
420*f5c631daSSadaf Ebrahimi   __ fcmge(d27, d18, d1);
421*f5c631daSSadaf Ebrahimi   __ fcmge(d31, d28, 0.0);
422*f5c631daSSadaf Ebrahimi   __ fcmge(s31, s19, s9);
423*f5c631daSSadaf Ebrahimi   __ fcmge(s1, s25, 0.0);
424*f5c631daSSadaf Ebrahimi   __ fcmgt(d18, d1, d15);
425*f5c631daSSadaf Ebrahimi   __ fcmgt(d3, d31, 0.0);
426*f5c631daSSadaf Ebrahimi   __ fcmgt(s11, s25, s2);
427*f5c631daSSadaf Ebrahimi   __ fcmgt(s17, s16, 0.0);
428*f5c631daSSadaf Ebrahimi   __ fcmle(d24, d17, 0.0);
429*f5c631daSSadaf Ebrahimi   __ fcmle(s11, s8, 0.0);
430*f5c631daSSadaf Ebrahimi   __ fcmlt(d5, d31, 0.0);
431*f5c631daSSadaf Ebrahimi   __ fcmlt(s18, s23, 0.0);
432*f5c631daSSadaf Ebrahimi   __ fcmp(d10, d24);
433*f5c631daSSadaf Ebrahimi   __ fcmp(d13, 0.0);
434*f5c631daSSadaf Ebrahimi   __ fcmp(s18, s6);
435*f5c631daSSadaf Ebrahimi   __ fcmp(s16, 0.0);
436*f5c631daSSadaf Ebrahimi   __ fcmpe(d9, d17);
437*f5c631daSSadaf Ebrahimi   __ fcmpe(d29, 0.0);
438*f5c631daSSadaf Ebrahimi   __ fcmpe(s16, s17);
439*f5c631daSSadaf Ebrahimi   __ fcmpe(s22, 0.0);
440*f5c631daSSadaf Ebrahimi   __ fcsel(d10, d14, d19, gt);
441*f5c631daSSadaf Ebrahimi   __ fcsel(s22, s18, s2, ge);
442*f5c631daSSadaf Ebrahimi   __ fcvt(d4, h24);
443*f5c631daSSadaf Ebrahimi   __ fcvt(d11, s2);
444*f5c631daSSadaf Ebrahimi   __ fcvt(h8, d9);
445*f5c631daSSadaf Ebrahimi   __ fcvt(h12, s1);
446*f5c631daSSadaf Ebrahimi   __ fcvt(s12, d31);
447*f5c631daSSadaf Ebrahimi   __ fcvt(s27, h25);
448*f5c631daSSadaf Ebrahimi   __ fcvtas(d28, d16);
449*f5c631daSSadaf Ebrahimi   __ fcvtas(s3, s5);
450*f5c631daSSadaf Ebrahimi   __ fcvtas(w18, d31);
451*f5c631daSSadaf Ebrahimi   __ fcvtas(w29, s24);
452*f5c631daSSadaf Ebrahimi   __ fcvtas(x9, d1);
453*f5c631daSSadaf Ebrahimi   __ fcvtas(x30, s2);
454*f5c631daSSadaf Ebrahimi   __ fcvtau(d14, d0);
455*f5c631daSSadaf Ebrahimi   __ fcvtau(s31, s14);
456*f5c631daSSadaf Ebrahimi   __ fcvtau(w16, d2);
457*f5c631daSSadaf Ebrahimi   __ fcvtau(w18, s0);
458*f5c631daSSadaf Ebrahimi   __ fcvtau(x26, d7);
459*f5c631daSSadaf Ebrahimi   __ fcvtau(x25, s19);
460*f5c631daSSadaf Ebrahimi   __ fcvtms(d30, d25);
461*f5c631daSSadaf Ebrahimi   __ fcvtms(s12, s15);
462*f5c631daSSadaf Ebrahimi   __ fcvtms(w9, d7);
463*f5c631daSSadaf Ebrahimi   __ fcvtms(w19, s6);
464*f5c631daSSadaf Ebrahimi   __ fcvtms(x6, d6);
465*f5c631daSSadaf Ebrahimi   __ fcvtms(x22, s7);
466*f5c631daSSadaf Ebrahimi   __ fcvtmu(d27, d0);
467*f5c631daSSadaf Ebrahimi   __ fcvtmu(s8, s22);
468*f5c631daSSadaf Ebrahimi   __ fcvtmu(w29, d19);
469*f5c631daSSadaf Ebrahimi   __ fcvtmu(w26, s0);
470*f5c631daSSadaf Ebrahimi   __ fcvtmu(x13, d5);
471*f5c631daSSadaf Ebrahimi   __ fcvtmu(x5, s18);
472*f5c631daSSadaf Ebrahimi   __ fcvtns(d30, d15);
473*f5c631daSSadaf Ebrahimi   __ fcvtns(s10, s11);
474*f5c631daSSadaf Ebrahimi   __ fcvtns(w21, d15);
475*f5c631daSSadaf Ebrahimi   __ fcvtns(w18, s10);
476*f5c631daSSadaf Ebrahimi   __ fcvtns(x8, d17);
477*f5c631daSSadaf Ebrahimi   __ fcvtns(x17, s12);
478*f5c631daSSadaf Ebrahimi   __ fcvtnu(d0, d21);
479*f5c631daSSadaf Ebrahimi   __ fcvtnu(s6, s25);
480*f5c631daSSadaf Ebrahimi   __ fcvtnu(w29, d11);
481*f5c631daSSadaf Ebrahimi   __ fcvtnu(w25, s31);
482*f5c631daSSadaf Ebrahimi   __ fcvtnu(x30, d11);
483*f5c631daSSadaf Ebrahimi   __ fcvtnu(x27, s18);
484*f5c631daSSadaf Ebrahimi   __ fcvtps(d11, d22);
485*f5c631daSSadaf Ebrahimi   __ fcvtps(s29, s20);
486*f5c631daSSadaf Ebrahimi   __ fcvtps(w15, d25);
487*f5c631daSSadaf Ebrahimi   __ fcvtps(w16, s7);
488*f5c631daSSadaf Ebrahimi   __ fcvtps(x13, d20);
489*f5c631daSSadaf Ebrahimi   __ fcvtps(x3, s23);
490*f5c631daSSadaf Ebrahimi   __ fcvtpu(d24, d1);
491*f5c631daSSadaf Ebrahimi   __ fcvtpu(s14, s24);
492*f5c631daSSadaf Ebrahimi   __ fcvtpu(w26, d29);
493*f5c631daSSadaf Ebrahimi   __ fcvtpu(wzr, s26);
494*f5c631daSSadaf Ebrahimi   __ fcvtpu(x27, d6);
495*f5c631daSSadaf Ebrahimi   __ fcvtpu(x29, s14);
496*f5c631daSSadaf Ebrahimi   __ fcvtxn(s12, d12);
497*f5c631daSSadaf Ebrahimi   __ fcvtzs(d15, d0);
498*f5c631daSSadaf Ebrahimi   __ fcvtzs(d13, d4, 42);
499*f5c631daSSadaf Ebrahimi   __ fcvtzs(s8, s11);
500*f5c631daSSadaf Ebrahimi   __ fcvtzs(s31, s6, 25);
501*f5c631daSSadaf Ebrahimi   __ fcvtzs(w6, d9);
502*f5c631daSSadaf Ebrahimi   __ fcvtzs(w25, d10, 20);
503*f5c631daSSadaf Ebrahimi   __ fcvtzs(w9, s1);
504*f5c631daSSadaf Ebrahimi   __ fcvtzs(w17, s29, 30);
505*f5c631daSSadaf Ebrahimi   __ fcvtzs(x19, d2);
506*f5c631daSSadaf Ebrahimi   __ fcvtzs(x22, d14, 1);
507*f5c631daSSadaf Ebrahimi   __ fcvtzs(x14, s20);
508*f5c631daSSadaf Ebrahimi   __ fcvtzs(x3, s30, 33);
509*f5c631daSSadaf Ebrahimi   __ fcvtzu(d28, d15);
510*f5c631daSSadaf Ebrahimi   __ fcvtzu(d0, d4, 3);
511*f5c631daSSadaf Ebrahimi   __ fcvtzu(s2, s5);
512*f5c631daSSadaf Ebrahimi   __ fcvtzu(s4, s0, 30);
513*f5c631daSSadaf Ebrahimi   __ fcvtzu(w11, d4);
514*f5c631daSSadaf Ebrahimi   __ fcvtzu(w7, d24, 32);
515*f5c631daSSadaf Ebrahimi   __ fcvtzu(w18, s24);
516*f5c631daSSadaf Ebrahimi   __ fcvtzu(w14, s27, 4);
517*f5c631daSSadaf Ebrahimi   __ fcvtzu(x22, d11);
518*f5c631daSSadaf Ebrahimi   __ fcvtzu(x8, d27, 52);
519*f5c631daSSadaf Ebrahimi   __ fcvtzu(x7, s20);
520*f5c631daSSadaf Ebrahimi   __ fcvtzu(x22, s7, 44);
521*f5c631daSSadaf Ebrahimi   __ fdiv(d6, d14, d15);
522*f5c631daSSadaf Ebrahimi   __ fdiv(s26, s5, s25);
523*f5c631daSSadaf Ebrahimi   __ fmadd(d18, d26, d12, d30);
524*f5c631daSSadaf Ebrahimi   __ fmadd(s13, s9, s28, s4);
525*f5c631daSSadaf Ebrahimi   __ fmax(d12, d5, d5);
526*f5c631daSSadaf Ebrahimi   __ fmax(s12, s28, s6);
527*f5c631daSSadaf Ebrahimi   __ fmaxnm(d28, d4, d2);
528*f5c631daSSadaf Ebrahimi   __ fmaxnm(s6, s10, s8);
529*f5c631daSSadaf Ebrahimi   __ fmin(d20, d20, d18);
530*f5c631daSSadaf Ebrahimi   __ fmin(s7, s13, s16);
531*f5c631daSSadaf Ebrahimi   __ fminnm(d19, d14, d30);
532*f5c631daSSadaf Ebrahimi   __ fminnm(s0, s1, s1);
533*f5c631daSSadaf Ebrahimi   __ fmov(d13, d6);
534*f5c631daSSadaf Ebrahimi   __ fmov(d2, x17);
535*f5c631daSSadaf Ebrahimi   __ fmov(d8, -2.5000);
536*f5c631daSSadaf Ebrahimi   __ fmov(s5, s3);
537*f5c631daSSadaf Ebrahimi   __ fmov(s25, w20);
538*f5c631daSSadaf Ebrahimi   __ fmov(s21, 2.8750f);
539*f5c631daSSadaf Ebrahimi   __ fmov(w18, s24);
540*f5c631daSSadaf Ebrahimi   __ fmov(x18, d2);
541*f5c631daSSadaf Ebrahimi   __ fmsub(d20, d30, d3, d19);
542*f5c631daSSadaf Ebrahimi   __ fmsub(s5, s19, s4, s12);
543*f5c631daSSadaf Ebrahimi   __ fmul(d30, d27, d23);
544*f5c631daSSadaf Ebrahimi   __ fmul(s25, s17, s15);
545*f5c631daSSadaf Ebrahimi   __ fmulx(d4, d17, d1);
546*f5c631daSSadaf Ebrahimi   __ fmulx(s14, s25, s4);
547*f5c631daSSadaf Ebrahimi   __ fneg(d15, d0);
548*f5c631daSSadaf Ebrahimi   __ fneg(s14, s15);
549*f5c631daSSadaf Ebrahimi   __ fnmadd(d0, d16, d22, d31);
550*f5c631daSSadaf Ebrahimi   __ fnmadd(s0, s18, s26, s18);
551*f5c631daSSadaf Ebrahimi   __ fnmsub(d19, d12, d15, d21);
552*f5c631daSSadaf Ebrahimi   __ fnmsub(s29, s0, s11, s26);
553*f5c631daSSadaf Ebrahimi   __ fnmul(d31, d19, d1);
554*f5c631daSSadaf Ebrahimi   __ fnmul(s18, s3, s17);
555*f5c631daSSadaf Ebrahimi   __ frecpe(d7, d21);
556*f5c631daSSadaf Ebrahimi   __ frecpe(s29, s17);
557*f5c631daSSadaf Ebrahimi   __ frecps(d11, d26, d17);
558*f5c631daSSadaf Ebrahimi   __ frecps(s18, s27, s1);
559*f5c631daSSadaf Ebrahimi   __ frecpx(d15, d18);
560*f5c631daSSadaf Ebrahimi   __ frecpx(s5, s10);
561*f5c631daSSadaf Ebrahimi   __ frinta(d16, d30);
562*f5c631daSSadaf Ebrahimi   __ frinta(s1, s22);
563*f5c631daSSadaf Ebrahimi   __ frinti(d19, d29);
564*f5c631daSSadaf Ebrahimi   __ frinti(s14, s21);
565*f5c631daSSadaf Ebrahimi   __ frintm(d20, d30);
566*f5c631daSSadaf Ebrahimi   __ frintm(s1, s16);
567*f5c631daSSadaf Ebrahimi   __ frintn(d30, d1);
568*f5c631daSSadaf Ebrahimi   __ frintn(s24, s10);
569*f5c631daSSadaf Ebrahimi   __ frintp(d4, d20);
570*f5c631daSSadaf Ebrahimi   __ frintp(s13, s3);
571*f5c631daSSadaf Ebrahimi   __ frintx(d13, d20);
572*f5c631daSSadaf Ebrahimi   __ frintx(s17, s7);
573*f5c631daSSadaf Ebrahimi   __ frintz(d0, d8);
574*f5c631daSSadaf Ebrahimi   __ frintz(s15, s29);
575*f5c631daSSadaf Ebrahimi   __ frsqrte(d21, d10);
576*f5c631daSSadaf Ebrahimi   __ frsqrte(s17, s25);
577*f5c631daSSadaf Ebrahimi   __ frsqrts(d4, d29, d17);
578*f5c631daSSadaf Ebrahimi   __ frsqrts(s14, s3, s24);
579*f5c631daSSadaf Ebrahimi   __ fsqrt(d14, d17);
580*f5c631daSSadaf Ebrahimi   __ fsqrt(s4, s14);
581*f5c631daSSadaf Ebrahimi   __ fsub(d13, d19, d7);
582*f5c631daSSadaf Ebrahimi   __ fsub(s3, s21, s27);
583*f5c631daSSadaf Ebrahimi   __ scvtf(d31, d16);
584*f5c631daSSadaf Ebrahimi   __ scvtf(d26, d31, 24);
585*f5c631daSSadaf Ebrahimi   __ scvtf(d6, w16);
586*f5c631daSSadaf Ebrahimi   __ scvtf(d5, w20, 6);
587*f5c631daSSadaf Ebrahimi   __ scvtf(d16, x8);
588*f5c631daSSadaf Ebrahimi   __ scvtf(d15, x8, 10);
589*f5c631daSSadaf Ebrahimi   __ scvtf(s7, s4);
590*f5c631daSSadaf Ebrahimi   __ scvtf(s8, s15, 14);
591*f5c631daSSadaf Ebrahimi   __ scvtf(s29, w10);
592*f5c631daSSadaf Ebrahimi   __ scvtf(s15, w21, 11);
593*f5c631daSSadaf Ebrahimi   __ scvtf(s27, x26);
594*f5c631daSSadaf Ebrahimi   __ scvtf(s26, x12, 38);
595*f5c631daSSadaf Ebrahimi   __ ucvtf(d0, d9);
596*f5c631daSSadaf Ebrahimi   __ ucvtf(d5, d22, 47);
597*f5c631daSSadaf Ebrahimi   __ ucvtf(d30, w27);
598*f5c631daSSadaf Ebrahimi   __ ucvtf(d3, w19, 1);
599*f5c631daSSadaf Ebrahimi   __ ucvtf(d28, x21);
600*f5c631daSSadaf Ebrahimi   __ ucvtf(d27, x30, 35);
601*f5c631daSSadaf Ebrahimi   __ ucvtf(s11, s5);
602*f5c631daSSadaf Ebrahimi   __ ucvtf(s0, s23, 14);
603*f5c631daSSadaf Ebrahimi   __ ucvtf(s20, w19);
604*f5c631daSSadaf Ebrahimi   __ ucvtf(s21, w22, 18);
605*f5c631daSSadaf Ebrahimi   __ ucvtf(s6, x13);
606*f5c631daSSadaf Ebrahimi   __ ucvtf(s7, x2, 21);
607*f5c631daSSadaf Ebrahimi }
608*f5c631daSSadaf Ebrahimi 
609*f5c631daSSadaf Ebrahimi 
GenerateTestSequenceNEON(MacroAssembler * masm)610*f5c631daSSadaf Ebrahimi static void GenerateTestSequenceNEON(MacroAssembler* masm) {
611*f5c631daSSadaf Ebrahimi   ExactAssemblyScope guard(masm,
612*f5c631daSSadaf Ebrahimi                            masm->GetBuffer()->GetRemainingBytes(),
613*f5c631daSSadaf Ebrahimi                            ExactAssemblyScope::kMaximumSize);
614*f5c631daSSadaf Ebrahimi 
615*f5c631daSSadaf Ebrahimi   // NEON integer instructions.
616*f5c631daSSadaf Ebrahimi   __ abs(d19, d0);
617*f5c631daSSadaf Ebrahimi   __ abs(v16.V16B(), v11.V16B());
618*f5c631daSSadaf Ebrahimi   __ abs(v0.V2D(), v31.V2D());
619*f5c631daSSadaf Ebrahimi   __ abs(v27.V2S(), v25.V2S());
620*f5c631daSSadaf Ebrahimi   __ abs(v21.V4H(), v27.V4H());
621*f5c631daSSadaf Ebrahimi   __ abs(v16.V4S(), v1.V4S());
622*f5c631daSSadaf Ebrahimi   __ abs(v31.V8B(), v5.V8B());
623*f5c631daSSadaf Ebrahimi   __ abs(v29.V8H(), v13.V8H());
624*f5c631daSSadaf Ebrahimi   __ add(d10, d5, d17);
625*f5c631daSSadaf Ebrahimi   __ add(v31.V16B(), v15.V16B(), v23.V16B());
626*f5c631daSSadaf Ebrahimi   __ add(v10.V2D(), v31.V2D(), v14.V2D());
627*f5c631daSSadaf Ebrahimi   __ add(v15.V2S(), v14.V2S(), v19.V2S());
628*f5c631daSSadaf Ebrahimi   __ add(v27.V4H(), v23.V4H(), v17.V4H());
629*f5c631daSSadaf Ebrahimi   __ add(v25.V4S(), v28.V4S(), v29.V4S());
630*f5c631daSSadaf Ebrahimi   __ add(v13.V8B(), v7.V8B(), v18.V8B());
631*f5c631daSSadaf Ebrahimi   __ add(v4.V8H(), v2.V8H(), v1.V8H());
632*f5c631daSSadaf Ebrahimi   __ addhn(v10.V2S(), v14.V2D(), v15.V2D());
633*f5c631daSSadaf Ebrahimi   __ addhn(v10.V4H(), v30.V4S(), v26.V4S());
634*f5c631daSSadaf Ebrahimi   __ addhn(v31.V8B(), v12.V8H(), v22.V8H());
635*f5c631daSSadaf Ebrahimi   __ addhn2(v16.V16B(), v21.V8H(), v20.V8H());
636*f5c631daSSadaf Ebrahimi   __ addhn2(v0.V4S(), v2.V2D(), v17.V2D());
637*f5c631daSSadaf Ebrahimi   __ addhn2(v31.V8H(), v7.V4S(), v17.V4S());
638*f5c631daSSadaf Ebrahimi   __ addp(d14, v19.V2D());
639*f5c631daSSadaf Ebrahimi   __ addp(v3.V16B(), v8.V16B(), v28.V16B());
640*f5c631daSSadaf Ebrahimi   __ addp(v8.V2D(), v5.V2D(), v17.V2D());
641*f5c631daSSadaf Ebrahimi   __ addp(v22.V2S(), v30.V2S(), v26.V2S());
642*f5c631daSSadaf Ebrahimi   __ addp(v29.V4H(), v24.V4H(), v14.V4H());
643*f5c631daSSadaf Ebrahimi   __ addp(v30.V4S(), v26.V4S(), v24.V4S());
644*f5c631daSSadaf Ebrahimi   __ addp(v12.V8B(), v26.V8B(), v7.V8B());
645*f5c631daSSadaf Ebrahimi   __ addp(v17.V8H(), v8.V8H(), v12.V8H());
646*f5c631daSSadaf Ebrahimi   __ addv(b27, v23.V16B());
647*f5c631daSSadaf Ebrahimi   __ addv(b12, v20.V8B());
648*f5c631daSSadaf Ebrahimi   __ addv(h27, v30.V4H());
649*f5c631daSSadaf Ebrahimi   __ addv(h19, v14.V8H());
650*f5c631daSSadaf Ebrahimi   __ addv(s14, v27.V4S());
651*f5c631daSSadaf Ebrahimi   __ and_(v10.V16B(), v8.V16B(), v27.V16B());
652*f5c631daSSadaf Ebrahimi   __ and_(v5.V8B(), v1.V8B(), v16.V8B());
653*f5c631daSSadaf Ebrahimi   __ bic(v26.V16B(), v3.V16B(), v24.V16B());
654*f5c631daSSadaf Ebrahimi   __ bic(v7.V2S(), 0xe4, 16);
655*f5c631daSSadaf Ebrahimi   __ bic(v28.V4H(), 0x23, 8);
656*f5c631daSSadaf Ebrahimi   __ bic(v29.V4S(), 0xac);
657*f5c631daSSadaf Ebrahimi   __ bic(v12.V8B(), v31.V8B(), v21.V8B());
658*f5c631daSSadaf Ebrahimi   __ bic(v18.V8H(), 0x98);
659*f5c631daSSadaf Ebrahimi   __ bif(v12.V16B(), v26.V16B(), v8.V16B());
660*f5c631daSSadaf Ebrahimi   __ bif(v2.V8B(), v23.V8B(), v27.V8B());
661*f5c631daSSadaf Ebrahimi   __ bit(v8.V16B(), v3.V16B(), v13.V16B());
662*f5c631daSSadaf Ebrahimi   __ bit(v5.V8B(), v5.V8B(), v23.V8B());
663*f5c631daSSadaf Ebrahimi   __ bsl(v9.V16B(), v31.V16B(), v23.V16B());
664*f5c631daSSadaf Ebrahimi   __ bsl(v14.V8B(), v7.V8B(), v3.V8B());
665*f5c631daSSadaf Ebrahimi   __ cls(v29.V16B(), v5.V16B());
666*f5c631daSSadaf Ebrahimi   __ cls(v21.V2S(), v0.V2S());
667*f5c631daSSadaf Ebrahimi   __ cls(v1.V4H(), v12.V4H());
668*f5c631daSSadaf Ebrahimi   __ cls(v27.V4S(), v10.V4S());
669*f5c631daSSadaf Ebrahimi   __ cls(v19.V8B(), v4.V8B());
670*f5c631daSSadaf Ebrahimi   __ cls(v15.V8H(), v14.V8H());
671*f5c631daSSadaf Ebrahimi   __ clz(v1.V16B(), v4.V16B());
672*f5c631daSSadaf Ebrahimi   __ clz(v27.V2S(), v17.V2S());
673*f5c631daSSadaf Ebrahimi   __ clz(v9.V4H(), v9.V4H());
674*f5c631daSSadaf Ebrahimi   __ clz(v31.V4S(), v15.V4S());
675*f5c631daSSadaf Ebrahimi   __ clz(v14.V8B(), v19.V8B());
676*f5c631daSSadaf Ebrahimi   __ clz(v6.V8H(), v11.V8H());
677*f5c631daSSadaf Ebrahimi   __ cmeq(d18, d5, d29);
678*f5c631daSSadaf Ebrahimi   __ cmeq(d14, d31, 0);
679*f5c631daSSadaf Ebrahimi   __ cmeq(v19.V16B(), v3.V16B(), v22.V16B());
680*f5c631daSSadaf Ebrahimi   __ cmeq(v15.V16B(), v9.V16B(), 0);
681*f5c631daSSadaf Ebrahimi   __ cmeq(v12.V2D(), v16.V2D(), v10.V2D());
682*f5c631daSSadaf Ebrahimi   __ cmeq(v8.V2D(), v22.V2D(), 0);
683*f5c631daSSadaf Ebrahimi   __ cmeq(v2.V2S(), v3.V2S(), v9.V2S());
684*f5c631daSSadaf Ebrahimi   __ cmeq(v16.V2S(), v25.V2S(), 0);
685*f5c631daSSadaf Ebrahimi   __ cmeq(v6.V4H(), v23.V4H(), v20.V4H());
686*f5c631daSSadaf Ebrahimi   __ cmeq(v16.V4H(), v13.V4H(), 0);
687*f5c631daSSadaf Ebrahimi   __ cmeq(v21.V4S(), v17.V4S(), v2.V4S());
688*f5c631daSSadaf Ebrahimi   __ cmeq(v6.V4S(), v25.V4S(), 0);
689*f5c631daSSadaf Ebrahimi   __ cmeq(v16.V8B(), v13.V8B(), v2.V8B());
690*f5c631daSSadaf Ebrahimi   __ cmeq(v21.V8B(), v16.V8B(), 0);
691*f5c631daSSadaf Ebrahimi   __ cmeq(v20.V8H(), v7.V8H(), v25.V8H());
692*f5c631daSSadaf Ebrahimi   __ cmeq(v26.V8H(), v8.V8H(), 0);
693*f5c631daSSadaf Ebrahimi   __ cmge(d16, d13, d31);
694*f5c631daSSadaf Ebrahimi   __ cmge(d25, d24, 0);
695*f5c631daSSadaf Ebrahimi   __ cmge(v17.V16B(), v19.V16B(), v17.V16B());
696*f5c631daSSadaf Ebrahimi   __ cmge(v22.V16B(), v30.V16B(), 0);
697*f5c631daSSadaf Ebrahimi   __ cmge(v28.V2D(), v20.V2D(), v26.V2D());
698*f5c631daSSadaf Ebrahimi   __ cmge(v6.V2D(), v23.V2D(), 0);
699*f5c631daSSadaf Ebrahimi   __ cmge(v25.V2S(), v22.V2S(), v3.V2S());
700*f5c631daSSadaf Ebrahimi   __ cmge(v21.V2S(), v11.V2S(), 0);
701*f5c631daSSadaf Ebrahimi   __ cmge(v16.V4H(), v3.V4H(), v12.V4H());
702*f5c631daSSadaf Ebrahimi   __ cmge(v23.V4H(), v9.V4H(), 0);
703*f5c631daSSadaf Ebrahimi   __ cmge(v7.V4S(), v2.V4S(), v11.V4S());
704*f5c631daSSadaf Ebrahimi   __ cmge(v0.V4S(), v22.V4S(), 0);
705*f5c631daSSadaf Ebrahimi   __ cmge(v10.V8B(), v30.V8B(), v9.V8B());
706*f5c631daSSadaf Ebrahimi   __ cmge(v21.V8B(), v8.V8B(), 0);
707*f5c631daSSadaf Ebrahimi   __ cmge(v2.V8H(), v7.V8H(), v26.V8H());
708*f5c631daSSadaf Ebrahimi   __ cmge(v19.V8H(), v10.V8H(), 0);
709*f5c631daSSadaf Ebrahimi   __ cmgt(d6, d13, d1);
710*f5c631daSSadaf Ebrahimi   __ cmgt(d30, d24, 0);
711*f5c631daSSadaf Ebrahimi   __ cmgt(v20.V16B(), v25.V16B(), v27.V16B());
712*f5c631daSSadaf Ebrahimi   __ cmgt(v0.V16B(), v25.V16B(), 0);
713*f5c631daSSadaf Ebrahimi   __ cmgt(v22.V2D(), v25.V2D(), v1.V2D());
714*f5c631daSSadaf Ebrahimi   __ cmgt(v16.V2D(), v16.V2D(), 0);
715*f5c631daSSadaf Ebrahimi   __ cmgt(v5.V2S(), v9.V2S(), v15.V2S());
716*f5c631daSSadaf Ebrahimi   __ cmgt(v12.V2S(), v18.V2S(), 0);
717*f5c631daSSadaf Ebrahimi   __ cmgt(v28.V4H(), v18.V4H(), v11.V4H());
718*f5c631daSSadaf Ebrahimi   __ cmgt(v22.V4H(), v3.V4H(), 0);
719*f5c631daSSadaf Ebrahimi   __ cmgt(v5.V4S(), v11.V4S(), v27.V4S());
720*f5c631daSSadaf Ebrahimi   __ cmgt(v13.V4S(), v20.V4S(), 0);
721*f5c631daSSadaf Ebrahimi   __ cmgt(v27.V8B(), v31.V8B(), v7.V8B());
722*f5c631daSSadaf Ebrahimi   __ cmgt(v5.V8B(), v0.V8B(), 0);
723*f5c631daSSadaf Ebrahimi   __ cmgt(v22.V8H(), v28.V8H(), v13.V8H());
724*f5c631daSSadaf Ebrahimi   __ cmgt(v6.V8H(), v2.V8H(), 0);
725*f5c631daSSadaf Ebrahimi   __ cmhi(d21, d8, d22);
726*f5c631daSSadaf Ebrahimi   __ cmhi(v18.V16B(), v19.V16B(), v19.V16B());
727*f5c631daSSadaf Ebrahimi   __ cmhi(v7.V2D(), v0.V2D(), v21.V2D());
728*f5c631daSSadaf Ebrahimi   __ cmhi(v15.V2S(), v19.V2S(), v0.V2S());
729*f5c631daSSadaf Ebrahimi   __ cmhi(v31.V4H(), v7.V4H(), v12.V4H());
730*f5c631daSSadaf Ebrahimi   __ cmhi(v9.V4S(), v16.V4S(), v22.V4S());
731*f5c631daSSadaf Ebrahimi   __ cmhi(v7.V8B(), v24.V8B(), v28.V8B());
732*f5c631daSSadaf Ebrahimi   __ cmhi(v11.V8H(), v10.V8H(), v25.V8H());
733*f5c631daSSadaf Ebrahimi   __ cmhs(d1, d12, d17);
734*f5c631daSSadaf Ebrahimi   __ cmhs(v21.V16B(), v25.V16B(), v30.V16B());
735*f5c631daSSadaf Ebrahimi   __ cmhs(v8.V2D(), v2.V2D(), v26.V2D());
736*f5c631daSSadaf Ebrahimi   __ cmhs(v1.V2S(), v22.V2S(), v29.V2S());
737*f5c631daSSadaf Ebrahimi   __ cmhs(v26.V4H(), v30.V4H(), v30.V4H());
738*f5c631daSSadaf Ebrahimi   __ cmhs(v19.V4S(), v20.V4S(), v16.V4S());
739*f5c631daSSadaf Ebrahimi   __ cmhs(v1.V8B(), v3.V8B(), v26.V8B());
740*f5c631daSSadaf Ebrahimi   __ cmhs(v20.V8H(), v28.V8H(), v8.V8H());
741*f5c631daSSadaf Ebrahimi   __ cmle(d30, d24, 0);
742*f5c631daSSadaf Ebrahimi   __ cmle(v0.V16B(), v3.V16B(), 0);
743*f5c631daSSadaf Ebrahimi   __ cmle(v2.V2D(), v30.V2D(), 0);
744*f5c631daSSadaf Ebrahimi   __ cmle(v7.V2S(), v10.V2S(), 0);
745*f5c631daSSadaf Ebrahimi   __ cmle(v9.V4H(), v31.V4H(), 0);
746*f5c631daSSadaf Ebrahimi   __ cmle(v9.V4S(), v18.V4S(), 0);
747*f5c631daSSadaf Ebrahimi   __ cmle(v21.V8B(), v31.V8B(), 0);
748*f5c631daSSadaf Ebrahimi   __ cmle(v29.V8H(), v21.V8H(), 0);
749*f5c631daSSadaf Ebrahimi   __ cmlt(d25, d23, 0);
750*f5c631daSSadaf Ebrahimi   __ cmlt(v7.V16B(), v21.V16B(), 0);
751*f5c631daSSadaf Ebrahimi   __ cmlt(v7.V2D(), v30.V2D(), 0);
752*f5c631daSSadaf Ebrahimi   __ cmlt(v25.V2S(), v28.V2S(), 0);
753*f5c631daSSadaf Ebrahimi   __ cmlt(v0.V4H(), v11.V4H(), 0);
754*f5c631daSSadaf Ebrahimi   __ cmlt(v24.V4S(), v5.V4S(), 0);
755*f5c631daSSadaf Ebrahimi   __ cmlt(v26.V8B(), v11.V8B(), 0);
756*f5c631daSSadaf Ebrahimi   __ cmlt(v1.V8H(), v21.V8H(), 0);
757*f5c631daSSadaf Ebrahimi   __ cmtst(d28, d23, d30);
758*f5c631daSSadaf Ebrahimi   __ cmtst(v26.V16B(), v6.V16B(), v31.V16B());
759*f5c631daSSadaf Ebrahimi   __ cmtst(v1.V2D(), v21.V2D(), v4.V2D());
760*f5c631daSSadaf Ebrahimi   __ cmtst(v27.V2S(), v26.V2S(), v20.V2S());
761*f5c631daSSadaf Ebrahimi   __ cmtst(v26.V4H(), v0.V4H(), v18.V4H());
762*f5c631daSSadaf Ebrahimi   __ cmtst(v25.V4S(), v16.V4S(), v4.V4S());
763*f5c631daSSadaf Ebrahimi   __ cmtst(v11.V8B(), v10.V8B(), v9.V8B());
764*f5c631daSSadaf Ebrahimi   __ cmtst(v0.V8H(), v2.V8H(), v1.V8H());
765*f5c631daSSadaf Ebrahimi   __ cnt(v25.V16B(), v15.V16B());
766*f5c631daSSadaf Ebrahimi   __ cnt(v28.V8B(), v6.V8B());
767*f5c631daSSadaf Ebrahimi   __ dup(v6.V16B(), v7.B(), 7);
768*f5c631daSSadaf Ebrahimi   __ dup(v9.V16B(), w20);
769*f5c631daSSadaf Ebrahimi   __ dup(v12.V2D(), v13.D(), 1);
770*f5c631daSSadaf Ebrahimi   __ dup(v9.V2D(), xzr);
771*f5c631daSSadaf Ebrahimi   __ dup(v4.V2S(), v26.S(), 2);
772*f5c631daSSadaf Ebrahimi   __ dup(v3.V2S(), w12);
773*f5c631daSSadaf Ebrahimi   __ dup(v22.V4H(), v5.H(), 7);
774*f5c631daSSadaf Ebrahimi   __ dup(v16.V4H(), w25);
775*f5c631daSSadaf Ebrahimi   __ dup(v20.V4S(), v10.S(), 2);
776*f5c631daSSadaf Ebrahimi   __ dup(v10.V4S(), w7);
777*f5c631daSSadaf Ebrahimi   __ dup(v30.V8B(), v30.B(), 2);
778*f5c631daSSadaf Ebrahimi   __ dup(v31.V8B(), w15);
779*f5c631daSSadaf Ebrahimi   __ dup(v28.V8H(), v17.H(), 4);
780*f5c631daSSadaf Ebrahimi   __ dup(v2.V8H(), w3);
781*f5c631daSSadaf Ebrahimi   __ eor(v29.V16B(), v25.V16B(), v3.V16B());
782*f5c631daSSadaf Ebrahimi   __ eor(v3.V8B(), v16.V8B(), v28.V8B());
783*f5c631daSSadaf Ebrahimi   __ ext(v1.V16B(), v26.V16B(), v6.V16B(), 1);
784*f5c631daSSadaf Ebrahimi   __ ext(v2.V8B(), v30.V8B(), v1.V8B(), 1);
785*f5c631daSSadaf Ebrahimi   __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x0));
786*f5c631daSSadaf Ebrahimi   __ ld1(v23.V16B(),
787*f5c631daSSadaf Ebrahimi          v24.V16B(),
788*f5c631daSSadaf Ebrahimi          v25.V16B(),
789*f5c631daSSadaf Ebrahimi          v26.V16B(),
790*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
791*f5c631daSSadaf Ebrahimi   __ ld1(v5.V16B(),
792*f5c631daSSadaf Ebrahimi          v6.V16B(),
793*f5c631daSSadaf Ebrahimi          v7.V16B(),
794*f5c631daSSadaf Ebrahimi          v8.V16B(),
795*f5c631daSSadaf Ebrahimi          MemOperand(x1, 64, PostIndex));
796*f5c631daSSadaf Ebrahimi   __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), MemOperand(x0));
797*f5c631daSSadaf Ebrahimi   __ ld1(v13.V16B(), v14.V16B(), v15.V16B(), MemOperand(x1, x2, PostIndex));
798*f5c631daSSadaf Ebrahimi   __ ld1(v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x1, 48, PostIndex));
799*f5c631daSSadaf Ebrahimi   __ ld1(v17.V16B(), v18.V16B(), MemOperand(x0));
800*f5c631daSSadaf Ebrahimi   __ ld1(v20.V16B(), v21.V16B(), MemOperand(x1, x2, PostIndex));
801*f5c631daSSadaf Ebrahimi   __ ld1(v28.V16B(), v29.V16B(), MemOperand(x1, 32, PostIndex));
802*f5c631daSSadaf Ebrahimi   __ ld1(v29.V16B(), MemOperand(x0));
803*f5c631daSSadaf Ebrahimi   __ ld1(v21.V16B(), MemOperand(x1, x2, PostIndex));
804*f5c631daSSadaf Ebrahimi   __ ld1(v4.V16B(), MemOperand(x1, 16, PostIndex));
805*f5c631daSSadaf Ebrahimi   __ ld1(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), MemOperand(x0));
806*f5c631daSSadaf Ebrahimi   __ ld1(v17.V1D(),
807*f5c631daSSadaf Ebrahimi          v18.V1D(),
808*f5c631daSSadaf Ebrahimi          v19.V1D(),
809*f5c631daSSadaf Ebrahimi          v20.V1D(),
810*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
811*f5c631daSSadaf Ebrahimi   __ ld1(v28.V1D(),
812*f5c631daSSadaf Ebrahimi          v29.V1D(),
813*f5c631daSSadaf Ebrahimi          v30.V1D(),
814*f5c631daSSadaf Ebrahimi          v31.V1D(),
815*f5c631daSSadaf Ebrahimi          MemOperand(x1, 32, PostIndex));
816*f5c631daSSadaf Ebrahimi   __ ld1(v20.V1D(), v21.V1D(), v22.V1D(), MemOperand(x0));
817*f5c631daSSadaf Ebrahimi   __ ld1(v19.V1D(), v20.V1D(), v21.V1D(), MemOperand(x1, x2, PostIndex));
818*f5c631daSSadaf Ebrahimi   __ ld1(v12.V1D(), v13.V1D(), v14.V1D(), MemOperand(x1, 24, PostIndex));
819*f5c631daSSadaf Ebrahimi   __ ld1(v29.V1D(), v30.V1D(), MemOperand(x0));
820*f5c631daSSadaf Ebrahimi   __ ld1(v31.V1D(), v0.V1D(), MemOperand(x1, x2, PostIndex));
821*f5c631daSSadaf Ebrahimi   __ ld1(v3.V1D(), v4.V1D(), MemOperand(x1, 16, PostIndex));
822*f5c631daSSadaf Ebrahimi   __ ld1(v28.V1D(), MemOperand(x0));
823*f5c631daSSadaf Ebrahimi   __ ld1(v11.V1D(), MemOperand(x1, x2, PostIndex));
824*f5c631daSSadaf Ebrahimi   __ ld1(v29.V1D(), MemOperand(x1, 8, PostIndex));
825*f5c631daSSadaf Ebrahimi   __ ld1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), MemOperand(x0));
826*f5c631daSSadaf Ebrahimi   __ ld1(v8.V2D(),
827*f5c631daSSadaf Ebrahimi          v9.V2D(),
828*f5c631daSSadaf Ebrahimi          v10.V2D(),
829*f5c631daSSadaf Ebrahimi          v11.V2D(),
830*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
831*f5c631daSSadaf Ebrahimi   __ ld1(v14.V2D(),
832*f5c631daSSadaf Ebrahimi          v15.V2D(),
833*f5c631daSSadaf Ebrahimi          v16.V2D(),
834*f5c631daSSadaf Ebrahimi          v17.V2D(),
835*f5c631daSSadaf Ebrahimi          MemOperand(x1, 64, PostIndex));
836*f5c631daSSadaf Ebrahimi   __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), MemOperand(x0));
837*f5c631daSSadaf Ebrahimi   __ ld1(v5.V2D(), v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex));
838*f5c631daSSadaf Ebrahimi   __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), MemOperand(x1, 48, PostIndex));
839*f5c631daSSadaf Ebrahimi   __ ld1(v18.V2D(), v19.V2D(), MemOperand(x0));
840*f5c631daSSadaf Ebrahimi   __ ld1(v21.V2D(), v22.V2D(), MemOperand(x1, x2, PostIndex));
841*f5c631daSSadaf Ebrahimi   __ ld1(v17.V2D(), v18.V2D(), MemOperand(x1, 32, PostIndex));
842*f5c631daSSadaf Ebrahimi   __ ld1(v5.V2D(), MemOperand(x0));
843*f5c631daSSadaf Ebrahimi   __ ld1(v6.V2D(), MemOperand(x1, x2, PostIndex));
844*f5c631daSSadaf Ebrahimi   __ ld1(v15.V2D(), MemOperand(x1, 16, PostIndex));
845*f5c631daSSadaf Ebrahimi   __ ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x0));
846*f5c631daSSadaf Ebrahimi   __ ld1(v24.V2S(),
847*f5c631daSSadaf Ebrahimi          v25.V2S(),
848*f5c631daSSadaf Ebrahimi          v26.V2S(),
849*f5c631daSSadaf Ebrahimi          v27.V2S(),
850*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
851*f5c631daSSadaf Ebrahimi   __ ld1(v27.V2S(),
852*f5c631daSSadaf Ebrahimi          v28.V2S(),
853*f5c631daSSadaf Ebrahimi          v29.V2S(),
854*f5c631daSSadaf Ebrahimi          v30.V2S(),
855*f5c631daSSadaf Ebrahimi          MemOperand(x1, 32, PostIndex));
856*f5c631daSSadaf Ebrahimi   __ ld1(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x0));
857*f5c631daSSadaf Ebrahimi   __ ld1(v8.V2S(), v9.V2S(), v10.V2S(), MemOperand(x1, x2, PostIndex));
858*f5c631daSSadaf Ebrahimi   __ ld1(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x1, 24, PostIndex));
859*f5c631daSSadaf Ebrahimi   __ ld1(v0.V2S(), v1.V2S(), MemOperand(x0));
860*f5c631daSSadaf Ebrahimi   __ ld1(v13.V2S(), v14.V2S(), MemOperand(x1, x2, PostIndex));
861*f5c631daSSadaf Ebrahimi   __ ld1(v3.V2S(), v4.V2S(), MemOperand(x1, 16, PostIndex));
862*f5c631daSSadaf Ebrahimi   __ ld1(v26.V2S(), MemOperand(x0));
863*f5c631daSSadaf Ebrahimi   __ ld1(v0.V2S(), MemOperand(x1, x2, PostIndex));
864*f5c631daSSadaf Ebrahimi   __ ld1(v11.V2S(), MemOperand(x1, 8, PostIndex));
865*f5c631daSSadaf Ebrahimi   __ ld1(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), MemOperand(x0));
866*f5c631daSSadaf Ebrahimi   __ ld1(v24.V4H(),
867*f5c631daSSadaf Ebrahimi          v25.V4H(),
868*f5c631daSSadaf Ebrahimi          v26.V4H(),
869*f5c631daSSadaf Ebrahimi          v27.V4H(),
870*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
871*f5c631daSSadaf Ebrahimi   __ ld1(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), MemOperand(x1, 32, PostIndex));
872*f5c631daSSadaf Ebrahimi   __ ld1(v30.V4H(), v31.V4H(), v0.V4H(), MemOperand(x0));
873*f5c631daSSadaf Ebrahimi   __ ld1(v25.V4H(), v26.V4H(), v27.V4H(), MemOperand(x1, x2, PostIndex));
874*f5c631daSSadaf Ebrahimi   __ ld1(v3.V4H(), v4.V4H(), v5.V4H(), MemOperand(x1, 24, PostIndex));
875*f5c631daSSadaf Ebrahimi   __ ld1(v3.V4H(), v4.V4H(), MemOperand(x0));
876*f5c631daSSadaf Ebrahimi   __ ld1(v3.V4H(), v4.V4H(), MemOperand(x1, x2, PostIndex));
877*f5c631daSSadaf Ebrahimi   __ ld1(v23.V4H(), v24.V4H(), MemOperand(x1, 16, PostIndex));
878*f5c631daSSadaf Ebrahimi   __ ld1(v26.V4H(), MemOperand(x0));
879*f5c631daSSadaf Ebrahimi   __ ld1(v1.V4H(), MemOperand(x1, x2, PostIndex));
880*f5c631daSSadaf Ebrahimi   __ ld1(v14.V4H(), MemOperand(x1, 8, PostIndex));
881*f5c631daSSadaf Ebrahimi   __ ld1(v26.V4S(), v27.V4S(), v28.V4S(), v29.V4S(), MemOperand(x0));
882*f5c631daSSadaf Ebrahimi   __ ld1(v28.V4S(),
883*f5c631daSSadaf Ebrahimi          v29.V4S(),
884*f5c631daSSadaf Ebrahimi          v30.V4S(),
885*f5c631daSSadaf Ebrahimi          v31.V4S(),
886*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
887*f5c631daSSadaf Ebrahimi   __ ld1(v4.V4S(), v5.V4S(), v6.V4S(), v7.V4S(), MemOperand(x1, 64, PostIndex));
888*f5c631daSSadaf Ebrahimi   __ ld1(v2.V4S(), v3.V4S(), v4.V4S(), MemOperand(x0));
889*f5c631daSSadaf Ebrahimi   __ ld1(v22.V4S(), v23.V4S(), v24.V4S(), MemOperand(x1, x2, PostIndex));
890*f5c631daSSadaf Ebrahimi   __ ld1(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x1, 48, PostIndex));
891*f5c631daSSadaf Ebrahimi   __ ld1(v20.V4S(), v21.V4S(), MemOperand(x0));
892*f5c631daSSadaf Ebrahimi   __ ld1(v30.V4S(), v31.V4S(), MemOperand(x1, x2, PostIndex));
893*f5c631daSSadaf Ebrahimi   __ ld1(v11.V4S(), v12.V4S(), MemOperand(x1, 32, PostIndex));
894*f5c631daSSadaf Ebrahimi   __ ld1(v15.V4S(), MemOperand(x0));
895*f5c631daSSadaf Ebrahimi   __ ld1(v12.V4S(), MemOperand(x1, x2, PostIndex));
896*f5c631daSSadaf Ebrahimi   __ ld1(v0.V4S(), MemOperand(x1, 16, PostIndex));
897*f5c631daSSadaf Ebrahimi   __ ld1(v17.V8B(), v18.V8B(), v19.V8B(), v20.V8B(), MemOperand(x0));
898*f5c631daSSadaf Ebrahimi   __ ld1(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), MemOperand(x1, x2, PostIndex));
899*f5c631daSSadaf Ebrahimi   __ ld1(v9.V8B(),
900*f5c631daSSadaf Ebrahimi          v10.V8B(),
901*f5c631daSSadaf Ebrahimi          v11.V8B(),
902*f5c631daSSadaf Ebrahimi          v12.V8B(),
903*f5c631daSSadaf Ebrahimi          MemOperand(x1, 32, PostIndex));
904*f5c631daSSadaf Ebrahimi   __ ld1(v4.V8B(), v5.V8B(), v6.V8B(), MemOperand(x0));
905*f5c631daSSadaf Ebrahimi   __ ld1(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x1, x2, PostIndex));
906*f5c631daSSadaf Ebrahimi   __ ld1(v12.V8B(), v13.V8B(), v14.V8B(), MemOperand(x1, 24, PostIndex));
907*f5c631daSSadaf Ebrahimi   __ ld1(v10.V8B(), v11.V8B(), MemOperand(x0));
908*f5c631daSSadaf Ebrahimi   __ ld1(v11.V8B(), v12.V8B(), MemOperand(x1, x2, PostIndex));
909*f5c631daSSadaf Ebrahimi   __ ld1(v27.V8B(), v28.V8B(), MemOperand(x1, 16, PostIndex));
910*f5c631daSSadaf Ebrahimi   __ ld1(v31.V8B(), MemOperand(x0));
911*f5c631daSSadaf Ebrahimi   __ ld1(v10.V8B(), MemOperand(x1, x2, PostIndex));
912*f5c631daSSadaf Ebrahimi   __ ld1(v28.V8B(), MemOperand(x1, 8, PostIndex));
913*f5c631daSSadaf Ebrahimi   __ ld1(v5.V8H(), v6.V8H(), v7.V8H(), v8.V8H(), MemOperand(x0));
914*f5c631daSSadaf Ebrahimi   __ ld1(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex));
915*f5c631daSSadaf Ebrahimi   __ ld1(v10.V8H(),
916*f5c631daSSadaf Ebrahimi          v11.V8H(),
917*f5c631daSSadaf Ebrahimi          v12.V8H(),
918*f5c631daSSadaf Ebrahimi          v13.V8H(),
919*f5c631daSSadaf Ebrahimi          MemOperand(x1, 64, PostIndex));
920*f5c631daSSadaf Ebrahimi   __ ld1(v26.V8H(), v27.V8H(), v28.V8H(), MemOperand(x0));
921*f5c631daSSadaf Ebrahimi   __ ld1(v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex));
922*f5c631daSSadaf Ebrahimi   __ ld1(v17.V8H(), v18.V8H(), v19.V8H(), MemOperand(x1, 48, PostIndex));
923*f5c631daSSadaf Ebrahimi   __ ld1(v4.V8H(), v5.V8H(), MemOperand(x0));
924*f5c631daSSadaf Ebrahimi   __ ld1(v21.V8H(), v22.V8H(), MemOperand(x1, x2, PostIndex));
925*f5c631daSSadaf Ebrahimi   __ ld1(v4.V8H(), v5.V8H(), MemOperand(x1, 32, PostIndex));
926*f5c631daSSadaf Ebrahimi   __ ld1(v9.V8H(), MemOperand(x0));
927*f5c631daSSadaf Ebrahimi   __ ld1(v27.V8H(), MemOperand(x1, x2, PostIndex));
928*f5c631daSSadaf Ebrahimi   __ ld1(v26.V8H(), MemOperand(x1, 16, PostIndex));
929*f5c631daSSadaf Ebrahimi   __ ld1(v19.B(), 1, MemOperand(x0));
930*f5c631daSSadaf Ebrahimi   __ ld1(v12.B(), 3, MemOperand(x1, x2, PostIndex));
931*f5c631daSSadaf Ebrahimi   __ ld1(v27.B(), 12, MemOperand(x1, 1, PostIndex));
932*f5c631daSSadaf Ebrahimi   __ ld1(v10.D(), 1, MemOperand(x0));
933*f5c631daSSadaf Ebrahimi   __ ld1(v26.D(), 1, MemOperand(x1, x2, PostIndex));
934*f5c631daSSadaf Ebrahimi   __ ld1(v7.D(), 1, MemOperand(x1, 8, PostIndex));
935*f5c631daSSadaf Ebrahimi   __ ld1(v19.H(), 5, MemOperand(x0));
936*f5c631daSSadaf Ebrahimi   __ ld1(v10.H(), 1, MemOperand(x1, x2, PostIndex));
937*f5c631daSSadaf Ebrahimi   __ ld1(v5.H(), 4, MemOperand(x1, 2, PostIndex));
938*f5c631daSSadaf Ebrahimi   __ ld1(v21.S(), 2, MemOperand(x0));
939*f5c631daSSadaf Ebrahimi   __ ld1(v13.S(), 2, MemOperand(x1, x2, PostIndex));
940*f5c631daSSadaf Ebrahimi   __ ld1(v1.S(), 2, MemOperand(x1, 4, PostIndex));
941*f5c631daSSadaf Ebrahimi   __ ld1r(v2.V16B(), MemOperand(x0));
942*f5c631daSSadaf Ebrahimi   __ ld1r(v2.V16B(), MemOperand(x1, x2, PostIndex));
943*f5c631daSSadaf Ebrahimi   __ ld1r(v22.V16B(), MemOperand(x1, 1, PostIndex));
944*f5c631daSSadaf Ebrahimi   __ ld1r(v25.V1D(), MemOperand(x0));
945*f5c631daSSadaf Ebrahimi   __ ld1r(v9.V1D(), MemOperand(x1, x2, PostIndex));
946*f5c631daSSadaf Ebrahimi   __ ld1r(v23.V1D(), MemOperand(x1, 8, PostIndex));
947*f5c631daSSadaf Ebrahimi   __ ld1r(v19.V2D(), MemOperand(x0));
948*f5c631daSSadaf Ebrahimi   __ ld1r(v21.V2D(), MemOperand(x1, x2, PostIndex));
949*f5c631daSSadaf Ebrahimi   __ ld1r(v30.V2D(), MemOperand(x1, 8, PostIndex));
950*f5c631daSSadaf Ebrahimi   __ ld1r(v24.V2S(), MemOperand(x0));
951*f5c631daSSadaf Ebrahimi   __ ld1r(v26.V2S(), MemOperand(x1, x2, PostIndex));
952*f5c631daSSadaf Ebrahimi   __ ld1r(v28.V2S(), MemOperand(x1, 4, PostIndex));
953*f5c631daSSadaf Ebrahimi   __ ld1r(v19.V4H(), MemOperand(x0));
954*f5c631daSSadaf Ebrahimi   __ ld1r(v1.V4H(), MemOperand(x1, x2, PostIndex));
955*f5c631daSSadaf Ebrahimi   __ ld1r(v21.V4H(), MemOperand(x1, 2, PostIndex));
956*f5c631daSSadaf Ebrahimi   __ ld1r(v15.V4S(), MemOperand(x0));
957*f5c631daSSadaf Ebrahimi   __ ld1r(v21.V4S(), MemOperand(x1, x2, PostIndex));
958*f5c631daSSadaf Ebrahimi   __ ld1r(v23.V4S(), MemOperand(x1, 4, PostIndex));
959*f5c631daSSadaf Ebrahimi   __ ld1r(v26.V8B(), MemOperand(x0));
960*f5c631daSSadaf Ebrahimi   __ ld1r(v14.V8B(), MemOperand(x1, x2, PostIndex));
961*f5c631daSSadaf Ebrahimi   __ ld1r(v19.V8B(), MemOperand(x1, 1, PostIndex));
962*f5c631daSSadaf Ebrahimi   __ ld1r(v13.V8H(), MemOperand(x0));
963*f5c631daSSadaf Ebrahimi   __ ld1r(v30.V8H(), MemOperand(x1, x2, PostIndex));
964*f5c631daSSadaf Ebrahimi   __ ld1r(v27.V8H(), MemOperand(x1, 2, PostIndex));
965*f5c631daSSadaf Ebrahimi   __ ld2(v21.V16B(), v22.V16B(), MemOperand(x0));
966*f5c631daSSadaf Ebrahimi   __ ld2(v21.V16B(), v22.V16B(), MemOperand(x1, x2, PostIndex));
967*f5c631daSSadaf Ebrahimi   __ ld2(v12.V16B(), v13.V16B(), MemOperand(x1, 32, PostIndex));
968*f5c631daSSadaf Ebrahimi   __ ld2(v14.V2D(), v15.V2D(), MemOperand(x0));
969*f5c631daSSadaf Ebrahimi   __ ld2(v0.V2D(), v1.V2D(), MemOperand(x1, x2, PostIndex));
970*f5c631daSSadaf Ebrahimi   __ ld2(v12.V2D(), v13.V2D(), MemOperand(x1, 32, PostIndex));
971*f5c631daSSadaf Ebrahimi   __ ld2(v27.V2S(), v28.V2S(), MemOperand(x0));
972*f5c631daSSadaf Ebrahimi   __ ld2(v2.V2S(), v3.V2S(), MemOperand(x1, x2, PostIndex));
973*f5c631daSSadaf Ebrahimi   __ ld2(v12.V2S(), v13.V2S(), MemOperand(x1, 16, PostIndex));
974*f5c631daSSadaf Ebrahimi   __ ld2(v9.V4H(), v10.V4H(), MemOperand(x0));
975*f5c631daSSadaf Ebrahimi   __ ld2(v23.V4H(), v24.V4H(), MemOperand(x1, x2, PostIndex));
976*f5c631daSSadaf Ebrahimi   __ ld2(v1.V4H(), v2.V4H(), MemOperand(x1, 16, PostIndex));
977*f5c631daSSadaf Ebrahimi   __ ld2(v20.V4S(), v21.V4S(), MemOperand(x0));
978*f5c631daSSadaf Ebrahimi   __ ld2(v10.V4S(), v11.V4S(), MemOperand(x1, x2, PostIndex));
979*f5c631daSSadaf Ebrahimi   __ ld2(v24.V4S(), v25.V4S(), MemOperand(x1, 32, PostIndex));
980*f5c631daSSadaf Ebrahimi   __ ld2(v17.V8B(), v18.V8B(), MemOperand(x0));
981*f5c631daSSadaf Ebrahimi   __ ld2(v13.V8B(), v14.V8B(), MemOperand(x1, x2, PostIndex));
982*f5c631daSSadaf Ebrahimi   __ ld2(v7.V8B(), v8.V8B(), MemOperand(x1, 16, PostIndex));
983*f5c631daSSadaf Ebrahimi   __ ld2(v30.V8H(), v31.V8H(), MemOperand(x0));
984*f5c631daSSadaf Ebrahimi   __ ld2(v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex));
985*f5c631daSSadaf Ebrahimi   __ ld2(v13.V8H(), v14.V8H(), MemOperand(x1, 32, PostIndex));
986*f5c631daSSadaf Ebrahimi   __ ld2(v5.B(), v6.B(), 12, MemOperand(x0));
987*f5c631daSSadaf Ebrahimi   __ ld2(v16.B(), v17.B(), 7, MemOperand(x1, x2, PostIndex));
988*f5c631daSSadaf Ebrahimi   __ ld2(v29.B(), v30.B(), 2, MemOperand(x1, 2, PostIndex));
989*f5c631daSSadaf Ebrahimi   __ ld2(v11.D(), v12.D(), 1, MemOperand(x0));
990*f5c631daSSadaf Ebrahimi   __ ld2(v26.D(), v27.D(), 0, MemOperand(x1, x2, PostIndex));
991*f5c631daSSadaf Ebrahimi   __ ld2(v25.D(), v26.D(), 0, MemOperand(x1, 16, PostIndex));
992*f5c631daSSadaf Ebrahimi   __ ld2(v18.H(), v19.H(), 7, MemOperand(x0));
993*f5c631daSSadaf Ebrahimi   __ ld2(v17.H(), v18.H(), 5, MemOperand(x1, x2, PostIndex));
994*f5c631daSSadaf Ebrahimi   __ ld2(v30.H(), v31.H(), 2, MemOperand(x1, 4, PostIndex));
995*f5c631daSSadaf Ebrahimi   __ ld2(v29.S(), v30.S(), 3, MemOperand(x0));
996*f5c631daSSadaf Ebrahimi   __ ld2(v28.S(), v29.S(), 0, MemOperand(x1, x2, PostIndex));
997*f5c631daSSadaf Ebrahimi   __ ld2(v6.S(), v7.S(), 1, MemOperand(x1, 8, PostIndex));
998*f5c631daSSadaf Ebrahimi   __ ld2r(v26.V16B(), v27.V16B(), MemOperand(x0));
999*f5c631daSSadaf Ebrahimi   __ ld2r(v21.V16B(), v22.V16B(), MemOperand(x1, x2, PostIndex));
1000*f5c631daSSadaf Ebrahimi   __ ld2r(v5.V16B(), v6.V16B(), MemOperand(x1, 2, PostIndex));
1001*f5c631daSSadaf Ebrahimi   __ ld2r(v26.V1D(), v27.V1D(), MemOperand(x0));
1002*f5c631daSSadaf Ebrahimi   __ ld2r(v14.V1D(), v15.V1D(), MemOperand(x1, x2, PostIndex));
1003*f5c631daSSadaf Ebrahimi   __ ld2r(v23.V1D(), v24.V1D(), MemOperand(x1, 16, PostIndex));
1004*f5c631daSSadaf Ebrahimi   __ ld2r(v11.V2D(), v12.V2D(), MemOperand(x0));
1005*f5c631daSSadaf Ebrahimi   __ ld2r(v29.V2D(), v30.V2D(), MemOperand(x1, x2, PostIndex));
1006*f5c631daSSadaf Ebrahimi   __ ld2r(v15.V2D(), v16.V2D(), MemOperand(x1, 16, PostIndex));
1007*f5c631daSSadaf Ebrahimi   __ ld2r(v26.V2S(), v27.V2S(), MemOperand(x0));
1008*f5c631daSSadaf Ebrahimi   __ ld2r(v22.V2S(), v23.V2S(), MemOperand(x1, x2, PostIndex));
1009*f5c631daSSadaf Ebrahimi   __ ld2r(v2.V2S(), v3.V2S(), MemOperand(x1, 8, PostIndex));
1010*f5c631daSSadaf Ebrahimi   __ ld2r(v2.V4H(), v3.V4H(), MemOperand(x0));
1011*f5c631daSSadaf Ebrahimi   __ ld2r(v9.V4H(), v10.V4H(), MemOperand(x1, x2, PostIndex));
1012*f5c631daSSadaf Ebrahimi   __ ld2r(v6.V4H(), v7.V4H(), MemOperand(x1, 4, PostIndex));
1013*f5c631daSSadaf Ebrahimi   __ ld2r(v7.V4S(), v8.V4S(), MemOperand(x0));
1014*f5c631daSSadaf Ebrahimi   __ ld2r(v19.V4S(), v20.V4S(), MemOperand(x1, x2, PostIndex));
1015*f5c631daSSadaf Ebrahimi   __ ld2r(v21.V4S(), v22.V4S(), MemOperand(x1, 8, PostIndex));
1016*f5c631daSSadaf Ebrahimi   __ ld2r(v26.V8B(), v27.V8B(), MemOperand(x0));
1017*f5c631daSSadaf Ebrahimi   __ ld2r(v20.V8B(), v21.V8B(), MemOperand(x1, x2, PostIndex));
1018*f5c631daSSadaf Ebrahimi   __ ld2r(v11.V8B(), v12.V8B(), MemOperand(x1, 2, PostIndex));
1019*f5c631daSSadaf Ebrahimi   __ ld2r(v12.V8H(), v13.V8H(), MemOperand(x0));
1020*f5c631daSSadaf Ebrahimi   __ ld2r(v6.V8H(), v7.V8H(), MemOperand(x1, x2, PostIndex));
1021*f5c631daSSadaf Ebrahimi   __ ld2r(v25.V8H(), v26.V8H(), MemOperand(x1, 4, PostIndex));
1022*f5c631daSSadaf Ebrahimi   __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), MemOperand(x0));
1023*f5c631daSSadaf Ebrahimi   __ ld3(v28.V16B(), v29.V16B(), v30.V16B(), MemOperand(x1, x2, PostIndex));
1024*f5c631daSSadaf Ebrahimi   __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), MemOperand(x1, 48, PostIndex));
1025*f5c631daSSadaf Ebrahimi   __ ld3(v21.V2D(), v22.V2D(), v23.V2D(), MemOperand(x0));
1026*f5c631daSSadaf Ebrahimi   __ ld3(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x1, x2, PostIndex));
1027*f5c631daSSadaf Ebrahimi   __ ld3(v27.V2D(), v28.V2D(), v29.V2D(), MemOperand(x1, 48, PostIndex));
1028*f5c631daSSadaf Ebrahimi   __ ld3(v7.V2S(), v8.V2S(), v9.V2S(), MemOperand(x0));
1029*f5c631daSSadaf Ebrahimi   __ ld3(v20.V2S(), v21.V2S(), v22.V2S(), MemOperand(x1, x2, PostIndex));
1030*f5c631daSSadaf Ebrahimi   __ ld3(v26.V2S(), v27.V2S(), v28.V2S(), MemOperand(x1, 24, PostIndex));
1031*f5c631daSSadaf Ebrahimi   __ ld3(v27.V4H(), v28.V4H(), v29.V4H(), MemOperand(x0));
1032*f5c631daSSadaf Ebrahimi   __ ld3(v28.V4H(), v29.V4H(), v30.V4H(), MemOperand(x1, x2, PostIndex));
1033*f5c631daSSadaf Ebrahimi   __ ld3(v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x1, 24, PostIndex));
1034*f5c631daSSadaf Ebrahimi   __ ld3(v2.V4S(), v3.V4S(), v4.V4S(), MemOperand(x0));
1035*f5c631daSSadaf Ebrahimi   __ ld3(v24.V4S(), v25.V4S(), v26.V4S(), MemOperand(x1, x2, PostIndex));
1036*f5c631daSSadaf Ebrahimi   __ ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x1, 48, PostIndex));
1037*f5c631daSSadaf Ebrahimi   __ ld3(v29.V8B(), v30.V8B(), v31.V8B(), MemOperand(x0));
1038*f5c631daSSadaf Ebrahimi   __ ld3(v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x1, x2, PostIndex));
1039*f5c631daSSadaf Ebrahimi   __ ld3(v12.V8B(), v13.V8B(), v14.V8B(), MemOperand(x1, 24, PostIndex));
1040*f5c631daSSadaf Ebrahimi   __ ld3(v22.V8H(), v23.V8H(), v24.V8H(), MemOperand(x0));
1041*f5c631daSSadaf Ebrahimi   __ ld3(v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x1, x2, PostIndex));
1042*f5c631daSSadaf Ebrahimi   __ ld3(v28.V8H(), v29.V8H(), v30.V8H(), MemOperand(x1, 48, PostIndex));
1043*f5c631daSSadaf Ebrahimi   __ ld3(v21.B(), v22.B(), v23.B(), 11, MemOperand(x0));
1044*f5c631daSSadaf Ebrahimi   __ ld3(v5.B(), v6.B(), v7.B(), 9, MemOperand(x1, x2, PostIndex));
1045*f5c631daSSadaf Ebrahimi   __ ld3(v23.B(), v24.B(), v25.B(), 0, MemOperand(x1, 3, PostIndex));
1046*f5c631daSSadaf Ebrahimi   __ ld3(v16.D(), v17.D(), v18.D(), 0, MemOperand(x0));
1047*f5c631daSSadaf Ebrahimi   __ ld3(v30.D(), v31.D(), v0.D(), 0, MemOperand(x1, x2, PostIndex));
1048*f5c631daSSadaf Ebrahimi   __ ld3(v28.D(), v29.D(), v30.D(), 1, MemOperand(x1, 24, PostIndex));
1049*f5c631daSSadaf Ebrahimi   __ ld3(v13.H(), v14.H(), v15.H(), 2, MemOperand(x0));
1050*f5c631daSSadaf Ebrahimi   __ ld3(v22.H(), v23.H(), v24.H(), 7, MemOperand(x1, x2, PostIndex));
1051*f5c631daSSadaf Ebrahimi   __ ld3(v14.H(), v15.H(), v16.H(), 3, MemOperand(x1, 6, PostIndex));
1052*f5c631daSSadaf Ebrahimi   __ ld3(v22.S(), v23.S(), v24.S(), 3, MemOperand(x0));
1053*f5c631daSSadaf Ebrahimi   __ ld3(v30.S(), v31.S(), v0.S(), 2, MemOperand(x1, x2, PostIndex));
1054*f5c631daSSadaf Ebrahimi   __ ld3(v12.S(), v13.S(), v14.S(), 1, MemOperand(x1, 12, PostIndex));
1055*f5c631daSSadaf Ebrahimi   __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x0));
1056*f5c631daSSadaf Ebrahimi   __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x1, x2, PostIndex));
1057*f5c631daSSadaf Ebrahimi   __ ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x1, 3, PostIndex));
1058*f5c631daSSadaf Ebrahimi   __ ld3r(v4.V1D(), v5.V1D(), v6.V1D(), MemOperand(x0));
1059*f5c631daSSadaf Ebrahimi   __ ld3r(v7.V1D(), v8.V1D(), v9.V1D(), MemOperand(x1, x2, PostIndex));
1060*f5c631daSSadaf Ebrahimi   __ ld3r(v17.V1D(), v18.V1D(), v19.V1D(), MemOperand(x1, 24, PostIndex));
1061*f5c631daSSadaf Ebrahimi   __ ld3r(v16.V2D(), v17.V2D(), v18.V2D(), MemOperand(x0));
1062*f5c631daSSadaf Ebrahimi   __ ld3r(v20.V2D(), v21.V2D(), v22.V2D(), MemOperand(x1, x2, PostIndex));
1063*f5c631daSSadaf Ebrahimi   __ ld3r(v14.V2D(), v15.V2D(), v16.V2D(), MemOperand(x1, 24, PostIndex));
1064*f5c631daSSadaf Ebrahimi   __ ld3r(v10.V2S(), v11.V2S(), v12.V2S(), MemOperand(x0));
1065*f5c631daSSadaf Ebrahimi   __ ld3r(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x1, x2, PostIndex));
1066*f5c631daSSadaf Ebrahimi   __ ld3r(v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x1, 12, PostIndex));
1067*f5c631daSSadaf Ebrahimi   __ ld3r(v22.V4H(), v23.V4H(), v24.V4H(), MemOperand(x0));
1068*f5c631daSSadaf Ebrahimi   __ ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x1, x2, PostIndex));
1069*f5c631daSSadaf Ebrahimi   __ ld3r(v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x1, 6, PostIndex));
1070*f5c631daSSadaf Ebrahimi   __ ld3r(v26.V4S(), v27.V4S(), v28.V4S(), MemOperand(x0));
1071*f5c631daSSadaf Ebrahimi   __ ld3r(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x1, x2, PostIndex));
1072*f5c631daSSadaf Ebrahimi   __ ld3r(v30.V4S(), v31.V4S(), v0.V4S(), MemOperand(x1, 12, PostIndex));
1073*f5c631daSSadaf Ebrahimi   __ ld3r(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x0));
1074*f5c631daSSadaf Ebrahimi   __ ld3r(v10.V8B(), v11.V8B(), v12.V8B(), MemOperand(x1, x2, PostIndex));
1075*f5c631daSSadaf Ebrahimi   __ ld3r(v28.V8B(), v29.V8B(), v30.V8B(), MemOperand(x1, 3, PostIndex));
1076*f5c631daSSadaf Ebrahimi   __ ld3r(v6.V8H(), v7.V8H(), v8.V8H(), MemOperand(x0));
1077*f5c631daSSadaf Ebrahimi   __ ld3r(v29.V8H(), v30.V8H(), v31.V8H(), MemOperand(x1, x2, PostIndex));
1078*f5c631daSSadaf Ebrahimi   __ ld3r(v7.V8H(), v8.V8H(), v9.V8H(), MemOperand(x1, 6, PostIndex));
1079*f5c631daSSadaf Ebrahimi   __ ld4(v3.V16B(), v4.V16B(), v5.V16B(), v6.V16B(), MemOperand(x0));
1080*f5c631daSSadaf Ebrahimi   __ ld4(v2.V16B(),
1081*f5c631daSSadaf Ebrahimi          v3.V16B(),
1082*f5c631daSSadaf Ebrahimi          v4.V16B(),
1083*f5c631daSSadaf Ebrahimi          v5.V16B(),
1084*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
1085*f5c631daSSadaf Ebrahimi   __ ld4(v5.V16B(),
1086*f5c631daSSadaf Ebrahimi          v6.V16B(),
1087*f5c631daSSadaf Ebrahimi          v7.V16B(),
1088*f5c631daSSadaf Ebrahimi          v8.V16B(),
1089*f5c631daSSadaf Ebrahimi          MemOperand(x1, 64, PostIndex));
1090*f5c631daSSadaf Ebrahimi   __ ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x0));
1091*f5c631daSSadaf Ebrahimi   __ ld4(v4.V2D(), v5.V2D(), v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex));
1092*f5c631daSSadaf Ebrahimi   __ ld4(v29.V2D(),
1093*f5c631daSSadaf Ebrahimi          v30.V2D(),
1094*f5c631daSSadaf Ebrahimi          v31.V2D(),
1095*f5c631daSSadaf Ebrahimi          v0.V2D(),
1096*f5c631daSSadaf Ebrahimi          MemOperand(x1, 64, PostIndex));
1097*f5c631daSSadaf Ebrahimi   __ ld4(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), MemOperand(x0));
1098*f5c631daSSadaf Ebrahimi   __ ld4(v24.V2S(),
1099*f5c631daSSadaf Ebrahimi          v25.V2S(),
1100*f5c631daSSadaf Ebrahimi          v26.V2S(),
1101*f5c631daSSadaf Ebrahimi          v27.V2S(),
1102*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
1103*f5c631daSSadaf Ebrahimi   __ ld4(v4.V2S(), v5.V2S(), v6.V2S(), v7.V2S(), MemOperand(x1, 32, PostIndex));
1104*f5c631daSSadaf Ebrahimi   __ ld4(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), MemOperand(x0));
1105*f5c631daSSadaf Ebrahimi   __ ld4(v23.V4H(),
1106*f5c631daSSadaf Ebrahimi          v24.V4H(),
1107*f5c631daSSadaf Ebrahimi          v25.V4H(),
1108*f5c631daSSadaf Ebrahimi          v26.V4H(),
1109*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
1110*f5c631daSSadaf Ebrahimi   __ ld4(v2.V4H(), v3.V4H(), v4.V4H(), v5.V4H(), MemOperand(x1, 32, PostIndex));
1111*f5c631daSSadaf Ebrahimi   __ ld4(v7.V4S(), v8.V4S(), v9.V4S(), v10.V4S(), MemOperand(x0));
1112*f5c631daSSadaf Ebrahimi   __ ld4(v28.V4S(),
1113*f5c631daSSadaf Ebrahimi          v29.V4S(),
1114*f5c631daSSadaf Ebrahimi          v30.V4S(),
1115*f5c631daSSadaf Ebrahimi          v31.V4S(),
1116*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
1117*f5c631daSSadaf Ebrahimi   __ ld4(v29.V4S(),
1118*f5c631daSSadaf Ebrahimi          v30.V4S(),
1119*f5c631daSSadaf Ebrahimi          v31.V4S(),
1120*f5c631daSSadaf Ebrahimi          v0.V4S(),
1121*f5c631daSSadaf Ebrahimi          MemOperand(x1, 64, PostIndex));
1122*f5c631daSSadaf Ebrahimi   __ ld4(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), MemOperand(x0));
1123*f5c631daSSadaf Ebrahimi   __ ld4(v27.V8B(),
1124*f5c631daSSadaf Ebrahimi          v28.V8B(),
1125*f5c631daSSadaf Ebrahimi          v29.V8B(),
1126*f5c631daSSadaf Ebrahimi          v30.V8B(),
1127*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
1128*f5c631daSSadaf Ebrahimi   __ ld4(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), MemOperand(x1, 32, PostIndex));
1129*f5c631daSSadaf Ebrahimi   __ ld4(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), MemOperand(x0));
1130*f5c631daSSadaf Ebrahimi   __ ld4(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex));
1131*f5c631daSSadaf Ebrahimi   __ ld4(v20.V8H(),
1132*f5c631daSSadaf Ebrahimi          v21.V8H(),
1133*f5c631daSSadaf Ebrahimi          v22.V8H(),
1134*f5c631daSSadaf Ebrahimi          v23.V8H(),
1135*f5c631daSSadaf Ebrahimi          MemOperand(x1, 64, PostIndex));
1136*f5c631daSSadaf Ebrahimi   __ ld4(v20.B(), v21.B(), v22.B(), v23.B(), 3, MemOperand(x0));
1137*f5c631daSSadaf Ebrahimi   __ ld4(v12.B(), v13.B(), v14.B(), v15.B(), 3, MemOperand(x1, x2, PostIndex));
1138*f5c631daSSadaf Ebrahimi   __ ld4(v27.B(), v28.B(), v29.B(), v30.B(), 6, MemOperand(x1, 4, PostIndex));
1139*f5c631daSSadaf Ebrahimi   __ ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x0));
1140*f5c631daSSadaf Ebrahimi   __ ld4(v15.D(), v16.D(), v17.D(), v18.D(), 1, MemOperand(x1, x2, PostIndex));
1141*f5c631daSSadaf Ebrahimi   __ ld4(v16.D(), v17.D(), v18.D(), v19.D(), 1, MemOperand(x1, 32, PostIndex));
1142*f5c631daSSadaf Ebrahimi   __ ld4(v2.H(), v3.H(), v4.H(), v5.H(), 6, MemOperand(x0));
1143*f5c631daSSadaf Ebrahimi   __ ld4(v5.H(), v6.H(), v7.H(), v8.H(), 3, MemOperand(x1, x2, PostIndex));
1144*f5c631daSSadaf Ebrahimi   __ ld4(v7.H(), v8.H(), v9.H(), v10.H(), 6, MemOperand(x1, 8, PostIndex));
1145*f5c631daSSadaf Ebrahimi   __ ld4(v6.S(), v7.S(), v8.S(), v9.S(), 1, MemOperand(x0));
1146*f5c631daSSadaf Ebrahimi   __ ld4(v25.S(), v26.S(), v27.S(), v28.S(), 2, MemOperand(x1, x2, PostIndex));
1147*f5c631daSSadaf Ebrahimi   __ ld4(v8.S(), v9.S(), v10.S(), v11.S(), 3, MemOperand(x1, 16, PostIndex));
1148*f5c631daSSadaf Ebrahimi   __ ld4r(v14.V16B(), v15.V16B(), v16.V16B(), v17.V16B(), MemOperand(x0));
1149*f5c631daSSadaf Ebrahimi   __ ld4r(v13.V16B(),
1150*f5c631daSSadaf Ebrahimi           v14.V16B(),
1151*f5c631daSSadaf Ebrahimi           v15.V16B(),
1152*f5c631daSSadaf Ebrahimi           v16.V16B(),
1153*f5c631daSSadaf Ebrahimi           MemOperand(x1, x2, PostIndex));
1154*f5c631daSSadaf Ebrahimi   __ ld4r(v9.V16B(),
1155*f5c631daSSadaf Ebrahimi           v10.V16B(),
1156*f5c631daSSadaf Ebrahimi           v11.V16B(),
1157*f5c631daSSadaf Ebrahimi           v12.V16B(),
1158*f5c631daSSadaf Ebrahimi           MemOperand(x1, 4, PostIndex));
1159*f5c631daSSadaf Ebrahimi   __ ld4r(v8.V1D(), v9.V1D(), v10.V1D(), v11.V1D(), MemOperand(x0));
1160*f5c631daSSadaf Ebrahimi   __ ld4r(v4.V1D(),
1161*f5c631daSSadaf Ebrahimi           v5.V1D(),
1162*f5c631daSSadaf Ebrahimi           v6.V1D(),
1163*f5c631daSSadaf Ebrahimi           v7.V1D(),
1164*f5c631daSSadaf Ebrahimi           MemOperand(x1, x2, PostIndex));
1165*f5c631daSSadaf Ebrahimi   __ ld4r(v26.V1D(),
1166*f5c631daSSadaf Ebrahimi           v27.V1D(),
1167*f5c631daSSadaf Ebrahimi           v28.V1D(),
1168*f5c631daSSadaf Ebrahimi           v29.V1D(),
1169*f5c631daSSadaf Ebrahimi           MemOperand(x1, 32, PostIndex));
1170*f5c631daSSadaf Ebrahimi   __ ld4r(v19.V2D(), v20.V2D(), v21.V2D(), v22.V2D(), MemOperand(x0));
1171*f5c631daSSadaf Ebrahimi   __ ld4r(v28.V2D(),
1172*f5c631daSSadaf Ebrahimi           v29.V2D(),
1173*f5c631daSSadaf Ebrahimi           v30.V2D(),
1174*f5c631daSSadaf Ebrahimi           v31.V2D(),
1175*f5c631daSSadaf Ebrahimi           MemOperand(x1, x2, PostIndex));
1176*f5c631daSSadaf Ebrahimi   __ ld4r(v15.V2D(),
1177*f5c631daSSadaf Ebrahimi           v16.V2D(),
1178*f5c631daSSadaf Ebrahimi           v17.V2D(),
1179*f5c631daSSadaf Ebrahimi           v18.V2D(),
1180*f5c631daSSadaf Ebrahimi           MemOperand(x1, 32, PostIndex));
1181*f5c631daSSadaf Ebrahimi   __ ld4r(v31.V2S(), v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x0));
1182*f5c631daSSadaf Ebrahimi   __ ld4r(v28.V2S(),
1183*f5c631daSSadaf Ebrahimi           v29.V2S(),
1184*f5c631daSSadaf Ebrahimi           v30.V2S(),
1185*f5c631daSSadaf Ebrahimi           v31.V2S(),
1186*f5c631daSSadaf Ebrahimi           MemOperand(x1, x2, PostIndex));
1187*f5c631daSSadaf Ebrahimi   __ ld4r(v11.V2S(),
1188*f5c631daSSadaf Ebrahimi           v12.V2S(),
1189*f5c631daSSadaf Ebrahimi           v13.V2S(),
1190*f5c631daSSadaf Ebrahimi           v14.V2S(),
1191*f5c631daSSadaf Ebrahimi           MemOperand(x1, 16, PostIndex));
1192*f5c631daSSadaf Ebrahimi   __ ld4r(v19.V4H(), v20.V4H(), v21.V4H(), v22.V4H(), MemOperand(x0));
1193*f5c631daSSadaf Ebrahimi   __ ld4r(v22.V4H(),
1194*f5c631daSSadaf Ebrahimi           v23.V4H(),
1195*f5c631daSSadaf Ebrahimi           v24.V4H(),
1196*f5c631daSSadaf Ebrahimi           v25.V4H(),
1197*f5c631daSSadaf Ebrahimi           MemOperand(x1, x2, PostIndex));
1198*f5c631daSSadaf Ebrahimi   __ ld4r(v20.V4H(),
1199*f5c631daSSadaf Ebrahimi           v21.V4H(),
1200*f5c631daSSadaf Ebrahimi           v22.V4H(),
1201*f5c631daSSadaf Ebrahimi           v23.V4H(),
1202*f5c631daSSadaf Ebrahimi           MemOperand(x1, 8, PostIndex));
1203*f5c631daSSadaf Ebrahimi   __ ld4r(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x0));
1204*f5c631daSSadaf Ebrahimi   __ ld4r(v25.V4S(),
1205*f5c631daSSadaf Ebrahimi           v26.V4S(),
1206*f5c631daSSadaf Ebrahimi           v27.V4S(),
1207*f5c631daSSadaf Ebrahimi           v28.V4S(),
1208*f5c631daSSadaf Ebrahimi           MemOperand(x1, x2, PostIndex));
1209*f5c631daSSadaf Ebrahimi   __ ld4r(v23.V4S(),
1210*f5c631daSSadaf Ebrahimi           v24.V4S(),
1211*f5c631daSSadaf Ebrahimi           v25.V4S(),
1212*f5c631daSSadaf Ebrahimi           v26.V4S(),
1213*f5c631daSSadaf Ebrahimi           MemOperand(x1, 16, PostIndex));
1214*f5c631daSSadaf Ebrahimi   __ ld4r(v22.V8B(), v23.V8B(), v24.V8B(), v25.V8B(), MemOperand(x0));
1215*f5c631daSSadaf Ebrahimi   __ ld4r(v27.V8B(),
1216*f5c631daSSadaf Ebrahimi           v28.V8B(),
1217*f5c631daSSadaf Ebrahimi           v29.V8B(),
1218*f5c631daSSadaf Ebrahimi           v30.V8B(),
1219*f5c631daSSadaf Ebrahimi           MemOperand(x1, x2, PostIndex));
1220*f5c631daSSadaf Ebrahimi   __ ld4r(v29.V8B(),
1221*f5c631daSSadaf Ebrahimi           v30.V8B(),
1222*f5c631daSSadaf Ebrahimi           v31.V8B(),
1223*f5c631daSSadaf Ebrahimi           v0.V8B(),
1224*f5c631daSSadaf Ebrahimi           MemOperand(x1, 4, PostIndex));
1225*f5c631daSSadaf Ebrahimi   __ ld4r(v28.V8H(), v29.V8H(), v30.V8H(), v31.V8H(), MemOperand(x0));
1226*f5c631daSSadaf Ebrahimi   __ ld4r(v25.V8H(),
1227*f5c631daSSadaf Ebrahimi           v26.V8H(),
1228*f5c631daSSadaf Ebrahimi           v27.V8H(),
1229*f5c631daSSadaf Ebrahimi           v28.V8H(),
1230*f5c631daSSadaf Ebrahimi           MemOperand(x1, x2, PostIndex));
1231*f5c631daSSadaf Ebrahimi   __ ld4r(v22.V8H(),
1232*f5c631daSSadaf Ebrahimi           v23.V8H(),
1233*f5c631daSSadaf Ebrahimi           v24.V8H(),
1234*f5c631daSSadaf Ebrahimi           v25.V8H(),
1235*f5c631daSSadaf Ebrahimi           MemOperand(x1, 8, PostIndex));
1236*f5c631daSSadaf Ebrahimi   __ mla(v29.V16B(), v7.V16B(), v26.V16B());
1237*f5c631daSSadaf Ebrahimi   __ mla(v6.V2S(), v4.V2S(), v14.V2S());
1238*f5c631daSSadaf Ebrahimi   __ mla(v9.V2S(), v11.V2S(), v0.S(), 2);
1239*f5c631daSSadaf Ebrahimi   __ mla(v5.V4H(), v17.V4H(), v25.V4H());
1240*f5c631daSSadaf Ebrahimi   __ mla(v24.V4H(), v7.V4H(), v11.H(), 3);
1241*f5c631daSSadaf Ebrahimi   __ mla(v12.V4S(), v3.V4S(), v4.V4S());
1242*f5c631daSSadaf Ebrahimi   __ mla(v10.V4S(), v7.V4S(), v7.S(), 3);
1243*f5c631daSSadaf Ebrahimi   __ mla(v3.V8B(), v16.V8B(), v9.V8B());
1244*f5c631daSSadaf Ebrahimi   __ mla(v19.V8H(), v22.V8H(), v18.V8H());
1245*f5c631daSSadaf Ebrahimi   __ mla(v6.V8H(), v2.V8H(), v0.H(), 0);
1246*f5c631daSSadaf Ebrahimi   __ mls(v23.V16B(), v10.V16B(), v11.V16B());
1247*f5c631daSSadaf Ebrahimi   __ mls(v14.V2S(), v31.V2S(), v22.V2S());
1248*f5c631daSSadaf Ebrahimi   __ mls(v28.V2S(), v13.V2S(), v1.S(), 3);
1249*f5c631daSSadaf Ebrahimi   __ mls(v2.V4H(), v19.V4H(), v13.V4H());
1250*f5c631daSSadaf Ebrahimi   __ mls(v18.V4H(), v15.V4H(), v12.H(), 6);
1251*f5c631daSSadaf Ebrahimi   __ mls(v6.V4S(), v11.V4S(), v16.V4S());
1252*f5c631daSSadaf Ebrahimi   __ mls(v23.V4S(), v16.V4S(), v10.S(), 2);
1253*f5c631daSSadaf Ebrahimi   __ mls(v26.V8B(), v13.V8B(), v23.V8B());
1254*f5c631daSSadaf Ebrahimi   __ mls(v10.V8H(), v10.V8H(), v12.V8H());
1255*f5c631daSSadaf Ebrahimi   __ mls(v14.V8H(), v0.V8H(), v14.H(), 7);
1256*f5c631daSSadaf Ebrahimi   __ mov(b22, v1.B(), 3);
1257*f5c631daSSadaf Ebrahimi   __ mov(d7, v13.D(), 1);
1258*f5c631daSSadaf Ebrahimi   __ mov(h26, v21.H(), 2);
1259*f5c631daSSadaf Ebrahimi   __ mov(s26, v19.S(), 0);
1260*f5c631daSSadaf Ebrahimi   __ mov(v26.V16B(), v11.V16B());
1261*f5c631daSSadaf Ebrahimi   __ mov(v20.V8B(), v0.V8B());
1262*f5c631daSSadaf Ebrahimi   __ mov(v19.B(), 13, v6.B(), 4);
1263*f5c631daSSadaf Ebrahimi   __ mov(v4.B(), 13, w19);
1264*f5c631daSSadaf Ebrahimi   __ mov(v11.D(), 1, v8.D(), 0);
1265*f5c631daSSadaf Ebrahimi   __ mov(v3.D(), 0, x30);
1266*f5c631daSSadaf Ebrahimi   __ mov(v29.H(), 4, v11.H(), 7);
1267*f5c631daSSadaf Ebrahimi   __ mov(v2.H(), 6, w6);
1268*f5c631daSSadaf Ebrahimi   __ mov(v22.S(), 0, v5.S(), 2);
1269*f5c631daSSadaf Ebrahimi   __ mov(v24.S(), 3, w8);
1270*f5c631daSSadaf Ebrahimi   __ mov(w18, v1.S(), 3);
1271*f5c631daSSadaf Ebrahimi   __ mov(x28, v21.D(), 0);
1272*f5c631daSSadaf Ebrahimi   __ movi(d24, 0xffff0000ffffff);
1273*f5c631daSSadaf Ebrahimi   __ movi(v29.V16B(), 0x80);
1274*f5c631daSSadaf Ebrahimi   __ movi(v12.V2D(), 0xffff00ff00ffff00);
1275*f5c631daSSadaf Ebrahimi   __ movi(v12.V2S(), 0xec, LSL, 24);
1276*f5c631daSSadaf Ebrahimi   __ movi(v10.V2S(), 0x4c, MSL, 16);
1277*f5c631daSSadaf Ebrahimi   __ movi(v26.V4H(), 0xc0, LSL);
1278*f5c631daSSadaf Ebrahimi   __ movi(v24.V4S(), 0x98, LSL, 16);
1279*f5c631daSSadaf Ebrahimi   __ movi(v1.V4S(), 0xde, MSL, 16);
1280*f5c631daSSadaf Ebrahimi   __ movi(v21.V8B(), 0x4d);
1281*f5c631daSSadaf Ebrahimi   __ movi(v29.V8H(), 0x69, LSL);
1282*f5c631daSSadaf Ebrahimi   __ mul(v1.V16B(), v15.V16B(), v17.V16B());
1283*f5c631daSSadaf Ebrahimi   __ mul(v21.V2S(), v19.V2S(), v29.V2S());
1284*f5c631daSSadaf Ebrahimi   __ mul(v19.V2S(), v5.V2S(), v3.S(), 0);
1285*f5c631daSSadaf Ebrahimi   __ mul(v29.V4H(), v11.V4H(), v2.V4H());
1286*f5c631daSSadaf Ebrahimi   __ mul(v2.V4H(), v7.V4H(), v0.H(), 0);
1287*f5c631daSSadaf Ebrahimi   __ mul(v25.V4S(), v26.V4S(), v16.V4S());
1288*f5c631daSSadaf Ebrahimi   __ mul(v26.V4S(), v6.V4S(), v15.S(), 2);
1289*f5c631daSSadaf Ebrahimi   __ mul(v11.V8B(), v15.V8B(), v31.V8B());
1290*f5c631daSSadaf Ebrahimi   __ mul(v20.V8H(), v31.V8H(), v15.V8H());
1291*f5c631daSSadaf Ebrahimi   __ mul(v29.V8H(), v5.V8H(), v9.H(), 4);
1292*f5c631daSSadaf Ebrahimi   __ mvn(v13.V16B(), v21.V16B());
1293*f5c631daSSadaf Ebrahimi   __ mvn(v28.V8B(), v19.V8B());
1294*f5c631daSSadaf Ebrahimi   __ mvni(v25.V2S(), 0xb8, LSL, 8);
1295*f5c631daSSadaf Ebrahimi   __ mvni(v17.V2S(), 0x6c, MSL, 16);
1296*f5c631daSSadaf Ebrahimi   __ mvni(v29.V4H(), 0x48, LSL);
1297*f5c631daSSadaf Ebrahimi   __ mvni(v20.V4S(), 0x7a, LSL, 16);
1298*f5c631daSSadaf Ebrahimi   __ mvni(v0.V4S(), 0x1e, MSL, 8);
1299*f5c631daSSadaf Ebrahimi   __ mvni(v31.V8H(), 0x3e, LSL);
1300*f5c631daSSadaf Ebrahimi   __ neg(d25, d11);
1301*f5c631daSSadaf Ebrahimi   __ neg(v4.V16B(), v9.V16B());
1302*f5c631daSSadaf Ebrahimi   __ neg(v11.V2D(), v25.V2D());
1303*f5c631daSSadaf Ebrahimi   __ neg(v7.V2S(), v18.V2S());
1304*f5c631daSSadaf Ebrahimi   __ neg(v7.V4H(), v15.V4H());
1305*f5c631daSSadaf Ebrahimi   __ neg(v17.V4S(), v18.V4S());
1306*f5c631daSSadaf Ebrahimi   __ neg(v20.V8B(), v17.V8B());
1307*f5c631daSSadaf Ebrahimi   __ neg(v0.V8H(), v11.V8H());
1308*f5c631daSSadaf Ebrahimi   __ orn(v13.V16B(), v11.V16B(), v31.V16B());
1309*f5c631daSSadaf Ebrahimi   __ orn(v22.V8B(), v16.V8B(), v22.V8B());
1310*f5c631daSSadaf Ebrahimi   __ orr(v17.V16B(), v17.V16B(), v23.V16B());
1311*f5c631daSSadaf Ebrahimi   __ orr(v8.V2S(), 0xe3);
1312*f5c631daSSadaf Ebrahimi   __ orr(v11.V4H(), 0x97, 8);
1313*f5c631daSSadaf Ebrahimi   __ orr(v7.V4S(), 0xab);
1314*f5c631daSSadaf Ebrahimi   __ orr(v8.V8B(), v4.V8B(), v3.V8B());
1315*f5c631daSSadaf Ebrahimi   __ orr(v31.V8H(), 0xb0, 8);
1316*f5c631daSSadaf Ebrahimi   __ pmul(v11.V16B(), v18.V16B(), v23.V16B());
1317*f5c631daSSadaf Ebrahimi   __ pmul(v8.V8B(), v24.V8B(), v5.V8B());
1318*f5c631daSSadaf Ebrahimi   __ pmull(v24.V8H(), v18.V8B(), v22.V8B());
1319*f5c631daSSadaf Ebrahimi   __ pmull2(v13.V8H(), v3.V16B(), v21.V16B());
1320*f5c631daSSadaf Ebrahimi   __ raddhn(v22.V2S(), v10.V2D(), v21.V2D());
1321*f5c631daSSadaf Ebrahimi   __ raddhn(v5.V4H(), v13.V4S(), v13.V4S());
1322*f5c631daSSadaf Ebrahimi   __ raddhn(v10.V8B(), v17.V8H(), v26.V8H());
1323*f5c631daSSadaf Ebrahimi   __ raddhn2(v9.V16B(), v29.V8H(), v13.V8H());
1324*f5c631daSSadaf Ebrahimi   __ raddhn2(v27.V4S(), v23.V2D(), v26.V2D());
1325*f5c631daSSadaf Ebrahimi   __ raddhn2(v0.V8H(), v29.V4S(), v7.V4S());
1326*f5c631daSSadaf Ebrahimi   __ rbit(v22.V16B(), v15.V16B());
1327*f5c631daSSadaf Ebrahimi   __ rbit(v30.V8B(), v3.V8B());
1328*f5c631daSSadaf Ebrahimi   __ rev16(v31.V16B(), v27.V16B());
1329*f5c631daSSadaf Ebrahimi   __ rev16(v12.V8B(), v26.V8B());
1330*f5c631daSSadaf Ebrahimi   __ rev32(v5.V16B(), v4.V16B());
1331*f5c631daSSadaf Ebrahimi   __ rev32(v16.V4H(), v26.V4H());
1332*f5c631daSSadaf Ebrahimi   __ rev32(v20.V8B(), v3.V8B());
1333*f5c631daSSadaf Ebrahimi   __ rev32(v20.V8H(), v28.V8H());
1334*f5c631daSSadaf Ebrahimi   __ rev64(v9.V16B(), v19.V16B());
1335*f5c631daSSadaf Ebrahimi   __ rev64(v5.V2S(), v16.V2S());
1336*f5c631daSSadaf Ebrahimi   __ rev64(v7.V4H(), v31.V4H());
1337*f5c631daSSadaf Ebrahimi   __ rev64(v15.V4S(), v26.V4S());
1338*f5c631daSSadaf Ebrahimi   __ rev64(v25.V8B(), v9.V8B());
1339*f5c631daSSadaf Ebrahimi   __ rev64(v11.V8H(), v5.V8H());
1340*f5c631daSSadaf Ebrahimi   __ rshrn(v18.V2S(), v13.V2D(), 1);
1341*f5c631daSSadaf Ebrahimi   __ rshrn(v25.V4H(), v30.V4S(), 2);
1342*f5c631daSSadaf Ebrahimi   __ rshrn(v13.V8B(), v9.V8H(), 8);
1343*f5c631daSSadaf Ebrahimi   __ rshrn2(v3.V16B(), v6.V8H(), 8);
1344*f5c631daSSadaf Ebrahimi   __ rshrn2(v0.V4S(), v29.V2D(), 25);
1345*f5c631daSSadaf Ebrahimi   __ rshrn2(v27.V8H(), v26.V4S(), 15);
1346*f5c631daSSadaf Ebrahimi   __ rsubhn(v15.V2S(), v25.V2D(), v4.V2D());
1347*f5c631daSSadaf Ebrahimi   __ rsubhn(v23.V4H(), v9.V4S(), v3.V4S());
1348*f5c631daSSadaf Ebrahimi   __ rsubhn(v6.V8B(), v30.V8H(), v24.V8H());
1349*f5c631daSSadaf Ebrahimi   __ rsubhn2(v4.V16B(), v24.V8H(), v20.V8H());
1350*f5c631daSSadaf Ebrahimi   __ rsubhn2(v1.V4S(), v23.V2D(), v22.V2D());
1351*f5c631daSSadaf Ebrahimi   __ rsubhn2(v19.V8H(), v2.V4S(), v20.V4S());
1352*f5c631daSSadaf Ebrahimi   __ saba(v28.V16B(), v9.V16B(), v25.V16B());
1353*f5c631daSSadaf Ebrahimi   __ saba(v9.V2S(), v28.V2S(), v20.V2S());
1354*f5c631daSSadaf Ebrahimi   __ saba(v17.V4H(), v22.V4H(), v22.V4H());
1355*f5c631daSSadaf Ebrahimi   __ saba(v29.V4S(), v5.V4S(), v27.V4S());
1356*f5c631daSSadaf Ebrahimi   __ saba(v20.V8B(), v21.V8B(), v18.V8B());
1357*f5c631daSSadaf Ebrahimi   __ saba(v27.V8H(), v17.V8H(), v30.V8H());
1358*f5c631daSSadaf Ebrahimi   __ sabal(v20.V2D(), v13.V2S(), v7.V2S());
1359*f5c631daSSadaf Ebrahimi   __ sabal(v4.V4S(), v12.V4H(), v4.V4H());
1360*f5c631daSSadaf Ebrahimi   __ sabal(v23.V8H(), v24.V8B(), v20.V8B());
1361*f5c631daSSadaf Ebrahimi   __ sabal2(v26.V2D(), v21.V4S(), v18.V4S());
1362*f5c631daSSadaf Ebrahimi   __ sabal2(v27.V4S(), v28.V8H(), v8.V8H());
1363*f5c631daSSadaf Ebrahimi   __ sabal2(v12.V8H(), v16.V16B(), v21.V16B());
1364*f5c631daSSadaf Ebrahimi   __ sabd(v0.V16B(), v15.V16B(), v13.V16B());
1365*f5c631daSSadaf Ebrahimi   __ sabd(v15.V2S(), v7.V2S(), v30.V2S());
1366*f5c631daSSadaf Ebrahimi   __ sabd(v17.V4H(), v17.V4H(), v12.V4H());
1367*f5c631daSSadaf Ebrahimi   __ sabd(v7.V4S(), v4.V4S(), v22.V4S());
1368*f5c631daSSadaf Ebrahimi   __ sabd(v23.V8B(), v3.V8B(), v26.V8B());
1369*f5c631daSSadaf Ebrahimi   __ sabd(v20.V8H(), v28.V8H(), v5.V8H());
1370*f5c631daSSadaf Ebrahimi   __ sabdl(v27.V2D(), v22.V2S(), v20.V2S());
1371*f5c631daSSadaf Ebrahimi   __ sabdl(v31.V4S(), v20.V4H(), v23.V4H());
1372*f5c631daSSadaf Ebrahimi   __ sabdl(v0.V8H(), v20.V8B(), v27.V8B());
1373*f5c631daSSadaf Ebrahimi   __ sabdl2(v31.V2D(), v11.V4S(), v3.V4S());
1374*f5c631daSSadaf Ebrahimi   __ sabdl2(v26.V4S(), v11.V8H(), v27.V8H());
1375*f5c631daSSadaf Ebrahimi   __ sabdl2(v6.V8H(), v8.V16B(), v18.V16B());
1376*f5c631daSSadaf Ebrahimi   __ sadalp(v8.V1D(), v26.V2S());
1377*f5c631daSSadaf Ebrahimi   __ sadalp(v12.V2D(), v26.V4S());
1378*f5c631daSSadaf Ebrahimi   __ sadalp(v12.V2S(), v26.V4H());
1379*f5c631daSSadaf Ebrahimi   __ sadalp(v4.V4H(), v1.V8B());
1380*f5c631daSSadaf Ebrahimi   __ sadalp(v15.V4S(), v17.V8H());
1381*f5c631daSSadaf Ebrahimi   __ sadalp(v21.V8H(), v25.V16B());
1382*f5c631daSSadaf Ebrahimi   __ saddl(v5.V2D(), v10.V2S(), v14.V2S());
1383*f5c631daSSadaf Ebrahimi   __ saddl(v18.V4S(), v3.V4H(), v15.V4H());
1384*f5c631daSSadaf Ebrahimi   __ saddl(v15.V8H(), v2.V8B(), v23.V8B());
1385*f5c631daSSadaf Ebrahimi   __ saddl2(v16.V2D(), v16.V4S(), v27.V4S());
1386*f5c631daSSadaf Ebrahimi   __ saddl2(v6.V4S(), v24.V8H(), v0.V8H());
1387*f5c631daSSadaf Ebrahimi   __ saddl2(v7.V8H(), v20.V16B(), v28.V16B());
1388*f5c631daSSadaf Ebrahimi   __ saddlp(v10.V1D(), v25.V2S());
1389*f5c631daSSadaf Ebrahimi   __ saddlp(v15.V2D(), v16.V4S());
1390*f5c631daSSadaf Ebrahimi   __ saddlp(v18.V2S(), v10.V4H());
1391*f5c631daSSadaf Ebrahimi   __ saddlp(v29.V4H(), v26.V8B());
1392*f5c631daSSadaf Ebrahimi   __ saddlp(v10.V4S(), v1.V8H());
1393*f5c631daSSadaf Ebrahimi   __ saddlp(v0.V8H(), v21.V16B());
1394*f5c631daSSadaf Ebrahimi   __ saddlv(d12, v7.V4S());
1395*f5c631daSSadaf Ebrahimi   __ saddlv(h14, v28.V16B());
1396*f5c631daSSadaf Ebrahimi   __ saddlv(h30, v30.V8B());
1397*f5c631daSSadaf Ebrahimi   __ saddlv(s27, v3.V4H());
1398*f5c631daSSadaf Ebrahimi   __ saddlv(s16, v16.V8H());
1399*f5c631daSSadaf Ebrahimi   __ saddw(v24.V2D(), v11.V2D(), v18.V2S());
1400*f5c631daSSadaf Ebrahimi   __ saddw(v13.V4S(), v12.V4S(), v6.V4H());
1401*f5c631daSSadaf Ebrahimi   __ saddw(v19.V8H(), v19.V8H(), v7.V8B());
1402*f5c631daSSadaf Ebrahimi   __ saddw2(v27.V2D(), v9.V2D(), v26.V4S());
1403*f5c631daSSadaf Ebrahimi   __ saddw2(v19.V4S(), v23.V4S(), v21.V8H());
1404*f5c631daSSadaf Ebrahimi   __ saddw2(v15.V8H(), v25.V8H(), v30.V16B());
1405*f5c631daSSadaf Ebrahimi   __ shadd(v7.V16B(), v4.V16B(), v9.V16B());
1406*f5c631daSSadaf Ebrahimi   __ shadd(v29.V2S(), v25.V2S(), v24.V2S());
1407*f5c631daSSadaf Ebrahimi   __ shadd(v31.V4H(), v10.V4H(), v13.V4H());
1408*f5c631daSSadaf Ebrahimi   __ shadd(v21.V4S(), v16.V4S(), v8.V4S());
1409*f5c631daSSadaf Ebrahimi   __ shadd(v14.V8B(), v29.V8B(), v22.V8B());
1410*f5c631daSSadaf Ebrahimi   __ shadd(v19.V8H(), v24.V8H(), v20.V8H());
1411*f5c631daSSadaf Ebrahimi   __ shl(d22, d25, 23);
1412*f5c631daSSadaf Ebrahimi   __ shl(v5.V16B(), v17.V16B(), 7);
1413*f5c631daSSadaf Ebrahimi   __ shl(v2.V2D(), v4.V2D(), 21);
1414*f5c631daSSadaf Ebrahimi   __ shl(v4.V2S(), v3.V2S(), 26);
1415*f5c631daSSadaf Ebrahimi   __ shl(v3.V4H(), v28.V4H(), 8);
1416*f5c631daSSadaf Ebrahimi   __ shl(v4.V4S(), v31.V4S(), 24);
1417*f5c631daSSadaf Ebrahimi   __ shl(v18.V8B(), v16.V8B(), 2);
1418*f5c631daSSadaf Ebrahimi   __ shl(v0.V8H(), v11.V8H(), 3);
1419*f5c631daSSadaf Ebrahimi   __ shll(v5.V2D(), v24.V2S(), 32);
1420*f5c631daSSadaf Ebrahimi   __ shll(v26.V4S(), v20.V4H(), 16);
1421*f5c631daSSadaf Ebrahimi   __ shll(v5.V8H(), v9.V8B(), 8);
1422*f5c631daSSadaf Ebrahimi   __ shll2(v21.V2D(), v28.V4S(), 32);
1423*f5c631daSSadaf Ebrahimi   __ shll2(v22.V4S(), v1.V8H(), 16);
1424*f5c631daSSadaf Ebrahimi   __ shll2(v30.V8H(), v25.V16B(), 8);
1425*f5c631daSSadaf Ebrahimi   __ shrn(v5.V2S(), v1.V2D(), 28);
1426*f5c631daSSadaf Ebrahimi   __ shrn(v29.V4H(), v18.V4S(), 7);
1427*f5c631daSSadaf Ebrahimi   __ shrn(v17.V8B(), v29.V8H(), 2);
1428*f5c631daSSadaf Ebrahimi   __ shrn2(v5.V16B(), v30.V8H(), 3);
1429*f5c631daSSadaf Ebrahimi   __ shrn2(v24.V4S(), v1.V2D(), 1);
1430*f5c631daSSadaf Ebrahimi   __ shrn2(v5.V8H(), v14.V4S(), 16);
1431*f5c631daSSadaf Ebrahimi   __ shsub(v30.V16B(), v22.V16B(), v23.V16B());
1432*f5c631daSSadaf Ebrahimi   __ shsub(v22.V2S(), v27.V2S(), v25.V2S());
1433*f5c631daSSadaf Ebrahimi   __ shsub(v13.V4H(), v22.V4H(), v1.V4H());
1434*f5c631daSSadaf Ebrahimi   __ shsub(v10.V4S(), v8.V4S(), v23.V4S());
1435*f5c631daSSadaf Ebrahimi   __ shsub(v6.V8B(), v9.V8B(), v31.V8B());
1436*f5c631daSSadaf Ebrahimi   __ shsub(v8.V8H(), v31.V8H(), v8.V8H());
1437*f5c631daSSadaf Ebrahimi   __ sli(d19, d29, 20);
1438*f5c631daSSadaf Ebrahimi   __ sli(v9.V16B(), v24.V16B(), 0);
1439*f5c631daSSadaf Ebrahimi   __ sli(v22.V2D(), v9.V2D(), 10);
1440*f5c631daSSadaf Ebrahimi   __ sli(v11.V2S(), v27.V2S(), 20);
1441*f5c631daSSadaf Ebrahimi   __ sli(v16.V4H(), v15.V4H(), 5);
1442*f5c631daSSadaf Ebrahimi   __ sli(v8.V4S(), v8.V4S(), 25);
1443*f5c631daSSadaf Ebrahimi   __ sli(v10.V8B(), v30.V8B(), 0);
1444*f5c631daSSadaf Ebrahimi   __ sli(v7.V8H(), v28.V8H(), 6);
1445*f5c631daSSadaf Ebrahimi   __ smax(v18.V16B(), v8.V16B(), v1.V16B());
1446*f5c631daSSadaf Ebrahimi   __ smax(v30.V2S(), v5.V2S(), v1.V2S());
1447*f5c631daSSadaf Ebrahimi   __ smax(v17.V4H(), v25.V4H(), v19.V4H());
1448*f5c631daSSadaf Ebrahimi   __ smax(v1.V4S(), v24.V4S(), v31.V4S());
1449*f5c631daSSadaf Ebrahimi   __ smax(v17.V8B(), v24.V8B(), v24.V8B());
1450*f5c631daSSadaf Ebrahimi   __ smax(v11.V8H(), v26.V8H(), v10.V8H());
1451*f5c631daSSadaf Ebrahimi   __ smaxp(v12.V16B(), v14.V16B(), v7.V16B());
1452*f5c631daSSadaf Ebrahimi   __ smaxp(v31.V2S(), v24.V2S(), v6.V2S());
1453*f5c631daSSadaf Ebrahimi   __ smaxp(v10.V4H(), v29.V4H(), v10.V4H());
1454*f5c631daSSadaf Ebrahimi   __ smaxp(v18.V4S(), v11.V4S(), v7.V4S());
1455*f5c631daSSadaf Ebrahimi   __ smaxp(v21.V8B(), v0.V8B(), v18.V8B());
1456*f5c631daSSadaf Ebrahimi   __ smaxp(v26.V8H(), v8.V8H(), v15.V8H());
1457*f5c631daSSadaf Ebrahimi   __ smaxv(b4, v5.V16B());
1458*f5c631daSSadaf Ebrahimi   __ smaxv(b23, v0.V8B());
1459*f5c631daSSadaf Ebrahimi   __ smaxv(h6, v0.V4H());
1460*f5c631daSSadaf Ebrahimi   __ smaxv(h24, v8.V8H());
1461*f5c631daSSadaf Ebrahimi   __ smaxv(s3, v16.V4S());
1462*f5c631daSSadaf Ebrahimi   __ smin(v24.V16B(), v8.V16B(), v18.V16B());
1463*f5c631daSSadaf Ebrahimi   __ smin(v29.V2S(), v8.V2S(), v23.V2S());
1464*f5c631daSSadaf Ebrahimi   __ smin(v6.V4H(), v11.V4H(), v21.V4H());
1465*f5c631daSSadaf Ebrahimi   __ smin(v24.V4S(), v23.V4S(), v15.V4S());
1466*f5c631daSSadaf Ebrahimi   __ smin(v8.V8B(), v16.V8B(), v4.V8B());
1467*f5c631daSSadaf Ebrahimi   __ smin(v12.V8H(), v1.V8H(), v10.V8H());
1468*f5c631daSSadaf Ebrahimi   __ sminp(v13.V16B(), v18.V16B(), v28.V16B());
1469*f5c631daSSadaf Ebrahimi   __ sminp(v22.V2S(), v28.V2S(), v16.V2S());
1470*f5c631daSSadaf Ebrahimi   __ sminp(v15.V4H(), v12.V4H(), v5.V4H());
1471*f5c631daSSadaf Ebrahimi   __ sminp(v15.V4S(), v17.V4S(), v8.V4S());
1472*f5c631daSSadaf Ebrahimi   __ sminp(v21.V8B(), v2.V8B(), v6.V8B());
1473*f5c631daSSadaf Ebrahimi   __ sminp(v21.V8H(), v12.V8H(), v6.V8H());
1474*f5c631daSSadaf Ebrahimi   __ sminv(b8, v6.V16B());
1475*f5c631daSSadaf Ebrahimi   __ sminv(b6, v18.V8B());
1476*f5c631daSSadaf Ebrahimi   __ sminv(h20, v1.V4H());
1477*f5c631daSSadaf Ebrahimi   __ sminv(h7, v17.V8H());
1478*f5c631daSSadaf Ebrahimi   __ sminv(s21, v4.V4S());
1479*f5c631daSSadaf Ebrahimi   __ smlal(v24.V2D(), v14.V2S(), v21.V2S());
1480*f5c631daSSadaf Ebrahimi   __ smlal(v31.V2D(), v3.V2S(), v14.S(), 2);
1481*f5c631daSSadaf Ebrahimi   __ smlal(v7.V4S(), v20.V4H(), v21.V4H());
1482*f5c631daSSadaf Ebrahimi   __ smlal(v19.V4S(), v16.V4H(), v9.H(), 3);
1483*f5c631daSSadaf Ebrahimi   __ smlal(v29.V8H(), v14.V8B(), v1.V8B());
1484*f5c631daSSadaf Ebrahimi   __ smlal2(v30.V2D(), v26.V4S(), v16.V4S());
1485*f5c631daSSadaf Ebrahimi   __ smlal2(v31.V2D(), v30.V4S(), v1.S(), 0);
1486*f5c631daSSadaf Ebrahimi   __ smlal2(v17.V4S(), v6.V8H(), v3.V8H());
1487*f5c631daSSadaf Ebrahimi   __ smlal2(v11.V4S(), v31.V8H(), v5.H(), 7);
1488*f5c631daSSadaf Ebrahimi   __ smlal2(v30.V8H(), v16.V16B(), v29.V16B());
1489*f5c631daSSadaf Ebrahimi   __ smlsl(v1.V2D(), v20.V2S(), v17.V2S());
1490*f5c631daSSadaf Ebrahimi   __ smlsl(v29.V2D(), v12.V2S(), v5.S(), 3);
1491*f5c631daSSadaf Ebrahimi   __ smlsl(v0.V4S(), v26.V4H(), v1.V4H());
1492*f5c631daSSadaf Ebrahimi   __ smlsl(v3.V4S(), v5.V4H(), v6.H(), 5);
1493*f5c631daSSadaf Ebrahimi   __ smlsl(v4.V8H(), v0.V8B(), v26.V8B());
1494*f5c631daSSadaf Ebrahimi   __ smlsl2(v14.V2D(), v14.V4S(), v5.V4S());
1495*f5c631daSSadaf Ebrahimi   __ smlsl2(v15.V2D(), v5.V4S(), v0.S(), 1);
1496*f5c631daSSadaf Ebrahimi   __ smlsl2(v29.V4S(), v17.V8H(), v31.V8H());
1497*f5c631daSSadaf Ebrahimi   __ smlsl2(v6.V4S(), v15.V8H(), v9.H(), 6);
1498*f5c631daSSadaf Ebrahimi   __ smlsl2(v30.V8H(), v15.V16B(), v15.V16B());
1499*f5c631daSSadaf Ebrahimi   __ smov(w21, v6.B(), 3);
1500*f5c631daSSadaf Ebrahimi   __ smov(w13, v26.H(), 7);
1501*f5c631daSSadaf Ebrahimi   __ smov(x24, v16.B(), 7);
1502*f5c631daSSadaf Ebrahimi   __ smov(x7, v4.H(), 3);
1503*f5c631daSSadaf Ebrahimi   __ smov(x29, v7.S(), 1);
1504*f5c631daSSadaf Ebrahimi   __ smull(v4.V2D(), v29.V2S(), v17.V2S());
1505*f5c631daSSadaf Ebrahimi   __ smull(v30.V2D(), v21.V2S(), v6.S(), 2);
1506*f5c631daSSadaf Ebrahimi   __ smull(v23.V4S(), v5.V4H(), v23.V4H());
1507*f5c631daSSadaf Ebrahimi   __ smull(v8.V4S(), v9.V4H(), v2.H(), 1);
1508*f5c631daSSadaf Ebrahimi   __ smull(v31.V8H(), v17.V8B(), v1.V8B());
1509*f5c631daSSadaf Ebrahimi   __ smull2(v3.V2D(), v3.V4S(), v23.V4S());
1510*f5c631daSSadaf Ebrahimi   __ smull2(v15.V2D(), v29.V4S(), v6.S(), 1);
1511*f5c631daSSadaf Ebrahimi   __ smull2(v19.V4S(), v20.V8H(), v30.V8H());
1512*f5c631daSSadaf Ebrahimi   __ smull2(v6.V4S(), v10.V8H(), v7.H(), 4);
1513*f5c631daSSadaf Ebrahimi   __ smull2(v25.V8H(), v8.V16B(), v27.V16B());
1514*f5c631daSSadaf Ebrahimi   __ sqabs(b3, b15);
1515*f5c631daSSadaf Ebrahimi   __ sqabs(d14, d9);
1516*f5c631daSSadaf Ebrahimi   __ sqabs(h31, h28);
1517*f5c631daSSadaf Ebrahimi   __ sqabs(s8, s0);
1518*f5c631daSSadaf Ebrahimi   __ sqabs(v14.V16B(), v7.V16B());
1519*f5c631daSSadaf Ebrahimi   __ sqabs(v23.V2D(), v19.V2D());
1520*f5c631daSSadaf Ebrahimi   __ sqabs(v10.V2S(), v24.V2S());
1521*f5c631daSSadaf Ebrahimi   __ sqabs(v31.V4H(), v19.V4H());
1522*f5c631daSSadaf Ebrahimi   __ sqabs(v23.V4S(), v0.V4S());
1523*f5c631daSSadaf Ebrahimi   __ sqabs(v29.V8B(), v23.V8B());
1524*f5c631daSSadaf Ebrahimi   __ sqabs(v17.V8H(), v21.V8H());
1525*f5c631daSSadaf Ebrahimi   __ sqadd(b9, b23, b13);
1526*f5c631daSSadaf Ebrahimi   __ sqadd(d2, d25, d26);
1527*f5c631daSSadaf Ebrahimi   __ sqadd(h7, h29, h25);
1528*f5c631daSSadaf Ebrahimi   __ sqadd(s11, s7, s24);
1529*f5c631daSSadaf Ebrahimi   __ sqadd(v20.V16B(), v16.V16B(), v29.V16B());
1530*f5c631daSSadaf Ebrahimi   __ sqadd(v23.V2D(), v30.V2D(), v28.V2D());
1531*f5c631daSSadaf Ebrahimi   __ sqadd(v8.V2S(), v19.V2S(), v2.V2S());
1532*f5c631daSSadaf Ebrahimi   __ sqadd(v20.V4H(), v12.V4H(), v31.V4H());
1533*f5c631daSSadaf Ebrahimi   __ sqadd(v14.V4S(), v15.V4S(), v17.V4S());
1534*f5c631daSSadaf Ebrahimi   __ sqadd(v2.V8B(), v29.V8B(), v13.V8B());
1535*f5c631daSSadaf Ebrahimi   __ sqadd(v7.V8H(), v19.V8H(), v14.V8H());
1536*f5c631daSSadaf Ebrahimi   __ sqdmlal(d15, s5, s30);
1537*f5c631daSSadaf Ebrahimi   __ sqdmlal(d24, s10, v2.S(), 3);
1538*f5c631daSSadaf Ebrahimi   __ sqdmlal(s9, h19, h8);
1539*f5c631daSSadaf Ebrahimi   __ sqdmlal(s14, h1, v12.H(), 3);
1540*f5c631daSSadaf Ebrahimi   __ sqdmlal(v30.V2D(), v5.V2S(), v31.V2S());
1541*f5c631daSSadaf Ebrahimi   __ sqdmlal(v25.V2D(), v14.V2S(), v10.S(), 1);
1542*f5c631daSSadaf Ebrahimi   __ sqdmlal(v19.V4S(), v17.V4H(), v16.V4H());
1543*f5c631daSSadaf Ebrahimi   __ sqdmlal(v8.V4S(), v5.V4H(), v8.H(), 1);
1544*f5c631daSSadaf Ebrahimi   __ sqdmlal2(v1.V2D(), v23.V4S(), v3.V4S());
1545*f5c631daSSadaf Ebrahimi   __ sqdmlal2(v19.V2D(), v0.V4S(), v9.S(), 0);
1546*f5c631daSSadaf Ebrahimi   __ sqdmlal2(v26.V4S(), v22.V8H(), v11.V8H());
1547*f5c631daSSadaf Ebrahimi   __ sqdmlal2(v6.V4S(), v28.V8H(), v13.H(), 4);
1548*f5c631daSSadaf Ebrahimi   __ sqdmlsl(d10, s29, s20);
1549*f5c631daSSadaf Ebrahimi   __ sqdmlsl(d10, s9, v10.S(), 1);
1550*f5c631daSSadaf Ebrahimi   __ sqdmlsl(s30, h9, h24);
1551*f5c631daSSadaf Ebrahimi   __ sqdmlsl(s13, h24, v6.H(), 1);
1552*f5c631daSSadaf Ebrahimi   __ sqdmlsl(v27.V2D(), v10.V2S(), v20.V2S());
1553*f5c631daSSadaf Ebrahimi   __ sqdmlsl(v23.V2D(), v23.V2S(), v3.S(), 3);
1554*f5c631daSSadaf Ebrahimi   __ sqdmlsl(v7.V4S(), v17.V4H(), v29.V4H());
1555*f5c631daSSadaf Ebrahimi   __ sqdmlsl(v22.V4S(), v21.V4H(), v3.H(), 4);
1556*f5c631daSSadaf Ebrahimi   __ sqdmlsl2(v12.V2D(), v7.V4S(), v22.V4S());
1557*f5c631daSSadaf Ebrahimi   __ sqdmlsl2(v20.V2D(), v25.V4S(), v8.S(), 0);
1558*f5c631daSSadaf Ebrahimi   __ sqdmlsl2(v25.V4S(), v26.V8H(), v18.V8H());
1559*f5c631daSSadaf Ebrahimi   __ sqdmlsl2(v25.V4S(), v19.V8H(), v5.H(), 0);
1560*f5c631daSSadaf Ebrahimi   __ sqdmulh(h17, h27, h12);
1561*f5c631daSSadaf Ebrahimi   __ sqdmulh(h16, h5, v11.H(), 0);
1562*f5c631daSSadaf Ebrahimi   __ sqdmulh(s1, s19, s16);
1563*f5c631daSSadaf Ebrahimi   __ sqdmulh(s1, s16, v2.S(), 0);
1564*f5c631daSSadaf Ebrahimi   __ sqdmulh(v28.V2S(), v1.V2S(), v8.V2S());
1565*f5c631daSSadaf Ebrahimi   __ sqdmulh(v28.V2S(), v8.V2S(), v3.S(), 0);
1566*f5c631daSSadaf Ebrahimi   __ sqdmulh(v11.V4H(), v25.V4H(), v5.V4H());
1567*f5c631daSSadaf Ebrahimi   __ sqdmulh(v30.V4H(), v14.V4H(), v8.H(), 5);
1568*f5c631daSSadaf Ebrahimi   __ sqdmulh(v25.V4S(), v21.V4S(), v13.V4S());
1569*f5c631daSSadaf Ebrahimi   __ sqdmulh(v23.V4S(), v2.V4S(), v10.S(), 3);
1570*f5c631daSSadaf Ebrahimi   __ sqdmulh(v26.V8H(), v5.V8H(), v23.V8H());
1571*f5c631daSSadaf Ebrahimi   __ sqdmulh(v4.V8H(), v22.V8H(), v4.H(), 3);
1572*f5c631daSSadaf Ebrahimi   __ sqdmull(d25, s2, s26);
1573*f5c631daSSadaf Ebrahimi   __ sqdmull(d30, s14, v5.S(), 1);
1574*f5c631daSSadaf Ebrahimi   __ sqdmull(s29, h18, h11);
1575*f5c631daSSadaf Ebrahimi   __ sqdmull(s11, h13, v7.H(), 6);
1576*f5c631daSSadaf Ebrahimi   __ sqdmull(v23.V2D(), v9.V2S(), v8.V2S());
1577*f5c631daSSadaf Ebrahimi   __ sqdmull(v18.V2D(), v29.V2S(), v4.S(), 1);
1578*f5c631daSSadaf Ebrahimi   __ sqdmull(v17.V4S(), v24.V4H(), v7.V4H());
1579*f5c631daSSadaf Ebrahimi   __ sqdmull(v8.V4S(), v15.V4H(), v5.H(), 1);
1580*f5c631daSSadaf Ebrahimi   __ sqdmull2(v28.V2D(), v14.V4S(), v2.V4S());
1581*f5c631daSSadaf Ebrahimi   __ sqdmull2(v1.V2D(), v24.V4S(), v13.S(), 2);
1582*f5c631daSSadaf Ebrahimi   __ sqdmull2(v11.V4S(), v17.V8H(), v31.V8H());
1583*f5c631daSSadaf Ebrahimi   __ sqdmull2(v1.V4S(), v20.V8H(), v11.H(), 3);
1584*f5c631daSSadaf Ebrahimi   __ sqneg(b2, b0);
1585*f5c631daSSadaf Ebrahimi   __ sqneg(d24, d2);
1586*f5c631daSSadaf Ebrahimi   __ sqneg(h29, h3);
1587*f5c631daSSadaf Ebrahimi   __ sqneg(s4, s9);
1588*f5c631daSSadaf Ebrahimi   __ sqneg(v14.V16B(), v29.V16B());
1589*f5c631daSSadaf Ebrahimi   __ sqneg(v30.V2D(), v12.V2D());
1590*f5c631daSSadaf Ebrahimi   __ sqneg(v28.V2S(), v26.V2S());
1591*f5c631daSSadaf Ebrahimi   __ sqneg(v4.V4H(), v4.V4H());
1592*f5c631daSSadaf Ebrahimi   __ sqneg(v9.V4S(), v8.V4S());
1593*f5c631daSSadaf Ebrahimi   __ sqneg(v20.V8B(), v20.V8B());
1594*f5c631daSSadaf Ebrahimi   __ sqneg(v27.V8H(), v10.V8H());
1595*f5c631daSSadaf Ebrahimi   __ sqrdmulh(h7, h24, h0);
1596*f5c631daSSadaf Ebrahimi   __ sqrdmulh(h14, h3, v4.H(), 6);
1597*f5c631daSSadaf Ebrahimi   __ sqrdmulh(s27, s19, s24);
1598*f5c631daSSadaf Ebrahimi   __ sqrdmulh(s31, s21, v4.S(), 0);
1599*f5c631daSSadaf Ebrahimi   __ sqrdmulh(v18.V2S(), v25.V2S(), v1.V2S());
1600*f5c631daSSadaf Ebrahimi   __ sqrdmulh(v22.V2S(), v5.V2S(), v13.S(), 0);
1601*f5c631daSSadaf Ebrahimi   __ sqrdmulh(v22.V4H(), v24.V4H(), v9.V4H());
1602*f5c631daSSadaf Ebrahimi   __ sqrdmulh(v13.V4H(), v2.V4H(), v12.H(), 6);
1603*f5c631daSSadaf Ebrahimi   __ sqrdmulh(v9.V4S(), v27.V4S(), v2.V4S());
1604*f5c631daSSadaf Ebrahimi   __ sqrdmulh(v3.V4S(), v23.V4S(), v7.S(), 1);
1605*f5c631daSSadaf Ebrahimi   __ sqrdmulh(v2.V8H(), v0.V8H(), v7.V8H());
1606*f5c631daSSadaf Ebrahimi   __ sqrdmulh(v16.V8H(), v9.V8H(), v8.H(), 2);
1607*f5c631daSSadaf Ebrahimi   __ sqrshl(b8, b21, b13);
1608*f5c631daSSadaf Ebrahimi   __ sqrshl(d29, d7, d20);
1609*f5c631daSSadaf Ebrahimi   __ sqrshl(h28, h14, h10);
1610*f5c631daSSadaf Ebrahimi   __ sqrshl(s26, s18, s2);
1611*f5c631daSSadaf Ebrahimi   __ sqrshl(v18.V16B(), v31.V16B(), v26.V16B());
1612*f5c631daSSadaf Ebrahimi   __ sqrshl(v28.V2D(), v4.V2D(), v0.V2D());
1613*f5c631daSSadaf Ebrahimi   __ sqrshl(v3.V2S(), v6.V2S(), v0.V2S());
1614*f5c631daSSadaf Ebrahimi   __ sqrshl(v1.V4H(), v18.V4H(), v22.V4H());
1615*f5c631daSSadaf Ebrahimi   __ sqrshl(v16.V4S(), v25.V4S(), v7.V4S());
1616*f5c631daSSadaf Ebrahimi   __ sqrshl(v0.V8B(), v21.V8B(), v5.V8B());
1617*f5c631daSSadaf Ebrahimi   __ sqrshl(v30.V8H(), v19.V8H(), v8.V8H());
1618*f5c631daSSadaf Ebrahimi   __ sqrshrn(b6, h21, 4);
1619*f5c631daSSadaf Ebrahimi   __ sqrshrn(h14, s17, 11);
1620*f5c631daSSadaf Ebrahimi   __ sqrshrn(s25, d27, 10);
1621*f5c631daSSadaf Ebrahimi   __ sqrshrn(v6.V2S(), v13.V2D(), 18);
1622*f5c631daSSadaf Ebrahimi   __ sqrshrn(v5.V4H(), v9.V4S(), 15);
1623*f5c631daSSadaf Ebrahimi   __ sqrshrn(v19.V8B(), v12.V8H(), 1);
1624*f5c631daSSadaf Ebrahimi   __ sqrshrn2(v19.V16B(), v21.V8H(), 7);
1625*f5c631daSSadaf Ebrahimi   __ sqrshrn2(v29.V4S(), v24.V2D(), 13);
1626*f5c631daSSadaf Ebrahimi   __ sqrshrn2(v12.V8H(), v2.V4S(), 10);
1627*f5c631daSSadaf Ebrahimi   __ sqrshrun(b16, h9, 5);
1628*f5c631daSSadaf Ebrahimi   __ sqrshrun(h3, s24, 15);
1629*f5c631daSSadaf Ebrahimi   __ sqrshrun(s16, d18, 8);
1630*f5c631daSSadaf Ebrahimi   __ sqrshrun(v28.V2S(), v23.V2D(), 8);
1631*f5c631daSSadaf Ebrahimi   __ sqrshrun(v31.V4H(), v25.V4S(), 10);
1632*f5c631daSSadaf Ebrahimi   __ sqrshrun(v19.V8B(), v23.V8H(), 2);
1633*f5c631daSSadaf Ebrahimi   __ sqrshrun2(v24.V16B(), v0.V8H(), 8);
1634*f5c631daSSadaf Ebrahimi   __ sqrshrun2(v22.V4S(), v1.V2D(), 23);
1635*f5c631daSSadaf Ebrahimi   __ sqrshrun2(v28.V8H(), v21.V4S(), 13);
1636*f5c631daSSadaf Ebrahimi   __ sqshl(b6, b21, b8);
1637*f5c631daSSadaf Ebrahimi   __ sqshl(b11, b26, 2);
1638*f5c631daSSadaf Ebrahimi   __ sqshl(d29, d0, d4);
1639*f5c631daSSadaf Ebrahimi   __ sqshl(d21, d7, 35);
1640*f5c631daSSadaf Ebrahimi   __ sqshl(h20, h25, h17);
1641*f5c631daSSadaf Ebrahimi   __ sqshl(h20, h0, 8);
1642*f5c631daSSadaf Ebrahimi   __ sqshl(s29, s13, s4);
1643*f5c631daSSadaf Ebrahimi   __ sqshl(s10, s11, 20);
1644*f5c631daSSadaf Ebrahimi   __ sqshl(v8.V16B(), v18.V16B(), v28.V16B());
1645*f5c631daSSadaf Ebrahimi   __ sqshl(v29.V16B(), v29.V16B(), 2);
1646*f5c631daSSadaf Ebrahimi   __ sqshl(v8.V2D(), v31.V2D(), v16.V2D());
1647*f5c631daSSadaf Ebrahimi   __ sqshl(v7.V2D(), v14.V2D(), 37);
1648*f5c631daSSadaf Ebrahimi   __ sqshl(v0.V2S(), v26.V2S(), v7.V2S());
1649*f5c631daSSadaf Ebrahimi   __ sqshl(v5.V2S(), v11.V2S(), 19);
1650*f5c631daSSadaf Ebrahimi   __ sqshl(v11.V4H(), v30.V4H(), v0.V4H());
1651*f5c631daSSadaf Ebrahimi   __ sqshl(v1.V4H(), v18.V4H(), 7);
1652*f5c631daSSadaf Ebrahimi   __ sqshl(v22.V4S(), v3.V4S(), v30.V4S());
1653*f5c631daSSadaf Ebrahimi   __ sqshl(v16.V4S(), v15.V4S(), 28);
1654*f5c631daSSadaf Ebrahimi   __ sqshl(v6.V8B(), v28.V8B(), v25.V8B());
1655*f5c631daSSadaf Ebrahimi   __ sqshl(v0.V8B(), v15.V8B(), 0);
1656*f5c631daSSadaf Ebrahimi   __ sqshl(v6.V8H(), v16.V8H(), v30.V8H());
1657*f5c631daSSadaf Ebrahimi   __ sqshl(v3.V8H(), v20.V8H(), 14);
1658*f5c631daSSadaf Ebrahimi   __ sqshlu(b13, b14, 6);
1659*f5c631daSSadaf Ebrahimi   __ sqshlu(d0, d16, 44);
1660*f5c631daSSadaf Ebrahimi   __ sqshlu(h5, h29, 15);
1661*f5c631daSSadaf Ebrahimi   __ sqshlu(s29, s8, 13);
1662*f5c631daSSadaf Ebrahimi   __ sqshlu(v27.V16B(), v20.V16B(), 2);
1663*f5c631daSSadaf Ebrahimi   __ sqshlu(v24.V2D(), v12.V2D(), 11);
1664*f5c631daSSadaf Ebrahimi   __ sqshlu(v12.V2S(), v19.V2S(), 22);
1665*f5c631daSSadaf Ebrahimi   __ sqshlu(v8.V4H(), v12.V4H(), 11);
1666*f5c631daSSadaf Ebrahimi   __ sqshlu(v18.V4S(), v3.V4S(), 8);
1667*f5c631daSSadaf Ebrahimi   __ sqshlu(v3.V8B(), v10.V8B(), 1);
1668*f5c631daSSadaf Ebrahimi   __ sqshlu(v30.V8H(), v24.V8H(), 4);
1669*f5c631daSSadaf Ebrahimi   __ sqshrn(b1, h28, 1);
1670*f5c631daSSadaf Ebrahimi   __ sqshrn(h31, s7, 10);
1671*f5c631daSSadaf Ebrahimi   __ sqshrn(s4, d10, 24);
1672*f5c631daSSadaf Ebrahimi   __ sqshrn(v10.V2S(), v1.V2D(), 29);
1673*f5c631daSSadaf Ebrahimi   __ sqshrn(v3.V4H(), v13.V4S(), 14);
1674*f5c631daSSadaf Ebrahimi   __ sqshrn(v27.V8B(), v6.V8H(), 7);
1675*f5c631daSSadaf Ebrahimi   __ sqshrn2(v14.V16B(), v23.V8H(), 1);
1676*f5c631daSSadaf Ebrahimi   __ sqshrn2(v25.V4S(), v22.V2D(), 27);
1677*f5c631daSSadaf Ebrahimi   __ sqshrn2(v31.V8H(), v12.V4S(), 10);
1678*f5c631daSSadaf Ebrahimi   __ sqshrun(b9, h0, 1);
1679*f5c631daSSadaf Ebrahimi   __ sqshrun(h11, s6, 7);
1680*f5c631daSSadaf Ebrahimi   __ sqshrun(s13, d12, 13);
1681*f5c631daSSadaf Ebrahimi   __ sqshrun(v10.V2S(), v30.V2D(), 1);
1682*f5c631daSSadaf Ebrahimi   __ sqshrun(v31.V4H(), v3.V4S(), 11);
1683*f5c631daSSadaf Ebrahimi   __ sqshrun(v28.V8B(), v30.V8H(), 8);
1684*f5c631daSSadaf Ebrahimi   __ sqshrun2(v16.V16B(), v27.V8H(), 3);
1685*f5c631daSSadaf Ebrahimi   __ sqshrun2(v27.V4S(), v14.V2D(), 18);
1686*f5c631daSSadaf Ebrahimi   __ sqshrun2(v23.V8H(), v14.V4S(), 1);
1687*f5c631daSSadaf Ebrahimi   __ sqsub(b19, b29, b11);
1688*f5c631daSSadaf Ebrahimi   __ sqsub(d21, d31, d6);
1689*f5c631daSSadaf Ebrahimi   __ sqsub(h18, h10, h19);
1690*f5c631daSSadaf Ebrahimi   __ sqsub(s6, s5, s0);
1691*f5c631daSSadaf Ebrahimi   __ sqsub(v21.V16B(), v22.V16B(), v0.V16B());
1692*f5c631daSSadaf Ebrahimi   __ sqsub(v22.V2D(), v10.V2D(), v17.V2D());
1693*f5c631daSSadaf Ebrahimi   __ sqsub(v8.V2S(), v21.V2S(), v2.V2S());
1694*f5c631daSSadaf Ebrahimi   __ sqsub(v18.V4H(), v25.V4H(), v27.V4H());
1695*f5c631daSSadaf Ebrahimi   __ sqsub(v13.V4S(), v3.V4S(), v6.V4S());
1696*f5c631daSSadaf Ebrahimi   __ sqsub(v28.V8B(), v29.V8B(), v16.V8B());
1697*f5c631daSSadaf Ebrahimi   __ sqsub(v17.V8H(), v6.V8H(), v10.V8H());
1698*f5c631daSSadaf Ebrahimi   __ sqxtn(b27, h26);
1699*f5c631daSSadaf Ebrahimi   __ sqxtn(h17, s11);
1700*f5c631daSSadaf Ebrahimi   __ sqxtn(s22, d31);
1701*f5c631daSSadaf Ebrahimi   __ sqxtn(v26.V2S(), v5.V2D());
1702*f5c631daSSadaf Ebrahimi   __ sqxtn(v13.V4H(), v7.V4S());
1703*f5c631daSSadaf Ebrahimi   __ sqxtn(v19.V8B(), v19.V8H());
1704*f5c631daSSadaf Ebrahimi   __ sqxtn2(v19.V16B(), v3.V8H());
1705*f5c631daSSadaf Ebrahimi   __ sqxtn2(v23.V4S(), v1.V2D());
1706*f5c631daSSadaf Ebrahimi   __ sqxtn2(v13.V8H(), v3.V4S());
1707*f5c631daSSadaf Ebrahimi   __ sqxtun(b26, h9);
1708*f5c631daSSadaf Ebrahimi   __ sqxtun(h19, s12);
1709*f5c631daSSadaf Ebrahimi   __ sqxtun(s3, d6);
1710*f5c631daSSadaf Ebrahimi   __ sqxtun(v29.V2S(), v26.V2D());
1711*f5c631daSSadaf Ebrahimi   __ sqxtun(v26.V4H(), v10.V4S());
1712*f5c631daSSadaf Ebrahimi   __ sqxtun(v7.V8B(), v29.V8H());
1713*f5c631daSSadaf Ebrahimi   __ sqxtun2(v21.V16B(), v14.V8H());
1714*f5c631daSSadaf Ebrahimi   __ sqxtun2(v24.V4S(), v15.V2D());
1715*f5c631daSSadaf Ebrahimi   __ sqxtun2(v30.V8H(), v1.V4S());
1716*f5c631daSSadaf Ebrahimi   __ srhadd(v21.V16B(), v17.V16B(), v15.V16B());
1717*f5c631daSSadaf Ebrahimi   __ srhadd(v28.V2S(), v21.V2S(), v29.V2S());
1718*f5c631daSSadaf Ebrahimi   __ srhadd(v9.V4H(), v1.V4H(), v30.V4H());
1719*f5c631daSSadaf Ebrahimi   __ srhadd(v24.V4S(), v0.V4S(), v2.V4S());
1720*f5c631daSSadaf Ebrahimi   __ srhadd(v6.V8B(), v17.V8B(), v15.V8B());
1721*f5c631daSSadaf Ebrahimi   __ srhadd(v5.V8H(), v7.V8H(), v21.V8H());
1722*f5c631daSSadaf Ebrahimi   __ sri(d14, d14, 49);
1723*f5c631daSSadaf Ebrahimi   __ sri(v23.V16B(), v8.V16B(), 4);
1724*f5c631daSSadaf Ebrahimi   __ sri(v20.V2D(), v13.V2D(), 20);
1725*f5c631daSSadaf Ebrahimi   __ sri(v16.V2S(), v2.V2S(), 24);
1726*f5c631daSSadaf Ebrahimi   __ sri(v5.V4H(), v23.V4H(), 11);
1727*f5c631daSSadaf Ebrahimi   __ sri(v27.V4S(), v15.V4S(), 23);
1728*f5c631daSSadaf Ebrahimi   __ sri(v19.V8B(), v29.V8B(), 4);
1729*f5c631daSSadaf Ebrahimi   __ sri(v7.V8H(), v29.V8H(), 3);
1730*f5c631daSSadaf Ebrahimi   __ srshl(d2, d9, d26);
1731*f5c631daSSadaf Ebrahimi   __ srshl(v29.V16B(), v17.V16B(), v11.V16B());
1732*f5c631daSSadaf Ebrahimi   __ srshl(v8.V2D(), v15.V2D(), v4.V2D());
1733*f5c631daSSadaf Ebrahimi   __ srshl(v25.V2S(), v17.V2S(), v8.V2S());
1734*f5c631daSSadaf Ebrahimi   __ srshl(v19.V4H(), v7.V4H(), v7.V4H());
1735*f5c631daSSadaf Ebrahimi   __ srshl(v13.V4S(), v2.V4S(), v17.V4S());
1736*f5c631daSSadaf Ebrahimi   __ srshl(v22.V8B(), v6.V8B(), v21.V8B());
1737*f5c631daSSadaf Ebrahimi   __ srshl(v10.V8H(), v17.V8H(), v4.V8H());
1738*f5c631daSSadaf Ebrahimi   __ srshr(d21, d18, 45);
1739*f5c631daSSadaf Ebrahimi   __ srshr(v3.V16B(), v11.V16B(), 7);
1740*f5c631daSSadaf Ebrahimi   __ srshr(v21.V2D(), v26.V2D(), 53);
1741*f5c631daSSadaf Ebrahimi   __ srshr(v11.V2S(), v5.V2S(), 28);
1742*f5c631daSSadaf Ebrahimi   __ srshr(v7.V4H(), v18.V4H(), 12);
1743*f5c631daSSadaf Ebrahimi   __ srshr(v7.V4S(), v3.V4S(), 30);
1744*f5c631daSSadaf Ebrahimi   __ srshr(v14.V8B(), v2.V8B(), 6);
1745*f5c631daSSadaf Ebrahimi   __ srshr(v21.V8H(), v20.V8H(), 3);
1746*f5c631daSSadaf Ebrahimi   __ srsra(d21, d30, 63);
1747*f5c631daSSadaf Ebrahimi   __ srsra(v27.V16B(), v30.V16B(), 6);
1748*f5c631daSSadaf Ebrahimi   __ srsra(v20.V2D(), v12.V2D(), 27);
1749*f5c631daSSadaf Ebrahimi   __ srsra(v0.V2S(), v17.V2S(), 5);
1750*f5c631daSSadaf Ebrahimi   __ srsra(v14.V4H(), v16.V4H(), 15);
1751*f5c631daSSadaf Ebrahimi   __ srsra(v18.V4S(), v3.V4S(), 20);
1752*f5c631daSSadaf Ebrahimi   __ srsra(v21.V8B(), v1.V8B(), 1);
1753*f5c631daSSadaf Ebrahimi   __ srsra(v31.V8H(), v25.V8H(), 2);
1754*f5c631daSSadaf Ebrahimi   __ sshl(d1, d13, d9);
1755*f5c631daSSadaf Ebrahimi   __ sshl(v17.V16B(), v31.V16B(), v15.V16B());
1756*f5c631daSSadaf Ebrahimi   __ sshl(v13.V2D(), v16.V2D(), v0.V2D());
1757*f5c631daSSadaf Ebrahimi   __ sshl(v0.V2S(), v7.V2S(), v22.V2S());
1758*f5c631daSSadaf Ebrahimi   __ sshl(v23.V4H(), v19.V4H(), v4.V4H());
1759*f5c631daSSadaf Ebrahimi   __ sshl(v5.V4S(), v5.V4S(), v11.V4S());
1760*f5c631daSSadaf Ebrahimi   __ sshl(v23.V8B(), v27.V8B(), v7.V8B());
1761*f5c631daSSadaf Ebrahimi   __ sshl(v29.V8H(), v10.V8H(), v5.V8H());
1762*f5c631daSSadaf Ebrahimi   __ sshll(v0.V2D(), v2.V2S(), 23);
1763*f5c631daSSadaf Ebrahimi   __ sshll(v11.V4S(), v8.V4H(), 8);
1764*f5c631daSSadaf Ebrahimi   __ sshll(v4.V8H(), v29.V8B(), 1);
1765*f5c631daSSadaf Ebrahimi   __ sshll2(v10.V2D(), v4.V4S(), 14);
1766*f5c631daSSadaf Ebrahimi   __ sshll2(v26.V4S(), v31.V8H(), 6);
1767*f5c631daSSadaf Ebrahimi   __ sshll2(v3.V8H(), v26.V16B(), 4);
1768*f5c631daSSadaf Ebrahimi   __ sshr(d19, d21, 20);
1769*f5c631daSSadaf Ebrahimi   __ sshr(v15.V16B(), v23.V16B(), 5);
1770*f5c631daSSadaf Ebrahimi   __ sshr(v17.V2D(), v14.V2D(), 38);
1771*f5c631daSSadaf Ebrahimi   __ sshr(v3.V2S(), v29.V2S(), 23);
1772*f5c631daSSadaf Ebrahimi   __ sshr(v23.V4H(), v27.V4H(), 4);
1773*f5c631daSSadaf Ebrahimi   __ sshr(v28.V4S(), v3.V4S(), 4);
1774*f5c631daSSadaf Ebrahimi   __ sshr(v14.V8B(), v2.V8B(), 6);
1775*f5c631daSSadaf Ebrahimi   __ sshr(v3.V8H(), v8.V8H(), 6);
1776*f5c631daSSadaf Ebrahimi   __ ssra(d12, d28, 44);
1777*f5c631daSSadaf Ebrahimi   __ ssra(v29.V16B(), v31.V16B(), 4);
1778*f5c631daSSadaf Ebrahimi   __ ssra(v3.V2D(), v0.V2D(), 24);
1779*f5c631daSSadaf Ebrahimi   __ ssra(v14.V2S(), v28.V2S(), 6);
1780*f5c631daSSadaf Ebrahimi   __ ssra(v18.V4H(), v8.V4H(), 7);
1781*f5c631daSSadaf Ebrahimi   __ ssra(v31.V4S(), v14.V4S(), 24);
1782*f5c631daSSadaf Ebrahimi   __ ssra(v28.V8B(), v26.V8B(), 5);
1783*f5c631daSSadaf Ebrahimi   __ ssra(v9.V8H(), v9.V8H(), 14);
1784*f5c631daSSadaf Ebrahimi   __ ssubl(v13.V2D(), v14.V2S(), v3.V2S());
1785*f5c631daSSadaf Ebrahimi   __ ssubl(v5.V4S(), v16.V4H(), v8.V4H());
1786*f5c631daSSadaf Ebrahimi   __ ssubl(v0.V8H(), v28.V8B(), v6.V8B());
1787*f5c631daSSadaf Ebrahimi   __ ssubl2(v5.V2D(), v13.V4S(), v25.V4S());
1788*f5c631daSSadaf Ebrahimi   __ ssubl2(v3.V4S(), v15.V8H(), v17.V8H());
1789*f5c631daSSadaf Ebrahimi   __ ssubl2(v15.V8H(), v15.V16B(), v14.V16B());
1790*f5c631daSSadaf Ebrahimi   __ ssubw(v25.V2D(), v23.V2D(), v26.V2S());
1791*f5c631daSSadaf Ebrahimi   __ ssubw(v21.V4S(), v18.V4S(), v24.V4H());
1792*f5c631daSSadaf Ebrahimi   __ ssubw(v30.V8H(), v22.V8H(), v3.V8B());
1793*f5c631daSSadaf Ebrahimi   __ ssubw2(v16.V2D(), v24.V2D(), v28.V4S());
1794*f5c631daSSadaf Ebrahimi   __ ssubw2(v31.V4S(), v11.V4S(), v15.V8H());
1795*f5c631daSSadaf Ebrahimi   __ ssubw2(v4.V8H(), v8.V8H(), v16.V16B());
1796*f5c631daSSadaf Ebrahimi   __ st1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x0));
1797*f5c631daSSadaf Ebrahimi   __ st1(v10.V16B(),
1798*f5c631daSSadaf Ebrahimi          v11.V16B(),
1799*f5c631daSSadaf Ebrahimi          v12.V16B(),
1800*f5c631daSSadaf Ebrahimi          v13.V16B(),
1801*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
1802*f5c631daSSadaf Ebrahimi   __ st1(v27.V16B(),
1803*f5c631daSSadaf Ebrahimi          v28.V16B(),
1804*f5c631daSSadaf Ebrahimi          v29.V16B(),
1805*f5c631daSSadaf Ebrahimi          v30.V16B(),
1806*f5c631daSSadaf Ebrahimi          MemOperand(x1, 64, PostIndex));
1807*f5c631daSSadaf Ebrahimi   __ st1(v16.V16B(), v17.V16B(), v18.V16B(), MemOperand(x0));
1808*f5c631daSSadaf Ebrahimi   __ st1(v21.V16B(), v22.V16B(), v23.V16B(), MemOperand(x1, x2, PostIndex));
1809*f5c631daSSadaf Ebrahimi   __ st1(v9.V16B(), v10.V16B(), v11.V16B(), MemOperand(x1, 48, PostIndex));
1810*f5c631daSSadaf Ebrahimi   __ st1(v7.V16B(), v8.V16B(), MemOperand(x0));
1811*f5c631daSSadaf Ebrahimi   __ st1(v26.V16B(), v27.V16B(), MemOperand(x1, x2, PostIndex));
1812*f5c631daSSadaf Ebrahimi   __ st1(v22.V16B(), v23.V16B(), MemOperand(x1, 32, PostIndex));
1813*f5c631daSSadaf Ebrahimi   __ st1(v23.V16B(), MemOperand(x0));
1814*f5c631daSSadaf Ebrahimi   __ st1(v28.V16B(), MemOperand(x1, x2, PostIndex));
1815*f5c631daSSadaf Ebrahimi   __ st1(v2.V16B(), MemOperand(x1, 16, PostIndex));
1816*f5c631daSSadaf Ebrahimi   __ st1(v29.V1D(), v30.V1D(), v31.V1D(), v0.V1D(), MemOperand(x0));
1817*f5c631daSSadaf Ebrahimi   __ st1(v12.V1D(),
1818*f5c631daSSadaf Ebrahimi          v13.V1D(),
1819*f5c631daSSadaf Ebrahimi          v14.V1D(),
1820*f5c631daSSadaf Ebrahimi          v15.V1D(),
1821*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
1822*f5c631daSSadaf Ebrahimi   __ st1(v30.V1D(),
1823*f5c631daSSadaf Ebrahimi          v31.V1D(),
1824*f5c631daSSadaf Ebrahimi          v0.V1D(),
1825*f5c631daSSadaf Ebrahimi          v1.V1D(),
1826*f5c631daSSadaf Ebrahimi          MemOperand(x1, 32, PostIndex));
1827*f5c631daSSadaf Ebrahimi   __ st1(v16.V1D(), v17.V1D(), v18.V1D(), MemOperand(x0));
1828*f5c631daSSadaf Ebrahimi   __ st1(v3.V1D(), v4.V1D(), v5.V1D(), MemOperand(x1, x2, PostIndex));
1829*f5c631daSSadaf Ebrahimi   __ st1(v14.V1D(), v15.V1D(), v16.V1D(), MemOperand(x1, 24, PostIndex));
1830*f5c631daSSadaf Ebrahimi   __ st1(v18.V1D(), v19.V1D(), MemOperand(x0));
1831*f5c631daSSadaf Ebrahimi   __ st1(v5.V1D(), v6.V1D(), MemOperand(x1, x2, PostIndex));
1832*f5c631daSSadaf Ebrahimi   __ st1(v2.V1D(), v3.V1D(), MemOperand(x1, 16, PostIndex));
1833*f5c631daSSadaf Ebrahimi   __ st1(v4.V1D(), MemOperand(x0));
1834*f5c631daSSadaf Ebrahimi   __ st1(v27.V1D(), MemOperand(x1, x2, PostIndex));
1835*f5c631daSSadaf Ebrahimi   __ st1(v23.V1D(), MemOperand(x1, 8, PostIndex));
1836*f5c631daSSadaf Ebrahimi   __ st1(v2.V2D(), v3.V2D(), v4.V2D(), v5.V2D(), MemOperand(x0));
1837*f5c631daSSadaf Ebrahimi   __ st1(v22.V2D(),
1838*f5c631daSSadaf Ebrahimi          v23.V2D(),
1839*f5c631daSSadaf Ebrahimi          v24.V2D(),
1840*f5c631daSSadaf Ebrahimi          v25.V2D(),
1841*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
1842*f5c631daSSadaf Ebrahimi   __ st1(v28.V2D(),
1843*f5c631daSSadaf Ebrahimi          v29.V2D(),
1844*f5c631daSSadaf Ebrahimi          v30.V2D(),
1845*f5c631daSSadaf Ebrahimi          v31.V2D(),
1846*f5c631daSSadaf Ebrahimi          MemOperand(x1, 64, PostIndex));
1847*f5c631daSSadaf Ebrahimi   __ st1(v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0));
1848*f5c631daSSadaf Ebrahimi   __ st1(v16.V2D(), v17.V2D(), v18.V2D(), MemOperand(x1, x2, PostIndex));
1849*f5c631daSSadaf Ebrahimi   __ st1(v22.V2D(), v23.V2D(), v24.V2D(), MemOperand(x1, 48, PostIndex));
1850*f5c631daSSadaf Ebrahimi   __ st1(v21.V2D(), v22.V2D(), MemOperand(x0));
1851*f5c631daSSadaf Ebrahimi   __ st1(v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex));
1852*f5c631daSSadaf Ebrahimi   __ st1(v27.V2D(), v28.V2D(), MemOperand(x1, 32, PostIndex));
1853*f5c631daSSadaf Ebrahimi   __ st1(v21.V2D(), MemOperand(x0));
1854*f5c631daSSadaf Ebrahimi   __ st1(v29.V2D(), MemOperand(x1, x2, PostIndex));
1855*f5c631daSSadaf Ebrahimi   __ st1(v20.V2D(), MemOperand(x1, 16, PostIndex));
1856*f5c631daSSadaf Ebrahimi   __ st1(v22.V2S(), v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x0));
1857*f5c631daSSadaf Ebrahimi   __ st1(v8.V2S(),
1858*f5c631daSSadaf Ebrahimi          v9.V2S(),
1859*f5c631daSSadaf Ebrahimi          v10.V2S(),
1860*f5c631daSSadaf Ebrahimi          v11.V2S(),
1861*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
1862*f5c631daSSadaf Ebrahimi   __ st1(v15.V2S(),
1863*f5c631daSSadaf Ebrahimi          v16.V2S(),
1864*f5c631daSSadaf Ebrahimi          v17.V2S(),
1865*f5c631daSSadaf Ebrahimi          v18.V2S(),
1866*f5c631daSSadaf Ebrahimi          MemOperand(x1, 32, PostIndex));
1867*f5c631daSSadaf Ebrahimi   __ st1(v2.V2S(), v3.V2S(), v4.V2S(), MemOperand(x0));
1868*f5c631daSSadaf Ebrahimi   __ st1(v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x1, x2, PostIndex));
1869*f5c631daSSadaf Ebrahimi   __ st1(v7.V2S(), v8.V2S(), v9.V2S(), MemOperand(x1, 24, PostIndex));
1870*f5c631daSSadaf Ebrahimi   __ st1(v28.V2S(), v29.V2S(), MemOperand(x0));
1871*f5c631daSSadaf Ebrahimi   __ st1(v29.V2S(), v30.V2S(), MemOperand(x1, x2, PostIndex));
1872*f5c631daSSadaf Ebrahimi   __ st1(v23.V2S(), v24.V2S(), MemOperand(x1, 16, PostIndex));
1873*f5c631daSSadaf Ebrahimi   __ st1(v6.V2S(), MemOperand(x0));
1874*f5c631daSSadaf Ebrahimi   __ st1(v11.V2S(), MemOperand(x1, x2, PostIndex));
1875*f5c631daSSadaf Ebrahimi   __ st1(v17.V2S(), MemOperand(x1, 8, PostIndex));
1876*f5c631daSSadaf Ebrahimi   __ st1(v6.V4H(), v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x0));
1877*f5c631daSSadaf Ebrahimi   __ st1(v9.V4H(),
1878*f5c631daSSadaf Ebrahimi          v10.V4H(),
1879*f5c631daSSadaf Ebrahimi          v11.V4H(),
1880*f5c631daSSadaf Ebrahimi          v12.V4H(),
1881*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
1882*f5c631daSSadaf Ebrahimi   __ st1(v25.V4H(),
1883*f5c631daSSadaf Ebrahimi          v26.V4H(),
1884*f5c631daSSadaf Ebrahimi          v27.V4H(),
1885*f5c631daSSadaf Ebrahimi          v28.V4H(),
1886*f5c631daSSadaf Ebrahimi          MemOperand(x1, 32, PostIndex));
1887*f5c631daSSadaf Ebrahimi   __ st1(v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x0));
1888*f5c631daSSadaf Ebrahimi   __ st1(v10.V4H(), v11.V4H(), v12.V4H(), MemOperand(x1, x2, PostIndex));
1889*f5c631daSSadaf Ebrahimi   __ st1(v12.V4H(), v13.V4H(), v14.V4H(), MemOperand(x1, 24, PostIndex));
1890*f5c631daSSadaf Ebrahimi   __ st1(v13.V4H(), v14.V4H(), MemOperand(x0));
1891*f5c631daSSadaf Ebrahimi   __ st1(v15.V4H(), v16.V4H(), MemOperand(x1, x2, PostIndex));
1892*f5c631daSSadaf Ebrahimi   __ st1(v21.V4H(), v22.V4H(), MemOperand(x1, 16, PostIndex));
1893*f5c631daSSadaf Ebrahimi   __ st1(v16.V4H(), MemOperand(x0));
1894*f5c631daSSadaf Ebrahimi   __ st1(v8.V4H(), MemOperand(x1, x2, PostIndex));
1895*f5c631daSSadaf Ebrahimi   __ st1(v30.V4H(), MemOperand(x1, 8, PostIndex));
1896*f5c631daSSadaf Ebrahimi   __ st1(v3.V4S(), v4.V4S(), v5.V4S(), v6.V4S(), MemOperand(x0));
1897*f5c631daSSadaf Ebrahimi   __ st1(v25.V4S(),
1898*f5c631daSSadaf Ebrahimi          v26.V4S(),
1899*f5c631daSSadaf Ebrahimi          v27.V4S(),
1900*f5c631daSSadaf Ebrahimi          v28.V4S(),
1901*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
1902*f5c631daSSadaf Ebrahimi   __ st1(v5.V4S(), v6.V4S(), v7.V4S(), v8.V4S(), MemOperand(x1, 64, PostIndex));
1903*f5c631daSSadaf Ebrahimi   __ st1(v31.V4S(), v0.V4S(), v1.V4S(), MemOperand(x0));
1904*f5c631daSSadaf Ebrahimi   __ st1(v30.V4S(), v31.V4S(), v0.V4S(), MemOperand(x1, x2, PostIndex));
1905*f5c631daSSadaf Ebrahimi   __ st1(v6.V4S(), v7.V4S(), v8.V4S(), MemOperand(x1, 48, PostIndex));
1906*f5c631daSSadaf Ebrahimi   __ st1(v17.V4S(), v18.V4S(), MemOperand(x0));
1907*f5c631daSSadaf Ebrahimi   __ st1(v31.V4S(), v0.V4S(), MemOperand(x1, x2, PostIndex));
1908*f5c631daSSadaf Ebrahimi   __ st1(v1.V4S(), v2.V4S(), MemOperand(x1, 32, PostIndex));
1909*f5c631daSSadaf Ebrahimi   __ st1(v26.V4S(), MemOperand(x0));
1910*f5c631daSSadaf Ebrahimi   __ st1(v15.V4S(), MemOperand(x1, x2, PostIndex));
1911*f5c631daSSadaf Ebrahimi   __ st1(v13.V4S(), MemOperand(x1, 16, PostIndex));
1912*f5c631daSSadaf Ebrahimi   __ st1(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0));
1913*f5c631daSSadaf Ebrahimi   __ st1(v10.V8B(),
1914*f5c631daSSadaf Ebrahimi          v11.V8B(),
1915*f5c631daSSadaf Ebrahimi          v12.V8B(),
1916*f5c631daSSadaf Ebrahimi          v13.V8B(),
1917*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
1918*f5c631daSSadaf Ebrahimi   __ st1(v15.V8B(),
1919*f5c631daSSadaf Ebrahimi          v16.V8B(),
1920*f5c631daSSadaf Ebrahimi          v17.V8B(),
1921*f5c631daSSadaf Ebrahimi          v18.V8B(),
1922*f5c631daSSadaf Ebrahimi          MemOperand(x1, 32, PostIndex));
1923*f5c631daSSadaf Ebrahimi   __ st1(v19.V8B(), v20.V8B(), v21.V8B(), MemOperand(x0));
1924*f5c631daSSadaf Ebrahimi   __ st1(v31.V8B(), v0.V8B(), v1.V8B(), MemOperand(x1, x2, PostIndex));
1925*f5c631daSSadaf Ebrahimi   __ st1(v9.V8B(), v10.V8B(), v11.V8B(), MemOperand(x1, 24, PostIndex));
1926*f5c631daSSadaf Ebrahimi   __ st1(v12.V8B(), v13.V8B(), MemOperand(x0));
1927*f5c631daSSadaf Ebrahimi   __ st1(v2.V8B(), v3.V8B(), MemOperand(x1, x2, PostIndex));
1928*f5c631daSSadaf Ebrahimi   __ st1(v0.V8B(), v1.V8B(), MemOperand(x1, 16, PostIndex));
1929*f5c631daSSadaf Ebrahimi   __ st1(v16.V8B(), MemOperand(x0));
1930*f5c631daSSadaf Ebrahimi   __ st1(v25.V8B(), MemOperand(x1, x2, PostIndex));
1931*f5c631daSSadaf Ebrahimi   __ st1(v31.V8B(), MemOperand(x1, 8, PostIndex));
1932*f5c631daSSadaf Ebrahimi   __ st1(v4.V8H(), v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x0));
1933*f5c631daSSadaf Ebrahimi   __ st1(v3.V8H(), v4.V8H(), v5.V8H(), v6.V8H(), MemOperand(x1, x2, PostIndex));
1934*f5c631daSSadaf Ebrahimi   __ st1(v26.V8H(),
1935*f5c631daSSadaf Ebrahimi          v27.V8H(),
1936*f5c631daSSadaf Ebrahimi          v28.V8H(),
1937*f5c631daSSadaf Ebrahimi          v29.V8H(),
1938*f5c631daSSadaf Ebrahimi          MemOperand(x1, 64, PostIndex));
1939*f5c631daSSadaf Ebrahimi   __ st1(v10.V8H(), v11.V8H(), v12.V8H(), MemOperand(x0));
1940*f5c631daSSadaf Ebrahimi   __ st1(v21.V8H(), v22.V8H(), v23.V8H(), MemOperand(x1, x2, PostIndex));
1941*f5c631daSSadaf Ebrahimi   __ st1(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, 48, PostIndex));
1942*f5c631daSSadaf Ebrahimi   __ st1(v26.V8H(), v27.V8H(), MemOperand(x0));
1943*f5c631daSSadaf Ebrahimi   __ st1(v24.V8H(), v25.V8H(), MemOperand(x1, x2, PostIndex));
1944*f5c631daSSadaf Ebrahimi   __ st1(v17.V8H(), v18.V8H(), MemOperand(x1, 32, PostIndex));
1945*f5c631daSSadaf Ebrahimi   __ st1(v29.V8H(), MemOperand(x0));
1946*f5c631daSSadaf Ebrahimi   __ st1(v19.V8H(), MemOperand(x1, x2, PostIndex));
1947*f5c631daSSadaf Ebrahimi   __ st1(v23.V8H(), MemOperand(x1, 16, PostIndex));
1948*f5c631daSSadaf Ebrahimi   __ st1(v19.B(), 15, MemOperand(x0));
1949*f5c631daSSadaf Ebrahimi   __ st1(v25.B(), 9, MemOperand(x1, x2, PostIndex));
1950*f5c631daSSadaf Ebrahimi   __ st1(v4.B(), 8, MemOperand(x1, 1, PostIndex));
1951*f5c631daSSadaf Ebrahimi   __ st1(v13.D(), 0, MemOperand(x0));
1952*f5c631daSSadaf Ebrahimi   __ st1(v30.D(), 0, MemOperand(x1, x2, PostIndex));
1953*f5c631daSSadaf Ebrahimi   __ st1(v3.D(), 0, MemOperand(x1, 8, PostIndex));
1954*f5c631daSSadaf Ebrahimi   __ st1(v22.H(), 0, MemOperand(x0));
1955*f5c631daSSadaf Ebrahimi   __ st1(v31.H(), 7, MemOperand(x1, x2, PostIndex));
1956*f5c631daSSadaf Ebrahimi   __ st1(v23.H(), 3, MemOperand(x1, 2, PostIndex));
1957*f5c631daSSadaf Ebrahimi   __ st1(v0.S(), 0, MemOperand(x0));
1958*f5c631daSSadaf Ebrahimi   __ st1(v11.S(), 3, MemOperand(x1, x2, PostIndex));
1959*f5c631daSSadaf Ebrahimi   __ st1(v24.S(), 3, MemOperand(x1, 4, PostIndex));
1960*f5c631daSSadaf Ebrahimi   __ st2(v7.V16B(), v8.V16B(), MemOperand(x0));
1961*f5c631daSSadaf Ebrahimi   __ st2(v5.V16B(), v6.V16B(), MemOperand(x1, x2, PostIndex));
1962*f5c631daSSadaf Ebrahimi   __ st2(v18.V16B(), v19.V16B(), MemOperand(x1, 32, PostIndex));
1963*f5c631daSSadaf Ebrahimi   __ st2(v14.V2D(), v15.V2D(), MemOperand(x0));
1964*f5c631daSSadaf Ebrahimi   __ st2(v7.V2D(), v8.V2D(), MemOperand(x1, x2, PostIndex));
1965*f5c631daSSadaf Ebrahimi   __ st2(v24.V2D(), v25.V2D(), MemOperand(x1, 32, PostIndex));
1966*f5c631daSSadaf Ebrahimi   __ st2(v22.V2S(), v23.V2S(), MemOperand(x0));
1967*f5c631daSSadaf Ebrahimi   __ st2(v4.V2S(), v5.V2S(), MemOperand(x1, x2, PostIndex));
1968*f5c631daSSadaf Ebrahimi   __ st2(v2.V2S(), v3.V2S(), MemOperand(x1, 16, PostIndex));
1969*f5c631daSSadaf Ebrahimi   __ st2(v23.V4H(), v24.V4H(), MemOperand(x0));
1970*f5c631daSSadaf Ebrahimi   __ st2(v8.V4H(), v9.V4H(), MemOperand(x1, x2, PostIndex));
1971*f5c631daSSadaf Ebrahimi   __ st2(v7.V4H(), v8.V4H(), MemOperand(x1, 16, PostIndex));
1972*f5c631daSSadaf Ebrahimi   __ st2(v17.V4S(), v18.V4S(), MemOperand(x0));
1973*f5c631daSSadaf Ebrahimi   __ st2(v6.V4S(), v7.V4S(), MemOperand(x1, x2, PostIndex));
1974*f5c631daSSadaf Ebrahimi   __ st2(v26.V4S(), v27.V4S(), MemOperand(x1, 32, PostIndex));
1975*f5c631daSSadaf Ebrahimi   __ st2(v31.V8B(), v0.V8B(), MemOperand(x0));
1976*f5c631daSSadaf Ebrahimi   __ st2(v0.V8B(), v1.V8B(), MemOperand(x1, x2, PostIndex));
1977*f5c631daSSadaf Ebrahimi   __ st2(v21.V8B(), v22.V8B(), MemOperand(x1, 16, PostIndex));
1978*f5c631daSSadaf Ebrahimi   __ st2(v7.V8H(), v8.V8H(), MemOperand(x0));
1979*f5c631daSSadaf Ebrahimi   __ st2(v22.V8H(), v23.V8H(), MemOperand(x1, x2, PostIndex));
1980*f5c631daSSadaf Ebrahimi   __ st2(v4.V8H(), v5.V8H(), MemOperand(x1, 32, PostIndex));
1981*f5c631daSSadaf Ebrahimi   __ st2(v8.B(), v9.B(), 15, MemOperand(x0));
1982*f5c631daSSadaf Ebrahimi   __ st2(v8.B(), v9.B(), 15, MemOperand(x1, x2, PostIndex));
1983*f5c631daSSadaf Ebrahimi   __ st2(v7.B(), v8.B(), 4, MemOperand(x1, 2, PostIndex));
1984*f5c631daSSadaf Ebrahimi   __ st2(v25.D(), v26.D(), 0, MemOperand(x0));
1985*f5c631daSSadaf Ebrahimi   __ st2(v17.D(), v18.D(), 1, MemOperand(x1, x2, PostIndex));
1986*f5c631daSSadaf Ebrahimi   __ st2(v3.D(), v4.D(), 1, MemOperand(x1, 16, PostIndex));
1987*f5c631daSSadaf Ebrahimi   __ st2(v4.H(), v5.H(), 3, MemOperand(x0));
1988*f5c631daSSadaf Ebrahimi   __ st2(v0.H(), v1.H(), 5, MemOperand(x1, x2, PostIndex));
1989*f5c631daSSadaf Ebrahimi   __ st2(v22.H(), v23.H(), 2, MemOperand(x1, 4, PostIndex));
1990*f5c631daSSadaf Ebrahimi   __ st2(v14.S(), v15.S(), 3, MemOperand(x0));
1991*f5c631daSSadaf Ebrahimi   __ st2(v23.S(), v24.S(), 3, MemOperand(x1, x2, PostIndex));
1992*f5c631daSSadaf Ebrahimi   __ st2(v0.S(), v1.S(), 2, MemOperand(x1, 8, PostIndex));
1993*f5c631daSSadaf Ebrahimi   __ st3(v26.V16B(), v27.V16B(), v28.V16B(), MemOperand(x0));
1994*f5c631daSSadaf Ebrahimi   __ st3(v21.V16B(), v22.V16B(), v23.V16B(), MemOperand(x1, x2, PostIndex));
1995*f5c631daSSadaf Ebrahimi   __ st3(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x1, 48, PostIndex));
1996*f5c631daSSadaf Ebrahimi   __ st3(v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0));
1997*f5c631daSSadaf Ebrahimi   __ st3(v23.V2D(), v24.V2D(), v25.V2D(), MemOperand(x1, x2, PostIndex));
1998*f5c631daSSadaf Ebrahimi   __ st3(v10.V2D(), v11.V2D(), v12.V2D(), MemOperand(x1, 48, PostIndex));
1999*f5c631daSSadaf Ebrahimi   __ st3(v9.V2S(), v10.V2S(), v11.V2S(), MemOperand(x0));
2000*f5c631daSSadaf Ebrahimi   __ st3(v13.V2S(), v14.V2S(), v15.V2S(), MemOperand(x1, x2, PostIndex));
2001*f5c631daSSadaf Ebrahimi   __ st3(v22.V2S(), v23.V2S(), v24.V2S(), MemOperand(x1, 24, PostIndex));
2002*f5c631daSSadaf Ebrahimi   __ st3(v31.V4H(), v0.V4H(), v1.V4H(), MemOperand(x0));
2003*f5c631daSSadaf Ebrahimi   __ st3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x1, x2, PostIndex));
2004*f5c631daSSadaf Ebrahimi   __ st3(v19.V4H(), v20.V4H(), v21.V4H(), MemOperand(x1, 24, PostIndex));
2005*f5c631daSSadaf Ebrahimi   __ st3(v18.V4S(), v19.V4S(), v20.V4S(), MemOperand(x0));
2006*f5c631daSSadaf Ebrahimi   __ st3(v25.V4S(), v26.V4S(), v27.V4S(), MemOperand(x1, x2, PostIndex));
2007*f5c631daSSadaf Ebrahimi   __ st3(v16.V4S(), v17.V4S(), v18.V4S(), MemOperand(x1, 48, PostIndex));
2008*f5c631daSSadaf Ebrahimi   __ st3(v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0));
2009*f5c631daSSadaf Ebrahimi   __ st3(v29.V8B(), v30.V8B(), v31.V8B(), MemOperand(x1, x2, PostIndex));
2010*f5c631daSSadaf Ebrahimi   __ st3(v30.V8B(), v31.V8B(), v0.V8B(), MemOperand(x1, 24, PostIndex));
2011*f5c631daSSadaf Ebrahimi   __ st3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x0));
2012*f5c631daSSadaf Ebrahimi   __ st3(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, x2, PostIndex));
2013*f5c631daSSadaf Ebrahimi   __ st3(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, 48, PostIndex));
2014*f5c631daSSadaf Ebrahimi   __ st3(v31.B(), v0.B(), v1.B(), 10, MemOperand(x0));
2015*f5c631daSSadaf Ebrahimi   __ st3(v4.B(), v5.B(), v6.B(), 5, MemOperand(x1, x2, PostIndex));
2016*f5c631daSSadaf Ebrahimi   __ st3(v5.B(), v6.B(), v7.B(), 1, MemOperand(x1, 3, PostIndex));
2017*f5c631daSSadaf Ebrahimi   __ st3(v5.D(), v6.D(), v7.D(), 0, MemOperand(x0));
2018*f5c631daSSadaf Ebrahimi   __ st3(v6.D(), v7.D(), v8.D(), 0, MemOperand(x1, x2, PostIndex));
2019*f5c631daSSadaf Ebrahimi   __ st3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x1, 24, PostIndex));
2020*f5c631daSSadaf Ebrahimi   __ st3(v31.H(), v0.H(), v1.H(), 2, MemOperand(x0));
2021*f5c631daSSadaf Ebrahimi   __ st3(v14.H(), v15.H(), v16.H(), 5, MemOperand(x1, x2, PostIndex));
2022*f5c631daSSadaf Ebrahimi   __ st3(v21.H(), v22.H(), v23.H(), 6, MemOperand(x1, 6, PostIndex));
2023*f5c631daSSadaf Ebrahimi   __ st3(v21.S(), v22.S(), v23.S(), 0, MemOperand(x0));
2024*f5c631daSSadaf Ebrahimi   __ st3(v11.S(), v12.S(), v13.S(), 1, MemOperand(x1, x2, PostIndex));
2025*f5c631daSSadaf Ebrahimi   __ st3(v15.S(), v16.S(), v17.S(), 0, MemOperand(x1, 12, PostIndex));
2026*f5c631daSSadaf Ebrahimi   __ st4(v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), MemOperand(x0));
2027*f5c631daSSadaf Ebrahimi   __ st4(v24.V16B(),
2028*f5c631daSSadaf Ebrahimi          v25.V16B(),
2029*f5c631daSSadaf Ebrahimi          v26.V16B(),
2030*f5c631daSSadaf Ebrahimi          v27.V16B(),
2031*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
2032*f5c631daSSadaf Ebrahimi   __ st4(v15.V16B(),
2033*f5c631daSSadaf Ebrahimi          v16.V16B(),
2034*f5c631daSSadaf Ebrahimi          v17.V16B(),
2035*f5c631daSSadaf Ebrahimi          v18.V16B(),
2036*f5c631daSSadaf Ebrahimi          MemOperand(x1, 64, PostIndex));
2037*f5c631daSSadaf Ebrahimi   __ st4(v16.V2D(), v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0));
2038*f5c631daSSadaf Ebrahimi   __ st4(v17.V2D(),
2039*f5c631daSSadaf Ebrahimi          v18.V2D(),
2040*f5c631daSSadaf Ebrahimi          v19.V2D(),
2041*f5c631daSSadaf Ebrahimi          v20.V2D(),
2042*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
2043*f5c631daSSadaf Ebrahimi   __ st4(v9.V2D(),
2044*f5c631daSSadaf Ebrahimi          v10.V2D(),
2045*f5c631daSSadaf Ebrahimi          v11.V2D(),
2046*f5c631daSSadaf Ebrahimi          v12.V2D(),
2047*f5c631daSSadaf Ebrahimi          MemOperand(x1, 64, PostIndex));
2048*f5c631daSSadaf Ebrahimi   __ st4(v23.V2S(), v24.V2S(), v25.V2S(), v26.V2S(), MemOperand(x0));
2049*f5c631daSSadaf Ebrahimi   __ st4(v15.V2S(),
2050*f5c631daSSadaf Ebrahimi          v16.V2S(),
2051*f5c631daSSadaf Ebrahimi          v17.V2S(),
2052*f5c631daSSadaf Ebrahimi          v18.V2S(),
2053*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
2054*f5c631daSSadaf Ebrahimi   __ st4(v24.V2S(),
2055*f5c631daSSadaf Ebrahimi          v25.V2S(),
2056*f5c631daSSadaf Ebrahimi          v26.V2S(),
2057*f5c631daSSadaf Ebrahimi          v27.V2S(),
2058*f5c631daSSadaf Ebrahimi          MemOperand(x1, 32, PostIndex));
2059*f5c631daSSadaf Ebrahimi   __ st4(v14.V4H(), v15.V4H(), v16.V4H(), v17.V4H(), MemOperand(x0));
2060*f5c631daSSadaf Ebrahimi   __ st4(v18.V4H(),
2061*f5c631daSSadaf Ebrahimi          v19.V4H(),
2062*f5c631daSSadaf Ebrahimi          v20.V4H(),
2063*f5c631daSSadaf Ebrahimi          v21.V4H(),
2064*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
2065*f5c631daSSadaf Ebrahimi   __ st4(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), MemOperand(x1, 32, PostIndex));
2066*f5c631daSSadaf Ebrahimi   __ st4(v13.V4S(), v14.V4S(), v15.V4S(), v16.V4S(), MemOperand(x0));
2067*f5c631daSSadaf Ebrahimi   __ st4(v6.V4S(), v7.V4S(), v8.V4S(), v9.V4S(), MemOperand(x1, x2, PostIndex));
2068*f5c631daSSadaf Ebrahimi   __ st4(v15.V4S(),
2069*f5c631daSSadaf Ebrahimi          v16.V4S(),
2070*f5c631daSSadaf Ebrahimi          v17.V4S(),
2071*f5c631daSSadaf Ebrahimi          v18.V4S(),
2072*f5c631daSSadaf Ebrahimi          MemOperand(x1, 64, PostIndex));
2073*f5c631daSSadaf Ebrahimi   __ st4(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0));
2074*f5c631daSSadaf Ebrahimi   __ st4(v25.V8B(),
2075*f5c631daSSadaf Ebrahimi          v26.V8B(),
2076*f5c631daSSadaf Ebrahimi          v27.V8B(),
2077*f5c631daSSadaf Ebrahimi          v28.V8B(),
2078*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
2079*f5c631daSSadaf Ebrahimi   __ st4(v19.V8B(),
2080*f5c631daSSadaf Ebrahimi          v20.V8B(),
2081*f5c631daSSadaf Ebrahimi          v21.V8B(),
2082*f5c631daSSadaf Ebrahimi          v22.V8B(),
2083*f5c631daSSadaf Ebrahimi          MemOperand(x1, 32, PostIndex));
2084*f5c631daSSadaf Ebrahimi   __ st4(v19.V8H(), v20.V8H(), v21.V8H(), v22.V8H(), MemOperand(x0));
2085*f5c631daSSadaf Ebrahimi   __ st4(v15.V8H(),
2086*f5c631daSSadaf Ebrahimi          v16.V8H(),
2087*f5c631daSSadaf Ebrahimi          v17.V8H(),
2088*f5c631daSSadaf Ebrahimi          v18.V8H(),
2089*f5c631daSSadaf Ebrahimi          MemOperand(x1, x2, PostIndex));
2090*f5c631daSSadaf Ebrahimi   __ st4(v31.V8H(),
2091*f5c631daSSadaf Ebrahimi          v0.V8H(),
2092*f5c631daSSadaf Ebrahimi          v1.V8H(),
2093*f5c631daSSadaf Ebrahimi          v2.V8H(),
2094*f5c631daSSadaf Ebrahimi          MemOperand(x1, 64, PostIndex));
2095*f5c631daSSadaf Ebrahimi   __ st4(v0.B(), v1.B(), v2.B(), v3.B(), 13, MemOperand(x0));
2096*f5c631daSSadaf Ebrahimi   __ st4(v4.B(), v5.B(), v6.B(), v7.B(), 10, MemOperand(x1, x2, PostIndex));
2097*f5c631daSSadaf Ebrahimi   __ st4(v9.B(), v10.B(), v11.B(), v12.B(), 9, MemOperand(x1, 4, PostIndex));
2098*f5c631daSSadaf Ebrahimi   __ st4(v2.D(), v3.D(), v4.D(), v5.D(), 1, MemOperand(x0));
2099*f5c631daSSadaf Ebrahimi   __ st4(v7.D(), v8.D(), v9.D(), v10.D(), 0, MemOperand(x1, x2, PostIndex));
2100*f5c631daSSadaf Ebrahimi   __ st4(v31.D(), v0.D(), v1.D(), v2.D(), 1, MemOperand(x1, 32, PostIndex));
2101*f5c631daSSadaf Ebrahimi   __ st4(v2.H(), v3.H(), v4.H(), v5.H(), 1, MemOperand(x0));
2102*f5c631daSSadaf Ebrahimi   __ st4(v27.H(), v28.H(), v29.H(), v30.H(), 3, MemOperand(x1, x2, PostIndex));
2103*f5c631daSSadaf Ebrahimi   __ st4(v24.H(), v25.H(), v26.H(), v27.H(), 4, MemOperand(x1, 8, PostIndex));
2104*f5c631daSSadaf Ebrahimi   __ st4(v18.S(), v19.S(), v20.S(), v21.S(), 2, MemOperand(x0));
2105*f5c631daSSadaf Ebrahimi   __ st4(v6.S(), v7.S(), v8.S(), v9.S(), 2, MemOperand(x1, x2, PostIndex));
2106*f5c631daSSadaf Ebrahimi   __ st4(v25.S(), v26.S(), v27.S(), v28.S(), 1, MemOperand(x1, 16, PostIndex));
2107*f5c631daSSadaf Ebrahimi   __ sub(d12, d17, d2);
2108*f5c631daSSadaf Ebrahimi   __ sub(v20.V16B(), v24.V16B(), v8.V16B());
2109*f5c631daSSadaf Ebrahimi   __ sub(v8.V2D(), v29.V2D(), v5.V2D());
2110*f5c631daSSadaf Ebrahimi   __ sub(v2.V2S(), v28.V2S(), v24.V2S());
2111*f5c631daSSadaf Ebrahimi   __ sub(v24.V4H(), v10.V4H(), v4.V4H());
2112*f5c631daSSadaf Ebrahimi   __ sub(v28.V4S(), v4.V4S(), v17.V4S());
2113*f5c631daSSadaf Ebrahimi   __ sub(v16.V8B(), v27.V8B(), v2.V8B());
2114*f5c631daSSadaf Ebrahimi   __ sub(v20.V8H(), v10.V8H(), v13.V8H());
2115*f5c631daSSadaf Ebrahimi   __ subhn(v5.V2S(), v14.V2D(), v13.V2D());
2116*f5c631daSSadaf Ebrahimi   __ subhn(v10.V4H(), v5.V4S(), v8.V4S());
2117*f5c631daSSadaf Ebrahimi   __ subhn(v6.V8B(), v10.V8H(), v22.V8H());
2118*f5c631daSSadaf Ebrahimi   __ subhn2(v11.V16B(), v6.V8H(), v9.V8H());
2119*f5c631daSSadaf Ebrahimi   __ subhn2(v25.V4S(), v18.V2D(), v24.V2D());
2120*f5c631daSSadaf Ebrahimi   __ subhn2(v20.V8H(), v21.V4S(), v1.V4S());
2121*f5c631daSSadaf Ebrahimi   __ suqadd(b25, b11);
2122*f5c631daSSadaf Ebrahimi   __ suqadd(d13, d1);
2123*f5c631daSSadaf Ebrahimi   __ suqadd(h0, h9);
2124*f5c631daSSadaf Ebrahimi   __ suqadd(s22, s8);
2125*f5c631daSSadaf Ebrahimi   __ suqadd(v24.V16B(), v27.V16B());
2126*f5c631daSSadaf Ebrahimi   __ suqadd(v26.V2D(), v14.V2D());
2127*f5c631daSSadaf Ebrahimi   __ suqadd(v7.V2S(), v10.V2S());
2128*f5c631daSSadaf Ebrahimi   __ suqadd(v25.V4H(), v12.V4H());
2129*f5c631daSSadaf Ebrahimi   __ suqadd(v4.V4S(), v3.V4S());
2130*f5c631daSSadaf Ebrahimi   __ suqadd(v14.V8B(), v18.V8B());
2131*f5c631daSSadaf Ebrahimi   __ suqadd(v31.V8H(), v8.V8H());
2132*f5c631daSSadaf Ebrahimi   __ sxtl(v16.V2D(), v20.V2S());
2133*f5c631daSSadaf Ebrahimi   __ sxtl(v27.V4S(), v28.V4H());
2134*f5c631daSSadaf Ebrahimi   __ sxtl(v0.V8H(), v22.V8B());
2135*f5c631daSSadaf Ebrahimi   __ sxtl2(v6.V2D(), v7.V4S());
2136*f5c631daSSadaf Ebrahimi   __ sxtl2(v9.V4S(), v27.V8H());
2137*f5c631daSSadaf Ebrahimi   __ sxtl2(v16.V8H(), v16.V16B());
2138*f5c631daSSadaf Ebrahimi   __ tbl(v25.V16B(),
2139*f5c631daSSadaf Ebrahimi          v17.V16B(),
2140*f5c631daSSadaf Ebrahimi          v18.V16B(),
2141*f5c631daSSadaf Ebrahimi          v19.V16B(),
2142*f5c631daSSadaf Ebrahimi          v20.V16B(),
2143*f5c631daSSadaf Ebrahimi          v22.V16B());
2144*f5c631daSSadaf Ebrahimi   __ tbl(v28.V16B(), v13.V16B(), v14.V16B(), v15.V16B(), v4.V16B());
2145*f5c631daSSadaf Ebrahimi   __ tbl(v3.V16B(), v0.V16B(), v1.V16B(), v2.V16B());
2146*f5c631daSSadaf Ebrahimi   __ tbl(v20.V16B(), v15.V16B(), v4.V16B());
2147*f5c631daSSadaf Ebrahimi   __ tbl(v7.V8B(), v23.V16B(), v24.V16B(), v25.V16B(), v26.V16B(), v20.V8B());
2148*f5c631daSSadaf Ebrahimi   __ tbl(v8.V8B(), v1.V16B(), v2.V16B(), v3.V16B(), v31.V8B());
2149*f5c631daSSadaf Ebrahimi   __ tbl(v8.V8B(), v25.V16B(), v26.V16B(), v16.V8B());
2150*f5c631daSSadaf Ebrahimi   __ tbl(v11.V8B(), v19.V16B(), v30.V8B());
2151*f5c631daSSadaf Ebrahimi   __ tbx(v25.V16B(), v25.V16B(), v26.V16B(), v27.V16B(), v28.V16B(), v5.V16B());
2152*f5c631daSSadaf Ebrahimi   __ tbx(v21.V16B(), v29.V16B(), v30.V16B(), v31.V16B(), v24.V16B());
2153*f5c631daSSadaf Ebrahimi   __ tbx(v6.V16B(), v16.V16B(), v17.V16B(), v1.V16B());
2154*f5c631daSSadaf Ebrahimi   __ tbx(v13.V16B(), v3.V16B(), v20.V16B());
2155*f5c631daSSadaf Ebrahimi   __ tbx(v24.V8B(), v29.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v9.V8B());
2156*f5c631daSSadaf Ebrahimi   __ tbx(v17.V8B(), v9.V16B(), v10.V16B(), v11.V16B(), v26.V8B());
2157*f5c631daSSadaf Ebrahimi   __ tbx(v5.V8B(), v3.V16B(), v4.V16B(), v21.V8B());
2158*f5c631daSSadaf Ebrahimi   __ tbx(v16.V8B(), v11.V16B(), v29.V8B());
2159*f5c631daSSadaf Ebrahimi   __ trn1(v19.V16B(), v24.V16B(), v12.V16B());
2160*f5c631daSSadaf Ebrahimi   __ trn1(v2.V2D(), v7.V2D(), v10.V2D());
2161*f5c631daSSadaf Ebrahimi   __ trn1(v22.V2S(), v0.V2S(), v21.V2S());
2162*f5c631daSSadaf Ebrahimi   __ trn1(v12.V4H(), v15.V4H(), v20.V4H());
2163*f5c631daSSadaf Ebrahimi   __ trn1(v30.V4S(), v17.V4S(), v9.V4S());
2164*f5c631daSSadaf Ebrahimi   __ trn1(v12.V8B(), v19.V8B(), v29.V8B());
2165*f5c631daSSadaf Ebrahimi   __ trn1(v23.V8H(), v8.V8H(), v9.V8H());
2166*f5c631daSSadaf Ebrahimi   __ trn2(v28.V16B(), v30.V16B(), v25.V16B());
2167*f5c631daSSadaf Ebrahimi   __ trn2(v7.V2D(), v27.V2D(), v7.V2D());
2168*f5c631daSSadaf Ebrahimi   __ trn2(v30.V2S(), v16.V2S(), v19.V2S());
2169*f5c631daSSadaf Ebrahimi   __ trn2(v24.V4H(), v6.V4H(), v25.V4H());
2170*f5c631daSSadaf Ebrahimi   __ trn2(v2.V4S(), v19.V4S(), v11.V4S());
2171*f5c631daSSadaf Ebrahimi   __ trn2(v25.V8B(), v27.V8B(), v18.V8B());
2172*f5c631daSSadaf Ebrahimi   __ trn2(v12.V8H(), v4.V8H(), v15.V8H());
2173*f5c631daSSadaf Ebrahimi   __ uaba(v31.V16B(), v12.V16B(), v28.V16B());
2174*f5c631daSSadaf Ebrahimi   __ uaba(v18.V2S(), v5.V2S(), v14.V2S());
2175*f5c631daSSadaf Ebrahimi   __ uaba(v9.V4H(), v20.V4H(), v21.V4H());
2176*f5c631daSSadaf Ebrahimi   __ uaba(v6.V4S(), v20.V4S(), v2.V4S());
2177*f5c631daSSadaf Ebrahimi   __ uaba(v16.V8B(), v12.V8B(), v5.V8B());
2178*f5c631daSSadaf Ebrahimi   __ uaba(v15.V8H(), v26.V8H(), v30.V8H());
2179*f5c631daSSadaf Ebrahimi   __ uabal(v10.V2D(), v18.V2S(), v15.V2S());
2180*f5c631daSSadaf Ebrahimi   __ uabal(v30.V4S(), v19.V4H(), v7.V4H());
2181*f5c631daSSadaf Ebrahimi   __ uabal(v4.V8H(), v27.V8B(), v0.V8B());
2182*f5c631daSSadaf Ebrahimi   __ uabal2(v19.V2D(), v12.V4S(), v2.V4S());
2183*f5c631daSSadaf Ebrahimi   __ uabal2(v26.V4S(), v5.V8H(), v12.V8H());
2184*f5c631daSSadaf Ebrahimi   __ uabal2(v19.V8H(), v20.V16B(), v28.V16B());
2185*f5c631daSSadaf Ebrahimi   __ uabd(v18.V16B(), v4.V16B(), v21.V16B());
2186*f5c631daSSadaf Ebrahimi   __ uabd(v30.V2S(), v21.V2S(), v16.V2S());
2187*f5c631daSSadaf Ebrahimi   __ uabd(v8.V4H(), v28.V4H(), v25.V4H());
2188*f5c631daSSadaf Ebrahimi   __ uabd(v28.V4S(), v12.V4S(), v21.V4S());
2189*f5c631daSSadaf Ebrahimi   __ uabd(v19.V8B(), v16.V8B(), v28.V8B());
2190*f5c631daSSadaf Ebrahimi   __ uabd(v9.V8H(), v12.V8H(), v29.V8H());
2191*f5c631daSSadaf Ebrahimi   __ uabdl(v26.V2D(), v0.V2S(), v8.V2S());
2192*f5c631daSSadaf Ebrahimi   __ uabdl(v29.V4S(), v31.V4H(), v25.V4H());
2193*f5c631daSSadaf Ebrahimi   __ uabdl(v27.V8H(), v29.V8B(), v14.V8B());
2194*f5c631daSSadaf Ebrahimi   __ uabdl2(v20.V2D(), v20.V4S(), v8.V4S());
2195*f5c631daSSadaf Ebrahimi   __ uabdl2(v22.V4S(), v15.V8H(), v18.V8H());
2196*f5c631daSSadaf Ebrahimi   __ uabdl2(v9.V8H(), v18.V16B(), v23.V16B());
2197*f5c631daSSadaf Ebrahimi   __ uadalp(v9.V1D(), v15.V2S());
2198*f5c631daSSadaf Ebrahimi   __ uadalp(v14.V2D(), v12.V4S());
2199*f5c631daSSadaf Ebrahimi   __ uadalp(v28.V2S(), v12.V4H());
2200*f5c631daSSadaf Ebrahimi   __ uadalp(v0.V4H(), v17.V8B());
2201*f5c631daSSadaf Ebrahimi   __ uadalp(v1.V4S(), v29.V8H());
2202*f5c631daSSadaf Ebrahimi   __ uadalp(v15.V8H(), v22.V16B());
2203*f5c631daSSadaf Ebrahimi   __ uaddl(v1.V2D(), v20.V2S(), v27.V2S());
2204*f5c631daSSadaf Ebrahimi   __ uaddl(v31.V4S(), v25.V4H(), v5.V4H());
2205*f5c631daSSadaf Ebrahimi   __ uaddl(v12.V8H(), v3.V8B(), v3.V8B());
2206*f5c631daSSadaf Ebrahimi   __ uaddl2(v5.V2D(), v23.V4S(), v6.V4S());
2207*f5c631daSSadaf Ebrahimi   __ uaddl2(v1.V4S(), v5.V8H(), v25.V8H());
2208*f5c631daSSadaf Ebrahimi   __ uaddl2(v22.V8H(), v30.V16B(), v28.V16B());
2209*f5c631daSSadaf Ebrahimi   __ uaddlp(v7.V1D(), v9.V2S());
2210*f5c631daSSadaf Ebrahimi   __ uaddlp(v26.V2D(), v4.V4S());
2211*f5c631daSSadaf Ebrahimi   __ uaddlp(v28.V2S(), v1.V4H());
2212*f5c631daSSadaf Ebrahimi   __ uaddlp(v20.V4H(), v31.V8B());
2213*f5c631daSSadaf Ebrahimi   __ uaddlp(v16.V4S(), v17.V8H());
2214*f5c631daSSadaf Ebrahimi   __ uaddlp(v6.V8H(), v2.V16B());
2215*f5c631daSSadaf Ebrahimi   __ uaddlv(d28, v22.V4S());
2216*f5c631daSSadaf Ebrahimi   __ uaddlv(h0, v19.V16B());
2217*f5c631daSSadaf Ebrahimi   __ uaddlv(h30, v30.V8B());
2218*f5c631daSSadaf Ebrahimi   __ uaddlv(s24, v18.V4H());
2219*f5c631daSSadaf Ebrahimi   __ uaddlv(s10, v0.V8H());
2220*f5c631daSSadaf Ebrahimi   __ uaddw(v9.V2D(), v17.V2D(), v14.V2S());
2221*f5c631daSSadaf Ebrahimi   __ uaddw(v9.V4S(), v25.V4S(), v3.V4H());
2222*f5c631daSSadaf Ebrahimi   __ uaddw(v18.V8H(), v1.V8H(), v0.V8B());
2223*f5c631daSSadaf Ebrahimi   __ uaddw2(v18.V2D(), v5.V2D(), v6.V4S());
2224*f5c631daSSadaf Ebrahimi   __ uaddw2(v17.V4S(), v15.V4S(), v11.V8H());
2225*f5c631daSSadaf Ebrahimi   __ uaddw2(v29.V8H(), v11.V8H(), v7.V16B());
2226*f5c631daSSadaf Ebrahimi   __ uhadd(v13.V16B(), v9.V16B(), v3.V16B());
2227*f5c631daSSadaf Ebrahimi   __ uhadd(v17.V2S(), v25.V2S(), v24.V2S());
2228*f5c631daSSadaf Ebrahimi   __ uhadd(v25.V4H(), v23.V4H(), v13.V4H());
2229*f5c631daSSadaf Ebrahimi   __ uhadd(v0.V4S(), v20.V4S(), v16.V4S());
2230*f5c631daSSadaf Ebrahimi   __ uhadd(v5.V8B(), v5.V8B(), v25.V8B());
2231*f5c631daSSadaf Ebrahimi   __ uhadd(v3.V8H(), v29.V8H(), v18.V8H());
2232*f5c631daSSadaf Ebrahimi   __ uhsub(v1.V16B(), v22.V16B(), v13.V16B());
2233*f5c631daSSadaf Ebrahimi   __ uhsub(v14.V2S(), v30.V2S(), v30.V2S());
2234*f5c631daSSadaf Ebrahimi   __ uhsub(v29.V4H(), v14.V4H(), v17.V4H());
2235*f5c631daSSadaf Ebrahimi   __ uhsub(v26.V4S(), v5.V4S(), v18.V4S());
2236*f5c631daSSadaf Ebrahimi   __ uhsub(v3.V8B(), v7.V8B(), v12.V8B());
2237*f5c631daSSadaf Ebrahimi   __ uhsub(v25.V8H(), v21.V8H(), v5.V8H());
2238*f5c631daSSadaf Ebrahimi   __ umax(v28.V16B(), v12.V16B(), v6.V16B());
2239*f5c631daSSadaf Ebrahimi   __ umax(v20.V2S(), v19.V2S(), v26.V2S());
2240*f5c631daSSadaf Ebrahimi   __ umax(v0.V4H(), v31.V4H(), v18.V4H());
2241*f5c631daSSadaf Ebrahimi   __ umax(v6.V4S(), v21.V4S(), v28.V4S());
2242*f5c631daSSadaf Ebrahimi   __ umax(v0.V8B(), v2.V8B(), v20.V8B());
2243*f5c631daSSadaf Ebrahimi   __ umax(v4.V8H(), v11.V8H(), v22.V8H());
2244*f5c631daSSadaf Ebrahimi   __ umaxp(v1.V16B(), v6.V16B(), v29.V16B());
2245*f5c631daSSadaf Ebrahimi   __ umaxp(v19.V2S(), v17.V2S(), v27.V2S());
2246*f5c631daSSadaf Ebrahimi   __ umaxp(v21.V4H(), v16.V4H(), v7.V4H());
2247*f5c631daSSadaf Ebrahimi   __ umaxp(v9.V4S(), v20.V4S(), v29.V4S());
2248*f5c631daSSadaf Ebrahimi   __ umaxp(v13.V8B(), v1.V8B(), v16.V8B());
2249*f5c631daSSadaf Ebrahimi   __ umaxp(v19.V8H(), v23.V8H(), v26.V8H());
2250*f5c631daSSadaf Ebrahimi   __ umaxv(b17, v30.V16B());
2251*f5c631daSSadaf Ebrahimi   __ umaxv(b23, v12.V8B());
2252*f5c631daSSadaf Ebrahimi   __ umaxv(h31, v15.V4H());
2253*f5c631daSSadaf Ebrahimi   __ umaxv(h15, v25.V8H());
2254*f5c631daSSadaf Ebrahimi   __ umaxv(s18, v21.V4S());
2255*f5c631daSSadaf Ebrahimi   __ umin(v22.V16B(), v0.V16B(), v18.V16B());
2256*f5c631daSSadaf Ebrahimi   __ umin(v1.V2S(), v21.V2S(), v16.V2S());
2257*f5c631daSSadaf Ebrahimi   __ umin(v17.V4H(), v4.V4H(), v25.V4H());
2258*f5c631daSSadaf Ebrahimi   __ umin(v24.V4S(), v26.V4S(), v13.V4S());
2259*f5c631daSSadaf Ebrahimi   __ umin(v20.V8B(), v1.V8B(), v5.V8B());
2260*f5c631daSSadaf Ebrahimi   __ umin(v26.V8H(), v25.V8H(), v23.V8H());
2261*f5c631daSSadaf Ebrahimi   __ uminp(v5.V16B(), v1.V16B(), v23.V16B());
2262*f5c631daSSadaf Ebrahimi   __ uminp(v7.V2S(), v26.V2S(), v30.V2S());
2263*f5c631daSSadaf Ebrahimi   __ uminp(v9.V4H(), v5.V4H(), v25.V4H());
2264*f5c631daSSadaf Ebrahimi   __ uminp(v23.V4S(), v10.V4S(), v1.V4S());
2265*f5c631daSSadaf Ebrahimi   __ uminp(v4.V8B(), v29.V8B(), v14.V8B());
2266*f5c631daSSadaf Ebrahimi   __ uminp(v21.V8H(), v0.V8H(), v14.V8H());
2267*f5c631daSSadaf Ebrahimi   __ uminv(b0, v17.V16B());
2268*f5c631daSSadaf Ebrahimi   __ uminv(b0, v31.V8B());
2269*f5c631daSSadaf Ebrahimi   __ uminv(h24, v0.V4H());
2270*f5c631daSSadaf Ebrahimi   __ uminv(h29, v14.V8H());
2271*f5c631daSSadaf Ebrahimi   __ uminv(s30, v3.V4S());
2272*f5c631daSSadaf Ebrahimi   __ umlal(v11.V2D(), v11.V2S(), v24.V2S());
2273*f5c631daSSadaf Ebrahimi   __ umlal(v30.V2D(), v16.V2S(), v11.S(), 3);
2274*f5c631daSSadaf Ebrahimi   __ umlal(v0.V4S(), v9.V4H(), v26.V4H());
2275*f5c631daSSadaf Ebrahimi   __ umlal(v20.V4S(), v24.V4H(), v12.H(), 4);
2276*f5c631daSSadaf Ebrahimi   __ umlal(v16.V8H(), v21.V8B(), v6.V8B());
2277*f5c631daSSadaf Ebrahimi   __ umlal2(v17.V2D(), v19.V4S(), v23.V4S());
2278*f5c631daSSadaf Ebrahimi   __ umlal2(v5.V2D(), v30.V4S(), v8.S(), 0);
2279*f5c631daSSadaf Ebrahimi   __ umlal2(v16.V4S(), v8.V8H(), v15.V8H());
2280*f5c631daSSadaf Ebrahimi   __ umlal2(v15.V4S(), v26.V8H(), v1.H(), 5);
2281*f5c631daSSadaf Ebrahimi   __ umlal2(v30.V8H(), v1.V16B(), v17.V16B());
2282*f5c631daSSadaf Ebrahimi   __ umlsl(v18.V2D(), v19.V2S(), v28.V2S());
2283*f5c631daSSadaf Ebrahimi   __ umlsl(v7.V2D(), v7.V2S(), v8.S(), 0);
2284*f5c631daSSadaf Ebrahimi   __ umlsl(v24.V4S(), v8.V4H(), v4.V4H());
2285*f5c631daSSadaf Ebrahimi   __ umlsl(v18.V4S(), v22.V4H(), v12.H(), 4);
2286*f5c631daSSadaf Ebrahimi   __ umlsl(v28.V8H(), v14.V8B(), v20.V8B());
2287*f5c631daSSadaf Ebrahimi   __ umlsl2(v11.V2D(), v0.V4S(), v9.V4S());
2288*f5c631daSSadaf Ebrahimi   __ umlsl2(v26.V2D(), v16.V4S(), v9.S(), 2);
2289*f5c631daSSadaf Ebrahimi   __ umlsl2(v3.V4S(), v11.V8H(), v9.V8H());
2290*f5c631daSSadaf Ebrahimi   __ umlsl2(v10.V4S(), v25.V8H(), v9.H(), 4);
2291*f5c631daSSadaf Ebrahimi   __ umlsl2(v24.V8H(), v16.V16B(), v28.V16B());
2292*f5c631daSSadaf Ebrahimi   __ umov(x30, v25.D(), 1);
2293*f5c631daSSadaf Ebrahimi   __ umull(v12.V2D(), v10.V2S(), v29.V2S());
2294*f5c631daSSadaf Ebrahimi   __ umull(v22.V2D(), v30.V2S(), v5.S(), 3);
2295*f5c631daSSadaf Ebrahimi   __ umull(v7.V4S(), v0.V4H(), v25.V4H());
2296*f5c631daSSadaf Ebrahimi   __ umull(v11.V4S(), v13.V4H(), v3.H(), 2);
2297*f5c631daSSadaf Ebrahimi   __ umull(v25.V8H(), v16.V8B(), v10.V8B());
2298*f5c631daSSadaf Ebrahimi   __ umull2(v17.V2D(), v3.V4S(), v26.V4S());
2299*f5c631daSSadaf Ebrahimi   __ umull2(v26.V2D(), v11.V4S(), v2.S(), 3);
2300*f5c631daSSadaf Ebrahimi   __ umull2(v12.V4S(), v17.V8H(), v23.V8H());
2301*f5c631daSSadaf Ebrahimi   __ umull2(v4.V4S(), v31.V8H(), v1.H(), 2);
2302*f5c631daSSadaf Ebrahimi   __ umull2(v5.V8H(), v12.V16B(), v17.V16B());
2303*f5c631daSSadaf Ebrahimi   __ uqadd(b30, b4, b28);
2304*f5c631daSSadaf Ebrahimi   __ uqadd(d27, d20, d16);
2305*f5c631daSSadaf Ebrahimi   __ uqadd(h7, h14, h28);
2306*f5c631daSSadaf Ebrahimi   __ uqadd(s28, s17, s4);
2307*f5c631daSSadaf Ebrahimi   __ uqadd(v19.V16B(), v22.V16B(), v21.V16B());
2308*f5c631daSSadaf Ebrahimi   __ uqadd(v16.V2D(), v4.V2D(), v11.V2D());
2309*f5c631daSSadaf Ebrahimi   __ uqadd(v20.V2S(), v14.V2S(), v4.V2S());
2310*f5c631daSSadaf Ebrahimi   __ uqadd(v5.V4H(), v0.V4H(), v16.V4H());
2311*f5c631daSSadaf Ebrahimi   __ uqadd(v21.V4S(), v31.V4S(), v9.V4S());
2312*f5c631daSSadaf Ebrahimi   __ uqadd(v23.V8B(), v24.V8B(), v3.V8B());
2313*f5c631daSSadaf Ebrahimi   __ uqadd(v17.V8H(), v27.V8H(), v11.V8H());
2314*f5c631daSSadaf Ebrahimi   __ uqrshl(b10, b22, b10);
2315*f5c631daSSadaf Ebrahimi   __ uqrshl(d29, d5, d11);
2316*f5c631daSSadaf Ebrahimi   __ uqrshl(h27, h24, h30);
2317*f5c631daSSadaf Ebrahimi   __ uqrshl(s10, s13, s8);
2318*f5c631daSSadaf Ebrahimi   __ uqrshl(v9.V16B(), v18.V16B(), v14.V16B());
2319*f5c631daSSadaf Ebrahimi   __ uqrshl(v24.V2D(), v15.V2D(), v17.V2D());
2320*f5c631daSSadaf Ebrahimi   __ uqrshl(v4.V2S(), v14.V2S(), v27.V2S());
2321*f5c631daSSadaf Ebrahimi   __ uqrshl(v15.V4H(), v5.V4H(), v8.V4H());
2322*f5c631daSSadaf Ebrahimi   __ uqrshl(v21.V4S(), v29.V4S(), v0.V4S());
2323*f5c631daSSadaf Ebrahimi   __ uqrshl(v16.V8B(), v24.V8B(), v9.V8B());
2324*f5c631daSSadaf Ebrahimi   __ uqrshl(v2.V8H(), v0.V8H(), v15.V8H());
2325*f5c631daSSadaf Ebrahimi   __ uqrshrn(b11, h26, 4);
2326*f5c631daSSadaf Ebrahimi   __ uqrshrn(h7, s30, 5);
2327*f5c631daSSadaf Ebrahimi   __ uqrshrn(s10, d8, 21);
2328*f5c631daSSadaf Ebrahimi   __ uqrshrn(v15.V2S(), v6.V2D(), 11);
2329*f5c631daSSadaf Ebrahimi   __ uqrshrn(v5.V4H(), v26.V4S(), 12);
2330*f5c631daSSadaf Ebrahimi   __ uqrshrn(v28.V8B(), v25.V8H(), 5);
2331*f5c631daSSadaf Ebrahimi   __ uqrshrn2(v25.V16B(), v30.V8H(), 2);
2332*f5c631daSSadaf Ebrahimi   __ uqrshrn2(v21.V4S(), v14.V2D(), 32);
2333*f5c631daSSadaf Ebrahimi   __ uqrshrn2(v13.V8H(), v7.V4S(), 2);
2334*f5c631daSSadaf Ebrahimi   __ uqshl(b13, b0, b23);
2335*f5c631daSSadaf Ebrahimi   __ uqshl(b9, b17, 4);
2336*f5c631daSSadaf Ebrahimi   __ uqshl(d23, d6, d4);
2337*f5c631daSSadaf Ebrahimi   __ uqshl(d8, d11, 44);
2338*f5c631daSSadaf Ebrahimi   __ uqshl(h19, h13, h15);
2339*f5c631daSSadaf Ebrahimi   __ uqshl(h25, h26, 6);
2340*f5c631daSSadaf Ebrahimi   __ uqshl(s4, s24, s10);
2341*f5c631daSSadaf Ebrahimi   __ uqshl(s19, s14, 1);
2342*f5c631daSSadaf Ebrahimi   __ uqshl(v14.V16B(), v30.V16B(), v25.V16B());
2343*f5c631daSSadaf Ebrahimi   __ uqshl(v6.V16B(), v10.V16B(), 5);
2344*f5c631daSSadaf Ebrahimi   __ uqshl(v18.V2D(), v8.V2D(), v7.V2D());
2345*f5c631daSSadaf Ebrahimi   __ uqshl(v25.V2D(), v14.V2D(), 18);
2346*f5c631daSSadaf Ebrahimi   __ uqshl(v25.V2S(), v16.V2S(), v23.V2S());
2347*f5c631daSSadaf Ebrahimi   __ uqshl(v13.V2S(), v15.V2S(), 31);
2348*f5c631daSSadaf Ebrahimi   __ uqshl(v28.V4H(), v24.V4H(), v15.V4H());
2349*f5c631daSSadaf Ebrahimi   __ uqshl(v4.V4H(), v17.V4H(), 1);
2350*f5c631daSSadaf Ebrahimi   __ uqshl(v9.V4S(), v31.V4S(), v23.V4S());
2351*f5c631daSSadaf Ebrahimi   __ uqshl(v18.V4S(), v28.V4S(), 31);
2352*f5c631daSSadaf Ebrahimi   __ uqshl(v31.V8B(), v21.V8B(), v15.V8B());
2353*f5c631daSSadaf Ebrahimi   __ uqshl(v6.V8B(), v21.V8B(), 1);
2354*f5c631daSSadaf Ebrahimi   __ uqshl(v28.V8H(), v2.V8H(), v17.V8H());
2355*f5c631daSSadaf Ebrahimi   __ uqshl(v24.V8H(), v8.V8H(), 14);
2356*f5c631daSSadaf Ebrahimi   __ uqshrn(b21, h27, 7);
2357*f5c631daSSadaf Ebrahimi   __ uqshrn(h28, s26, 11);
2358*f5c631daSSadaf Ebrahimi   __ uqshrn(s13, d31, 17);
2359*f5c631daSSadaf Ebrahimi   __ uqshrn(v21.V2S(), v16.V2D(), 8);
2360*f5c631daSSadaf Ebrahimi   __ uqshrn(v24.V4H(), v24.V4S(), 2);
2361*f5c631daSSadaf Ebrahimi   __ uqshrn(v5.V8B(), v1.V8H(), 8);
2362*f5c631daSSadaf Ebrahimi   __ uqshrn2(v16.V16B(), v29.V8H(), 6);
2363*f5c631daSSadaf Ebrahimi   __ uqshrn2(v2.V4S(), v6.V2D(), 1);
2364*f5c631daSSadaf Ebrahimi   __ uqshrn2(v16.V8H(), v10.V4S(), 14);
2365*f5c631daSSadaf Ebrahimi   __ uqsub(b28, b20, b26);
2366*f5c631daSSadaf Ebrahimi   __ uqsub(d0, d7, d10);
2367*f5c631daSSadaf Ebrahimi   __ uqsub(h26, h24, h7);
2368*f5c631daSSadaf Ebrahimi   __ uqsub(s23, s23, s16);
2369*f5c631daSSadaf Ebrahimi   __ uqsub(v14.V16B(), v16.V16B(), v24.V16B());
2370*f5c631daSSadaf Ebrahimi   __ uqsub(v11.V2D(), v17.V2D(), v6.V2D());
2371*f5c631daSSadaf Ebrahimi   __ uqsub(v10.V2S(), v10.V2S(), v8.V2S());
2372*f5c631daSSadaf Ebrahimi   __ uqsub(v9.V4H(), v15.V4H(), v12.V4H());
2373*f5c631daSSadaf Ebrahimi   __ uqsub(v23.V4S(), v18.V4S(), v7.V4S());
2374*f5c631daSSadaf Ebrahimi   __ uqsub(v9.V8B(), v19.V8B(), v17.V8B());
2375*f5c631daSSadaf Ebrahimi   __ uqsub(v20.V8H(), v2.V8H(), v6.V8H());
2376*f5c631daSSadaf Ebrahimi   __ uqxtn(b29, h19);
2377*f5c631daSSadaf Ebrahimi   __ uqxtn(h0, s13);
2378*f5c631daSSadaf Ebrahimi   __ uqxtn(s26, d22);
2379*f5c631daSSadaf Ebrahimi   __ uqxtn(v5.V2S(), v31.V2D());
2380*f5c631daSSadaf Ebrahimi   __ uqxtn(v30.V4H(), v19.V4S());
2381*f5c631daSSadaf Ebrahimi   __ uqxtn(v15.V8B(), v2.V8H());
2382*f5c631daSSadaf Ebrahimi   __ uqxtn2(v29.V16B(), v3.V8H());
2383*f5c631daSSadaf Ebrahimi   __ uqxtn2(v13.V4S(), v17.V2D());
2384*f5c631daSSadaf Ebrahimi   __ uqxtn2(v28.V8H(), v11.V4S());
2385*f5c631daSSadaf Ebrahimi   __ urecpe(v23.V2S(), v15.V2S());
2386*f5c631daSSadaf Ebrahimi   __ urecpe(v27.V4S(), v7.V4S());
2387*f5c631daSSadaf Ebrahimi   __ urhadd(v2.V16B(), v15.V16B(), v27.V16B());
2388*f5c631daSSadaf Ebrahimi   __ urhadd(v15.V2S(), v1.V2S(), v18.V2S());
2389*f5c631daSSadaf Ebrahimi   __ urhadd(v17.V4H(), v4.V4H(), v26.V4H());
2390*f5c631daSSadaf Ebrahimi   __ urhadd(v2.V4S(), v27.V4S(), v14.V4S());
2391*f5c631daSSadaf Ebrahimi   __ urhadd(v5.V8B(), v17.V8B(), v14.V8B());
2392*f5c631daSSadaf Ebrahimi   __ urhadd(v30.V8H(), v2.V8H(), v25.V8H());
2393*f5c631daSSadaf Ebrahimi   __ urshl(d4, d28, d30);
2394*f5c631daSSadaf Ebrahimi   __ urshl(v13.V16B(), v31.V16B(), v19.V16B());
2395*f5c631daSSadaf Ebrahimi   __ urshl(v14.V2D(), v23.V2D(), v21.V2D());
2396*f5c631daSSadaf Ebrahimi   __ urshl(v10.V2S(), v7.V2S(), v8.V2S());
2397*f5c631daSSadaf Ebrahimi   __ urshl(v15.V4H(), v21.V4H(), v28.V4H());
2398*f5c631daSSadaf Ebrahimi   __ urshl(v30.V4S(), v8.V4S(), v23.V4S());
2399*f5c631daSSadaf Ebrahimi   __ urshl(v31.V8B(), v20.V8B(), v5.V8B());
2400*f5c631daSSadaf Ebrahimi   __ urshl(v30.V8H(), v27.V8H(), v30.V8H());
2401*f5c631daSSadaf Ebrahimi   __ urshr(d4, d13, 49);
2402*f5c631daSSadaf Ebrahimi   __ urshr(v2.V16B(), v20.V16B(), 1);
2403*f5c631daSSadaf Ebrahimi   __ urshr(v13.V2D(), v11.V2D(), 51);
2404*f5c631daSSadaf Ebrahimi   __ urshr(v21.V2S(), v31.V2S(), 10);
2405*f5c631daSSadaf Ebrahimi   __ urshr(v21.V4H(), v17.V4H(), 11);
2406*f5c631daSSadaf Ebrahimi   __ urshr(v4.V4S(), v22.V4S(), 1);
2407*f5c631daSSadaf Ebrahimi   __ urshr(v0.V8B(), v1.V8B(), 7);
2408*f5c631daSSadaf Ebrahimi   __ urshr(v13.V8H(), v20.V8H(), 1);
2409*f5c631daSSadaf Ebrahimi   __ ursqrte(v20.V2S(), v16.V2S());
2410*f5c631daSSadaf Ebrahimi   __ ursqrte(v28.V4S(), v8.V4S());
2411*f5c631daSSadaf Ebrahimi   __ ursra(d27, d16, 45);
2412*f5c631daSSadaf Ebrahimi   __ ursra(v18.V16B(), v17.V16B(), 3);
2413*f5c631daSSadaf Ebrahimi   __ ursra(v26.V2D(), v28.V2D(), 58);
2414*f5c631daSSadaf Ebrahimi   __ ursra(v8.V2S(), v22.V2S(), 31);
2415*f5c631daSSadaf Ebrahimi   __ ursra(v31.V4H(), v4.V4H(), 7);
2416*f5c631daSSadaf Ebrahimi   __ ursra(v31.V4S(), v15.V4S(), 2);
2417*f5c631daSSadaf Ebrahimi   __ ursra(v3.V8B(), v1.V8B(), 5);
2418*f5c631daSSadaf Ebrahimi   __ ursra(v18.V8H(), v14.V8H(), 13);
2419*f5c631daSSadaf Ebrahimi   __ ushl(d31, d0, d16);
2420*f5c631daSSadaf Ebrahimi   __ ushl(v0.V16B(), v6.V16B(), v2.V16B());
2421*f5c631daSSadaf Ebrahimi   __ ushl(v18.V2D(), v1.V2D(), v18.V2D());
2422*f5c631daSSadaf Ebrahimi   __ ushl(v27.V2S(), v7.V2S(), v29.V2S());
2423*f5c631daSSadaf Ebrahimi   __ ushl(v14.V4H(), v14.V4H(), v13.V4H());
2424*f5c631daSSadaf Ebrahimi   __ ushl(v22.V4S(), v4.V4S(), v9.V4S());
2425*f5c631daSSadaf Ebrahimi   __ ushl(v23.V8B(), v22.V8B(), v27.V8B());
2426*f5c631daSSadaf Ebrahimi   __ ushl(v21.V8H(), v25.V8H(), v8.V8H());
2427*f5c631daSSadaf Ebrahimi   __ ushll(v11.V2D(), v0.V2S(), 21);
2428*f5c631daSSadaf Ebrahimi   __ ushll(v2.V4S(), v17.V4H(), 8);
2429*f5c631daSSadaf Ebrahimi   __ ushll(v11.V8H(), v14.V8B(), 1);
2430*f5c631daSSadaf Ebrahimi   __ ushll2(v8.V2D(), v29.V4S(), 7);
2431*f5c631daSSadaf Ebrahimi   __ ushll2(v29.V4S(), v9.V8H(), 2);
2432*f5c631daSSadaf Ebrahimi   __ ushll2(v5.V8H(), v24.V16B(), 6);
2433*f5c631daSSadaf Ebrahimi   __ ushr(d28, d27, 53);
2434*f5c631daSSadaf Ebrahimi   __ ushr(v1.V16B(), v9.V16B(), 7);
2435*f5c631daSSadaf Ebrahimi   __ ushr(v2.V2D(), v24.V2D(), 43);
2436*f5c631daSSadaf Ebrahimi   __ ushr(v30.V2S(), v25.V2S(), 11);
2437*f5c631daSSadaf Ebrahimi   __ ushr(v10.V4H(), v26.V4H(), 12);
2438*f5c631daSSadaf Ebrahimi   __ ushr(v4.V4S(), v5.V4S(), 30);
2439*f5c631daSSadaf Ebrahimi   __ ushr(v30.V8B(), v2.V8B(), 1);
2440*f5c631daSSadaf Ebrahimi   __ ushr(v6.V8H(), v12.V8H(), 2);
2441*f5c631daSSadaf Ebrahimi   __ usqadd(b19, b5);
2442*f5c631daSSadaf Ebrahimi   __ usqadd(d9, d2);
2443*f5c631daSSadaf Ebrahimi   __ usqadd(h2, h16);
2444*f5c631daSSadaf Ebrahimi   __ usqadd(s16, s3);
2445*f5c631daSSadaf Ebrahimi   __ usqadd(v31.V16B(), v29.V16B());
2446*f5c631daSSadaf Ebrahimi   __ usqadd(v8.V2D(), v10.V2D());
2447*f5c631daSSadaf Ebrahimi   __ usqadd(v18.V2S(), v9.V2S());
2448*f5c631daSSadaf Ebrahimi   __ usqadd(v24.V4H(), v14.V4H());
2449*f5c631daSSadaf Ebrahimi   __ usqadd(v10.V4S(), v30.V4S());
2450*f5c631daSSadaf Ebrahimi   __ usqadd(v16.V8B(), v20.V8B());
2451*f5c631daSSadaf Ebrahimi   __ usqadd(v12.V8H(), v16.V8H());
2452*f5c631daSSadaf Ebrahimi   __ usra(d28, d27, 37);
2453*f5c631daSSadaf Ebrahimi   __ usra(v5.V16B(), v22.V16B(), 5);
2454*f5c631daSSadaf Ebrahimi   __ usra(v2.V2D(), v19.V2D(), 33);
2455*f5c631daSSadaf Ebrahimi   __ usra(v0.V2S(), v0.V2S(), 21);
2456*f5c631daSSadaf Ebrahimi   __ usra(v7.V4H(), v6.V4H(), 12);
2457*f5c631daSSadaf Ebrahimi   __ usra(v4.V4S(), v17.V4S(), 9);
2458*f5c631daSSadaf Ebrahimi   __ usra(v9.V8B(), v12.V8B(), 7);
2459*f5c631daSSadaf Ebrahimi   __ usra(v3.V8H(), v27.V8H(), 14);
2460*f5c631daSSadaf Ebrahimi   __ usubl(v29.V2D(), v12.V2S(), v30.V2S());
2461*f5c631daSSadaf Ebrahimi   __ usubl(v29.V4S(), v28.V4H(), v6.V4H());
2462*f5c631daSSadaf Ebrahimi   __ usubl(v12.V8H(), v4.V8B(), v14.V8B());
2463*f5c631daSSadaf Ebrahimi   __ usubl2(v1.V2D(), v24.V4S(), v17.V4S());
2464*f5c631daSSadaf Ebrahimi   __ usubl2(v4.V4S(), v1.V8H(), v3.V8H());
2465*f5c631daSSadaf Ebrahimi   __ usubl2(v23.V8H(), v4.V16B(), v7.V16B());
2466*f5c631daSSadaf Ebrahimi   __ usubw(v9.V2D(), v20.V2D(), v30.V2S());
2467*f5c631daSSadaf Ebrahimi   __ usubw(v20.V4S(), v16.V4S(), v23.V4H());
2468*f5c631daSSadaf Ebrahimi   __ usubw(v25.V8H(), v8.V8H(), v29.V8B());
2469*f5c631daSSadaf Ebrahimi   __ usubw2(v18.V2D(), v29.V2D(), v6.V4S());
2470*f5c631daSSadaf Ebrahimi   __ usubw2(v6.V4S(), v6.V4S(), v20.V8H());
2471*f5c631daSSadaf Ebrahimi   __ usubw2(v18.V8H(), v4.V8H(), v16.V16B());
2472*f5c631daSSadaf Ebrahimi   __ uxtl(v27.V2D(), v21.V2S());
2473*f5c631daSSadaf Ebrahimi   __ uxtl(v0.V4S(), v31.V4H());
2474*f5c631daSSadaf Ebrahimi   __ uxtl(v27.V8H(), v10.V8B());
2475*f5c631daSSadaf Ebrahimi   __ uxtl2(v6.V2D(), v16.V4S());
2476*f5c631daSSadaf Ebrahimi   __ uxtl2(v22.V4S(), v20.V8H());
2477*f5c631daSSadaf Ebrahimi   __ uxtl2(v20.V8H(), v21.V16B());
2478*f5c631daSSadaf Ebrahimi   __ uzp1(v30.V16B(), v9.V16B(), v17.V16B());
2479*f5c631daSSadaf Ebrahimi   __ uzp1(v7.V2D(), v26.V2D(), v28.V2D());
2480*f5c631daSSadaf Ebrahimi   __ uzp1(v26.V2S(), v16.V2S(), v22.V2S());
2481*f5c631daSSadaf Ebrahimi   __ uzp1(v14.V4H(), v19.V4H(), v6.V4H());
2482*f5c631daSSadaf Ebrahimi   __ uzp1(v17.V4S(), v23.V4S(), v30.V4S());
2483*f5c631daSSadaf Ebrahimi   __ uzp1(v28.V8B(), v27.V8B(), v13.V8B());
2484*f5c631daSSadaf Ebrahimi   __ uzp1(v17.V8H(), v1.V8H(), v12.V8H());
2485*f5c631daSSadaf Ebrahimi   __ uzp2(v8.V16B(), v18.V16B(), v26.V16B());
2486*f5c631daSSadaf Ebrahimi   __ uzp2(v21.V2D(), v22.V2D(), v24.V2D());
2487*f5c631daSSadaf Ebrahimi   __ uzp2(v20.V2S(), v21.V2S(), v2.V2S());
2488*f5c631daSSadaf Ebrahimi   __ uzp2(v16.V4H(), v31.V4H(), v6.V4H());
2489*f5c631daSSadaf Ebrahimi   __ uzp2(v25.V4S(), v11.V4S(), v8.V4S());
2490*f5c631daSSadaf Ebrahimi   __ uzp2(v31.V8B(), v31.V8B(), v13.V8B());
2491*f5c631daSSadaf Ebrahimi   __ uzp2(v8.V8H(), v17.V8H(), v1.V8H());
2492*f5c631daSSadaf Ebrahimi   __ xtn(v17.V2S(), v26.V2D());
2493*f5c631daSSadaf Ebrahimi   __ xtn(v3.V4H(), v0.V4S());
2494*f5c631daSSadaf Ebrahimi   __ xtn(v18.V8B(), v8.V8H());
2495*f5c631daSSadaf Ebrahimi   __ xtn2(v0.V16B(), v0.V8H());
2496*f5c631daSSadaf Ebrahimi   __ xtn2(v15.V4S(), v4.V2D());
2497*f5c631daSSadaf Ebrahimi   __ xtn2(v31.V8H(), v18.V4S());
2498*f5c631daSSadaf Ebrahimi   __ zip1(v22.V16B(), v9.V16B(), v6.V16B());
2499*f5c631daSSadaf Ebrahimi   __ zip1(v23.V2D(), v11.V2D(), v2.V2D());
2500*f5c631daSSadaf Ebrahimi   __ zip1(v26.V2S(), v16.V2S(), v9.V2S());
2501*f5c631daSSadaf Ebrahimi   __ zip1(v1.V4H(), v9.V4H(), v7.V4H());
2502*f5c631daSSadaf Ebrahimi   __ zip1(v0.V4S(), v30.V4S(), v20.V4S());
2503*f5c631daSSadaf Ebrahimi   __ zip1(v30.V8B(), v17.V8B(), v15.V8B());
2504*f5c631daSSadaf Ebrahimi   __ zip1(v17.V8H(), v8.V8H(), v2.V8H());
2505*f5c631daSSadaf Ebrahimi   __ zip2(v23.V16B(), v10.V16B(), v11.V16B());
2506*f5c631daSSadaf Ebrahimi   __ zip2(v30.V2D(), v6.V2D(), v14.V2D());
2507*f5c631daSSadaf Ebrahimi   __ zip2(v9.V2S(), v10.V2S(), v21.V2S());
2508*f5c631daSSadaf Ebrahimi   __ zip2(v8.V4H(), v24.V4H(), v29.V4H());
2509*f5c631daSSadaf Ebrahimi   __ zip2(v0.V4S(), v21.V4S(), v23.V4S());
2510*f5c631daSSadaf Ebrahimi   __ zip2(v25.V8B(), v23.V8B(), v30.V8B());
2511*f5c631daSSadaf Ebrahimi   __ zip2(v7.V8H(), v10.V8H(), v30.V8H());
2512*f5c631daSSadaf Ebrahimi }  // NOLINT(readability/fn_size)
2513*f5c631daSSadaf Ebrahimi 
2514*f5c631daSSadaf Ebrahimi 
GenerateTestSequenceNEONFP(MacroAssembler * masm)2515*f5c631daSSadaf Ebrahimi static void GenerateTestSequenceNEONFP(MacroAssembler* masm) {
2516*f5c631daSSadaf Ebrahimi   ExactAssemblyScope guard(masm,
2517*f5c631daSSadaf Ebrahimi                            masm->GetBuffer()->GetRemainingBytes(),
2518*f5c631daSSadaf Ebrahimi                            ExactAssemblyScope::kMaximumSize);
2519*f5c631daSSadaf Ebrahimi 
2520*f5c631daSSadaf Ebrahimi   // NEON floating point instructions.
2521*f5c631daSSadaf Ebrahimi   __ fabd(v3.V2D(), v25.V2D(), v8.V2D());
2522*f5c631daSSadaf Ebrahimi   __ fabd(v14.V2S(), v27.V2S(), v11.V2S());
2523*f5c631daSSadaf Ebrahimi   __ fabd(v9.V4S(), v22.V4S(), v18.V4S());
2524*f5c631daSSadaf Ebrahimi   __ fabs(v1.V2D(), v29.V2D());
2525*f5c631daSSadaf Ebrahimi   __ fabs(v6.V2S(), v21.V2S());
2526*f5c631daSSadaf Ebrahimi   __ fabs(v12.V4S(), v25.V4S());
2527*f5c631daSSadaf Ebrahimi   __ facge(v18.V2D(), v5.V2D(), v0.V2D());
2528*f5c631daSSadaf Ebrahimi   __ facge(v15.V2S(), v11.V2S(), v6.V2S());
2529*f5c631daSSadaf Ebrahimi   __ facge(v30.V4S(), v10.V4S(), v25.V4S());
2530*f5c631daSSadaf Ebrahimi   __ facgt(v28.V2D(), v16.V2D(), v31.V2D());
2531*f5c631daSSadaf Ebrahimi   __ facgt(v15.V2S(), v1.V2S(), v4.V2S());
2532*f5c631daSSadaf Ebrahimi   __ facgt(v22.V4S(), v3.V4S(), v10.V4S());
2533*f5c631daSSadaf Ebrahimi   __ fadd(v7.V2D(), v10.V2D(), v24.V2D());
2534*f5c631daSSadaf Ebrahimi   __ fadd(v10.V2S(), v23.V2S(), v7.V2S());
2535*f5c631daSSadaf Ebrahimi   __ fadd(v16.V4S(), v22.V4S(), v11.V4S());
2536*f5c631daSSadaf Ebrahimi   __ faddp(d27, v28.V2D());
2537*f5c631daSSadaf Ebrahimi   __ faddp(s20, v23.V2S());
2538*f5c631daSSadaf Ebrahimi   __ faddp(v21.V2D(), v4.V2D(), v11.V2D());
2539*f5c631daSSadaf Ebrahimi   __ faddp(v31.V2S(), v26.V2S(), v1.V2S());
2540*f5c631daSSadaf Ebrahimi   __ faddp(v13.V4S(), v27.V4S(), v28.V4S());
2541*f5c631daSSadaf Ebrahimi   __ fcmeq(v17.V2D(), v13.V2D(), v20.V2D());
2542*f5c631daSSadaf Ebrahimi   __ fcmeq(v24.V2D(), v16.V2D(), 0.0);
2543*f5c631daSSadaf Ebrahimi   __ fcmeq(v26.V2S(), v17.V2S(), v10.V2S());
2544*f5c631daSSadaf Ebrahimi   __ fcmeq(v24.V2S(), v4.V2S(), 0.0);
2545*f5c631daSSadaf Ebrahimi   __ fcmeq(v8.V4S(), v4.V4S(), v14.V4S());
2546*f5c631daSSadaf Ebrahimi   __ fcmeq(v26.V4S(), v25.V4S(), 0.0);
2547*f5c631daSSadaf Ebrahimi   __ fcmge(v27.V2D(), v0.V2D(), v0.V2D());
2548*f5c631daSSadaf Ebrahimi   __ fcmge(v22.V2D(), v30.V2D(), 0.0);
2549*f5c631daSSadaf Ebrahimi   __ fcmge(v7.V2S(), v21.V2S(), v25.V2S());
2550*f5c631daSSadaf Ebrahimi   __ fcmge(v15.V2S(), v15.V2S(), 0.0);
2551*f5c631daSSadaf Ebrahimi   __ fcmge(v29.V4S(), v4.V4S(), v27.V4S());
2552*f5c631daSSadaf Ebrahimi   __ fcmge(v22.V4S(), v21.V4S(), 0.0);
2553*f5c631daSSadaf Ebrahimi   __ fcmgt(v1.V2D(), v26.V2D(), v15.V2D());
2554*f5c631daSSadaf Ebrahimi   __ fcmgt(v15.V2D(), v23.V2D(), 0.0);
2555*f5c631daSSadaf Ebrahimi   __ fcmgt(v21.V2S(), v16.V2S(), v6.V2S());
2556*f5c631daSSadaf Ebrahimi   __ fcmgt(v1.V2S(), v13.V2S(), 0.0);
2557*f5c631daSSadaf Ebrahimi   __ fcmgt(v14.V4S(), v0.V4S(), v25.V4S());
2558*f5c631daSSadaf Ebrahimi   __ fcmgt(v13.V4S(), v8.V4S(), 0.0);
2559*f5c631daSSadaf Ebrahimi   __ fcmle(v4.V2D(), v6.V2D(), 0.0);
2560*f5c631daSSadaf Ebrahimi   __ fcmle(v24.V2S(), v31.V2S(), 0.0);
2561*f5c631daSSadaf Ebrahimi   __ fcmle(v8.V4S(), v23.V4S(), 0.0);
2562*f5c631daSSadaf Ebrahimi   __ fcmlt(v7.V2D(), v3.V2D(), 0.0);
2563*f5c631daSSadaf Ebrahimi   __ fcmlt(v15.V2S(), v21.V2S(), 0.0);
2564*f5c631daSSadaf Ebrahimi   __ fcmlt(v1.V4S(), v2.V4S(), 0.0);
2565*f5c631daSSadaf Ebrahimi   __ fcvtas(v6.V2D(), v8.V2D());
2566*f5c631daSSadaf Ebrahimi   __ fcvtas(v1.V2S(), v9.V2S());
2567*f5c631daSSadaf Ebrahimi   __ fcvtas(v8.V4S(), v19.V4S());
2568*f5c631daSSadaf Ebrahimi   __ fcvtau(v5.V2D(), v31.V2D());
2569*f5c631daSSadaf Ebrahimi   __ fcvtau(v28.V2S(), v29.V2S());
2570*f5c631daSSadaf Ebrahimi   __ fcvtau(v11.V4S(), v26.V4S());
2571*f5c631daSSadaf Ebrahimi   __ fcvtl(v8.V2D(), v25.V2S());
2572*f5c631daSSadaf Ebrahimi   __ fcvtl(v27.V4S(), v14.V4H());
2573*f5c631daSSadaf Ebrahimi   __ fcvtl2(v1.V2D(), v6.V4S());
2574*f5c631daSSadaf Ebrahimi   __ fcvtl2(v24.V4S(), v9.V8H());
2575*f5c631daSSadaf Ebrahimi   __ fcvtms(v9.V2D(), v24.V2D());
2576*f5c631daSSadaf Ebrahimi   __ fcvtms(v7.V2S(), v11.V2S());
2577*f5c631daSSadaf Ebrahimi   __ fcvtms(v23.V4S(), v21.V4S());
2578*f5c631daSSadaf Ebrahimi   __ fcvtmu(v13.V2D(), v1.V2D());
2579*f5c631daSSadaf Ebrahimi   __ fcvtmu(v26.V2S(), v12.V2S());
2580*f5c631daSSadaf Ebrahimi   __ fcvtmu(v21.V4S(), v21.V4S());
2581*f5c631daSSadaf Ebrahimi   __ fcvtn(v11.V2S(), v1.V2D());
2582*f5c631daSSadaf Ebrahimi   __ fcvtn(v8.V4H(), v2.V4S());
2583*f5c631daSSadaf Ebrahimi   __ fcvtn2(v24.V4S(), v29.V2D());
2584*f5c631daSSadaf Ebrahimi   __ fcvtn2(v4.V8H(), v10.V4S());
2585*f5c631daSSadaf Ebrahimi   __ fcvtns(v25.V2D(), v10.V2D());
2586*f5c631daSSadaf Ebrahimi   __ fcvtns(v4.V2S(), v8.V2S());
2587*f5c631daSSadaf Ebrahimi   __ fcvtns(v29.V4S(), v27.V4S());
2588*f5c631daSSadaf Ebrahimi   __ fcvtnu(v18.V2D(), v27.V2D());
2589*f5c631daSSadaf Ebrahimi   __ fcvtnu(v11.V2S(), v14.V2S());
2590*f5c631daSSadaf Ebrahimi   __ fcvtnu(v27.V4S(), v21.V4S());
2591*f5c631daSSadaf Ebrahimi   __ fcvtps(v23.V2D(), v5.V2D());
2592*f5c631daSSadaf Ebrahimi   __ fcvtps(v24.V2S(), v15.V2S());
2593*f5c631daSSadaf Ebrahimi   __ fcvtps(v5.V4S(), v19.V4S());
2594*f5c631daSSadaf Ebrahimi   __ fcvtpu(v3.V2D(), v21.V2D());
2595*f5c631daSSadaf Ebrahimi   __ fcvtpu(v3.V2S(), v21.V2S());
2596*f5c631daSSadaf Ebrahimi   __ fcvtpu(v0.V4S(), v7.V4S());
2597*f5c631daSSadaf Ebrahimi   __ fcvtxn(v29.V2S(), v11.V2D());
2598*f5c631daSSadaf Ebrahimi   __ fcvtxn2(v31.V4S(), v25.V2D());
2599*f5c631daSSadaf Ebrahimi   __ fcvtzs(v19.V2D(), v17.V2D());
2600*f5c631daSSadaf Ebrahimi   __ fcvtzs(v12.V2D(), v24.V2D(), 64);
2601*f5c631daSSadaf Ebrahimi   __ fcvtzs(v9.V2S(), v2.V2S());
2602*f5c631daSSadaf Ebrahimi   __ fcvtzs(v5.V2S(), v20.V2S(), 29);
2603*f5c631daSSadaf Ebrahimi   __ fcvtzs(v21.V4S(), v25.V4S());
2604*f5c631daSSadaf Ebrahimi   __ fcvtzs(v26.V4S(), v1.V4S(), 6);
2605*f5c631daSSadaf Ebrahimi   __ fcvtzu(v13.V2D(), v25.V2D());
2606*f5c631daSSadaf Ebrahimi   __ fcvtzu(v28.V2D(), v13.V2D(), 32);
2607*f5c631daSSadaf Ebrahimi   __ fcvtzu(v26.V2S(), v6.V2S());
2608*f5c631daSSadaf Ebrahimi   __ fcvtzu(v9.V2S(), v10.V2S(), 15);
2609*f5c631daSSadaf Ebrahimi   __ fcvtzu(v30.V4S(), v6.V4S());
2610*f5c631daSSadaf Ebrahimi   __ fcvtzu(v19.V4S(), v22.V4S(), 18);
2611*f5c631daSSadaf Ebrahimi   __ fdiv(v15.V2D(), v8.V2D(), v15.V2D());
2612*f5c631daSSadaf Ebrahimi   __ fdiv(v12.V2S(), v9.V2S(), v26.V2S());
2613*f5c631daSSadaf Ebrahimi   __ fdiv(v19.V4S(), v22.V4S(), v19.V4S());
2614*f5c631daSSadaf Ebrahimi   __ fmax(v19.V2D(), v7.V2D(), v8.V2D());
2615*f5c631daSSadaf Ebrahimi   __ fmax(v25.V2S(), v12.V2S(), v29.V2S());
2616*f5c631daSSadaf Ebrahimi   __ fmax(v6.V4S(), v15.V4S(), v5.V4S());
2617*f5c631daSSadaf Ebrahimi   __ fmaxnm(v16.V2D(), v8.V2D(), v20.V2D());
2618*f5c631daSSadaf Ebrahimi   __ fmaxnm(v15.V2S(), v26.V2S(), v25.V2S());
2619*f5c631daSSadaf Ebrahimi   __ fmaxnm(v23.V4S(), v14.V4S(), v16.V4S());
2620*f5c631daSSadaf Ebrahimi   __ fmaxnmp(d6, v19.V2D());
2621*f5c631daSSadaf Ebrahimi   __ fmaxnmp(s27, v26.V2S());
2622*f5c631daSSadaf Ebrahimi   __ fmaxnmp(v8.V2D(), v12.V2D(), v23.V2D());
2623*f5c631daSSadaf Ebrahimi   __ fmaxnmp(v13.V2S(), v25.V2S(), v22.V2S());
2624*f5c631daSSadaf Ebrahimi   __ fmaxnmp(v15.V4S(), v11.V4S(), v17.V4S());
2625*f5c631daSSadaf Ebrahimi   __ fmaxnmv(s27, v19.V4S());
2626*f5c631daSSadaf Ebrahimi   __ fmaxp(d20, v14.V2D());
2627*f5c631daSSadaf Ebrahimi   __ fmaxp(s18, v2.V2S());
2628*f5c631daSSadaf Ebrahimi   __ fmaxp(v9.V2D(), v23.V2D(), v31.V2D());
2629*f5c631daSSadaf Ebrahimi   __ fmaxp(v7.V2S(), v22.V2S(), v31.V2S());
2630*f5c631daSSadaf Ebrahimi   __ fmaxp(v18.V4S(), v7.V4S(), v29.V4S());
2631*f5c631daSSadaf Ebrahimi   __ fmaxv(s31, v29.V4S());
2632*f5c631daSSadaf Ebrahimi   __ fmin(v2.V2D(), v5.V2D(), v2.V2D());
2633*f5c631daSSadaf Ebrahimi   __ fmin(v31.V2S(), v17.V2S(), v10.V2S());
2634*f5c631daSSadaf Ebrahimi   __ fmin(v10.V4S(), v4.V4S(), v16.V4S());
2635*f5c631daSSadaf Ebrahimi   __ fminnm(v21.V2D(), v6.V2D(), v5.V2D());
2636*f5c631daSSadaf Ebrahimi   __ fminnm(v22.V2S(), v18.V2S(), v14.V2S());
2637*f5c631daSSadaf Ebrahimi   __ fminnm(v25.V4S(), v31.V4S(), v3.V4S());
2638*f5c631daSSadaf Ebrahimi   __ fminnmp(d9, v1.V2D());
2639*f5c631daSSadaf Ebrahimi   __ fminnmp(s21, v20.V2S());
2640*f5c631daSSadaf Ebrahimi   __ fminnmp(v16.V2D(), v21.V2D(), v19.V2D());
2641*f5c631daSSadaf Ebrahimi   __ fminnmp(v16.V2S(), v31.V2S(), v25.V2S());
2642*f5c631daSSadaf Ebrahimi   __ fminnmp(v26.V4S(), v16.V4S(), v15.V4S());
2643*f5c631daSSadaf Ebrahimi   __ fminnmv(s3, v4.V4S());
2644*f5c631daSSadaf Ebrahimi   __ fminp(d24, v26.V2D());
2645*f5c631daSSadaf Ebrahimi   __ fminp(s7, v17.V2S());
2646*f5c631daSSadaf Ebrahimi   __ fminp(v23.V2D(), v19.V2D(), v3.V2D());
2647*f5c631daSSadaf Ebrahimi   __ fminp(v29.V2S(), v21.V2S(), v9.V2S());
2648*f5c631daSSadaf Ebrahimi   __ fminp(v0.V4S(), v24.V4S(), v21.V4S());
2649*f5c631daSSadaf Ebrahimi   __ fminv(s25, v8.V4S());
2650*f5c631daSSadaf Ebrahimi   __ fmla(d23, d0, v9.D(), 1);
2651*f5c631daSSadaf Ebrahimi   __ fmla(s23, s15, v7.S(), 0);
2652*f5c631daSSadaf Ebrahimi   __ fmla(v17.V2D(), v11.V2D(), v6.V2D());
2653*f5c631daSSadaf Ebrahimi   __ fmla(v30.V2D(), v30.V2D(), v11.D(), 0);
2654*f5c631daSSadaf Ebrahimi   __ fmla(v19.V2S(), v12.V2S(), v6.V2S());
2655*f5c631daSSadaf Ebrahimi   __ fmla(v24.V2S(), v17.V2S(), v9.S(), 0);
2656*f5c631daSSadaf Ebrahimi   __ fmla(v16.V4S(), v11.V4S(), v11.V4S());
2657*f5c631daSSadaf Ebrahimi   __ fmla(v27.V4S(), v23.V4S(), v9.S(), 2);
2658*f5c631daSSadaf Ebrahimi   __ fmls(d27, d30, v6.D(), 0);
2659*f5c631daSSadaf Ebrahimi   __ fmls(s21, s16, v2.S(), 0);
2660*f5c631daSSadaf Ebrahimi   __ fmls(v5.V2D(), v19.V2D(), v21.V2D());
2661*f5c631daSSadaf Ebrahimi   __ fmls(v18.V2D(), v30.V2D(), v12.D(), 0);
2662*f5c631daSSadaf Ebrahimi   __ fmls(v5.V2S(), v16.V2S(), v7.V2S());
2663*f5c631daSSadaf Ebrahimi   __ fmls(v3.V2S(), v18.V2S(), v11.S(), 1);
2664*f5c631daSSadaf Ebrahimi   __ fmls(v27.V4S(), v5.V4S(), v30.V4S());
2665*f5c631daSSadaf Ebrahimi   __ fmls(v26.V4S(), v20.V4S(), v4.S(), 3);
2666*f5c631daSSadaf Ebrahimi   __ fmov(v14.V2D(), -0.34375);
2667*f5c631daSSadaf Ebrahimi   __ fmov(v26.V2S(), 0.90625f);
2668*f5c631daSSadaf Ebrahimi   __ fmov(v31.V4S(), -5.0000f);
2669*f5c631daSSadaf Ebrahimi   __ fmov(v28.D(), 1, x25);
2670*f5c631daSSadaf Ebrahimi   __ fmov(x18, v2.D(), 1);
2671*f5c631daSSadaf Ebrahimi   __ fmul(d12, d4, v1.D(), 1);
2672*f5c631daSSadaf Ebrahimi   __ fmul(s30, s1, v15.S(), 3);
2673*f5c631daSSadaf Ebrahimi   __ fmul(v25.V2D(), v0.V2D(), v21.V2D());
2674*f5c631daSSadaf Ebrahimi   __ fmul(v10.V2D(), v24.V2D(), v10.D(), 1);
2675*f5c631daSSadaf Ebrahimi   __ fmul(v7.V2S(), v24.V2S(), v16.V2S());
2676*f5c631daSSadaf Ebrahimi   __ fmul(v1.V2S(), v16.V2S(), v4.S(), 2);
2677*f5c631daSSadaf Ebrahimi   __ fmul(v5.V4S(), v28.V4S(), v25.V4S());
2678*f5c631daSSadaf Ebrahimi   __ fmul(v11.V4S(), v3.V4S(), v8.S(), 0);
2679*f5c631daSSadaf Ebrahimi   __ fmulx(d28, d9, v3.D(), 1);
2680*f5c631daSSadaf Ebrahimi   __ fmulx(s25, s21, v15.S(), 1);
2681*f5c631daSSadaf Ebrahimi   __ fmulx(v31.V2D(), v28.V2D(), v8.V2D());
2682*f5c631daSSadaf Ebrahimi   __ fmulx(v3.V2D(), v21.V2D(), v6.D(), 0);
2683*f5c631daSSadaf Ebrahimi   __ fmulx(v9.V2S(), v1.V2S(), v0.V2S());
2684*f5c631daSSadaf Ebrahimi   __ fmulx(v16.V2S(), v27.V2S(), v6.S(), 0);
2685*f5c631daSSadaf Ebrahimi   __ fmulx(v2.V4S(), v4.V4S(), v5.V4S());
2686*f5c631daSSadaf Ebrahimi   __ fmulx(v18.V4S(), v7.V4S(), v4.S(), 0);
2687*f5c631daSSadaf Ebrahimi   __ fneg(v1.V2D(), v25.V2D());
2688*f5c631daSSadaf Ebrahimi   __ fneg(v14.V2S(), v31.V2S());
2689*f5c631daSSadaf Ebrahimi   __ fneg(v5.V4S(), v4.V4S());
2690*f5c631daSSadaf Ebrahimi   __ frecpe(v18.V2D(), v12.V2D());
2691*f5c631daSSadaf Ebrahimi   __ frecpe(v10.V2S(), v22.V2S());
2692*f5c631daSSadaf Ebrahimi   __ frecpe(v5.V4S(), v6.V4S());
2693*f5c631daSSadaf Ebrahimi   __ frecps(v22.V2D(), v7.V2D(), v26.V2D());
2694*f5c631daSSadaf Ebrahimi   __ frecps(v31.V2S(), v27.V2S(), v2.V2S());
2695*f5c631daSSadaf Ebrahimi   __ frecps(v18.V4S(), v6.V4S(), v27.V4S());
2696*f5c631daSSadaf Ebrahimi   __ frinta(v26.V2D(), v13.V2D());
2697*f5c631daSSadaf Ebrahimi   __ frinta(v15.V2S(), v26.V2S());
2698*f5c631daSSadaf Ebrahimi   __ frinta(v13.V4S(), v16.V4S());
2699*f5c631daSSadaf Ebrahimi   __ frinti(v9.V2D(), v12.V2D());
2700*f5c631daSSadaf Ebrahimi   __ frinti(v5.V2S(), v19.V2S());
2701*f5c631daSSadaf Ebrahimi   __ frinti(v15.V4S(), v11.V4S());
2702*f5c631daSSadaf Ebrahimi   __ frintm(v17.V2D(), v29.V2D());
2703*f5c631daSSadaf Ebrahimi   __ frintm(v30.V2S(), v11.V2S());
2704*f5c631daSSadaf Ebrahimi   __ frintm(v1.V4S(), v20.V4S());
2705*f5c631daSSadaf Ebrahimi   __ frintn(v24.V2D(), v6.V2D());
2706*f5c631daSSadaf Ebrahimi   __ frintn(v12.V2S(), v17.V2S());
2707*f5c631daSSadaf Ebrahimi   __ frintn(v29.V4S(), v11.V4S());
2708*f5c631daSSadaf Ebrahimi   __ frintp(v10.V2D(), v7.V2D());
2709*f5c631daSSadaf Ebrahimi   __ frintp(v12.V2S(), v18.V2S());
2710*f5c631daSSadaf Ebrahimi   __ frintp(v26.V4S(), v31.V4S());
2711*f5c631daSSadaf Ebrahimi   __ frintx(v24.V2D(), v13.V2D());
2712*f5c631daSSadaf Ebrahimi   __ frintx(v7.V2S(), v9.V2S());
2713*f5c631daSSadaf Ebrahimi   __ frintx(v18.V4S(), v21.V4S());
2714*f5c631daSSadaf Ebrahimi   __ frintz(v19.V2D(), v25.V2D());
2715*f5c631daSSadaf Ebrahimi   __ frintz(v15.V2S(), v8.V2S());
2716*f5c631daSSadaf Ebrahimi   __ frintz(v20.V4S(), v3.V4S());
2717*f5c631daSSadaf Ebrahimi   __ frsqrte(v23.V2D(), v5.V2D());
2718*f5c631daSSadaf Ebrahimi   __ frsqrte(v9.V2S(), v7.V2S());
2719*f5c631daSSadaf Ebrahimi   __ frsqrte(v3.V4S(), v9.V4S());
2720*f5c631daSSadaf Ebrahimi   __ frsqrts(v25.V2D(), v28.V2D(), v15.V2D());
2721*f5c631daSSadaf Ebrahimi   __ frsqrts(v9.V2S(), v26.V2S(), v10.V2S());
2722*f5c631daSSadaf Ebrahimi   __ frsqrts(v5.V4S(), v1.V4S(), v10.V4S());
2723*f5c631daSSadaf Ebrahimi   __ fsqrt(v6.V2D(), v18.V2D());
2724*f5c631daSSadaf Ebrahimi   __ fsqrt(v6.V2S(), v18.V2S());
2725*f5c631daSSadaf Ebrahimi   __ fsqrt(v0.V4S(), v31.V4S());
2726*f5c631daSSadaf Ebrahimi   __ fsub(v31.V2D(), v30.V2D(), v31.V2D());
2727*f5c631daSSadaf Ebrahimi   __ fsub(v11.V2S(), v8.V2S(), v6.V2S());
2728*f5c631daSSadaf Ebrahimi   __ fsub(v16.V4S(), v0.V4S(), v31.V4S());
2729*f5c631daSSadaf Ebrahimi   __ scvtf(v25.V2D(), v31.V2D());
2730*f5c631daSSadaf Ebrahimi   __ scvtf(v10.V2D(), v13.V2D(), 45);
2731*f5c631daSSadaf Ebrahimi   __ scvtf(v10.V2S(), v15.V2S());
2732*f5c631daSSadaf Ebrahimi   __ scvtf(v18.V2S(), v4.V2S(), 27);
2733*f5c631daSSadaf Ebrahimi   __ scvtf(v17.V4S(), v5.V4S());
2734*f5c631daSSadaf Ebrahimi   __ scvtf(v11.V4S(), v25.V4S(), 24);
2735*f5c631daSSadaf Ebrahimi   __ ucvtf(v9.V2D(), v3.V2D());
2736*f5c631daSSadaf Ebrahimi   __ ucvtf(v26.V2D(), v30.V2D(), 46);
2737*f5c631daSSadaf Ebrahimi   __ ucvtf(v11.V2S(), v4.V2S());
2738*f5c631daSSadaf Ebrahimi   __ ucvtf(v29.V2S(), v3.V2S(), 25);
2739*f5c631daSSadaf Ebrahimi   __ ucvtf(v22.V4S(), v23.V4S());
2740*f5c631daSSadaf Ebrahimi   __ ucvtf(v18.V4S(), v9.V4S(), 25);
2741*f5c631daSSadaf Ebrahimi }
2742*f5c631daSSadaf Ebrahimi 
2743*f5c631daSSadaf Ebrahimi 
GenerateTestSequenceSVE(MacroAssembler * masm)2744*f5c631daSSadaf Ebrahimi static void GenerateTestSequenceSVE(MacroAssembler* masm) {
2745*f5c631daSSadaf Ebrahimi   ExactAssemblyScope guard(masm,
2746*f5c631daSSadaf Ebrahimi                            masm->GetBuffer()->GetRemainingBytes(),
2747*f5c631daSSadaf Ebrahimi                            ExactAssemblyScope::kMaximumSize);
2748*f5c631daSSadaf Ebrahimi   CPUFeaturesScope feature_guard(masm, CPUFeatures::kSVE);
2749*f5c631daSSadaf Ebrahimi 
2750*f5c631daSSadaf Ebrahimi   // Simple, unpredicated loads and stores.
2751*f5c631daSSadaf Ebrahimi   __ str(p12.VnD(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2752*f5c631daSSadaf Ebrahimi   __ str(p13.VnS(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2753*f5c631daSSadaf Ebrahimi   __ str(p14.VnH(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2754*f5c631daSSadaf Ebrahimi   __ str(p15.VnB(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2755*f5c631daSSadaf Ebrahimi   __ ldr(p8.VnD(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2756*f5c631daSSadaf Ebrahimi   __ ldr(p9.VnS(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2757*f5c631daSSadaf Ebrahimi   __ ldr(p10.VnH(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2758*f5c631daSSadaf Ebrahimi   __ ldr(p11.VnB(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2759*f5c631daSSadaf Ebrahimi 
2760*f5c631daSSadaf Ebrahimi   __ str(z0.VnD(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2761*f5c631daSSadaf Ebrahimi   __ str(z1.VnS(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2762*f5c631daSSadaf Ebrahimi   __ str(z2.VnH(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2763*f5c631daSSadaf Ebrahimi   __ str(z3.VnB(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2764*f5c631daSSadaf Ebrahimi   __ ldr(z20.VnD(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2765*f5c631daSSadaf Ebrahimi   __ ldr(z21.VnS(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2766*f5c631daSSadaf Ebrahimi   __ ldr(z22.VnH(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2767*f5c631daSSadaf Ebrahimi   __ ldr(z23.VnB(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2768*f5c631daSSadaf Ebrahimi 
2769*f5c631daSSadaf Ebrahimi   // Structured accesses.
2770*f5c631daSSadaf Ebrahimi   __ st1b(z0.VnB(), p2, SVEMemOperand(x0, 3, SVE_MUL_VL));
2771*f5c631daSSadaf Ebrahimi   __ st1h(z1.VnH(), p1, SVEMemOperand(x0, 3, SVE_MUL_VL));
2772*f5c631daSSadaf Ebrahimi   __ st1w(z2.VnS(), p1, SVEMemOperand(x0, x3, LSL, 2));
2773*f5c631daSSadaf Ebrahimi   __ st1d(z3.VnD(), p2, SVEMemOperand(x0, 3, SVE_MUL_VL));
2774*f5c631daSSadaf Ebrahimi   __ ld1b(z20.VnB(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2775*f5c631daSSadaf Ebrahimi   __ ld1h(z21.VnH(), p2.Zeroing(), SVEMemOperand(x0, x2, LSL, 1));
2776*f5c631daSSadaf Ebrahimi   __ ld1w(z22.VnS(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2777*f5c631daSSadaf Ebrahimi   __ ld1d(z23.VnD(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2778*f5c631daSSadaf Ebrahimi 
2779*f5c631daSSadaf Ebrahimi   // Structured, packed accesses.
2780*f5c631daSSadaf Ebrahimi   __ st1b(z2.VnH(), p1, SVEMemOperand(x0, 3, SVE_MUL_VL));
2781*f5c631daSSadaf Ebrahimi   __ st1b(z3.VnS(), p2, SVEMemOperand(x0, 3, SVE_MUL_VL));
2782*f5c631daSSadaf Ebrahimi   __ st1b(z4.VnD(), p2, SVEMemOperand(x0, 3, SVE_MUL_VL));
2783*f5c631daSSadaf Ebrahimi   __ st1h(z0.VnS(), p1, SVEMemOperand(x0, 3, SVE_MUL_VL));
2784*f5c631daSSadaf Ebrahimi   __ st1h(z1.VnD(), p1, SVEMemOperand(x0, x2, LSL, 1));
2785*f5c631daSSadaf Ebrahimi   __ st1w(z2.VnD(), p1, SVEMemOperand(x0, 3, SVE_MUL_VL));
2786*f5c631daSSadaf Ebrahimi   __ ld1b(z20.VnH(), p1.Zeroing(), SVEMemOperand(x0, x2));
2787*f5c631daSSadaf Ebrahimi   __ ld1b(z21.VnS(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2788*f5c631daSSadaf Ebrahimi   __ ld1b(z22.VnD(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2789*f5c631daSSadaf Ebrahimi   __ ld1h(z23.VnS(), p2.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2790*f5c631daSSadaf Ebrahimi   __ ld1h(z24.VnD(), p2.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2791*f5c631daSSadaf Ebrahimi   __ ld1w(z20.VnD(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2792*f5c631daSSadaf Ebrahimi   __ ld1sb(z21.VnH(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2793*f5c631daSSadaf Ebrahimi   __ ld1sb(z22.VnS(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2794*f5c631daSSadaf Ebrahimi   __ ld1sb(z23.VnD(), p2.Zeroing(), SVEMemOperand(x0, x2));
2795*f5c631daSSadaf Ebrahimi   __ ld1sh(z24.VnS(), p2.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2796*f5c631daSSadaf Ebrahimi   __ ld1sh(z20.VnD(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2797*f5c631daSSadaf Ebrahimi   __ ld1sw(z21.VnD(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2798*f5c631daSSadaf Ebrahimi 
2799*f5c631daSSadaf Ebrahimi   // Structured, interleaved accesses.
2800*f5c631daSSadaf Ebrahimi   __ st2b(z0.VnB(), z1.VnB(), p4, SVEMemOperand(x0, 4, SVE_MUL_VL));
2801*f5c631daSSadaf Ebrahimi   __ st2h(z1.VnH(), z2.VnH(), p4, SVEMemOperand(x0, 4, SVE_MUL_VL));
2802*f5c631daSSadaf Ebrahimi   __ st2w(z2.VnS(), z3.VnS(), p3, SVEMemOperand(x0, x2, LSL, 2));
2803*f5c631daSSadaf Ebrahimi   __ st2d(z3.VnD(), z4.VnD(), p4, SVEMemOperand(x0, 4, SVE_MUL_VL));
2804*f5c631daSSadaf Ebrahimi   __ ld2b(z20.VnB(), z21.VnB(), p5.Zeroing(), SVEMemOperand(x0, x2));
2805*f5c631daSSadaf Ebrahimi   __ ld2h(z21.VnH(), z22.VnH(), p6.Zeroing(), SVEMemOperand(x0, 4, SVE_MUL_VL));
2806*f5c631daSSadaf Ebrahimi   __ ld2w(z22.VnS(), z23.VnS(), p6.Zeroing(), SVEMemOperand(x0, 4, SVE_MUL_VL));
2807*f5c631daSSadaf Ebrahimi   __ ld2d(z23.VnD(), z24.VnD(), p5.Zeroing(), SVEMemOperand(x0, 4, SVE_MUL_VL));
2808*f5c631daSSadaf Ebrahimi 
2809*f5c631daSSadaf Ebrahimi   __ st3b(z4.VnB(), z5.VnB(), z6.VnB(), p4, SVEMemOperand(x0, 3, SVE_MUL_VL));
2810*f5c631daSSadaf Ebrahimi   __ st3h(z5.VnH(), z6.VnH(), z7.VnH(), p4, SVEMemOperand(x0, 3, SVE_MUL_VL));
2811*f5c631daSSadaf Ebrahimi   __ st3w(z6.VnS(), z7.VnS(), z8.VnS(), p3, SVEMemOperand(x0, 3, SVE_MUL_VL));
2812*f5c631daSSadaf Ebrahimi   __ st3d(z7.VnD(), z8.VnD(), z9.VnD(), p4, SVEMemOperand(x0, x2, LSL, 3));
2813*f5c631daSSadaf Ebrahimi   __ ld3b(z24.VnB(),
2814*f5c631daSSadaf Ebrahimi           z25.VnB(),
2815*f5c631daSSadaf Ebrahimi           z26.VnB(),
2816*f5c631daSSadaf Ebrahimi           p5.Zeroing(),
2817*f5c631daSSadaf Ebrahimi           SVEMemOperand(x0, 3, SVE_MUL_VL));
2818*f5c631daSSadaf Ebrahimi   __ ld3h(z25.VnH(),
2819*f5c631daSSadaf Ebrahimi           z26.VnH(),
2820*f5c631daSSadaf Ebrahimi           z27.VnH(),
2821*f5c631daSSadaf Ebrahimi           p6.Zeroing(),
2822*f5c631daSSadaf Ebrahimi           SVEMemOperand(x0, x2, LSL, 1));
2823*f5c631daSSadaf Ebrahimi   __ ld3w(z26.VnS(),
2824*f5c631daSSadaf Ebrahimi           z27.VnS(),
2825*f5c631daSSadaf Ebrahimi           z28.VnS(),
2826*f5c631daSSadaf Ebrahimi           p6.Zeroing(),
2827*f5c631daSSadaf Ebrahimi           SVEMemOperand(x0, 3, SVE_MUL_VL));
2828*f5c631daSSadaf Ebrahimi   __ ld3d(z27.VnD(),
2829*f5c631daSSadaf Ebrahimi           z28.VnD(),
2830*f5c631daSSadaf Ebrahimi           z29.VnD(),
2831*f5c631daSSadaf Ebrahimi           p5.Zeroing(),
2832*f5c631daSSadaf Ebrahimi           SVEMemOperand(x0, 3, SVE_MUL_VL));
2833*f5c631daSSadaf Ebrahimi 
2834*f5c631daSSadaf Ebrahimi   __ st4b(z31.VnB(),
2835*f5c631daSSadaf Ebrahimi           z0.VnB(),
2836*f5c631daSSadaf Ebrahimi           z1.VnB(),
2837*f5c631daSSadaf Ebrahimi           z2.VnB(),
2838*f5c631daSSadaf Ebrahimi           p4,
2839*f5c631daSSadaf Ebrahimi           SVEMemOperand(x0, 4, SVE_MUL_VL));
2840*f5c631daSSadaf Ebrahimi   __ st4h(z0.VnH(),
2841*f5c631daSSadaf Ebrahimi           z1.VnH(),
2842*f5c631daSSadaf Ebrahimi           z2.VnH(),
2843*f5c631daSSadaf Ebrahimi           z3.VnH(),
2844*f5c631daSSadaf Ebrahimi           p4,
2845*f5c631daSSadaf Ebrahimi           SVEMemOperand(x0, 4, SVE_MUL_VL));
2846*f5c631daSSadaf Ebrahimi   __ st4w(z1.VnS(),
2847*f5c631daSSadaf Ebrahimi           z2.VnS(),
2848*f5c631daSSadaf Ebrahimi           z3.VnS(),
2849*f5c631daSSadaf Ebrahimi           z4.VnS(),
2850*f5c631daSSadaf Ebrahimi           p3,
2851*f5c631daSSadaf Ebrahimi           SVEMemOperand(x0, 4, SVE_MUL_VL));
2852*f5c631daSSadaf Ebrahimi   __ st4d(z2.VnD(),
2853*f5c631daSSadaf Ebrahimi           z3.VnD(),
2854*f5c631daSSadaf Ebrahimi           z4.VnD(),
2855*f5c631daSSadaf Ebrahimi           z5.VnD(),
2856*f5c631daSSadaf Ebrahimi           p4,
2857*f5c631daSSadaf Ebrahimi           SVEMemOperand(x0, x2, LSL, 3));
2858*f5c631daSSadaf Ebrahimi   __ ld4b(z25.VnB(),
2859*f5c631daSSadaf Ebrahimi           z26.VnB(),
2860*f5c631daSSadaf Ebrahimi           z27.VnB(),
2861*f5c631daSSadaf Ebrahimi           z28.VnB(),
2862*f5c631daSSadaf Ebrahimi           p5.Zeroing(),
2863*f5c631daSSadaf Ebrahimi           SVEMemOperand(x0, 4, SVE_MUL_VL));
2864*f5c631daSSadaf Ebrahimi   __ ld4h(z26.VnH(),
2865*f5c631daSSadaf Ebrahimi           z27.VnH(),
2866*f5c631daSSadaf Ebrahimi           z28.VnH(),
2867*f5c631daSSadaf Ebrahimi           z29.VnH(),
2868*f5c631daSSadaf Ebrahimi           p6.Zeroing(),
2869*f5c631daSSadaf Ebrahimi           SVEMemOperand(x0, 4, SVE_MUL_VL));
2870*f5c631daSSadaf Ebrahimi   __ ld4w(z27.VnS(),
2871*f5c631daSSadaf Ebrahimi           z28.VnS(),
2872*f5c631daSSadaf Ebrahimi           z29.VnS(),
2873*f5c631daSSadaf Ebrahimi           z30.VnS(),
2874*f5c631daSSadaf Ebrahimi           p6.Zeroing(),
2875*f5c631daSSadaf Ebrahimi           SVEMemOperand(x0, x2, LSL, 2));
2876*f5c631daSSadaf Ebrahimi   __ ld4d(z28.VnD(),
2877*f5c631daSSadaf Ebrahimi           z29.VnD(),
2878*f5c631daSSadaf Ebrahimi           z30.VnD(),
2879*f5c631daSSadaf Ebrahimi           z31.VnD(),
2880*f5c631daSSadaf Ebrahimi           p5.Zeroing(),
2881*f5c631daSSadaf Ebrahimi           SVEMemOperand(x0, 4, SVE_MUL_VL));
2882*f5c631daSSadaf Ebrahimi }
2883*f5c631daSSadaf Ebrahimi 
GenerateTestSequenceAtomics(MacroAssembler * masm)2884*f5c631daSSadaf Ebrahimi static void GenerateTestSequenceAtomics(MacroAssembler* masm) {
2885*f5c631daSSadaf Ebrahimi   ExactAssemblyScope guard(masm,
2886*f5c631daSSadaf Ebrahimi                            masm->GetBuffer()->GetRemainingBytes(),
2887*f5c631daSSadaf Ebrahimi                            ExactAssemblyScope::kMaximumSize);
2888*f5c631daSSadaf Ebrahimi   CPUFeaturesScope feature_guard(masm, CPUFeatures::kAtomics);
2889*f5c631daSSadaf Ebrahimi   __ sub(sp, sp, 16);  // Claim some working space on the stack.
2890*f5c631daSSadaf Ebrahimi   __ mov(x0, 0x5555555555555555);
2891*f5c631daSSadaf Ebrahimi   __ str(x0, MemOperand(sp));  // Initialise working space.
2892*f5c631daSSadaf Ebrahimi 
2893*f5c631daSSadaf Ebrahimi #define INST_LIST(OP)                     \
2894*f5c631daSSadaf Ebrahimi   __ ld##OP##b(w0, w0, MemOperand(sp));   \
2895*f5c631daSSadaf Ebrahimi   __ ld##OP##ab(w0, w1, MemOperand(sp));  \
2896*f5c631daSSadaf Ebrahimi   __ ld##OP##lb(w0, w2, MemOperand(sp));  \
2897*f5c631daSSadaf Ebrahimi   __ ld##OP##alb(w0, w3, MemOperand(sp)); \
2898*f5c631daSSadaf Ebrahimi   __ ld##OP##h(w0, w0, MemOperand(sp));   \
2899*f5c631daSSadaf Ebrahimi   __ ld##OP##ah(w0, w1, MemOperand(sp));  \
2900*f5c631daSSadaf Ebrahimi   __ ld##OP##lh(w0, w2, MemOperand(sp));  \
2901*f5c631daSSadaf Ebrahimi   __ ld##OP##alh(w0, w3, MemOperand(sp)); \
2902*f5c631daSSadaf Ebrahimi   __ ld##OP(w0, w0, MemOperand(sp));      \
2903*f5c631daSSadaf Ebrahimi   __ ld##OP##a(w0, w1, MemOperand(sp));   \
2904*f5c631daSSadaf Ebrahimi   __ ld##OP##l(w0, w2, MemOperand(sp));   \
2905*f5c631daSSadaf Ebrahimi   __ ld##OP##al(w0, w3, MemOperand(sp));  \
2906*f5c631daSSadaf Ebrahimi   __ ld##OP(x0, x0, MemOperand(sp));      \
2907*f5c631daSSadaf Ebrahimi   __ ld##OP##a(x0, x1, MemOperand(sp));   \
2908*f5c631daSSadaf Ebrahimi   __ ld##OP##l(x0, x2, MemOperand(sp));   \
2909*f5c631daSSadaf Ebrahimi   __ ld##OP##al(x0, x3, MemOperand(sp));  \
2910*f5c631daSSadaf Ebrahimi   __ st##OP##b(w0, MemOperand(sp));       \
2911*f5c631daSSadaf Ebrahimi   __ st##OP##lb(w0, MemOperand(sp));      \
2912*f5c631daSSadaf Ebrahimi   __ st##OP##h(w0, MemOperand(sp));       \
2913*f5c631daSSadaf Ebrahimi   __ st##OP##lh(w0, MemOperand(sp));      \
2914*f5c631daSSadaf Ebrahimi   __ st##OP(w0, MemOperand(sp));          \
2915*f5c631daSSadaf Ebrahimi   __ st##OP##l(w0, MemOperand(sp));       \
2916*f5c631daSSadaf Ebrahimi   __ st##OP(x0, MemOperand(sp));          \
2917*f5c631daSSadaf Ebrahimi   __ st##OP##l(x0, MemOperand(sp));
2918*f5c631daSSadaf Ebrahimi 
2919*f5c631daSSadaf Ebrahimi   INST_LIST(add);
2920*f5c631daSSadaf Ebrahimi   INST_LIST(set);
2921*f5c631daSSadaf Ebrahimi   INST_LIST(eor);
2922*f5c631daSSadaf Ebrahimi   INST_LIST(smin);
2923*f5c631daSSadaf Ebrahimi   INST_LIST(smax);
2924*f5c631daSSadaf Ebrahimi   INST_LIST(umin);
2925*f5c631daSSadaf Ebrahimi   INST_LIST(umax);
2926*f5c631daSSadaf Ebrahimi   INST_LIST(clr);
2927*f5c631daSSadaf Ebrahimi 
2928*f5c631daSSadaf Ebrahimi #undef INST_LIST
2929*f5c631daSSadaf Ebrahimi 
2930*f5c631daSSadaf Ebrahimi   __ add(sp, sp, 16);  // Restore stack pointer.
2931*f5c631daSSadaf Ebrahimi }
2932*f5c631daSSadaf Ebrahimi 
MaskAddresses(const char * trace)2933*f5c631daSSadaf Ebrahimi static void MaskAddresses(const char* trace) {
2934*f5c631daSSadaf Ebrahimi #define VIXL_COLOUR "(\x1b\\[[01];([0-9][0-9])?m)?"
2935*f5c631daSSadaf Ebrahimi   // All patterns are replaced with "$1~~~~~~~~~~~~~~~~".
2936*f5c631daSSadaf Ebrahimi   std::regex patterns[] =
2937*f5c631daSSadaf Ebrahimi       {// Mask registers that hold addresses that change from run to run.
2938*f5c631daSSadaf Ebrahimi        std::regex("((x0|x1|x2|sp): " VIXL_COLOUR "0x)[0-9a-f]{16}"),
2939*f5c631daSSadaf Ebrahimi        // Mask accessed memory addresses.
2940*f5c631daSSadaf Ebrahimi        std::regex("((<-|->) " VIXL_COLOUR "0x)[0-9a-f]{16}"),
2941*f5c631daSSadaf Ebrahimi        // Mask instruction addresses.
2942*f5c631daSSadaf Ebrahimi        std::regex("^(0x)[0-9a-f]{16}"),
2943*f5c631daSSadaf Ebrahimi        // Mask branch targets.
2944*f5c631daSSadaf Ebrahimi        std::regex("(Branch" VIXL_COLOUR " to 0x)[0-9a-f]{16}"),
2945*f5c631daSSadaf Ebrahimi        // Mask explicit address annotations.
2946*f5c631daSSadaf Ebrahimi        std::regex("(addr 0x)[0-9a-f]+")};
2947*f5c631daSSadaf Ebrahimi #undef VIXL_COLOUR
2948*f5c631daSSadaf Ebrahimi 
2949*f5c631daSSadaf Ebrahimi   std::vector<std::string> lines;
2950*f5c631daSSadaf Ebrahimi   std::ifstream in(trace);
2951*f5c631daSSadaf Ebrahimi   while (!in.eof()) {
2952*f5c631daSSadaf Ebrahimi     std::string line;
2953*f5c631daSSadaf Ebrahimi     std::getline(in, line);
2954*f5c631daSSadaf Ebrahimi     for (auto&& pattern : patterns) {
2955*f5c631daSSadaf Ebrahimi       line = std::regex_replace(line, pattern, "$1~~~~~~~~~~~~~~~~");
2956*f5c631daSSadaf Ebrahimi     }
2957*f5c631daSSadaf Ebrahimi     lines.push_back(line);
2958*f5c631daSSadaf Ebrahimi   }
2959*f5c631daSSadaf Ebrahimi   in.close();
2960*f5c631daSSadaf Ebrahimi 
2961*f5c631daSSadaf Ebrahimi   // `getline` produces an empty line after a terminal "\n".
2962*f5c631daSSadaf Ebrahimi   if (lines.back().empty()) lines.pop_back();
2963*f5c631daSSadaf Ebrahimi 
2964*f5c631daSSadaf Ebrahimi   std::ofstream out(trace, std::ofstream::trunc);
2965*f5c631daSSadaf Ebrahimi   for (auto&& line : lines) {
2966*f5c631daSSadaf Ebrahimi     out << line << "\n";
2967*f5c631daSSadaf Ebrahimi   }
2968*f5c631daSSadaf Ebrahimi }
2969*f5c631daSSadaf Ebrahimi 
PrintFile(const char * name)2970*f5c631daSSadaf Ebrahimi static void PrintFile(const char* name) {
2971*f5c631daSSadaf Ebrahimi   FILE* file = fopen(name, "r");
2972*f5c631daSSadaf Ebrahimi   char buffer[1024];  // The buffer size is arbitrary.
2973*f5c631daSSadaf Ebrahimi   while (fgets(buffer, sizeof(buffer), file) != NULL) fputs(buffer, stdout);
2974*f5c631daSSadaf Ebrahimi   fclose(file);
2975*f5c631daSSadaf Ebrahimi }
2976*f5c631daSSadaf Ebrahimi 
CheckOrGenerateTrace(const char * filename,const char * ref_file)2977*f5c631daSSadaf Ebrahimi static bool CheckOrGenerateTrace(const char* filename, const char* ref_file) {
2978*f5c631daSSadaf Ebrahimi   bool trace_matched_reference;
2979*f5c631daSSadaf Ebrahimi   if (Test::generate_test_trace()) {
2980*f5c631daSSadaf Ebrahimi     // Copy trace_stream to stdout.
2981*f5c631daSSadaf Ebrahimi     FILE* trace_stream = fopen(filename, "r");
2982*f5c631daSSadaf Ebrahimi     VIXL_ASSERT(trace_stream != NULL);
2983*f5c631daSSadaf Ebrahimi     fseek(trace_stream, 0, SEEK_SET);
2984*f5c631daSSadaf Ebrahimi     int c;
2985*f5c631daSSadaf Ebrahimi     while (1) {
2986*f5c631daSSadaf Ebrahimi       c = getc(trace_stream);
2987*f5c631daSSadaf Ebrahimi       if (c == EOF) break;
2988*f5c631daSSadaf Ebrahimi       putc(c, stdout);
2989*f5c631daSSadaf Ebrahimi     }
2990*f5c631daSSadaf Ebrahimi     fclose(trace_stream);
2991*f5c631daSSadaf Ebrahimi     trace_matched_reference = true;
2992*f5c631daSSadaf Ebrahimi   } else {
2993*f5c631daSSadaf Ebrahimi     // Check trace_stream against ref_file.
2994*f5c631daSSadaf Ebrahimi     char command[1024];
2995*f5c631daSSadaf Ebrahimi     size_t length =
2996*f5c631daSSadaf Ebrahimi         snprintf(command, sizeof(command), "diff -u %s %s", ref_file, filename);
2997*f5c631daSSadaf Ebrahimi     VIXL_CHECK(length < sizeof(command));
2998*f5c631daSSadaf Ebrahimi     trace_matched_reference = (system(command) == 0);
2999*f5c631daSSadaf Ebrahimi   }
3000*f5c631daSSadaf Ebrahimi   return trace_matched_reference;
3001*f5c631daSSadaf Ebrahimi }
3002*f5c631daSSadaf Ebrahimi 
3003*f5c631daSSadaf Ebrahimi 
3004*f5c631daSSadaf Ebrahimi // Trace tests can only work with the simulator.
3005*f5c631daSSadaf Ebrahimi #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
3006*f5c631daSSadaf Ebrahimi 
TraceTestHelper(bool coloured_trace,TraceParameters trace_parameters,const char * ref_file)3007*f5c631daSSadaf Ebrahimi static void TraceTestHelper(bool coloured_trace,
3008*f5c631daSSadaf Ebrahimi                             TraceParameters trace_parameters,
3009*f5c631daSSadaf Ebrahimi                             const char* ref_file) {
3010*f5c631daSSadaf Ebrahimi   MacroAssembler masm(12 * KBytes);
3011*f5c631daSSadaf Ebrahimi 
3012*f5c631daSSadaf Ebrahimi   char trace_stream_filename[] = "/tmp/vixl-test-trace-XXXXXX";
3013*f5c631daSSadaf Ebrahimi   FILE* trace_stream = fdopen(mkstemp(trace_stream_filename), "w");
3014*f5c631daSSadaf Ebrahimi 
3015*f5c631daSSadaf Ebrahimi   Decoder decoder;
3016*f5c631daSSadaf Ebrahimi   Simulator simulator(&decoder, trace_stream);
3017*f5c631daSSadaf Ebrahimi   simulator.SetColouredTrace(coloured_trace);
3018*f5c631daSSadaf Ebrahimi   simulator.SetTraceParameters(trace_parameters);
3019*f5c631daSSadaf Ebrahimi   simulator.SilenceExclusiveAccessWarning();
3020*f5c631daSSadaf Ebrahimi 
3021*f5c631daSSadaf Ebrahimi   const int vl_in_bytes = 5 * kZRegMinSizeInBytes;
3022*f5c631daSSadaf Ebrahimi   const int vl_in_bits = vl_in_bytes * kBitsPerByte;
3023*f5c631daSSadaf Ebrahimi   const int pl_in_bits = vl_in_bits / kZRegBitsPerPRegBit;
3024*f5c631daSSadaf Ebrahimi   simulator.SetVectorLengthInBits(vl_in_bits);
3025*f5c631daSSadaf Ebrahimi 
3026*f5c631daSSadaf Ebrahimi   // Set up a scratch buffer so we can test loads and stores.
3027*f5c631daSSadaf Ebrahimi   const int kScratchSize = vl_in_bytes * 1024;
3028*f5c631daSSadaf Ebrahimi   const int kScratchGuardSize = vl_in_bytes;
3029*f5c631daSSadaf Ebrahimi   char scratch_buffer[kScratchSize + kScratchGuardSize];
3030*f5c631daSSadaf Ebrahimi   for (size_t i = 0; i < (sizeof(scratch_buffer) / sizeof(scratch_buffer[0]));
3031*f5c631daSSadaf Ebrahimi        i++) {
3032*f5c631daSSadaf Ebrahimi     scratch_buffer[i] = i & 0xff;
3033*f5c631daSSadaf Ebrahimi   }
3034*f5c631daSSadaf Ebrahimi   // Used for offset addressing.
3035*f5c631daSSadaf Ebrahimi   simulator.WriteXRegister(0, reinterpret_cast<uintptr_t>(scratch_buffer));
3036*f5c631daSSadaf Ebrahimi   // Used for pre-/post-index addressing.
3037*f5c631daSSadaf Ebrahimi   simulator.WriteXRegister(1, reinterpret_cast<uintptr_t>(scratch_buffer));
3038*f5c631daSSadaf Ebrahimi 
3039*f5c631daSSadaf Ebrahimi   const int kPostIndexRegisterStep = 13;  // Arbitrary interesting value.
3040*f5c631daSSadaf Ebrahimi   // Used for post-index offsets.
3041*f5c631daSSadaf Ebrahimi   simulator.WriteXRegister(2, kPostIndexRegisterStep);
3042*f5c631daSSadaf Ebrahimi 
3043*f5c631daSSadaf Ebrahimi   // Initialize the other registers with unique values.
3044*f5c631daSSadaf Ebrahimi   uint64_t initial_base_u64 = 0x0100001000100101;
3045*f5c631daSSadaf Ebrahimi   for (unsigned i = 3; i < kNumberOfRegisters; i++) {
3046*f5c631daSSadaf Ebrahimi     if (i == kLinkRegCode) continue;
3047*f5c631daSSadaf Ebrahimi     if (i == kZeroRegCode) continue;
3048*f5c631daSSadaf Ebrahimi     // NoRegLog suppresses the log now, but the registers will still be logged
3049*f5c631daSSadaf Ebrahimi     // before the first instruction is executed since they have been written but
3050*f5c631daSSadaf Ebrahimi     // not printed.
3051*f5c631daSSadaf Ebrahimi     simulator.WriteRegister(i, initial_base_u64 * i, Simulator::NoRegLog);
3052*f5c631daSSadaf Ebrahimi   }
3053*f5c631daSSadaf Ebrahimi   for (unsigned r = 0; r < kNumberOfVRegisters; r++) {
3054*f5c631daSSadaf Ebrahimi     LogicVRegister reg(simulator.ReadVRegister(r));
3055*f5c631daSSadaf Ebrahimi     // Try to initialise Z registers with reasonable FP values. We prioritise
3056*f5c631daSSadaf Ebrahimi     // setting double values, then floats and half-precision values. The lanes
3057*f5c631daSSadaf Ebrahimi     // overlap, so this is a compromise, but d0, s0 and h0 views all see similar
3058*f5c631daSSadaf Ebrahimi     // arithmetic values.
3059*f5c631daSSadaf Ebrahimi     //
3060*f5c631daSSadaf Ebrahimi     // The exponent of each value is set to the (biased) register number. We set
3061*f5c631daSSadaf Ebrahimi     // the double, float and half-precision exponents where we can.
3062*f5c631daSSadaf Ebrahimi     uint64_t base = 0x3ff000003f803c00 + (0x0010000000800400 * (0x7f + r));
3063*f5c631daSSadaf Ebrahimi     for (unsigned lane = 0; lane < (vl_in_bytes / kDRegSizeInBytes); lane++) {
3064*f5c631daSSadaf Ebrahimi       uint64_t mantissas = 0x0000000100010001 * (lane & 0x7f);
3065*f5c631daSSadaf Ebrahimi       reg.SetUint(kFormatVnD, lane, base | mantissas);
3066*f5c631daSSadaf Ebrahimi     }
3067*f5c631daSSadaf Ebrahimi   }
3068*f5c631daSSadaf Ebrahimi   for (unsigned r = 0; r < kNumberOfPRegisters; r++) {
3069*f5c631daSSadaf Ebrahimi     LogicPRegister reg(simulator.ReadPRegister(r));
3070*f5c631daSSadaf Ebrahimi     // Set `r` active lanes between each inactive lane.
3071*f5c631daSSadaf Ebrahimi     for (unsigned bit = 0; bit < pl_in_bits; bit++) {
3072*f5c631daSSadaf Ebrahimi       reg.SetActive(kFormatVnB, bit, ((bit + 1) % (r + 2)) != 0);
3073*f5c631daSSadaf Ebrahimi     }
3074*f5c631daSSadaf Ebrahimi     // Completely clear some Q-sized blocks. The trace will completely omit
3075*f5c631daSSadaf Ebrahimi     // these for stores.
3076*f5c631daSSadaf Ebrahimi     for (unsigned chunk = 0; chunk < (vl_in_bits / kQRegSize); chunk++) {
3077*f5c631daSSadaf Ebrahimi       if (((chunk + 1) % (r + 2)) == 0) {
3078*f5c631daSSadaf Ebrahimi         reg.SetActiveMask(chunk, static_cast<uint16_t>(0));
3079*f5c631daSSadaf Ebrahimi       }
3080*f5c631daSSadaf Ebrahimi     }
3081*f5c631daSSadaf Ebrahimi   }
3082*f5c631daSSadaf Ebrahimi 
3083*f5c631daSSadaf Ebrahimi   GenerateTestSequenceBase(&masm);
3084*f5c631daSSadaf Ebrahimi   GenerateTestSequenceFP(&masm);
3085*f5c631daSSadaf Ebrahimi   GenerateTestSequenceNEON(&masm);
3086*f5c631daSSadaf Ebrahimi   GenerateTestSequenceNEONFP(&masm);
3087*f5c631daSSadaf Ebrahimi   GenerateTestSequenceSVE(&masm);
3088*f5c631daSSadaf Ebrahimi   GenerateTestSequenceAtomics(&masm);
3089*f5c631daSSadaf Ebrahimi   masm.Ret();
3090*f5c631daSSadaf Ebrahimi   masm.FinalizeCode();
3091*f5c631daSSadaf Ebrahimi 
3092*f5c631daSSadaf Ebrahimi   if (Test::disassemble()) {
3093*f5c631daSSadaf Ebrahimi     PrintDisassembler disasm(stdout);
3094*f5c631daSSadaf Ebrahimi     Instruction* start = masm.GetBuffer()->GetStartAddress<Instruction*>();
3095*f5c631daSSadaf Ebrahimi     Instruction* end = masm.GetBuffer()->GetEndAddress<Instruction*>();
3096*f5c631daSSadaf Ebrahimi     disasm.DisassembleBuffer(start, end);
3097*f5c631daSSadaf Ebrahimi   }
3098*f5c631daSSadaf Ebrahimi 
3099*f5c631daSSadaf Ebrahimi   simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>());
3100*f5c631daSSadaf Ebrahimi 
3101*f5c631daSSadaf Ebrahimi   fclose(trace_stream);
3102*f5c631daSSadaf Ebrahimi 
3103*f5c631daSSadaf Ebrahimi   // We already traced into the temporary file, so just print the file.
3104*f5c631daSSadaf Ebrahimi   // Note that these tests need to control the trace flags, so we ignore all
3105*f5c631daSSadaf Ebrahimi   // --trace-* options here except for --trace-sim.
3106*f5c631daSSadaf Ebrahimi   if (Test::trace_sim()) PrintFile(trace_stream_filename);
3107*f5c631daSSadaf Ebrahimi 
3108*f5c631daSSadaf Ebrahimi   MaskAddresses(trace_stream_filename);
3109*f5c631daSSadaf Ebrahimi 
3110*f5c631daSSadaf Ebrahimi   bool trace_matched_reference =
3111*f5c631daSSadaf Ebrahimi       CheckOrGenerateTrace(trace_stream_filename, ref_file);
3112*f5c631daSSadaf Ebrahimi   remove(trace_stream_filename);  // Clean up before checking the result.
3113*f5c631daSSadaf Ebrahimi   VIXL_CHECK(trace_matched_reference);
3114*f5c631daSSadaf Ebrahimi 
3115*f5c631daSSadaf Ebrahimi   uint64_t offset_base = simulator.ReadRegister<uint64_t>(0);
3116*f5c631daSSadaf Ebrahimi   uint64_t index_base = simulator.ReadRegister<uint64_t>(1);
3117*f5c631daSSadaf Ebrahimi 
3118*f5c631daSSadaf Ebrahimi   VIXL_CHECK(index_base >= offset_base);
3119*f5c631daSSadaf Ebrahimi   VIXL_CHECK((index_base - offset_base) <= kScratchSize);
3120*f5c631daSSadaf Ebrahimi }
3121*f5c631daSSadaf Ebrahimi 
3122*f5c631daSSadaf Ebrahimi 
3123*f5c631daSSadaf Ebrahimi // Test individual options.
TEST(disasm)3124*f5c631daSSadaf Ebrahimi TEST(disasm) { TraceTestHelper(false, LOG_DISASM, REF("log-disasm")); }
TEST(regs)3125*f5c631daSSadaf Ebrahimi TEST(regs) { TraceTestHelper(false, LOG_REGS, REF("log-regs")); }
TEST(vregs)3126*f5c631daSSadaf Ebrahimi TEST(vregs) { TraceTestHelper(false, LOG_VREGS, REF("log-vregs")); }
TEST(sysregs)3127*f5c631daSSadaf Ebrahimi TEST(sysregs) { TraceTestHelper(false, LOG_SYSREGS, REF("log-sysregs")); }
TEST(write)3128*f5c631daSSadaf Ebrahimi TEST(write) { TraceTestHelper(false, LOG_WRITE, REF("log-write")); }
TEST(branch)3129*f5c631daSSadaf Ebrahimi TEST(branch) { TraceTestHelper(false, LOG_WRITE, REF("log-branch")); }
3130*f5c631daSSadaf Ebrahimi 
3131*f5c631daSSadaf Ebrahimi // Test standard combinations.
TEST(none)3132*f5c631daSSadaf Ebrahimi TEST(none) { TraceTestHelper(false, LOG_NONE, REF("log-none")); }
TEST(state)3133*f5c631daSSadaf Ebrahimi TEST(state) { TraceTestHelper(false, LOG_STATE, REF("log-state")); }
TEST(all)3134*f5c631daSSadaf Ebrahimi TEST(all) { TraceTestHelper(false, LOG_ALL, REF("log-all")); }
3135*f5c631daSSadaf Ebrahimi 
3136*f5c631daSSadaf Ebrahimi 
3137*f5c631daSSadaf Ebrahimi // Test individual options (with colour).
TEST(disasm_colour)3138*f5c631daSSadaf Ebrahimi TEST(disasm_colour) {
3139*f5c631daSSadaf Ebrahimi   TraceTestHelper(true, LOG_DISASM, REF("log-disasm-colour"));
3140*f5c631daSSadaf Ebrahimi }
TEST(regs_colour)3141*f5c631daSSadaf Ebrahimi TEST(regs_colour) { TraceTestHelper(true, LOG_REGS, REF("log-regs-colour")); }
TEST(vregs_colour)3142*f5c631daSSadaf Ebrahimi TEST(vregs_colour) {
3143*f5c631daSSadaf Ebrahimi   TraceTestHelper(true, LOG_VREGS, REF("log-vregs-colour"));
3144*f5c631daSSadaf Ebrahimi }
TEST(sysregs_colour)3145*f5c631daSSadaf Ebrahimi TEST(sysregs_colour) {
3146*f5c631daSSadaf Ebrahimi   TraceTestHelper(true, LOG_SYSREGS, REF("log-sysregs-colour"));
3147*f5c631daSSadaf Ebrahimi }
TEST(write_colour)3148*f5c631daSSadaf Ebrahimi TEST(write_colour) {
3149*f5c631daSSadaf Ebrahimi   TraceTestHelper(true, LOG_WRITE, REF("log-write-colour"));
3150*f5c631daSSadaf Ebrahimi }
TEST(branch_colour)3151*f5c631daSSadaf Ebrahimi TEST(branch_colour) {
3152*f5c631daSSadaf Ebrahimi   TraceTestHelper(true, LOG_WRITE, REF("log-branch-colour"));
3153*f5c631daSSadaf Ebrahimi }
3154*f5c631daSSadaf Ebrahimi 
3155*f5c631daSSadaf Ebrahimi // Test standard combinations (with colour).
TEST(none_colour)3156*f5c631daSSadaf Ebrahimi TEST(none_colour) { TraceTestHelper(true, LOG_NONE, REF("log-none-colour")); }
TEST(state_colour)3157*f5c631daSSadaf Ebrahimi TEST(state_colour) {
3158*f5c631daSSadaf Ebrahimi   TraceTestHelper(true, LOG_STATE, REF("log-state-colour"));
3159*f5c631daSSadaf Ebrahimi }
TEST(all_colour)3160*f5c631daSSadaf Ebrahimi TEST(all_colour) { TraceTestHelper(true, LOG_ALL, REF("log-all-colour")); }
3161*f5c631daSSadaf Ebrahimi 
3162*f5c631daSSadaf Ebrahimi #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
3163*f5c631daSSadaf Ebrahimi 
PrintDisassemblerTestHelper(const char * prefix,const char * suffix,const char * ref_file)3164*f5c631daSSadaf Ebrahimi static void PrintDisassemblerTestHelper(const char* prefix,
3165*f5c631daSSadaf Ebrahimi                                         const char* suffix,
3166*f5c631daSSadaf Ebrahimi                                         const char* ref_file) {
3167*f5c631daSSadaf Ebrahimi   MacroAssembler masm(12 * KBytes);
3168*f5c631daSSadaf Ebrahimi 
3169*f5c631daSSadaf Ebrahimi   char trace_stream_filename[] = "/tmp/vixl-test-trace-XXXXXX";
3170*f5c631daSSadaf Ebrahimi   FILE* trace_stream = fdopen(mkstemp(trace_stream_filename), "w");
3171*f5c631daSSadaf Ebrahimi 
3172*f5c631daSSadaf Ebrahimi   // We don't need to execute this code so there's no need for the execution
3173*f5c631daSSadaf Ebrahimi   // environment setup from TraceTestHelper.
3174*f5c631daSSadaf Ebrahimi 
3175*f5c631daSSadaf Ebrahimi   GenerateTestSequenceBase(&masm);
3176*f5c631daSSadaf Ebrahimi   GenerateTestSequenceFP(&masm);
3177*f5c631daSSadaf Ebrahimi   GenerateTestSequenceNEON(&masm);
3178*f5c631daSSadaf Ebrahimi   GenerateTestSequenceNEONFP(&masm);
3179*f5c631daSSadaf Ebrahimi   GenerateTestSequenceSVE(&masm);
3180*f5c631daSSadaf Ebrahimi   GenerateTestSequenceAtomics(&masm);
3181*f5c631daSSadaf Ebrahimi   masm.FinalizeCode();
3182*f5c631daSSadaf Ebrahimi 
3183*f5c631daSSadaf Ebrahimi   Decoder decoder;
3184*f5c631daSSadaf Ebrahimi   CPUFeaturesAuditor auditor(&decoder);
3185*f5c631daSSadaf Ebrahimi   PrintDisassembler disasm(trace_stream);
3186*f5c631daSSadaf Ebrahimi   if (prefix != NULL) disasm.SetCPUFeaturesPrefix(prefix);
3187*f5c631daSSadaf Ebrahimi   if (suffix != NULL) disasm.SetCPUFeaturesSuffix(suffix);
3188*f5c631daSSadaf Ebrahimi   disasm.RegisterCPUFeaturesAuditor(&auditor);
3189*f5c631daSSadaf Ebrahimi   decoder.AppendVisitor(&disasm);
3190*f5c631daSSadaf Ebrahimi 
3191*f5c631daSSadaf Ebrahimi   Instruction* instruction = masm.GetBuffer()->GetStartAddress<Instruction*>();
3192*f5c631daSSadaf Ebrahimi   Instruction* end = masm.GetCursorAddress<Instruction*>();
3193*f5c631daSSadaf Ebrahimi   while (instruction != end) {
3194*f5c631daSSadaf Ebrahimi     decoder.Decode(instruction);
3195*f5c631daSSadaf Ebrahimi     instruction += kInstructionSize;
3196*f5c631daSSadaf Ebrahimi   }
3197*f5c631daSSadaf Ebrahimi 
3198*f5c631daSSadaf Ebrahimi   fclose(trace_stream);
3199*f5c631daSSadaf Ebrahimi 
3200*f5c631daSSadaf Ebrahimi   // We already disassembled into the temporary file, so just print the file.
3201*f5c631daSSadaf Ebrahimi   if (Test::disassemble()) PrintFile(trace_stream_filename);
3202*f5c631daSSadaf Ebrahimi 
3203*f5c631daSSadaf Ebrahimi   MaskAddresses(trace_stream_filename);
3204*f5c631daSSadaf Ebrahimi 
3205*f5c631daSSadaf Ebrahimi   bool trace_matched_reference =
3206*f5c631daSSadaf Ebrahimi       CheckOrGenerateTrace(trace_stream_filename, ref_file);
3207*f5c631daSSadaf Ebrahimi   remove(trace_stream_filename);  // Clean up before checking the result.
3208*f5c631daSSadaf Ebrahimi   VIXL_CHECK(trace_matched_reference);
3209*f5c631daSSadaf Ebrahimi }
3210*f5c631daSSadaf Ebrahimi 
3211*f5c631daSSadaf Ebrahimi 
3212*f5c631daSSadaf Ebrahimi // Test CPUFeatures disassembly annotations.
TEST(cpufeatures)3213*f5c631daSSadaf Ebrahimi TEST(cpufeatures) {
3214*f5c631daSSadaf Ebrahimi   PrintDisassemblerTestHelper(NULL, NULL, REF("log-cpufeatures"));
3215*f5c631daSSadaf Ebrahimi }
TEST(cpufeatures_custom)3216*f5c631daSSadaf Ebrahimi TEST(cpufeatures_custom) {
3217*f5c631daSSadaf Ebrahimi   PrintDisassemblerTestHelper("### {", "} ###", REF("log-cpufeatures-custom"));
3218*f5c631daSSadaf Ebrahimi }
TEST(cpufeatures_colour)3219*f5c631daSSadaf Ebrahimi TEST(cpufeatures_colour) {
3220*f5c631daSSadaf Ebrahimi   // The colour chosen is arbitrary.
3221*f5c631daSSadaf Ebrahimi   PrintDisassemblerTestHelper("\033[1;35m",  // Prefix: Bold magenta.
3222*f5c631daSSadaf Ebrahimi                               "\033[0;m",    // Suffix: Reset colour.
3223*f5c631daSSadaf Ebrahimi                               REF("log-cpufeatures-colour"));
3224*f5c631daSSadaf Ebrahimi }
3225*f5c631daSSadaf Ebrahimi }  // namespace aarch64
3226*f5c631daSSadaf Ebrahimi }  // namespace vixl
3227