1*f5c631daSSadaf Ebrahimi // Copyright 2016, VIXL authors
2*f5c631daSSadaf Ebrahimi // All rights reserved.
3*f5c631daSSadaf Ebrahimi //
4*f5c631daSSadaf Ebrahimi // Redistribution and use in source and binary forms, with or without
5*f5c631daSSadaf Ebrahimi // modification, are permitted provided that the following conditions are met:
6*f5c631daSSadaf Ebrahimi //
7*f5c631daSSadaf Ebrahimi // * Redistributions of source code must retain the above copyright notice,
8*f5c631daSSadaf Ebrahimi // this list of conditions and the following disclaimer.
9*f5c631daSSadaf Ebrahimi // * Redistributions in binary form must reproduce the above copyright notice,
10*f5c631daSSadaf Ebrahimi // this list of conditions and the following disclaimer in the documentation
11*f5c631daSSadaf Ebrahimi // and/or other materials provided with the distribution.
12*f5c631daSSadaf Ebrahimi // * Neither the name of ARM Limited nor the names of its contributors may be
13*f5c631daSSadaf Ebrahimi // used to endorse or promote products derived from this software without
14*f5c631daSSadaf Ebrahimi // specific prior written permission.
15*f5c631daSSadaf Ebrahimi //
16*f5c631daSSadaf Ebrahimi // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17*f5c631daSSadaf Ebrahimi // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*f5c631daSSadaf Ebrahimi // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*f5c631daSSadaf Ebrahimi // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20*f5c631daSSadaf Ebrahimi // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*f5c631daSSadaf Ebrahimi // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22*f5c631daSSadaf Ebrahimi // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23*f5c631daSSadaf Ebrahimi // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24*f5c631daSSadaf Ebrahimi // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25*f5c631daSSadaf Ebrahimi // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*f5c631daSSadaf Ebrahimi
27*f5c631daSSadaf Ebrahimi #include <cfloat>
28*f5c631daSSadaf Ebrahimi #include <cmath>
29*f5c631daSSadaf Ebrahimi #include <cstdio>
30*f5c631daSSadaf Ebrahimi #include <cstdlib>
31*f5c631daSSadaf Ebrahimi #include <cstring>
32*f5c631daSSadaf Ebrahimi
33*f5c631daSSadaf Ebrahimi #include <fstream>
34*f5c631daSSadaf Ebrahimi #include <regex>
35*f5c631daSSadaf Ebrahimi
36*f5c631daSSadaf Ebrahimi #include "test-runner.h"
37*f5c631daSSadaf Ebrahimi #include "test-utils-aarch64.h"
38*f5c631daSSadaf Ebrahimi
39*f5c631daSSadaf Ebrahimi #include "aarch64/cpu-aarch64.h"
40*f5c631daSSadaf Ebrahimi #include "aarch64/disasm-aarch64.h"
41*f5c631daSSadaf Ebrahimi #include "aarch64/macro-assembler-aarch64.h"
42*f5c631daSSadaf Ebrahimi #include "aarch64/simulator-aarch64.h"
43*f5c631daSSadaf Ebrahimi
44*f5c631daSSadaf Ebrahimi namespace vixl {
45*f5c631daSSadaf Ebrahimi namespace aarch64 {
46*f5c631daSSadaf Ebrahimi
47*f5c631daSSadaf Ebrahimi #define __ masm->
48*f5c631daSSadaf Ebrahimi #define TEST(name) TEST_(TRACE_##name)
49*f5c631daSSadaf Ebrahimi
50*f5c631daSSadaf Ebrahimi #define REF(name) "test/test-trace-reference/" name
51*f5c631daSSadaf Ebrahimi
GenerateTestSequenceBase(MacroAssembler * masm)52*f5c631daSSadaf Ebrahimi static void GenerateTestSequenceBase(MacroAssembler* masm) {
53*f5c631daSSadaf Ebrahimi ExactAssemblyScope guard(masm,
54*f5c631daSSadaf Ebrahimi masm->GetBuffer()->GetRemainingBytes(),
55*f5c631daSSadaf Ebrahimi ExactAssemblyScope::kMaximumSize);
56*f5c631daSSadaf Ebrahimi
57*f5c631daSSadaf Ebrahimi __ adc(w3, w4, w5);
58*f5c631daSSadaf Ebrahimi __ adc(x6, x7, x8);
59*f5c631daSSadaf Ebrahimi __ adcs(w9, w10, w11);
60*f5c631daSSadaf Ebrahimi __ adcs(x12, x13, x14);
61*f5c631daSSadaf Ebrahimi __ add(w15, w16, w17);
62*f5c631daSSadaf Ebrahimi __ add(x18, x19, x20);
63*f5c631daSSadaf Ebrahimi __ adds(w21, w22, w23);
64*f5c631daSSadaf Ebrahimi __ adds(x24, x25, x26);
65*f5c631daSSadaf Ebrahimi __ and_(w27, w28, w29);
66*f5c631daSSadaf Ebrahimi __ and_(x2, x3, x4);
67*f5c631daSSadaf Ebrahimi __ ands(w5, w6, w7);
68*f5c631daSSadaf Ebrahimi __ ands(x8, x9, x10);
69*f5c631daSSadaf Ebrahimi __ asr(w11, w12, 0);
70*f5c631daSSadaf Ebrahimi __ asr(x13, x14, 1);
71*f5c631daSSadaf Ebrahimi __ asrv(w15, w16, w17);
72*f5c631daSSadaf Ebrahimi __ asrv(x18, x19, x20);
73*f5c631daSSadaf Ebrahimi __ bfm(w21, w22, 5, 6);
74*f5c631daSSadaf Ebrahimi __ bfm(x23, x24, 7, 8);
75*f5c631daSSadaf Ebrahimi __ bic(w25, w26, w27);
76*f5c631daSSadaf Ebrahimi __ bic(x28, x29, x2);
77*f5c631daSSadaf Ebrahimi __ bics(w3, w4, w5);
78*f5c631daSSadaf Ebrahimi __ bics(x6, x7, x8);
79*f5c631daSSadaf Ebrahimi __ ccmn(w9, w10, NoFlag, al);
80*f5c631daSSadaf Ebrahimi __ ccmn(w9, w10, NoFlag, eq);
81*f5c631daSSadaf Ebrahimi __ ccmn(w9, w10, NoFlag, ne);
82*f5c631daSSadaf Ebrahimi __ ccmn(x11, x12, CFlag, al);
83*f5c631daSSadaf Ebrahimi __ ccmn(x11, x12, CFlag, cc);
84*f5c631daSSadaf Ebrahimi __ ccmn(x11, x12, CFlag, cs);
85*f5c631daSSadaf Ebrahimi __ ccmp(w13, w14, VFlag, al);
86*f5c631daSSadaf Ebrahimi __ ccmp(w13, w14, VFlag, hi);
87*f5c631daSSadaf Ebrahimi __ ccmp(w13, w14, VFlag, ls);
88*f5c631daSSadaf Ebrahimi __ ccmp(x15, x16, CVFlag, al);
89*f5c631daSSadaf Ebrahimi __ ccmp(x15, x16, CVFlag, eq);
90*f5c631daSSadaf Ebrahimi __ ccmp(x15, x16, CVFlag, ne);
91*f5c631daSSadaf Ebrahimi __ cinc(w17, w18, cc);
92*f5c631daSSadaf Ebrahimi __ cinc(w17, w18, cs);
93*f5c631daSSadaf Ebrahimi __ cinc(x19, x20, hi);
94*f5c631daSSadaf Ebrahimi __ cinc(x19, x20, ls);
95*f5c631daSSadaf Ebrahimi __ cinv(w21, w22, eq);
96*f5c631daSSadaf Ebrahimi __ cinv(w21, w22, ne);
97*f5c631daSSadaf Ebrahimi __ cinv(x23, x24, cc);
98*f5c631daSSadaf Ebrahimi __ cinv(x23, x24, cs);
99*f5c631daSSadaf Ebrahimi __ clrex();
100*f5c631daSSadaf Ebrahimi __ cls(w25, w26);
101*f5c631daSSadaf Ebrahimi __ cls(x27, x28);
102*f5c631daSSadaf Ebrahimi __ clz(w29, w2);
103*f5c631daSSadaf Ebrahimi __ clz(x3, x4);
104*f5c631daSSadaf Ebrahimi __ cmn(w5, w6);
105*f5c631daSSadaf Ebrahimi __ cmn(x7, x8);
106*f5c631daSSadaf Ebrahimi __ cmp(w9, w10);
107*f5c631daSSadaf Ebrahimi __ cmp(x11, x12);
108*f5c631daSSadaf Ebrahimi __ cneg(w13, w14, hi);
109*f5c631daSSadaf Ebrahimi __ cneg(w13, w14, ls);
110*f5c631daSSadaf Ebrahimi __ cneg(x15, x16, eq);
111*f5c631daSSadaf Ebrahimi __ cneg(x15, x16, ne);
112*f5c631daSSadaf Ebrahimi __ crc32b(w17, w18, w19);
113*f5c631daSSadaf Ebrahimi __ crc32cb(w20, w21, w22);
114*f5c631daSSadaf Ebrahimi __ crc32ch(w23, w24, w25);
115*f5c631daSSadaf Ebrahimi __ crc32cw(w26, w27, w28);
116*f5c631daSSadaf Ebrahimi __ crc32h(w4, w5, w6);
117*f5c631daSSadaf Ebrahimi __ crc32w(w7, w8, w9);
118*f5c631daSSadaf Ebrahimi __ csel(w13, w14, w15, cc);
119*f5c631daSSadaf Ebrahimi __ csel(w13, w14, w15, cs);
120*f5c631daSSadaf Ebrahimi __ csel(x16, x17, x18, hi);
121*f5c631daSSadaf Ebrahimi __ csel(x16, x17, x18, ls);
122*f5c631daSSadaf Ebrahimi __ cset(w19, eq);
123*f5c631daSSadaf Ebrahimi __ cset(w19, ne);
124*f5c631daSSadaf Ebrahimi __ cset(x20, cc);
125*f5c631daSSadaf Ebrahimi __ cset(x20, cs);
126*f5c631daSSadaf Ebrahimi __ csetm(w21, hi);
127*f5c631daSSadaf Ebrahimi __ csetm(w21, ls);
128*f5c631daSSadaf Ebrahimi __ csetm(x22, eq);
129*f5c631daSSadaf Ebrahimi __ csetm(x22, ne);
130*f5c631daSSadaf Ebrahimi __ csinc(w23, w24, w25, cc);
131*f5c631daSSadaf Ebrahimi __ csinc(w23, w24, w25, cs);
132*f5c631daSSadaf Ebrahimi __ csinc(x26, x27, x28, hi);
133*f5c631daSSadaf Ebrahimi __ csinc(x26, x27, x28, ls);
134*f5c631daSSadaf Ebrahimi __ csinv(w29, w2, w3, eq);
135*f5c631daSSadaf Ebrahimi __ csinv(w29, w2, w3, ne);
136*f5c631daSSadaf Ebrahimi __ csinv(x4, x5, x6, cc);
137*f5c631daSSadaf Ebrahimi __ csinv(x4, x5, x6, cs);
138*f5c631daSSadaf Ebrahimi __ csneg(w7, w8, w9, hi);
139*f5c631daSSadaf Ebrahimi __ csneg(w7, w8, w9, ls);
140*f5c631daSSadaf Ebrahimi __ csneg(x10, x11, x12, eq);
141*f5c631daSSadaf Ebrahimi __ csneg(x10, x11, x12, ne);
142*f5c631daSSadaf Ebrahimi __ dc(CVAC, x0);
143*f5c631daSSadaf Ebrahimi __ dmb(InnerShareable, BarrierAll);
144*f5c631daSSadaf Ebrahimi __ dsb(InnerShareable, BarrierAll);
145*f5c631daSSadaf Ebrahimi __ eon(w13, w14, w15);
146*f5c631daSSadaf Ebrahimi __ eon(x16, x17, x18);
147*f5c631daSSadaf Ebrahimi __ eor(w19, w20, w21);
148*f5c631daSSadaf Ebrahimi __ eor(x22, x23, x24);
149*f5c631daSSadaf Ebrahimi __ extr(w25, w26, w27, 9);
150*f5c631daSSadaf Ebrahimi __ extr(x28, x29, x2, 10);
151*f5c631daSSadaf Ebrahimi __ hint(NOP);
152*f5c631daSSadaf Ebrahimi __ ic(IVAU, x0);
153*f5c631daSSadaf Ebrahimi __ isb();
154*f5c631daSSadaf Ebrahimi __ ldar(w3, MemOperand(x0));
155*f5c631daSSadaf Ebrahimi __ ldar(x4, MemOperand(x0));
156*f5c631daSSadaf Ebrahimi __ ldarb(w5, MemOperand(x0));
157*f5c631daSSadaf Ebrahimi __ ldarb(x6, MemOperand(x0));
158*f5c631daSSadaf Ebrahimi __ ldarh(w7, MemOperand(x0));
159*f5c631daSSadaf Ebrahimi __ ldarh(x8, MemOperand(x0));
160*f5c631daSSadaf Ebrahimi __ ldaxp(w9, w10, MemOperand(x0));
161*f5c631daSSadaf Ebrahimi __ ldaxp(x11, x12, MemOperand(x0));
162*f5c631daSSadaf Ebrahimi __ ldaxr(w13, MemOperand(x0));
163*f5c631daSSadaf Ebrahimi __ ldaxr(x14, MemOperand(x0));
164*f5c631daSSadaf Ebrahimi __ ldaxrb(w15, MemOperand(x0));
165*f5c631daSSadaf Ebrahimi __ ldaxrb(x16, MemOperand(x0));
166*f5c631daSSadaf Ebrahimi __ ldaxrh(w17, MemOperand(x0));
167*f5c631daSSadaf Ebrahimi __ ldaxrh(x18, MemOperand(x0));
168*f5c631daSSadaf Ebrahimi __ ldnp(w19, w20, MemOperand(x0));
169*f5c631daSSadaf Ebrahimi __ ldnp(x21, x22, MemOperand(x0));
170*f5c631daSSadaf Ebrahimi __ ldp(w23, w24, MemOperand(x0));
171*f5c631daSSadaf Ebrahimi __ ldp(w23, w24, MemOperand(x1, 8, PostIndex));
172*f5c631daSSadaf Ebrahimi __ ldp(w23, w24, MemOperand(x1, 8, PreIndex));
173*f5c631daSSadaf Ebrahimi __ ldp(x25, x26, MemOperand(x0));
174*f5c631daSSadaf Ebrahimi __ ldp(x25, x26, MemOperand(x1, 16, PostIndex));
175*f5c631daSSadaf Ebrahimi __ ldp(x25, x26, MemOperand(x1, 16, PreIndex));
176*f5c631daSSadaf Ebrahimi __ ldpsw(x27, x28, MemOperand(x0));
177*f5c631daSSadaf Ebrahimi __ ldpsw(x27, x28, MemOperand(x1, 8, PostIndex));
178*f5c631daSSadaf Ebrahimi __ ldpsw(x27, x28, MemOperand(x1, 8, PreIndex));
179*f5c631daSSadaf Ebrahimi __ ldr(w29, MemOperand(x0));
180*f5c631daSSadaf Ebrahimi __ ldr(w29, MemOperand(x1, 4, PostIndex));
181*f5c631daSSadaf Ebrahimi __ ldr(w29, MemOperand(x1, 4, PreIndex));
182*f5c631daSSadaf Ebrahimi __ ldr(x2, MemOperand(x0));
183*f5c631daSSadaf Ebrahimi __ ldr(x2, MemOperand(x1, 8, PostIndex));
184*f5c631daSSadaf Ebrahimi __ ldr(x2, MemOperand(x1, 8, PreIndex));
185*f5c631daSSadaf Ebrahimi __ ldrb(w3, MemOperand(x0));
186*f5c631daSSadaf Ebrahimi __ ldrb(w3, MemOperand(x1, 1, PostIndex));
187*f5c631daSSadaf Ebrahimi __ ldrb(w3, MemOperand(x1, 1, PreIndex));
188*f5c631daSSadaf Ebrahimi __ ldrb(x4, MemOperand(x0));
189*f5c631daSSadaf Ebrahimi __ ldrb(x4, MemOperand(x1, 1, PostIndex));
190*f5c631daSSadaf Ebrahimi __ ldrb(x4, MemOperand(x1, 1, PreIndex));
191*f5c631daSSadaf Ebrahimi __ ldrh(w5, MemOperand(x0));
192*f5c631daSSadaf Ebrahimi __ ldrh(w5, MemOperand(x1, 2, PostIndex));
193*f5c631daSSadaf Ebrahimi __ ldrh(w5, MemOperand(x1, 2, PreIndex));
194*f5c631daSSadaf Ebrahimi __ ldrh(x6, MemOperand(x0));
195*f5c631daSSadaf Ebrahimi __ ldrh(x6, MemOperand(x1, 2, PostIndex));
196*f5c631daSSadaf Ebrahimi __ ldrh(x6, MemOperand(x1, 2, PreIndex));
197*f5c631daSSadaf Ebrahimi __ ldrsb(w7, MemOperand(x0));
198*f5c631daSSadaf Ebrahimi __ ldrsb(w7, MemOperand(x1, 1, PostIndex));
199*f5c631daSSadaf Ebrahimi __ ldrsb(w7, MemOperand(x1, 1, PreIndex));
200*f5c631daSSadaf Ebrahimi __ ldrsb(x8, MemOperand(x0));
201*f5c631daSSadaf Ebrahimi __ ldrsb(x8, MemOperand(x1, 1, PostIndex));
202*f5c631daSSadaf Ebrahimi __ ldrsb(x8, MemOperand(x1, 1, PreIndex));
203*f5c631daSSadaf Ebrahimi __ ldrsh(w9, MemOperand(x0));
204*f5c631daSSadaf Ebrahimi __ ldrsh(w9, MemOperand(x1, 2, PostIndex));
205*f5c631daSSadaf Ebrahimi __ ldrsh(w9, MemOperand(x1, 2, PreIndex));
206*f5c631daSSadaf Ebrahimi __ ldrsh(x10, MemOperand(x0));
207*f5c631daSSadaf Ebrahimi __ ldrsh(x10, MemOperand(x1, 2, PostIndex));
208*f5c631daSSadaf Ebrahimi __ ldrsh(x10, MemOperand(x1, 2, PreIndex));
209*f5c631daSSadaf Ebrahimi __ ldrsw(x11, MemOperand(x0));
210*f5c631daSSadaf Ebrahimi __ ldrsw(x11, MemOperand(x1, 4, PostIndex));
211*f5c631daSSadaf Ebrahimi __ ldrsw(x11, MemOperand(x1, 4, PreIndex));
212*f5c631daSSadaf Ebrahimi __ ldur(w12, MemOperand(x0, 7));
213*f5c631daSSadaf Ebrahimi __ ldur(x13, MemOperand(x0, 15));
214*f5c631daSSadaf Ebrahimi __ ldurb(w14, MemOperand(x0, 1));
215*f5c631daSSadaf Ebrahimi __ ldurb(x15, MemOperand(x0, 1));
216*f5c631daSSadaf Ebrahimi __ ldurh(w16, MemOperand(x0, 3));
217*f5c631daSSadaf Ebrahimi __ ldurh(x17, MemOperand(x0, 3));
218*f5c631daSSadaf Ebrahimi __ ldursb(w18, MemOperand(x0, 1));
219*f5c631daSSadaf Ebrahimi __ ldursb(x19, MemOperand(x0, 1));
220*f5c631daSSadaf Ebrahimi __ ldursh(w20, MemOperand(x0, 3));
221*f5c631daSSadaf Ebrahimi __ ldursh(x21, MemOperand(x0, 3));
222*f5c631daSSadaf Ebrahimi __ ldursw(x22, MemOperand(x0, 7));
223*f5c631daSSadaf Ebrahimi __ ldxp(w23, w24, MemOperand(x0));
224*f5c631daSSadaf Ebrahimi __ ldxp(x25, x26, MemOperand(x0));
225*f5c631daSSadaf Ebrahimi __ ldxr(w27, MemOperand(x0));
226*f5c631daSSadaf Ebrahimi __ ldxr(x28, MemOperand(x0));
227*f5c631daSSadaf Ebrahimi __ ldxrb(w29, MemOperand(x0));
228*f5c631daSSadaf Ebrahimi __ ldxrb(x2, MemOperand(x0));
229*f5c631daSSadaf Ebrahimi __ ldxrh(w3, MemOperand(x0));
230*f5c631daSSadaf Ebrahimi __ ldxrh(x4, MemOperand(x0));
231*f5c631daSSadaf Ebrahimi __ lsl(w5, w6, 2);
232*f5c631daSSadaf Ebrahimi __ lsl(x7, x8, 3);
233*f5c631daSSadaf Ebrahimi __ lslv(w9, w10, w11);
234*f5c631daSSadaf Ebrahimi __ lslv(x12, x13, x14);
235*f5c631daSSadaf Ebrahimi __ lsr(w15, w16, 4);
236*f5c631daSSadaf Ebrahimi __ lsr(x17, x18, 5);
237*f5c631daSSadaf Ebrahimi __ lsrv(w19, w20, w21);
238*f5c631daSSadaf Ebrahimi __ lsrv(x22, x23, x24);
239*f5c631daSSadaf Ebrahimi __ madd(w25, w26, w27, w28);
240*f5c631daSSadaf Ebrahimi __ madd(x29, x2, x3, x4);
241*f5c631daSSadaf Ebrahimi __ mneg(w5, w6, w7);
242*f5c631daSSadaf Ebrahimi __ mneg(x8, x9, x10);
243*f5c631daSSadaf Ebrahimi __ mov(w11, w12);
244*f5c631daSSadaf Ebrahimi __ mov(x13, x14);
245*f5c631daSSadaf Ebrahimi __ movk(w15, 130);
246*f5c631daSSadaf Ebrahimi __ movk(x16, 131);
247*f5c631daSSadaf Ebrahimi __ movn(w17, 132);
248*f5c631daSSadaf Ebrahimi __ movn(x18, 133);
249*f5c631daSSadaf Ebrahimi __ movz(w19, 134);
250*f5c631daSSadaf Ebrahimi __ movz(x20, 135);
251*f5c631daSSadaf Ebrahimi __ msub(w22, w23, w24, w25);
252*f5c631daSSadaf Ebrahimi __ msub(x26, x27, x28, x29);
253*f5c631daSSadaf Ebrahimi __ mul(w2, w3, w4);
254*f5c631daSSadaf Ebrahimi __ mul(x5, x6, x7);
255*f5c631daSSadaf Ebrahimi __ mvn(w8, w9);
256*f5c631daSSadaf Ebrahimi __ mvn(x10, x11);
257*f5c631daSSadaf Ebrahimi __ neg(w12, w13);
258*f5c631daSSadaf Ebrahimi __ neg(x14, x15);
259*f5c631daSSadaf Ebrahimi __ negs(w16, w17);
260*f5c631daSSadaf Ebrahimi __ negs(x18, x19);
261*f5c631daSSadaf Ebrahimi __ ngc(w20, w21);
262*f5c631daSSadaf Ebrahimi __ ngc(x22, x23);
263*f5c631daSSadaf Ebrahimi __ ngcs(w24, w25);
264*f5c631daSSadaf Ebrahimi __ ngcs(x26, x27);
265*f5c631daSSadaf Ebrahimi __ nop();
266*f5c631daSSadaf Ebrahimi __ orn(w28, w29, w2);
267*f5c631daSSadaf Ebrahimi __ orn(x3, x4, x5);
268*f5c631daSSadaf Ebrahimi __ orr(w6, w7, w8);
269*f5c631daSSadaf Ebrahimi __ orr(x9, x10, x11);
270*f5c631daSSadaf Ebrahimi __ prfm(PLDL1KEEP, MemOperand(x0, 4));
271*f5c631daSSadaf Ebrahimi __ prfum(PLDL1KEEP, MemOperand(x0, 1));
272*f5c631daSSadaf Ebrahimi __ rbit(w12, w13);
273*f5c631daSSadaf Ebrahimi __ rbit(x14, x15);
274*f5c631daSSadaf Ebrahimi __ rev(w16, w17);
275*f5c631daSSadaf Ebrahimi __ rev(x18, x19);
276*f5c631daSSadaf Ebrahimi __ rev16(w20, w21);
277*f5c631daSSadaf Ebrahimi __ rev16(x22, x23);
278*f5c631daSSadaf Ebrahimi __ rev32(x24, x25);
279*f5c631daSSadaf Ebrahimi __ rorv(w26, w27, w28);
280*f5c631daSSadaf Ebrahimi __ rorv(x29, x2, x3);
281*f5c631daSSadaf Ebrahimi __ sbc(w4, w5, w6);
282*f5c631daSSadaf Ebrahimi __ sbc(x7, x8, x9);
283*f5c631daSSadaf Ebrahimi __ sbcs(w10, w11, w12);
284*f5c631daSSadaf Ebrahimi __ sbcs(x13, x14, x15);
285*f5c631daSSadaf Ebrahimi __ sbfiz(w16, w17, 2, 3);
286*f5c631daSSadaf Ebrahimi __ sbfiz(x18, x19, 4, 5);
287*f5c631daSSadaf Ebrahimi __ sbfx(w22, w23, 6, 7);
288*f5c631daSSadaf Ebrahimi __ sbfx(x24, x25, 8, 9);
289*f5c631daSSadaf Ebrahimi __ sdiv(w26, w27, w28);
290*f5c631daSSadaf Ebrahimi __ sdiv(x29, x2, x3);
291*f5c631daSSadaf Ebrahimi __ smulh(x12, x13, x14);
292*f5c631daSSadaf Ebrahimi __ stlr(w18, MemOperand(x0));
293*f5c631daSSadaf Ebrahimi __ stlr(x19, MemOperand(x0));
294*f5c631daSSadaf Ebrahimi __ stlrb(w20, MemOperand(x0));
295*f5c631daSSadaf Ebrahimi __ stlrb(x21, MemOperand(x0));
296*f5c631daSSadaf Ebrahimi __ stlrh(w22, MemOperand(x0));
297*f5c631daSSadaf Ebrahimi __ stlrh(x23, MemOperand(x0));
298*f5c631daSSadaf Ebrahimi __ stlxp(w24, w25, w26, MemOperand(x0));
299*f5c631daSSadaf Ebrahimi __ stlxp(x27, x28, x29, MemOperand(x0));
300*f5c631daSSadaf Ebrahimi __ stlxr(w2, w3, MemOperand(x0));
301*f5c631daSSadaf Ebrahimi __ stlxr(x4, x5, MemOperand(x0));
302*f5c631daSSadaf Ebrahimi __ stlxrb(w6, w7, MemOperand(x0));
303*f5c631daSSadaf Ebrahimi __ stlxrb(x8, x9, MemOperand(x0));
304*f5c631daSSadaf Ebrahimi __ stlxrh(w10, w11, MemOperand(x0));
305*f5c631daSSadaf Ebrahimi __ stlxrh(x12, x13, MemOperand(x0));
306*f5c631daSSadaf Ebrahimi __ stnp(w14, w15, MemOperand(x0));
307*f5c631daSSadaf Ebrahimi __ stnp(x16, x17, MemOperand(x0));
308*f5c631daSSadaf Ebrahimi __ stp(w18, w19, MemOperand(x0));
309*f5c631daSSadaf Ebrahimi __ stp(w18, w19, MemOperand(x1, 8, PostIndex));
310*f5c631daSSadaf Ebrahimi __ stp(w18, w19, MemOperand(x1, 8, PreIndex));
311*f5c631daSSadaf Ebrahimi __ stp(x20, x21, MemOperand(x0));
312*f5c631daSSadaf Ebrahimi __ stp(x20, x21, MemOperand(x1, 16, PostIndex));
313*f5c631daSSadaf Ebrahimi __ stp(x20, x21, MemOperand(x1, 16, PreIndex));
314*f5c631daSSadaf Ebrahimi __ str(w22, MemOperand(x0));
315*f5c631daSSadaf Ebrahimi __ str(w22, MemOperand(x1, 4, PostIndex));
316*f5c631daSSadaf Ebrahimi __ str(w22, MemOperand(x1, 4, PreIndex));
317*f5c631daSSadaf Ebrahimi __ str(x23, MemOperand(x0));
318*f5c631daSSadaf Ebrahimi __ str(x23, MemOperand(x1, 8, PostIndex));
319*f5c631daSSadaf Ebrahimi __ str(x23, MemOperand(x1, 8, PreIndex));
320*f5c631daSSadaf Ebrahimi __ strb(w24, MemOperand(x0));
321*f5c631daSSadaf Ebrahimi __ strb(w24, MemOperand(x1, 1, PostIndex));
322*f5c631daSSadaf Ebrahimi __ strb(w24, MemOperand(x1, 1, PreIndex));
323*f5c631daSSadaf Ebrahimi __ strb(x25, MemOperand(x0));
324*f5c631daSSadaf Ebrahimi __ strb(x25, MemOperand(x1, 1, PostIndex));
325*f5c631daSSadaf Ebrahimi __ strb(x25, MemOperand(x1, 1, PreIndex));
326*f5c631daSSadaf Ebrahimi __ strh(w26, MemOperand(x0));
327*f5c631daSSadaf Ebrahimi __ strh(w26, MemOperand(x1, 2, PostIndex));
328*f5c631daSSadaf Ebrahimi __ strh(w26, MemOperand(x1, 2, PreIndex));
329*f5c631daSSadaf Ebrahimi __ strh(x27, MemOperand(x0));
330*f5c631daSSadaf Ebrahimi __ strh(x27, MemOperand(x1, 2, PostIndex));
331*f5c631daSSadaf Ebrahimi __ strh(x27, MemOperand(x1, 2, PreIndex));
332*f5c631daSSadaf Ebrahimi __ stur(w28, MemOperand(x0, 7));
333*f5c631daSSadaf Ebrahimi __ stur(x29, MemOperand(x0, 15));
334*f5c631daSSadaf Ebrahimi __ sturb(w2, MemOperand(x0, 1));
335*f5c631daSSadaf Ebrahimi __ sturb(x3, MemOperand(x0, 1));
336*f5c631daSSadaf Ebrahimi __ sturh(w4, MemOperand(x0, 3));
337*f5c631daSSadaf Ebrahimi __ sturh(x5, MemOperand(x0, 3));
338*f5c631daSSadaf Ebrahimi __ stxp(w6, w7, w8, MemOperand(x0));
339*f5c631daSSadaf Ebrahimi __ stxp(x9, x10, x11, MemOperand(x0));
340*f5c631daSSadaf Ebrahimi __ stxr(w12, w13, MemOperand(x0));
341*f5c631daSSadaf Ebrahimi __ stxr(x14, x15, MemOperand(x0));
342*f5c631daSSadaf Ebrahimi __ stxrb(w16, w17, MemOperand(x0));
343*f5c631daSSadaf Ebrahimi __ stxrb(x18, x19, MemOperand(x0));
344*f5c631daSSadaf Ebrahimi __ stxrh(w20, w21, MemOperand(x0));
345*f5c631daSSadaf Ebrahimi __ stxrh(x22, x23, MemOperand(x0));
346*f5c631daSSadaf Ebrahimi __ sub(w24, w25, w26);
347*f5c631daSSadaf Ebrahimi __ sub(x27, x28, x29);
348*f5c631daSSadaf Ebrahimi __ subs(w2, w3, w4);
349*f5c631daSSadaf Ebrahimi __ subs(x5, x6, x7);
350*f5c631daSSadaf Ebrahimi __ sxtb(w8, w9);
351*f5c631daSSadaf Ebrahimi __ sxtb(x10, x11);
352*f5c631daSSadaf Ebrahimi __ sxth(w12, w13);
353*f5c631daSSadaf Ebrahimi __ sxth(x14, x15);
354*f5c631daSSadaf Ebrahimi __ sxtw(w16, w17);
355*f5c631daSSadaf Ebrahimi __ sxtw(x18, x19);
356*f5c631daSSadaf Ebrahimi __ tst(w20, w21);
357*f5c631daSSadaf Ebrahimi __ tst(x22, x23);
358*f5c631daSSadaf Ebrahimi __ ubfiz(w24, w25, 10, 11);
359*f5c631daSSadaf Ebrahimi __ ubfiz(x26, x27, 12, 13);
360*f5c631daSSadaf Ebrahimi __ ubfm(w28, w29, 14, 15);
361*f5c631daSSadaf Ebrahimi __ ubfm(x2, x3, 1, 2);
362*f5c631daSSadaf Ebrahimi __ ubfx(w4, w5, 3, 4);
363*f5c631daSSadaf Ebrahimi __ ubfx(x6, x7, 5, 6);
364*f5c631daSSadaf Ebrahimi __ udiv(w8, w9, w10);
365*f5c631daSSadaf Ebrahimi __ udiv(x11, x12, x13);
366*f5c631daSSadaf Ebrahimi __ umulh(x22, x23, x24);
367*f5c631daSSadaf Ebrahimi __ uxtb(w28, w29);
368*f5c631daSSadaf Ebrahimi __ uxtb(x2, x3);
369*f5c631daSSadaf Ebrahimi __ uxth(w4, w5);
370*f5c631daSSadaf Ebrahimi __ uxth(x6, x7);
371*f5c631daSSadaf Ebrahimi __ uxtw(w8, w9);
372*f5c631daSSadaf Ebrahimi __ uxtw(x10, x11);
373*f5c631daSSadaf Ebrahimi
374*f5c631daSSadaf Ebrahimi // Branch tests.
375*f5c631daSSadaf Ebrahimi {
376*f5c631daSSadaf Ebrahimi Label end;
377*f5c631daSSadaf Ebrahimi // Branch to the next instruction.
378*f5c631daSSadaf Ebrahimi __ b(&end);
379*f5c631daSSadaf Ebrahimi __ bind(&end);
380*f5c631daSSadaf Ebrahimi }
381*f5c631daSSadaf Ebrahimi {
382*f5c631daSSadaf Ebrahimi Label loop, end;
383*f5c631daSSadaf Ebrahimi __ subs(x3, x3, x3);
384*f5c631daSSadaf Ebrahimi __ bind(&loop);
385*f5c631daSSadaf Ebrahimi // Not-taken branch (the first time).
386*f5c631daSSadaf Ebrahimi // Taken branch (the second time).
387*f5c631daSSadaf Ebrahimi __ b(&end, ne);
388*f5c631daSSadaf Ebrahimi __ cmp(x3, 1);
389*f5c631daSSadaf Ebrahimi // Backwards branch.
390*f5c631daSSadaf Ebrahimi __ b(&loop);
391*f5c631daSSadaf Ebrahimi __ bind(&end);
392*f5c631daSSadaf Ebrahimi }
393*f5c631daSSadaf Ebrahimi }
394*f5c631daSSadaf Ebrahimi
395*f5c631daSSadaf Ebrahimi
GenerateTestSequenceFP(MacroAssembler * masm)396*f5c631daSSadaf Ebrahimi static void GenerateTestSequenceFP(MacroAssembler* masm) {
397*f5c631daSSadaf Ebrahimi ExactAssemblyScope guard(masm,
398*f5c631daSSadaf Ebrahimi masm->GetBuffer()->GetRemainingBytes(),
399*f5c631daSSadaf Ebrahimi ExactAssemblyScope::kMaximumSize);
400*f5c631daSSadaf Ebrahimi
401*f5c631daSSadaf Ebrahimi // Scalar floating point instructions.
402*f5c631daSSadaf Ebrahimi __ fabd(d13, d2, d19);
403*f5c631daSSadaf Ebrahimi __ fabd(s8, s10, s30);
404*f5c631daSSadaf Ebrahimi __ fabs(d1, d1);
405*f5c631daSSadaf Ebrahimi __ fabs(s25, s7);
406*f5c631daSSadaf Ebrahimi __ facge(d1, d23, d16);
407*f5c631daSSadaf Ebrahimi __ facge(s4, s17, s1);
408*f5c631daSSadaf Ebrahimi __ facgt(d2, d21, d24);
409*f5c631daSSadaf Ebrahimi __ facgt(s12, s26, s12);
410*f5c631daSSadaf Ebrahimi __ fadd(d13, d11, d22);
411*f5c631daSSadaf Ebrahimi __ fadd(s27, s19, s8);
412*f5c631daSSadaf Ebrahimi __ fccmp(d6, d10, NoFlag, hs);
413*f5c631daSSadaf Ebrahimi __ fccmp(s29, s20, NZVFlag, ne);
414*f5c631daSSadaf Ebrahimi __ fccmpe(d10, d2, NZCFlag, al);
415*f5c631daSSadaf Ebrahimi __ fccmpe(s3, s3, NZVFlag, pl);
416*f5c631daSSadaf Ebrahimi __ fcmeq(d19, d8, d10);
417*f5c631daSSadaf Ebrahimi __ fcmeq(d0, d18, 0.0);
418*f5c631daSSadaf Ebrahimi __ fcmeq(s1, s4, s30);
419*f5c631daSSadaf Ebrahimi __ fcmeq(s22, s29, 0.0);
420*f5c631daSSadaf Ebrahimi __ fcmge(d27, d18, d1);
421*f5c631daSSadaf Ebrahimi __ fcmge(d31, d28, 0.0);
422*f5c631daSSadaf Ebrahimi __ fcmge(s31, s19, s9);
423*f5c631daSSadaf Ebrahimi __ fcmge(s1, s25, 0.0);
424*f5c631daSSadaf Ebrahimi __ fcmgt(d18, d1, d15);
425*f5c631daSSadaf Ebrahimi __ fcmgt(d3, d31, 0.0);
426*f5c631daSSadaf Ebrahimi __ fcmgt(s11, s25, s2);
427*f5c631daSSadaf Ebrahimi __ fcmgt(s17, s16, 0.0);
428*f5c631daSSadaf Ebrahimi __ fcmle(d24, d17, 0.0);
429*f5c631daSSadaf Ebrahimi __ fcmle(s11, s8, 0.0);
430*f5c631daSSadaf Ebrahimi __ fcmlt(d5, d31, 0.0);
431*f5c631daSSadaf Ebrahimi __ fcmlt(s18, s23, 0.0);
432*f5c631daSSadaf Ebrahimi __ fcmp(d10, d24);
433*f5c631daSSadaf Ebrahimi __ fcmp(d13, 0.0);
434*f5c631daSSadaf Ebrahimi __ fcmp(s18, s6);
435*f5c631daSSadaf Ebrahimi __ fcmp(s16, 0.0);
436*f5c631daSSadaf Ebrahimi __ fcmpe(d9, d17);
437*f5c631daSSadaf Ebrahimi __ fcmpe(d29, 0.0);
438*f5c631daSSadaf Ebrahimi __ fcmpe(s16, s17);
439*f5c631daSSadaf Ebrahimi __ fcmpe(s22, 0.0);
440*f5c631daSSadaf Ebrahimi __ fcsel(d10, d14, d19, gt);
441*f5c631daSSadaf Ebrahimi __ fcsel(s22, s18, s2, ge);
442*f5c631daSSadaf Ebrahimi __ fcvt(d4, h24);
443*f5c631daSSadaf Ebrahimi __ fcvt(d11, s2);
444*f5c631daSSadaf Ebrahimi __ fcvt(h8, d9);
445*f5c631daSSadaf Ebrahimi __ fcvt(h12, s1);
446*f5c631daSSadaf Ebrahimi __ fcvt(s12, d31);
447*f5c631daSSadaf Ebrahimi __ fcvt(s27, h25);
448*f5c631daSSadaf Ebrahimi __ fcvtas(d28, d16);
449*f5c631daSSadaf Ebrahimi __ fcvtas(s3, s5);
450*f5c631daSSadaf Ebrahimi __ fcvtas(w18, d31);
451*f5c631daSSadaf Ebrahimi __ fcvtas(w29, s24);
452*f5c631daSSadaf Ebrahimi __ fcvtas(x9, d1);
453*f5c631daSSadaf Ebrahimi __ fcvtas(x30, s2);
454*f5c631daSSadaf Ebrahimi __ fcvtau(d14, d0);
455*f5c631daSSadaf Ebrahimi __ fcvtau(s31, s14);
456*f5c631daSSadaf Ebrahimi __ fcvtau(w16, d2);
457*f5c631daSSadaf Ebrahimi __ fcvtau(w18, s0);
458*f5c631daSSadaf Ebrahimi __ fcvtau(x26, d7);
459*f5c631daSSadaf Ebrahimi __ fcvtau(x25, s19);
460*f5c631daSSadaf Ebrahimi __ fcvtms(d30, d25);
461*f5c631daSSadaf Ebrahimi __ fcvtms(s12, s15);
462*f5c631daSSadaf Ebrahimi __ fcvtms(w9, d7);
463*f5c631daSSadaf Ebrahimi __ fcvtms(w19, s6);
464*f5c631daSSadaf Ebrahimi __ fcvtms(x6, d6);
465*f5c631daSSadaf Ebrahimi __ fcvtms(x22, s7);
466*f5c631daSSadaf Ebrahimi __ fcvtmu(d27, d0);
467*f5c631daSSadaf Ebrahimi __ fcvtmu(s8, s22);
468*f5c631daSSadaf Ebrahimi __ fcvtmu(w29, d19);
469*f5c631daSSadaf Ebrahimi __ fcvtmu(w26, s0);
470*f5c631daSSadaf Ebrahimi __ fcvtmu(x13, d5);
471*f5c631daSSadaf Ebrahimi __ fcvtmu(x5, s18);
472*f5c631daSSadaf Ebrahimi __ fcvtns(d30, d15);
473*f5c631daSSadaf Ebrahimi __ fcvtns(s10, s11);
474*f5c631daSSadaf Ebrahimi __ fcvtns(w21, d15);
475*f5c631daSSadaf Ebrahimi __ fcvtns(w18, s10);
476*f5c631daSSadaf Ebrahimi __ fcvtns(x8, d17);
477*f5c631daSSadaf Ebrahimi __ fcvtns(x17, s12);
478*f5c631daSSadaf Ebrahimi __ fcvtnu(d0, d21);
479*f5c631daSSadaf Ebrahimi __ fcvtnu(s6, s25);
480*f5c631daSSadaf Ebrahimi __ fcvtnu(w29, d11);
481*f5c631daSSadaf Ebrahimi __ fcvtnu(w25, s31);
482*f5c631daSSadaf Ebrahimi __ fcvtnu(x30, d11);
483*f5c631daSSadaf Ebrahimi __ fcvtnu(x27, s18);
484*f5c631daSSadaf Ebrahimi __ fcvtps(d11, d22);
485*f5c631daSSadaf Ebrahimi __ fcvtps(s29, s20);
486*f5c631daSSadaf Ebrahimi __ fcvtps(w15, d25);
487*f5c631daSSadaf Ebrahimi __ fcvtps(w16, s7);
488*f5c631daSSadaf Ebrahimi __ fcvtps(x13, d20);
489*f5c631daSSadaf Ebrahimi __ fcvtps(x3, s23);
490*f5c631daSSadaf Ebrahimi __ fcvtpu(d24, d1);
491*f5c631daSSadaf Ebrahimi __ fcvtpu(s14, s24);
492*f5c631daSSadaf Ebrahimi __ fcvtpu(w26, d29);
493*f5c631daSSadaf Ebrahimi __ fcvtpu(wzr, s26);
494*f5c631daSSadaf Ebrahimi __ fcvtpu(x27, d6);
495*f5c631daSSadaf Ebrahimi __ fcvtpu(x29, s14);
496*f5c631daSSadaf Ebrahimi __ fcvtxn(s12, d12);
497*f5c631daSSadaf Ebrahimi __ fcvtzs(d15, d0);
498*f5c631daSSadaf Ebrahimi __ fcvtzs(d13, d4, 42);
499*f5c631daSSadaf Ebrahimi __ fcvtzs(s8, s11);
500*f5c631daSSadaf Ebrahimi __ fcvtzs(s31, s6, 25);
501*f5c631daSSadaf Ebrahimi __ fcvtzs(w6, d9);
502*f5c631daSSadaf Ebrahimi __ fcvtzs(w25, d10, 20);
503*f5c631daSSadaf Ebrahimi __ fcvtzs(w9, s1);
504*f5c631daSSadaf Ebrahimi __ fcvtzs(w17, s29, 30);
505*f5c631daSSadaf Ebrahimi __ fcvtzs(x19, d2);
506*f5c631daSSadaf Ebrahimi __ fcvtzs(x22, d14, 1);
507*f5c631daSSadaf Ebrahimi __ fcvtzs(x14, s20);
508*f5c631daSSadaf Ebrahimi __ fcvtzs(x3, s30, 33);
509*f5c631daSSadaf Ebrahimi __ fcvtzu(d28, d15);
510*f5c631daSSadaf Ebrahimi __ fcvtzu(d0, d4, 3);
511*f5c631daSSadaf Ebrahimi __ fcvtzu(s2, s5);
512*f5c631daSSadaf Ebrahimi __ fcvtzu(s4, s0, 30);
513*f5c631daSSadaf Ebrahimi __ fcvtzu(w11, d4);
514*f5c631daSSadaf Ebrahimi __ fcvtzu(w7, d24, 32);
515*f5c631daSSadaf Ebrahimi __ fcvtzu(w18, s24);
516*f5c631daSSadaf Ebrahimi __ fcvtzu(w14, s27, 4);
517*f5c631daSSadaf Ebrahimi __ fcvtzu(x22, d11);
518*f5c631daSSadaf Ebrahimi __ fcvtzu(x8, d27, 52);
519*f5c631daSSadaf Ebrahimi __ fcvtzu(x7, s20);
520*f5c631daSSadaf Ebrahimi __ fcvtzu(x22, s7, 44);
521*f5c631daSSadaf Ebrahimi __ fdiv(d6, d14, d15);
522*f5c631daSSadaf Ebrahimi __ fdiv(s26, s5, s25);
523*f5c631daSSadaf Ebrahimi __ fmadd(d18, d26, d12, d30);
524*f5c631daSSadaf Ebrahimi __ fmadd(s13, s9, s28, s4);
525*f5c631daSSadaf Ebrahimi __ fmax(d12, d5, d5);
526*f5c631daSSadaf Ebrahimi __ fmax(s12, s28, s6);
527*f5c631daSSadaf Ebrahimi __ fmaxnm(d28, d4, d2);
528*f5c631daSSadaf Ebrahimi __ fmaxnm(s6, s10, s8);
529*f5c631daSSadaf Ebrahimi __ fmin(d20, d20, d18);
530*f5c631daSSadaf Ebrahimi __ fmin(s7, s13, s16);
531*f5c631daSSadaf Ebrahimi __ fminnm(d19, d14, d30);
532*f5c631daSSadaf Ebrahimi __ fminnm(s0, s1, s1);
533*f5c631daSSadaf Ebrahimi __ fmov(d13, d6);
534*f5c631daSSadaf Ebrahimi __ fmov(d2, x17);
535*f5c631daSSadaf Ebrahimi __ fmov(d8, -2.5000);
536*f5c631daSSadaf Ebrahimi __ fmov(s5, s3);
537*f5c631daSSadaf Ebrahimi __ fmov(s25, w20);
538*f5c631daSSadaf Ebrahimi __ fmov(s21, 2.8750f);
539*f5c631daSSadaf Ebrahimi __ fmov(w18, s24);
540*f5c631daSSadaf Ebrahimi __ fmov(x18, d2);
541*f5c631daSSadaf Ebrahimi __ fmsub(d20, d30, d3, d19);
542*f5c631daSSadaf Ebrahimi __ fmsub(s5, s19, s4, s12);
543*f5c631daSSadaf Ebrahimi __ fmul(d30, d27, d23);
544*f5c631daSSadaf Ebrahimi __ fmul(s25, s17, s15);
545*f5c631daSSadaf Ebrahimi __ fmulx(d4, d17, d1);
546*f5c631daSSadaf Ebrahimi __ fmulx(s14, s25, s4);
547*f5c631daSSadaf Ebrahimi __ fneg(d15, d0);
548*f5c631daSSadaf Ebrahimi __ fneg(s14, s15);
549*f5c631daSSadaf Ebrahimi __ fnmadd(d0, d16, d22, d31);
550*f5c631daSSadaf Ebrahimi __ fnmadd(s0, s18, s26, s18);
551*f5c631daSSadaf Ebrahimi __ fnmsub(d19, d12, d15, d21);
552*f5c631daSSadaf Ebrahimi __ fnmsub(s29, s0, s11, s26);
553*f5c631daSSadaf Ebrahimi __ fnmul(d31, d19, d1);
554*f5c631daSSadaf Ebrahimi __ fnmul(s18, s3, s17);
555*f5c631daSSadaf Ebrahimi __ frecpe(d7, d21);
556*f5c631daSSadaf Ebrahimi __ frecpe(s29, s17);
557*f5c631daSSadaf Ebrahimi __ frecps(d11, d26, d17);
558*f5c631daSSadaf Ebrahimi __ frecps(s18, s27, s1);
559*f5c631daSSadaf Ebrahimi __ frecpx(d15, d18);
560*f5c631daSSadaf Ebrahimi __ frecpx(s5, s10);
561*f5c631daSSadaf Ebrahimi __ frinta(d16, d30);
562*f5c631daSSadaf Ebrahimi __ frinta(s1, s22);
563*f5c631daSSadaf Ebrahimi __ frinti(d19, d29);
564*f5c631daSSadaf Ebrahimi __ frinti(s14, s21);
565*f5c631daSSadaf Ebrahimi __ frintm(d20, d30);
566*f5c631daSSadaf Ebrahimi __ frintm(s1, s16);
567*f5c631daSSadaf Ebrahimi __ frintn(d30, d1);
568*f5c631daSSadaf Ebrahimi __ frintn(s24, s10);
569*f5c631daSSadaf Ebrahimi __ frintp(d4, d20);
570*f5c631daSSadaf Ebrahimi __ frintp(s13, s3);
571*f5c631daSSadaf Ebrahimi __ frintx(d13, d20);
572*f5c631daSSadaf Ebrahimi __ frintx(s17, s7);
573*f5c631daSSadaf Ebrahimi __ frintz(d0, d8);
574*f5c631daSSadaf Ebrahimi __ frintz(s15, s29);
575*f5c631daSSadaf Ebrahimi __ frsqrte(d21, d10);
576*f5c631daSSadaf Ebrahimi __ frsqrte(s17, s25);
577*f5c631daSSadaf Ebrahimi __ frsqrts(d4, d29, d17);
578*f5c631daSSadaf Ebrahimi __ frsqrts(s14, s3, s24);
579*f5c631daSSadaf Ebrahimi __ fsqrt(d14, d17);
580*f5c631daSSadaf Ebrahimi __ fsqrt(s4, s14);
581*f5c631daSSadaf Ebrahimi __ fsub(d13, d19, d7);
582*f5c631daSSadaf Ebrahimi __ fsub(s3, s21, s27);
583*f5c631daSSadaf Ebrahimi __ scvtf(d31, d16);
584*f5c631daSSadaf Ebrahimi __ scvtf(d26, d31, 24);
585*f5c631daSSadaf Ebrahimi __ scvtf(d6, w16);
586*f5c631daSSadaf Ebrahimi __ scvtf(d5, w20, 6);
587*f5c631daSSadaf Ebrahimi __ scvtf(d16, x8);
588*f5c631daSSadaf Ebrahimi __ scvtf(d15, x8, 10);
589*f5c631daSSadaf Ebrahimi __ scvtf(s7, s4);
590*f5c631daSSadaf Ebrahimi __ scvtf(s8, s15, 14);
591*f5c631daSSadaf Ebrahimi __ scvtf(s29, w10);
592*f5c631daSSadaf Ebrahimi __ scvtf(s15, w21, 11);
593*f5c631daSSadaf Ebrahimi __ scvtf(s27, x26);
594*f5c631daSSadaf Ebrahimi __ scvtf(s26, x12, 38);
595*f5c631daSSadaf Ebrahimi __ ucvtf(d0, d9);
596*f5c631daSSadaf Ebrahimi __ ucvtf(d5, d22, 47);
597*f5c631daSSadaf Ebrahimi __ ucvtf(d30, w27);
598*f5c631daSSadaf Ebrahimi __ ucvtf(d3, w19, 1);
599*f5c631daSSadaf Ebrahimi __ ucvtf(d28, x21);
600*f5c631daSSadaf Ebrahimi __ ucvtf(d27, x30, 35);
601*f5c631daSSadaf Ebrahimi __ ucvtf(s11, s5);
602*f5c631daSSadaf Ebrahimi __ ucvtf(s0, s23, 14);
603*f5c631daSSadaf Ebrahimi __ ucvtf(s20, w19);
604*f5c631daSSadaf Ebrahimi __ ucvtf(s21, w22, 18);
605*f5c631daSSadaf Ebrahimi __ ucvtf(s6, x13);
606*f5c631daSSadaf Ebrahimi __ ucvtf(s7, x2, 21);
607*f5c631daSSadaf Ebrahimi }
608*f5c631daSSadaf Ebrahimi
609*f5c631daSSadaf Ebrahimi
GenerateTestSequenceNEON(MacroAssembler * masm)610*f5c631daSSadaf Ebrahimi static void GenerateTestSequenceNEON(MacroAssembler* masm) {
611*f5c631daSSadaf Ebrahimi ExactAssemblyScope guard(masm,
612*f5c631daSSadaf Ebrahimi masm->GetBuffer()->GetRemainingBytes(),
613*f5c631daSSadaf Ebrahimi ExactAssemblyScope::kMaximumSize);
614*f5c631daSSadaf Ebrahimi
615*f5c631daSSadaf Ebrahimi // NEON integer instructions.
616*f5c631daSSadaf Ebrahimi __ abs(d19, d0);
617*f5c631daSSadaf Ebrahimi __ abs(v16.V16B(), v11.V16B());
618*f5c631daSSadaf Ebrahimi __ abs(v0.V2D(), v31.V2D());
619*f5c631daSSadaf Ebrahimi __ abs(v27.V2S(), v25.V2S());
620*f5c631daSSadaf Ebrahimi __ abs(v21.V4H(), v27.V4H());
621*f5c631daSSadaf Ebrahimi __ abs(v16.V4S(), v1.V4S());
622*f5c631daSSadaf Ebrahimi __ abs(v31.V8B(), v5.V8B());
623*f5c631daSSadaf Ebrahimi __ abs(v29.V8H(), v13.V8H());
624*f5c631daSSadaf Ebrahimi __ add(d10, d5, d17);
625*f5c631daSSadaf Ebrahimi __ add(v31.V16B(), v15.V16B(), v23.V16B());
626*f5c631daSSadaf Ebrahimi __ add(v10.V2D(), v31.V2D(), v14.V2D());
627*f5c631daSSadaf Ebrahimi __ add(v15.V2S(), v14.V2S(), v19.V2S());
628*f5c631daSSadaf Ebrahimi __ add(v27.V4H(), v23.V4H(), v17.V4H());
629*f5c631daSSadaf Ebrahimi __ add(v25.V4S(), v28.V4S(), v29.V4S());
630*f5c631daSSadaf Ebrahimi __ add(v13.V8B(), v7.V8B(), v18.V8B());
631*f5c631daSSadaf Ebrahimi __ add(v4.V8H(), v2.V8H(), v1.V8H());
632*f5c631daSSadaf Ebrahimi __ addhn(v10.V2S(), v14.V2D(), v15.V2D());
633*f5c631daSSadaf Ebrahimi __ addhn(v10.V4H(), v30.V4S(), v26.V4S());
634*f5c631daSSadaf Ebrahimi __ addhn(v31.V8B(), v12.V8H(), v22.V8H());
635*f5c631daSSadaf Ebrahimi __ addhn2(v16.V16B(), v21.V8H(), v20.V8H());
636*f5c631daSSadaf Ebrahimi __ addhn2(v0.V4S(), v2.V2D(), v17.V2D());
637*f5c631daSSadaf Ebrahimi __ addhn2(v31.V8H(), v7.V4S(), v17.V4S());
638*f5c631daSSadaf Ebrahimi __ addp(d14, v19.V2D());
639*f5c631daSSadaf Ebrahimi __ addp(v3.V16B(), v8.V16B(), v28.V16B());
640*f5c631daSSadaf Ebrahimi __ addp(v8.V2D(), v5.V2D(), v17.V2D());
641*f5c631daSSadaf Ebrahimi __ addp(v22.V2S(), v30.V2S(), v26.V2S());
642*f5c631daSSadaf Ebrahimi __ addp(v29.V4H(), v24.V4H(), v14.V4H());
643*f5c631daSSadaf Ebrahimi __ addp(v30.V4S(), v26.V4S(), v24.V4S());
644*f5c631daSSadaf Ebrahimi __ addp(v12.V8B(), v26.V8B(), v7.V8B());
645*f5c631daSSadaf Ebrahimi __ addp(v17.V8H(), v8.V8H(), v12.V8H());
646*f5c631daSSadaf Ebrahimi __ addv(b27, v23.V16B());
647*f5c631daSSadaf Ebrahimi __ addv(b12, v20.V8B());
648*f5c631daSSadaf Ebrahimi __ addv(h27, v30.V4H());
649*f5c631daSSadaf Ebrahimi __ addv(h19, v14.V8H());
650*f5c631daSSadaf Ebrahimi __ addv(s14, v27.V4S());
651*f5c631daSSadaf Ebrahimi __ and_(v10.V16B(), v8.V16B(), v27.V16B());
652*f5c631daSSadaf Ebrahimi __ and_(v5.V8B(), v1.V8B(), v16.V8B());
653*f5c631daSSadaf Ebrahimi __ bic(v26.V16B(), v3.V16B(), v24.V16B());
654*f5c631daSSadaf Ebrahimi __ bic(v7.V2S(), 0xe4, 16);
655*f5c631daSSadaf Ebrahimi __ bic(v28.V4H(), 0x23, 8);
656*f5c631daSSadaf Ebrahimi __ bic(v29.V4S(), 0xac);
657*f5c631daSSadaf Ebrahimi __ bic(v12.V8B(), v31.V8B(), v21.V8B());
658*f5c631daSSadaf Ebrahimi __ bic(v18.V8H(), 0x98);
659*f5c631daSSadaf Ebrahimi __ bif(v12.V16B(), v26.V16B(), v8.V16B());
660*f5c631daSSadaf Ebrahimi __ bif(v2.V8B(), v23.V8B(), v27.V8B());
661*f5c631daSSadaf Ebrahimi __ bit(v8.V16B(), v3.V16B(), v13.V16B());
662*f5c631daSSadaf Ebrahimi __ bit(v5.V8B(), v5.V8B(), v23.V8B());
663*f5c631daSSadaf Ebrahimi __ bsl(v9.V16B(), v31.V16B(), v23.V16B());
664*f5c631daSSadaf Ebrahimi __ bsl(v14.V8B(), v7.V8B(), v3.V8B());
665*f5c631daSSadaf Ebrahimi __ cls(v29.V16B(), v5.V16B());
666*f5c631daSSadaf Ebrahimi __ cls(v21.V2S(), v0.V2S());
667*f5c631daSSadaf Ebrahimi __ cls(v1.V4H(), v12.V4H());
668*f5c631daSSadaf Ebrahimi __ cls(v27.V4S(), v10.V4S());
669*f5c631daSSadaf Ebrahimi __ cls(v19.V8B(), v4.V8B());
670*f5c631daSSadaf Ebrahimi __ cls(v15.V8H(), v14.V8H());
671*f5c631daSSadaf Ebrahimi __ clz(v1.V16B(), v4.V16B());
672*f5c631daSSadaf Ebrahimi __ clz(v27.V2S(), v17.V2S());
673*f5c631daSSadaf Ebrahimi __ clz(v9.V4H(), v9.V4H());
674*f5c631daSSadaf Ebrahimi __ clz(v31.V4S(), v15.V4S());
675*f5c631daSSadaf Ebrahimi __ clz(v14.V8B(), v19.V8B());
676*f5c631daSSadaf Ebrahimi __ clz(v6.V8H(), v11.V8H());
677*f5c631daSSadaf Ebrahimi __ cmeq(d18, d5, d29);
678*f5c631daSSadaf Ebrahimi __ cmeq(d14, d31, 0);
679*f5c631daSSadaf Ebrahimi __ cmeq(v19.V16B(), v3.V16B(), v22.V16B());
680*f5c631daSSadaf Ebrahimi __ cmeq(v15.V16B(), v9.V16B(), 0);
681*f5c631daSSadaf Ebrahimi __ cmeq(v12.V2D(), v16.V2D(), v10.V2D());
682*f5c631daSSadaf Ebrahimi __ cmeq(v8.V2D(), v22.V2D(), 0);
683*f5c631daSSadaf Ebrahimi __ cmeq(v2.V2S(), v3.V2S(), v9.V2S());
684*f5c631daSSadaf Ebrahimi __ cmeq(v16.V2S(), v25.V2S(), 0);
685*f5c631daSSadaf Ebrahimi __ cmeq(v6.V4H(), v23.V4H(), v20.V4H());
686*f5c631daSSadaf Ebrahimi __ cmeq(v16.V4H(), v13.V4H(), 0);
687*f5c631daSSadaf Ebrahimi __ cmeq(v21.V4S(), v17.V4S(), v2.V4S());
688*f5c631daSSadaf Ebrahimi __ cmeq(v6.V4S(), v25.V4S(), 0);
689*f5c631daSSadaf Ebrahimi __ cmeq(v16.V8B(), v13.V8B(), v2.V8B());
690*f5c631daSSadaf Ebrahimi __ cmeq(v21.V8B(), v16.V8B(), 0);
691*f5c631daSSadaf Ebrahimi __ cmeq(v20.V8H(), v7.V8H(), v25.V8H());
692*f5c631daSSadaf Ebrahimi __ cmeq(v26.V8H(), v8.V8H(), 0);
693*f5c631daSSadaf Ebrahimi __ cmge(d16, d13, d31);
694*f5c631daSSadaf Ebrahimi __ cmge(d25, d24, 0);
695*f5c631daSSadaf Ebrahimi __ cmge(v17.V16B(), v19.V16B(), v17.V16B());
696*f5c631daSSadaf Ebrahimi __ cmge(v22.V16B(), v30.V16B(), 0);
697*f5c631daSSadaf Ebrahimi __ cmge(v28.V2D(), v20.V2D(), v26.V2D());
698*f5c631daSSadaf Ebrahimi __ cmge(v6.V2D(), v23.V2D(), 0);
699*f5c631daSSadaf Ebrahimi __ cmge(v25.V2S(), v22.V2S(), v3.V2S());
700*f5c631daSSadaf Ebrahimi __ cmge(v21.V2S(), v11.V2S(), 0);
701*f5c631daSSadaf Ebrahimi __ cmge(v16.V4H(), v3.V4H(), v12.V4H());
702*f5c631daSSadaf Ebrahimi __ cmge(v23.V4H(), v9.V4H(), 0);
703*f5c631daSSadaf Ebrahimi __ cmge(v7.V4S(), v2.V4S(), v11.V4S());
704*f5c631daSSadaf Ebrahimi __ cmge(v0.V4S(), v22.V4S(), 0);
705*f5c631daSSadaf Ebrahimi __ cmge(v10.V8B(), v30.V8B(), v9.V8B());
706*f5c631daSSadaf Ebrahimi __ cmge(v21.V8B(), v8.V8B(), 0);
707*f5c631daSSadaf Ebrahimi __ cmge(v2.V8H(), v7.V8H(), v26.V8H());
708*f5c631daSSadaf Ebrahimi __ cmge(v19.V8H(), v10.V8H(), 0);
709*f5c631daSSadaf Ebrahimi __ cmgt(d6, d13, d1);
710*f5c631daSSadaf Ebrahimi __ cmgt(d30, d24, 0);
711*f5c631daSSadaf Ebrahimi __ cmgt(v20.V16B(), v25.V16B(), v27.V16B());
712*f5c631daSSadaf Ebrahimi __ cmgt(v0.V16B(), v25.V16B(), 0);
713*f5c631daSSadaf Ebrahimi __ cmgt(v22.V2D(), v25.V2D(), v1.V2D());
714*f5c631daSSadaf Ebrahimi __ cmgt(v16.V2D(), v16.V2D(), 0);
715*f5c631daSSadaf Ebrahimi __ cmgt(v5.V2S(), v9.V2S(), v15.V2S());
716*f5c631daSSadaf Ebrahimi __ cmgt(v12.V2S(), v18.V2S(), 0);
717*f5c631daSSadaf Ebrahimi __ cmgt(v28.V4H(), v18.V4H(), v11.V4H());
718*f5c631daSSadaf Ebrahimi __ cmgt(v22.V4H(), v3.V4H(), 0);
719*f5c631daSSadaf Ebrahimi __ cmgt(v5.V4S(), v11.V4S(), v27.V4S());
720*f5c631daSSadaf Ebrahimi __ cmgt(v13.V4S(), v20.V4S(), 0);
721*f5c631daSSadaf Ebrahimi __ cmgt(v27.V8B(), v31.V8B(), v7.V8B());
722*f5c631daSSadaf Ebrahimi __ cmgt(v5.V8B(), v0.V8B(), 0);
723*f5c631daSSadaf Ebrahimi __ cmgt(v22.V8H(), v28.V8H(), v13.V8H());
724*f5c631daSSadaf Ebrahimi __ cmgt(v6.V8H(), v2.V8H(), 0);
725*f5c631daSSadaf Ebrahimi __ cmhi(d21, d8, d22);
726*f5c631daSSadaf Ebrahimi __ cmhi(v18.V16B(), v19.V16B(), v19.V16B());
727*f5c631daSSadaf Ebrahimi __ cmhi(v7.V2D(), v0.V2D(), v21.V2D());
728*f5c631daSSadaf Ebrahimi __ cmhi(v15.V2S(), v19.V2S(), v0.V2S());
729*f5c631daSSadaf Ebrahimi __ cmhi(v31.V4H(), v7.V4H(), v12.V4H());
730*f5c631daSSadaf Ebrahimi __ cmhi(v9.V4S(), v16.V4S(), v22.V4S());
731*f5c631daSSadaf Ebrahimi __ cmhi(v7.V8B(), v24.V8B(), v28.V8B());
732*f5c631daSSadaf Ebrahimi __ cmhi(v11.V8H(), v10.V8H(), v25.V8H());
733*f5c631daSSadaf Ebrahimi __ cmhs(d1, d12, d17);
734*f5c631daSSadaf Ebrahimi __ cmhs(v21.V16B(), v25.V16B(), v30.V16B());
735*f5c631daSSadaf Ebrahimi __ cmhs(v8.V2D(), v2.V2D(), v26.V2D());
736*f5c631daSSadaf Ebrahimi __ cmhs(v1.V2S(), v22.V2S(), v29.V2S());
737*f5c631daSSadaf Ebrahimi __ cmhs(v26.V4H(), v30.V4H(), v30.V4H());
738*f5c631daSSadaf Ebrahimi __ cmhs(v19.V4S(), v20.V4S(), v16.V4S());
739*f5c631daSSadaf Ebrahimi __ cmhs(v1.V8B(), v3.V8B(), v26.V8B());
740*f5c631daSSadaf Ebrahimi __ cmhs(v20.V8H(), v28.V8H(), v8.V8H());
741*f5c631daSSadaf Ebrahimi __ cmle(d30, d24, 0);
742*f5c631daSSadaf Ebrahimi __ cmle(v0.V16B(), v3.V16B(), 0);
743*f5c631daSSadaf Ebrahimi __ cmle(v2.V2D(), v30.V2D(), 0);
744*f5c631daSSadaf Ebrahimi __ cmle(v7.V2S(), v10.V2S(), 0);
745*f5c631daSSadaf Ebrahimi __ cmle(v9.V4H(), v31.V4H(), 0);
746*f5c631daSSadaf Ebrahimi __ cmle(v9.V4S(), v18.V4S(), 0);
747*f5c631daSSadaf Ebrahimi __ cmle(v21.V8B(), v31.V8B(), 0);
748*f5c631daSSadaf Ebrahimi __ cmle(v29.V8H(), v21.V8H(), 0);
749*f5c631daSSadaf Ebrahimi __ cmlt(d25, d23, 0);
750*f5c631daSSadaf Ebrahimi __ cmlt(v7.V16B(), v21.V16B(), 0);
751*f5c631daSSadaf Ebrahimi __ cmlt(v7.V2D(), v30.V2D(), 0);
752*f5c631daSSadaf Ebrahimi __ cmlt(v25.V2S(), v28.V2S(), 0);
753*f5c631daSSadaf Ebrahimi __ cmlt(v0.V4H(), v11.V4H(), 0);
754*f5c631daSSadaf Ebrahimi __ cmlt(v24.V4S(), v5.V4S(), 0);
755*f5c631daSSadaf Ebrahimi __ cmlt(v26.V8B(), v11.V8B(), 0);
756*f5c631daSSadaf Ebrahimi __ cmlt(v1.V8H(), v21.V8H(), 0);
757*f5c631daSSadaf Ebrahimi __ cmtst(d28, d23, d30);
758*f5c631daSSadaf Ebrahimi __ cmtst(v26.V16B(), v6.V16B(), v31.V16B());
759*f5c631daSSadaf Ebrahimi __ cmtst(v1.V2D(), v21.V2D(), v4.V2D());
760*f5c631daSSadaf Ebrahimi __ cmtst(v27.V2S(), v26.V2S(), v20.V2S());
761*f5c631daSSadaf Ebrahimi __ cmtst(v26.V4H(), v0.V4H(), v18.V4H());
762*f5c631daSSadaf Ebrahimi __ cmtst(v25.V4S(), v16.V4S(), v4.V4S());
763*f5c631daSSadaf Ebrahimi __ cmtst(v11.V8B(), v10.V8B(), v9.V8B());
764*f5c631daSSadaf Ebrahimi __ cmtst(v0.V8H(), v2.V8H(), v1.V8H());
765*f5c631daSSadaf Ebrahimi __ cnt(v25.V16B(), v15.V16B());
766*f5c631daSSadaf Ebrahimi __ cnt(v28.V8B(), v6.V8B());
767*f5c631daSSadaf Ebrahimi __ dup(v6.V16B(), v7.B(), 7);
768*f5c631daSSadaf Ebrahimi __ dup(v9.V16B(), w20);
769*f5c631daSSadaf Ebrahimi __ dup(v12.V2D(), v13.D(), 1);
770*f5c631daSSadaf Ebrahimi __ dup(v9.V2D(), xzr);
771*f5c631daSSadaf Ebrahimi __ dup(v4.V2S(), v26.S(), 2);
772*f5c631daSSadaf Ebrahimi __ dup(v3.V2S(), w12);
773*f5c631daSSadaf Ebrahimi __ dup(v22.V4H(), v5.H(), 7);
774*f5c631daSSadaf Ebrahimi __ dup(v16.V4H(), w25);
775*f5c631daSSadaf Ebrahimi __ dup(v20.V4S(), v10.S(), 2);
776*f5c631daSSadaf Ebrahimi __ dup(v10.V4S(), w7);
777*f5c631daSSadaf Ebrahimi __ dup(v30.V8B(), v30.B(), 2);
778*f5c631daSSadaf Ebrahimi __ dup(v31.V8B(), w15);
779*f5c631daSSadaf Ebrahimi __ dup(v28.V8H(), v17.H(), 4);
780*f5c631daSSadaf Ebrahimi __ dup(v2.V8H(), w3);
781*f5c631daSSadaf Ebrahimi __ eor(v29.V16B(), v25.V16B(), v3.V16B());
782*f5c631daSSadaf Ebrahimi __ eor(v3.V8B(), v16.V8B(), v28.V8B());
783*f5c631daSSadaf Ebrahimi __ ext(v1.V16B(), v26.V16B(), v6.V16B(), 1);
784*f5c631daSSadaf Ebrahimi __ ext(v2.V8B(), v30.V8B(), v1.V8B(), 1);
785*f5c631daSSadaf Ebrahimi __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x0));
786*f5c631daSSadaf Ebrahimi __ ld1(v23.V16B(),
787*f5c631daSSadaf Ebrahimi v24.V16B(),
788*f5c631daSSadaf Ebrahimi v25.V16B(),
789*f5c631daSSadaf Ebrahimi v26.V16B(),
790*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
791*f5c631daSSadaf Ebrahimi __ ld1(v5.V16B(),
792*f5c631daSSadaf Ebrahimi v6.V16B(),
793*f5c631daSSadaf Ebrahimi v7.V16B(),
794*f5c631daSSadaf Ebrahimi v8.V16B(),
795*f5c631daSSadaf Ebrahimi MemOperand(x1, 64, PostIndex));
796*f5c631daSSadaf Ebrahimi __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), MemOperand(x0));
797*f5c631daSSadaf Ebrahimi __ ld1(v13.V16B(), v14.V16B(), v15.V16B(), MemOperand(x1, x2, PostIndex));
798*f5c631daSSadaf Ebrahimi __ ld1(v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x1, 48, PostIndex));
799*f5c631daSSadaf Ebrahimi __ ld1(v17.V16B(), v18.V16B(), MemOperand(x0));
800*f5c631daSSadaf Ebrahimi __ ld1(v20.V16B(), v21.V16B(), MemOperand(x1, x2, PostIndex));
801*f5c631daSSadaf Ebrahimi __ ld1(v28.V16B(), v29.V16B(), MemOperand(x1, 32, PostIndex));
802*f5c631daSSadaf Ebrahimi __ ld1(v29.V16B(), MemOperand(x0));
803*f5c631daSSadaf Ebrahimi __ ld1(v21.V16B(), MemOperand(x1, x2, PostIndex));
804*f5c631daSSadaf Ebrahimi __ ld1(v4.V16B(), MemOperand(x1, 16, PostIndex));
805*f5c631daSSadaf Ebrahimi __ ld1(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), MemOperand(x0));
806*f5c631daSSadaf Ebrahimi __ ld1(v17.V1D(),
807*f5c631daSSadaf Ebrahimi v18.V1D(),
808*f5c631daSSadaf Ebrahimi v19.V1D(),
809*f5c631daSSadaf Ebrahimi v20.V1D(),
810*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
811*f5c631daSSadaf Ebrahimi __ ld1(v28.V1D(),
812*f5c631daSSadaf Ebrahimi v29.V1D(),
813*f5c631daSSadaf Ebrahimi v30.V1D(),
814*f5c631daSSadaf Ebrahimi v31.V1D(),
815*f5c631daSSadaf Ebrahimi MemOperand(x1, 32, PostIndex));
816*f5c631daSSadaf Ebrahimi __ ld1(v20.V1D(), v21.V1D(), v22.V1D(), MemOperand(x0));
817*f5c631daSSadaf Ebrahimi __ ld1(v19.V1D(), v20.V1D(), v21.V1D(), MemOperand(x1, x2, PostIndex));
818*f5c631daSSadaf Ebrahimi __ ld1(v12.V1D(), v13.V1D(), v14.V1D(), MemOperand(x1, 24, PostIndex));
819*f5c631daSSadaf Ebrahimi __ ld1(v29.V1D(), v30.V1D(), MemOperand(x0));
820*f5c631daSSadaf Ebrahimi __ ld1(v31.V1D(), v0.V1D(), MemOperand(x1, x2, PostIndex));
821*f5c631daSSadaf Ebrahimi __ ld1(v3.V1D(), v4.V1D(), MemOperand(x1, 16, PostIndex));
822*f5c631daSSadaf Ebrahimi __ ld1(v28.V1D(), MemOperand(x0));
823*f5c631daSSadaf Ebrahimi __ ld1(v11.V1D(), MemOperand(x1, x2, PostIndex));
824*f5c631daSSadaf Ebrahimi __ ld1(v29.V1D(), MemOperand(x1, 8, PostIndex));
825*f5c631daSSadaf Ebrahimi __ ld1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), MemOperand(x0));
826*f5c631daSSadaf Ebrahimi __ ld1(v8.V2D(),
827*f5c631daSSadaf Ebrahimi v9.V2D(),
828*f5c631daSSadaf Ebrahimi v10.V2D(),
829*f5c631daSSadaf Ebrahimi v11.V2D(),
830*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
831*f5c631daSSadaf Ebrahimi __ ld1(v14.V2D(),
832*f5c631daSSadaf Ebrahimi v15.V2D(),
833*f5c631daSSadaf Ebrahimi v16.V2D(),
834*f5c631daSSadaf Ebrahimi v17.V2D(),
835*f5c631daSSadaf Ebrahimi MemOperand(x1, 64, PostIndex));
836*f5c631daSSadaf Ebrahimi __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), MemOperand(x0));
837*f5c631daSSadaf Ebrahimi __ ld1(v5.V2D(), v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex));
838*f5c631daSSadaf Ebrahimi __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), MemOperand(x1, 48, PostIndex));
839*f5c631daSSadaf Ebrahimi __ ld1(v18.V2D(), v19.V2D(), MemOperand(x0));
840*f5c631daSSadaf Ebrahimi __ ld1(v21.V2D(), v22.V2D(), MemOperand(x1, x2, PostIndex));
841*f5c631daSSadaf Ebrahimi __ ld1(v17.V2D(), v18.V2D(), MemOperand(x1, 32, PostIndex));
842*f5c631daSSadaf Ebrahimi __ ld1(v5.V2D(), MemOperand(x0));
843*f5c631daSSadaf Ebrahimi __ ld1(v6.V2D(), MemOperand(x1, x2, PostIndex));
844*f5c631daSSadaf Ebrahimi __ ld1(v15.V2D(), MemOperand(x1, 16, PostIndex));
845*f5c631daSSadaf Ebrahimi __ ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x0));
846*f5c631daSSadaf Ebrahimi __ ld1(v24.V2S(),
847*f5c631daSSadaf Ebrahimi v25.V2S(),
848*f5c631daSSadaf Ebrahimi v26.V2S(),
849*f5c631daSSadaf Ebrahimi v27.V2S(),
850*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
851*f5c631daSSadaf Ebrahimi __ ld1(v27.V2S(),
852*f5c631daSSadaf Ebrahimi v28.V2S(),
853*f5c631daSSadaf Ebrahimi v29.V2S(),
854*f5c631daSSadaf Ebrahimi v30.V2S(),
855*f5c631daSSadaf Ebrahimi MemOperand(x1, 32, PostIndex));
856*f5c631daSSadaf Ebrahimi __ ld1(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x0));
857*f5c631daSSadaf Ebrahimi __ ld1(v8.V2S(), v9.V2S(), v10.V2S(), MemOperand(x1, x2, PostIndex));
858*f5c631daSSadaf Ebrahimi __ ld1(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x1, 24, PostIndex));
859*f5c631daSSadaf Ebrahimi __ ld1(v0.V2S(), v1.V2S(), MemOperand(x0));
860*f5c631daSSadaf Ebrahimi __ ld1(v13.V2S(), v14.V2S(), MemOperand(x1, x2, PostIndex));
861*f5c631daSSadaf Ebrahimi __ ld1(v3.V2S(), v4.V2S(), MemOperand(x1, 16, PostIndex));
862*f5c631daSSadaf Ebrahimi __ ld1(v26.V2S(), MemOperand(x0));
863*f5c631daSSadaf Ebrahimi __ ld1(v0.V2S(), MemOperand(x1, x2, PostIndex));
864*f5c631daSSadaf Ebrahimi __ ld1(v11.V2S(), MemOperand(x1, 8, PostIndex));
865*f5c631daSSadaf Ebrahimi __ ld1(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), MemOperand(x0));
866*f5c631daSSadaf Ebrahimi __ ld1(v24.V4H(),
867*f5c631daSSadaf Ebrahimi v25.V4H(),
868*f5c631daSSadaf Ebrahimi v26.V4H(),
869*f5c631daSSadaf Ebrahimi v27.V4H(),
870*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
871*f5c631daSSadaf Ebrahimi __ ld1(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), MemOperand(x1, 32, PostIndex));
872*f5c631daSSadaf Ebrahimi __ ld1(v30.V4H(), v31.V4H(), v0.V4H(), MemOperand(x0));
873*f5c631daSSadaf Ebrahimi __ ld1(v25.V4H(), v26.V4H(), v27.V4H(), MemOperand(x1, x2, PostIndex));
874*f5c631daSSadaf Ebrahimi __ ld1(v3.V4H(), v4.V4H(), v5.V4H(), MemOperand(x1, 24, PostIndex));
875*f5c631daSSadaf Ebrahimi __ ld1(v3.V4H(), v4.V4H(), MemOperand(x0));
876*f5c631daSSadaf Ebrahimi __ ld1(v3.V4H(), v4.V4H(), MemOperand(x1, x2, PostIndex));
877*f5c631daSSadaf Ebrahimi __ ld1(v23.V4H(), v24.V4H(), MemOperand(x1, 16, PostIndex));
878*f5c631daSSadaf Ebrahimi __ ld1(v26.V4H(), MemOperand(x0));
879*f5c631daSSadaf Ebrahimi __ ld1(v1.V4H(), MemOperand(x1, x2, PostIndex));
880*f5c631daSSadaf Ebrahimi __ ld1(v14.V4H(), MemOperand(x1, 8, PostIndex));
881*f5c631daSSadaf Ebrahimi __ ld1(v26.V4S(), v27.V4S(), v28.V4S(), v29.V4S(), MemOperand(x0));
882*f5c631daSSadaf Ebrahimi __ ld1(v28.V4S(),
883*f5c631daSSadaf Ebrahimi v29.V4S(),
884*f5c631daSSadaf Ebrahimi v30.V4S(),
885*f5c631daSSadaf Ebrahimi v31.V4S(),
886*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
887*f5c631daSSadaf Ebrahimi __ ld1(v4.V4S(), v5.V4S(), v6.V4S(), v7.V4S(), MemOperand(x1, 64, PostIndex));
888*f5c631daSSadaf Ebrahimi __ ld1(v2.V4S(), v3.V4S(), v4.V4S(), MemOperand(x0));
889*f5c631daSSadaf Ebrahimi __ ld1(v22.V4S(), v23.V4S(), v24.V4S(), MemOperand(x1, x2, PostIndex));
890*f5c631daSSadaf Ebrahimi __ ld1(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x1, 48, PostIndex));
891*f5c631daSSadaf Ebrahimi __ ld1(v20.V4S(), v21.V4S(), MemOperand(x0));
892*f5c631daSSadaf Ebrahimi __ ld1(v30.V4S(), v31.V4S(), MemOperand(x1, x2, PostIndex));
893*f5c631daSSadaf Ebrahimi __ ld1(v11.V4S(), v12.V4S(), MemOperand(x1, 32, PostIndex));
894*f5c631daSSadaf Ebrahimi __ ld1(v15.V4S(), MemOperand(x0));
895*f5c631daSSadaf Ebrahimi __ ld1(v12.V4S(), MemOperand(x1, x2, PostIndex));
896*f5c631daSSadaf Ebrahimi __ ld1(v0.V4S(), MemOperand(x1, 16, PostIndex));
897*f5c631daSSadaf Ebrahimi __ ld1(v17.V8B(), v18.V8B(), v19.V8B(), v20.V8B(), MemOperand(x0));
898*f5c631daSSadaf Ebrahimi __ ld1(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), MemOperand(x1, x2, PostIndex));
899*f5c631daSSadaf Ebrahimi __ ld1(v9.V8B(),
900*f5c631daSSadaf Ebrahimi v10.V8B(),
901*f5c631daSSadaf Ebrahimi v11.V8B(),
902*f5c631daSSadaf Ebrahimi v12.V8B(),
903*f5c631daSSadaf Ebrahimi MemOperand(x1, 32, PostIndex));
904*f5c631daSSadaf Ebrahimi __ ld1(v4.V8B(), v5.V8B(), v6.V8B(), MemOperand(x0));
905*f5c631daSSadaf Ebrahimi __ ld1(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x1, x2, PostIndex));
906*f5c631daSSadaf Ebrahimi __ ld1(v12.V8B(), v13.V8B(), v14.V8B(), MemOperand(x1, 24, PostIndex));
907*f5c631daSSadaf Ebrahimi __ ld1(v10.V8B(), v11.V8B(), MemOperand(x0));
908*f5c631daSSadaf Ebrahimi __ ld1(v11.V8B(), v12.V8B(), MemOperand(x1, x2, PostIndex));
909*f5c631daSSadaf Ebrahimi __ ld1(v27.V8B(), v28.V8B(), MemOperand(x1, 16, PostIndex));
910*f5c631daSSadaf Ebrahimi __ ld1(v31.V8B(), MemOperand(x0));
911*f5c631daSSadaf Ebrahimi __ ld1(v10.V8B(), MemOperand(x1, x2, PostIndex));
912*f5c631daSSadaf Ebrahimi __ ld1(v28.V8B(), MemOperand(x1, 8, PostIndex));
913*f5c631daSSadaf Ebrahimi __ ld1(v5.V8H(), v6.V8H(), v7.V8H(), v8.V8H(), MemOperand(x0));
914*f5c631daSSadaf Ebrahimi __ ld1(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex));
915*f5c631daSSadaf Ebrahimi __ ld1(v10.V8H(),
916*f5c631daSSadaf Ebrahimi v11.V8H(),
917*f5c631daSSadaf Ebrahimi v12.V8H(),
918*f5c631daSSadaf Ebrahimi v13.V8H(),
919*f5c631daSSadaf Ebrahimi MemOperand(x1, 64, PostIndex));
920*f5c631daSSadaf Ebrahimi __ ld1(v26.V8H(), v27.V8H(), v28.V8H(), MemOperand(x0));
921*f5c631daSSadaf Ebrahimi __ ld1(v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex));
922*f5c631daSSadaf Ebrahimi __ ld1(v17.V8H(), v18.V8H(), v19.V8H(), MemOperand(x1, 48, PostIndex));
923*f5c631daSSadaf Ebrahimi __ ld1(v4.V8H(), v5.V8H(), MemOperand(x0));
924*f5c631daSSadaf Ebrahimi __ ld1(v21.V8H(), v22.V8H(), MemOperand(x1, x2, PostIndex));
925*f5c631daSSadaf Ebrahimi __ ld1(v4.V8H(), v5.V8H(), MemOperand(x1, 32, PostIndex));
926*f5c631daSSadaf Ebrahimi __ ld1(v9.V8H(), MemOperand(x0));
927*f5c631daSSadaf Ebrahimi __ ld1(v27.V8H(), MemOperand(x1, x2, PostIndex));
928*f5c631daSSadaf Ebrahimi __ ld1(v26.V8H(), MemOperand(x1, 16, PostIndex));
929*f5c631daSSadaf Ebrahimi __ ld1(v19.B(), 1, MemOperand(x0));
930*f5c631daSSadaf Ebrahimi __ ld1(v12.B(), 3, MemOperand(x1, x2, PostIndex));
931*f5c631daSSadaf Ebrahimi __ ld1(v27.B(), 12, MemOperand(x1, 1, PostIndex));
932*f5c631daSSadaf Ebrahimi __ ld1(v10.D(), 1, MemOperand(x0));
933*f5c631daSSadaf Ebrahimi __ ld1(v26.D(), 1, MemOperand(x1, x2, PostIndex));
934*f5c631daSSadaf Ebrahimi __ ld1(v7.D(), 1, MemOperand(x1, 8, PostIndex));
935*f5c631daSSadaf Ebrahimi __ ld1(v19.H(), 5, MemOperand(x0));
936*f5c631daSSadaf Ebrahimi __ ld1(v10.H(), 1, MemOperand(x1, x2, PostIndex));
937*f5c631daSSadaf Ebrahimi __ ld1(v5.H(), 4, MemOperand(x1, 2, PostIndex));
938*f5c631daSSadaf Ebrahimi __ ld1(v21.S(), 2, MemOperand(x0));
939*f5c631daSSadaf Ebrahimi __ ld1(v13.S(), 2, MemOperand(x1, x2, PostIndex));
940*f5c631daSSadaf Ebrahimi __ ld1(v1.S(), 2, MemOperand(x1, 4, PostIndex));
941*f5c631daSSadaf Ebrahimi __ ld1r(v2.V16B(), MemOperand(x0));
942*f5c631daSSadaf Ebrahimi __ ld1r(v2.V16B(), MemOperand(x1, x2, PostIndex));
943*f5c631daSSadaf Ebrahimi __ ld1r(v22.V16B(), MemOperand(x1, 1, PostIndex));
944*f5c631daSSadaf Ebrahimi __ ld1r(v25.V1D(), MemOperand(x0));
945*f5c631daSSadaf Ebrahimi __ ld1r(v9.V1D(), MemOperand(x1, x2, PostIndex));
946*f5c631daSSadaf Ebrahimi __ ld1r(v23.V1D(), MemOperand(x1, 8, PostIndex));
947*f5c631daSSadaf Ebrahimi __ ld1r(v19.V2D(), MemOperand(x0));
948*f5c631daSSadaf Ebrahimi __ ld1r(v21.V2D(), MemOperand(x1, x2, PostIndex));
949*f5c631daSSadaf Ebrahimi __ ld1r(v30.V2D(), MemOperand(x1, 8, PostIndex));
950*f5c631daSSadaf Ebrahimi __ ld1r(v24.V2S(), MemOperand(x0));
951*f5c631daSSadaf Ebrahimi __ ld1r(v26.V2S(), MemOperand(x1, x2, PostIndex));
952*f5c631daSSadaf Ebrahimi __ ld1r(v28.V2S(), MemOperand(x1, 4, PostIndex));
953*f5c631daSSadaf Ebrahimi __ ld1r(v19.V4H(), MemOperand(x0));
954*f5c631daSSadaf Ebrahimi __ ld1r(v1.V4H(), MemOperand(x1, x2, PostIndex));
955*f5c631daSSadaf Ebrahimi __ ld1r(v21.V4H(), MemOperand(x1, 2, PostIndex));
956*f5c631daSSadaf Ebrahimi __ ld1r(v15.V4S(), MemOperand(x0));
957*f5c631daSSadaf Ebrahimi __ ld1r(v21.V4S(), MemOperand(x1, x2, PostIndex));
958*f5c631daSSadaf Ebrahimi __ ld1r(v23.V4S(), MemOperand(x1, 4, PostIndex));
959*f5c631daSSadaf Ebrahimi __ ld1r(v26.V8B(), MemOperand(x0));
960*f5c631daSSadaf Ebrahimi __ ld1r(v14.V8B(), MemOperand(x1, x2, PostIndex));
961*f5c631daSSadaf Ebrahimi __ ld1r(v19.V8B(), MemOperand(x1, 1, PostIndex));
962*f5c631daSSadaf Ebrahimi __ ld1r(v13.V8H(), MemOperand(x0));
963*f5c631daSSadaf Ebrahimi __ ld1r(v30.V8H(), MemOperand(x1, x2, PostIndex));
964*f5c631daSSadaf Ebrahimi __ ld1r(v27.V8H(), MemOperand(x1, 2, PostIndex));
965*f5c631daSSadaf Ebrahimi __ ld2(v21.V16B(), v22.V16B(), MemOperand(x0));
966*f5c631daSSadaf Ebrahimi __ ld2(v21.V16B(), v22.V16B(), MemOperand(x1, x2, PostIndex));
967*f5c631daSSadaf Ebrahimi __ ld2(v12.V16B(), v13.V16B(), MemOperand(x1, 32, PostIndex));
968*f5c631daSSadaf Ebrahimi __ ld2(v14.V2D(), v15.V2D(), MemOperand(x0));
969*f5c631daSSadaf Ebrahimi __ ld2(v0.V2D(), v1.V2D(), MemOperand(x1, x2, PostIndex));
970*f5c631daSSadaf Ebrahimi __ ld2(v12.V2D(), v13.V2D(), MemOperand(x1, 32, PostIndex));
971*f5c631daSSadaf Ebrahimi __ ld2(v27.V2S(), v28.V2S(), MemOperand(x0));
972*f5c631daSSadaf Ebrahimi __ ld2(v2.V2S(), v3.V2S(), MemOperand(x1, x2, PostIndex));
973*f5c631daSSadaf Ebrahimi __ ld2(v12.V2S(), v13.V2S(), MemOperand(x1, 16, PostIndex));
974*f5c631daSSadaf Ebrahimi __ ld2(v9.V4H(), v10.V4H(), MemOperand(x0));
975*f5c631daSSadaf Ebrahimi __ ld2(v23.V4H(), v24.V4H(), MemOperand(x1, x2, PostIndex));
976*f5c631daSSadaf Ebrahimi __ ld2(v1.V4H(), v2.V4H(), MemOperand(x1, 16, PostIndex));
977*f5c631daSSadaf Ebrahimi __ ld2(v20.V4S(), v21.V4S(), MemOperand(x0));
978*f5c631daSSadaf Ebrahimi __ ld2(v10.V4S(), v11.V4S(), MemOperand(x1, x2, PostIndex));
979*f5c631daSSadaf Ebrahimi __ ld2(v24.V4S(), v25.V4S(), MemOperand(x1, 32, PostIndex));
980*f5c631daSSadaf Ebrahimi __ ld2(v17.V8B(), v18.V8B(), MemOperand(x0));
981*f5c631daSSadaf Ebrahimi __ ld2(v13.V8B(), v14.V8B(), MemOperand(x1, x2, PostIndex));
982*f5c631daSSadaf Ebrahimi __ ld2(v7.V8B(), v8.V8B(), MemOperand(x1, 16, PostIndex));
983*f5c631daSSadaf Ebrahimi __ ld2(v30.V8H(), v31.V8H(), MemOperand(x0));
984*f5c631daSSadaf Ebrahimi __ ld2(v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex));
985*f5c631daSSadaf Ebrahimi __ ld2(v13.V8H(), v14.V8H(), MemOperand(x1, 32, PostIndex));
986*f5c631daSSadaf Ebrahimi __ ld2(v5.B(), v6.B(), 12, MemOperand(x0));
987*f5c631daSSadaf Ebrahimi __ ld2(v16.B(), v17.B(), 7, MemOperand(x1, x2, PostIndex));
988*f5c631daSSadaf Ebrahimi __ ld2(v29.B(), v30.B(), 2, MemOperand(x1, 2, PostIndex));
989*f5c631daSSadaf Ebrahimi __ ld2(v11.D(), v12.D(), 1, MemOperand(x0));
990*f5c631daSSadaf Ebrahimi __ ld2(v26.D(), v27.D(), 0, MemOperand(x1, x2, PostIndex));
991*f5c631daSSadaf Ebrahimi __ ld2(v25.D(), v26.D(), 0, MemOperand(x1, 16, PostIndex));
992*f5c631daSSadaf Ebrahimi __ ld2(v18.H(), v19.H(), 7, MemOperand(x0));
993*f5c631daSSadaf Ebrahimi __ ld2(v17.H(), v18.H(), 5, MemOperand(x1, x2, PostIndex));
994*f5c631daSSadaf Ebrahimi __ ld2(v30.H(), v31.H(), 2, MemOperand(x1, 4, PostIndex));
995*f5c631daSSadaf Ebrahimi __ ld2(v29.S(), v30.S(), 3, MemOperand(x0));
996*f5c631daSSadaf Ebrahimi __ ld2(v28.S(), v29.S(), 0, MemOperand(x1, x2, PostIndex));
997*f5c631daSSadaf Ebrahimi __ ld2(v6.S(), v7.S(), 1, MemOperand(x1, 8, PostIndex));
998*f5c631daSSadaf Ebrahimi __ ld2r(v26.V16B(), v27.V16B(), MemOperand(x0));
999*f5c631daSSadaf Ebrahimi __ ld2r(v21.V16B(), v22.V16B(), MemOperand(x1, x2, PostIndex));
1000*f5c631daSSadaf Ebrahimi __ ld2r(v5.V16B(), v6.V16B(), MemOperand(x1, 2, PostIndex));
1001*f5c631daSSadaf Ebrahimi __ ld2r(v26.V1D(), v27.V1D(), MemOperand(x0));
1002*f5c631daSSadaf Ebrahimi __ ld2r(v14.V1D(), v15.V1D(), MemOperand(x1, x2, PostIndex));
1003*f5c631daSSadaf Ebrahimi __ ld2r(v23.V1D(), v24.V1D(), MemOperand(x1, 16, PostIndex));
1004*f5c631daSSadaf Ebrahimi __ ld2r(v11.V2D(), v12.V2D(), MemOperand(x0));
1005*f5c631daSSadaf Ebrahimi __ ld2r(v29.V2D(), v30.V2D(), MemOperand(x1, x2, PostIndex));
1006*f5c631daSSadaf Ebrahimi __ ld2r(v15.V2D(), v16.V2D(), MemOperand(x1, 16, PostIndex));
1007*f5c631daSSadaf Ebrahimi __ ld2r(v26.V2S(), v27.V2S(), MemOperand(x0));
1008*f5c631daSSadaf Ebrahimi __ ld2r(v22.V2S(), v23.V2S(), MemOperand(x1, x2, PostIndex));
1009*f5c631daSSadaf Ebrahimi __ ld2r(v2.V2S(), v3.V2S(), MemOperand(x1, 8, PostIndex));
1010*f5c631daSSadaf Ebrahimi __ ld2r(v2.V4H(), v3.V4H(), MemOperand(x0));
1011*f5c631daSSadaf Ebrahimi __ ld2r(v9.V4H(), v10.V4H(), MemOperand(x1, x2, PostIndex));
1012*f5c631daSSadaf Ebrahimi __ ld2r(v6.V4H(), v7.V4H(), MemOperand(x1, 4, PostIndex));
1013*f5c631daSSadaf Ebrahimi __ ld2r(v7.V4S(), v8.V4S(), MemOperand(x0));
1014*f5c631daSSadaf Ebrahimi __ ld2r(v19.V4S(), v20.V4S(), MemOperand(x1, x2, PostIndex));
1015*f5c631daSSadaf Ebrahimi __ ld2r(v21.V4S(), v22.V4S(), MemOperand(x1, 8, PostIndex));
1016*f5c631daSSadaf Ebrahimi __ ld2r(v26.V8B(), v27.V8B(), MemOperand(x0));
1017*f5c631daSSadaf Ebrahimi __ ld2r(v20.V8B(), v21.V8B(), MemOperand(x1, x2, PostIndex));
1018*f5c631daSSadaf Ebrahimi __ ld2r(v11.V8B(), v12.V8B(), MemOperand(x1, 2, PostIndex));
1019*f5c631daSSadaf Ebrahimi __ ld2r(v12.V8H(), v13.V8H(), MemOperand(x0));
1020*f5c631daSSadaf Ebrahimi __ ld2r(v6.V8H(), v7.V8H(), MemOperand(x1, x2, PostIndex));
1021*f5c631daSSadaf Ebrahimi __ ld2r(v25.V8H(), v26.V8H(), MemOperand(x1, 4, PostIndex));
1022*f5c631daSSadaf Ebrahimi __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), MemOperand(x0));
1023*f5c631daSSadaf Ebrahimi __ ld3(v28.V16B(), v29.V16B(), v30.V16B(), MemOperand(x1, x2, PostIndex));
1024*f5c631daSSadaf Ebrahimi __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), MemOperand(x1, 48, PostIndex));
1025*f5c631daSSadaf Ebrahimi __ ld3(v21.V2D(), v22.V2D(), v23.V2D(), MemOperand(x0));
1026*f5c631daSSadaf Ebrahimi __ ld3(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x1, x2, PostIndex));
1027*f5c631daSSadaf Ebrahimi __ ld3(v27.V2D(), v28.V2D(), v29.V2D(), MemOperand(x1, 48, PostIndex));
1028*f5c631daSSadaf Ebrahimi __ ld3(v7.V2S(), v8.V2S(), v9.V2S(), MemOperand(x0));
1029*f5c631daSSadaf Ebrahimi __ ld3(v20.V2S(), v21.V2S(), v22.V2S(), MemOperand(x1, x2, PostIndex));
1030*f5c631daSSadaf Ebrahimi __ ld3(v26.V2S(), v27.V2S(), v28.V2S(), MemOperand(x1, 24, PostIndex));
1031*f5c631daSSadaf Ebrahimi __ ld3(v27.V4H(), v28.V4H(), v29.V4H(), MemOperand(x0));
1032*f5c631daSSadaf Ebrahimi __ ld3(v28.V4H(), v29.V4H(), v30.V4H(), MemOperand(x1, x2, PostIndex));
1033*f5c631daSSadaf Ebrahimi __ ld3(v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x1, 24, PostIndex));
1034*f5c631daSSadaf Ebrahimi __ ld3(v2.V4S(), v3.V4S(), v4.V4S(), MemOperand(x0));
1035*f5c631daSSadaf Ebrahimi __ ld3(v24.V4S(), v25.V4S(), v26.V4S(), MemOperand(x1, x2, PostIndex));
1036*f5c631daSSadaf Ebrahimi __ ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x1, 48, PostIndex));
1037*f5c631daSSadaf Ebrahimi __ ld3(v29.V8B(), v30.V8B(), v31.V8B(), MemOperand(x0));
1038*f5c631daSSadaf Ebrahimi __ ld3(v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x1, x2, PostIndex));
1039*f5c631daSSadaf Ebrahimi __ ld3(v12.V8B(), v13.V8B(), v14.V8B(), MemOperand(x1, 24, PostIndex));
1040*f5c631daSSadaf Ebrahimi __ ld3(v22.V8H(), v23.V8H(), v24.V8H(), MemOperand(x0));
1041*f5c631daSSadaf Ebrahimi __ ld3(v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x1, x2, PostIndex));
1042*f5c631daSSadaf Ebrahimi __ ld3(v28.V8H(), v29.V8H(), v30.V8H(), MemOperand(x1, 48, PostIndex));
1043*f5c631daSSadaf Ebrahimi __ ld3(v21.B(), v22.B(), v23.B(), 11, MemOperand(x0));
1044*f5c631daSSadaf Ebrahimi __ ld3(v5.B(), v6.B(), v7.B(), 9, MemOperand(x1, x2, PostIndex));
1045*f5c631daSSadaf Ebrahimi __ ld3(v23.B(), v24.B(), v25.B(), 0, MemOperand(x1, 3, PostIndex));
1046*f5c631daSSadaf Ebrahimi __ ld3(v16.D(), v17.D(), v18.D(), 0, MemOperand(x0));
1047*f5c631daSSadaf Ebrahimi __ ld3(v30.D(), v31.D(), v0.D(), 0, MemOperand(x1, x2, PostIndex));
1048*f5c631daSSadaf Ebrahimi __ ld3(v28.D(), v29.D(), v30.D(), 1, MemOperand(x1, 24, PostIndex));
1049*f5c631daSSadaf Ebrahimi __ ld3(v13.H(), v14.H(), v15.H(), 2, MemOperand(x0));
1050*f5c631daSSadaf Ebrahimi __ ld3(v22.H(), v23.H(), v24.H(), 7, MemOperand(x1, x2, PostIndex));
1051*f5c631daSSadaf Ebrahimi __ ld3(v14.H(), v15.H(), v16.H(), 3, MemOperand(x1, 6, PostIndex));
1052*f5c631daSSadaf Ebrahimi __ ld3(v22.S(), v23.S(), v24.S(), 3, MemOperand(x0));
1053*f5c631daSSadaf Ebrahimi __ ld3(v30.S(), v31.S(), v0.S(), 2, MemOperand(x1, x2, PostIndex));
1054*f5c631daSSadaf Ebrahimi __ ld3(v12.S(), v13.S(), v14.S(), 1, MemOperand(x1, 12, PostIndex));
1055*f5c631daSSadaf Ebrahimi __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x0));
1056*f5c631daSSadaf Ebrahimi __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x1, x2, PostIndex));
1057*f5c631daSSadaf Ebrahimi __ ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x1, 3, PostIndex));
1058*f5c631daSSadaf Ebrahimi __ ld3r(v4.V1D(), v5.V1D(), v6.V1D(), MemOperand(x0));
1059*f5c631daSSadaf Ebrahimi __ ld3r(v7.V1D(), v8.V1D(), v9.V1D(), MemOperand(x1, x2, PostIndex));
1060*f5c631daSSadaf Ebrahimi __ ld3r(v17.V1D(), v18.V1D(), v19.V1D(), MemOperand(x1, 24, PostIndex));
1061*f5c631daSSadaf Ebrahimi __ ld3r(v16.V2D(), v17.V2D(), v18.V2D(), MemOperand(x0));
1062*f5c631daSSadaf Ebrahimi __ ld3r(v20.V2D(), v21.V2D(), v22.V2D(), MemOperand(x1, x2, PostIndex));
1063*f5c631daSSadaf Ebrahimi __ ld3r(v14.V2D(), v15.V2D(), v16.V2D(), MemOperand(x1, 24, PostIndex));
1064*f5c631daSSadaf Ebrahimi __ ld3r(v10.V2S(), v11.V2S(), v12.V2S(), MemOperand(x0));
1065*f5c631daSSadaf Ebrahimi __ ld3r(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x1, x2, PostIndex));
1066*f5c631daSSadaf Ebrahimi __ ld3r(v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x1, 12, PostIndex));
1067*f5c631daSSadaf Ebrahimi __ ld3r(v22.V4H(), v23.V4H(), v24.V4H(), MemOperand(x0));
1068*f5c631daSSadaf Ebrahimi __ ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x1, x2, PostIndex));
1069*f5c631daSSadaf Ebrahimi __ ld3r(v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x1, 6, PostIndex));
1070*f5c631daSSadaf Ebrahimi __ ld3r(v26.V4S(), v27.V4S(), v28.V4S(), MemOperand(x0));
1071*f5c631daSSadaf Ebrahimi __ ld3r(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x1, x2, PostIndex));
1072*f5c631daSSadaf Ebrahimi __ ld3r(v30.V4S(), v31.V4S(), v0.V4S(), MemOperand(x1, 12, PostIndex));
1073*f5c631daSSadaf Ebrahimi __ ld3r(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x0));
1074*f5c631daSSadaf Ebrahimi __ ld3r(v10.V8B(), v11.V8B(), v12.V8B(), MemOperand(x1, x2, PostIndex));
1075*f5c631daSSadaf Ebrahimi __ ld3r(v28.V8B(), v29.V8B(), v30.V8B(), MemOperand(x1, 3, PostIndex));
1076*f5c631daSSadaf Ebrahimi __ ld3r(v6.V8H(), v7.V8H(), v8.V8H(), MemOperand(x0));
1077*f5c631daSSadaf Ebrahimi __ ld3r(v29.V8H(), v30.V8H(), v31.V8H(), MemOperand(x1, x2, PostIndex));
1078*f5c631daSSadaf Ebrahimi __ ld3r(v7.V8H(), v8.V8H(), v9.V8H(), MemOperand(x1, 6, PostIndex));
1079*f5c631daSSadaf Ebrahimi __ ld4(v3.V16B(), v4.V16B(), v5.V16B(), v6.V16B(), MemOperand(x0));
1080*f5c631daSSadaf Ebrahimi __ ld4(v2.V16B(),
1081*f5c631daSSadaf Ebrahimi v3.V16B(),
1082*f5c631daSSadaf Ebrahimi v4.V16B(),
1083*f5c631daSSadaf Ebrahimi v5.V16B(),
1084*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1085*f5c631daSSadaf Ebrahimi __ ld4(v5.V16B(),
1086*f5c631daSSadaf Ebrahimi v6.V16B(),
1087*f5c631daSSadaf Ebrahimi v7.V16B(),
1088*f5c631daSSadaf Ebrahimi v8.V16B(),
1089*f5c631daSSadaf Ebrahimi MemOperand(x1, 64, PostIndex));
1090*f5c631daSSadaf Ebrahimi __ ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x0));
1091*f5c631daSSadaf Ebrahimi __ ld4(v4.V2D(), v5.V2D(), v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex));
1092*f5c631daSSadaf Ebrahimi __ ld4(v29.V2D(),
1093*f5c631daSSadaf Ebrahimi v30.V2D(),
1094*f5c631daSSadaf Ebrahimi v31.V2D(),
1095*f5c631daSSadaf Ebrahimi v0.V2D(),
1096*f5c631daSSadaf Ebrahimi MemOperand(x1, 64, PostIndex));
1097*f5c631daSSadaf Ebrahimi __ ld4(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), MemOperand(x0));
1098*f5c631daSSadaf Ebrahimi __ ld4(v24.V2S(),
1099*f5c631daSSadaf Ebrahimi v25.V2S(),
1100*f5c631daSSadaf Ebrahimi v26.V2S(),
1101*f5c631daSSadaf Ebrahimi v27.V2S(),
1102*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1103*f5c631daSSadaf Ebrahimi __ ld4(v4.V2S(), v5.V2S(), v6.V2S(), v7.V2S(), MemOperand(x1, 32, PostIndex));
1104*f5c631daSSadaf Ebrahimi __ ld4(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), MemOperand(x0));
1105*f5c631daSSadaf Ebrahimi __ ld4(v23.V4H(),
1106*f5c631daSSadaf Ebrahimi v24.V4H(),
1107*f5c631daSSadaf Ebrahimi v25.V4H(),
1108*f5c631daSSadaf Ebrahimi v26.V4H(),
1109*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1110*f5c631daSSadaf Ebrahimi __ ld4(v2.V4H(), v3.V4H(), v4.V4H(), v5.V4H(), MemOperand(x1, 32, PostIndex));
1111*f5c631daSSadaf Ebrahimi __ ld4(v7.V4S(), v8.V4S(), v9.V4S(), v10.V4S(), MemOperand(x0));
1112*f5c631daSSadaf Ebrahimi __ ld4(v28.V4S(),
1113*f5c631daSSadaf Ebrahimi v29.V4S(),
1114*f5c631daSSadaf Ebrahimi v30.V4S(),
1115*f5c631daSSadaf Ebrahimi v31.V4S(),
1116*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1117*f5c631daSSadaf Ebrahimi __ ld4(v29.V4S(),
1118*f5c631daSSadaf Ebrahimi v30.V4S(),
1119*f5c631daSSadaf Ebrahimi v31.V4S(),
1120*f5c631daSSadaf Ebrahimi v0.V4S(),
1121*f5c631daSSadaf Ebrahimi MemOperand(x1, 64, PostIndex));
1122*f5c631daSSadaf Ebrahimi __ ld4(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), MemOperand(x0));
1123*f5c631daSSadaf Ebrahimi __ ld4(v27.V8B(),
1124*f5c631daSSadaf Ebrahimi v28.V8B(),
1125*f5c631daSSadaf Ebrahimi v29.V8B(),
1126*f5c631daSSadaf Ebrahimi v30.V8B(),
1127*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1128*f5c631daSSadaf Ebrahimi __ ld4(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), MemOperand(x1, 32, PostIndex));
1129*f5c631daSSadaf Ebrahimi __ ld4(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), MemOperand(x0));
1130*f5c631daSSadaf Ebrahimi __ ld4(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex));
1131*f5c631daSSadaf Ebrahimi __ ld4(v20.V8H(),
1132*f5c631daSSadaf Ebrahimi v21.V8H(),
1133*f5c631daSSadaf Ebrahimi v22.V8H(),
1134*f5c631daSSadaf Ebrahimi v23.V8H(),
1135*f5c631daSSadaf Ebrahimi MemOperand(x1, 64, PostIndex));
1136*f5c631daSSadaf Ebrahimi __ ld4(v20.B(), v21.B(), v22.B(), v23.B(), 3, MemOperand(x0));
1137*f5c631daSSadaf Ebrahimi __ ld4(v12.B(), v13.B(), v14.B(), v15.B(), 3, MemOperand(x1, x2, PostIndex));
1138*f5c631daSSadaf Ebrahimi __ ld4(v27.B(), v28.B(), v29.B(), v30.B(), 6, MemOperand(x1, 4, PostIndex));
1139*f5c631daSSadaf Ebrahimi __ ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x0));
1140*f5c631daSSadaf Ebrahimi __ ld4(v15.D(), v16.D(), v17.D(), v18.D(), 1, MemOperand(x1, x2, PostIndex));
1141*f5c631daSSadaf Ebrahimi __ ld4(v16.D(), v17.D(), v18.D(), v19.D(), 1, MemOperand(x1, 32, PostIndex));
1142*f5c631daSSadaf Ebrahimi __ ld4(v2.H(), v3.H(), v4.H(), v5.H(), 6, MemOperand(x0));
1143*f5c631daSSadaf Ebrahimi __ ld4(v5.H(), v6.H(), v7.H(), v8.H(), 3, MemOperand(x1, x2, PostIndex));
1144*f5c631daSSadaf Ebrahimi __ ld4(v7.H(), v8.H(), v9.H(), v10.H(), 6, MemOperand(x1, 8, PostIndex));
1145*f5c631daSSadaf Ebrahimi __ ld4(v6.S(), v7.S(), v8.S(), v9.S(), 1, MemOperand(x0));
1146*f5c631daSSadaf Ebrahimi __ ld4(v25.S(), v26.S(), v27.S(), v28.S(), 2, MemOperand(x1, x2, PostIndex));
1147*f5c631daSSadaf Ebrahimi __ ld4(v8.S(), v9.S(), v10.S(), v11.S(), 3, MemOperand(x1, 16, PostIndex));
1148*f5c631daSSadaf Ebrahimi __ ld4r(v14.V16B(), v15.V16B(), v16.V16B(), v17.V16B(), MemOperand(x0));
1149*f5c631daSSadaf Ebrahimi __ ld4r(v13.V16B(),
1150*f5c631daSSadaf Ebrahimi v14.V16B(),
1151*f5c631daSSadaf Ebrahimi v15.V16B(),
1152*f5c631daSSadaf Ebrahimi v16.V16B(),
1153*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1154*f5c631daSSadaf Ebrahimi __ ld4r(v9.V16B(),
1155*f5c631daSSadaf Ebrahimi v10.V16B(),
1156*f5c631daSSadaf Ebrahimi v11.V16B(),
1157*f5c631daSSadaf Ebrahimi v12.V16B(),
1158*f5c631daSSadaf Ebrahimi MemOperand(x1, 4, PostIndex));
1159*f5c631daSSadaf Ebrahimi __ ld4r(v8.V1D(), v9.V1D(), v10.V1D(), v11.V1D(), MemOperand(x0));
1160*f5c631daSSadaf Ebrahimi __ ld4r(v4.V1D(),
1161*f5c631daSSadaf Ebrahimi v5.V1D(),
1162*f5c631daSSadaf Ebrahimi v6.V1D(),
1163*f5c631daSSadaf Ebrahimi v7.V1D(),
1164*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1165*f5c631daSSadaf Ebrahimi __ ld4r(v26.V1D(),
1166*f5c631daSSadaf Ebrahimi v27.V1D(),
1167*f5c631daSSadaf Ebrahimi v28.V1D(),
1168*f5c631daSSadaf Ebrahimi v29.V1D(),
1169*f5c631daSSadaf Ebrahimi MemOperand(x1, 32, PostIndex));
1170*f5c631daSSadaf Ebrahimi __ ld4r(v19.V2D(), v20.V2D(), v21.V2D(), v22.V2D(), MemOperand(x0));
1171*f5c631daSSadaf Ebrahimi __ ld4r(v28.V2D(),
1172*f5c631daSSadaf Ebrahimi v29.V2D(),
1173*f5c631daSSadaf Ebrahimi v30.V2D(),
1174*f5c631daSSadaf Ebrahimi v31.V2D(),
1175*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1176*f5c631daSSadaf Ebrahimi __ ld4r(v15.V2D(),
1177*f5c631daSSadaf Ebrahimi v16.V2D(),
1178*f5c631daSSadaf Ebrahimi v17.V2D(),
1179*f5c631daSSadaf Ebrahimi v18.V2D(),
1180*f5c631daSSadaf Ebrahimi MemOperand(x1, 32, PostIndex));
1181*f5c631daSSadaf Ebrahimi __ ld4r(v31.V2S(), v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x0));
1182*f5c631daSSadaf Ebrahimi __ ld4r(v28.V2S(),
1183*f5c631daSSadaf Ebrahimi v29.V2S(),
1184*f5c631daSSadaf Ebrahimi v30.V2S(),
1185*f5c631daSSadaf Ebrahimi v31.V2S(),
1186*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1187*f5c631daSSadaf Ebrahimi __ ld4r(v11.V2S(),
1188*f5c631daSSadaf Ebrahimi v12.V2S(),
1189*f5c631daSSadaf Ebrahimi v13.V2S(),
1190*f5c631daSSadaf Ebrahimi v14.V2S(),
1191*f5c631daSSadaf Ebrahimi MemOperand(x1, 16, PostIndex));
1192*f5c631daSSadaf Ebrahimi __ ld4r(v19.V4H(), v20.V4H(), v21.V4H(), v22.V4H(), MemOperand(x0));
1193*f5c631daSSadaf Ebrahimi __ ld4r(v22.V4H(),
1194*f5c631daSSadaf Ebrahimi v23.V4H(),
1195*f5c631daSSadaf Ebrahimi v24.V4H(),
1196*f5c631daSSadaf Ebrahimi v25.V4H(),
1197*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1198*f5c631daSSadaf Ebrahimi __ ld4r(v20.V4H(),
1199*f5c631daSSadaf Ebrahimi v21.V4H(),
1200*f5c631daSSadaf Ebrahimi v22.V4H(),
1201*f5c631daSSadaf Ebrahimi v23.V4H(),
1202*f5c631daSSadaf Ebrahimi MemOperand(x1, 8, PostIndex));
1203*f5c631daSSadaf Ebrahimi __ ld4r(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x0));
1204*f5c631daSSadaf Ebrahimi __ ld4r(v25.V4S(),
1205*f5c631daSSadaf Ebrahimi v26.V4S(),
1206*f5c631daSSadaf Ebrahimi v27.V4S(),
1207*f5c631daSSadaf Ebrahimi v28.V4S(),
1208*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1209*f5c631daSSadaf Ebrahimi __ ld4r(v23.V4S(),
1210*f5c631daSSadaf Ebrahimi v24.V4S(),
1211*f5c631daSSadaf Ebrahimi v25.V4S(),
1212*f5c631daSSadaf Ebrahimi v26.V4S(),
1213*f5c631daSSadaf Ebrahimi MemOperand(x1, 16, PostIndex));
1214*f5c631daSSadaf Ebrahimi __ ld4r(v22.V8B(), v23.V8B(), v24.V8B(), v25.V8B(), MemOperand(x0));
1215*f5c631daSSadaf Ebrahimi __ ld4r(v27.V8B(),
1216*f5c631daSSadaf Ebrahimi v28.V8B(),
1217*f5c631daSSadaf Ebrahimi v29.V8B(),
1218*f5c631daSSadaf Ebrahimi v30.V8B(),
1219*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1220*f5c631daSSadaf Ebrahimi __ ld4r(v29.V8B(),
1221*f5c631daSSadaf Ebrahimi v30.V8B(),
1222*f5c631daSSadaf Ebrahimi v31.V8B(),
1223*f5c631daSSadaf Ebrahimi v0.V8B(),
1224*f5c631daSSadaf Ebrahimi MemOperand(x1, 4, PostIndex));
1225*f5c631daSSadaf Ebrahimi __ ld4r(v28.V8H(), v29.V8H(), v30.V8H(), v31.V8H(), MemOperand(x0));
1226*f5c631daSSadaf Ebrahimi __ ld4r(v25.V8H(),
1227*f5c631daSSadaf Ebrahimi v26.V8H(),
1228*f5c631daSSadaf Ebrahimi v27.V8H(),
1229*f5c631daSSadaf Ebrahimi v28.V8H(),
1230*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1231*f5c631daSSadaf Ebrahimi __ ld4r(v22.V8H(),
1232*f5c631daSSadaf Ebrahimi v23.V8H(),
1233*f5c631daSSadaf Ebrahimi v24.V8H(),
1234*f5c631daSSadaf Ebrahimi v25.V8H(),
1235*f5c631daSSadaf Ebrahimi MemOperand(x1, 8, PostIndex));
1236*f5c631daSSadaf Ebrahimi __ mla(v29.V16B(), v7.V16B(), v26.V16B());
1237*f5c631daSSadaf Ebrahimi __ mla(v6.V2S(), v4.V2S(), v14.V2S());
1238*f5c631daSSadaf Ebrahimi __ mla(v9.V2S(), v11.V2S(), v0.S(), 2);
1239*f5c631daSSadaf Ebrahimi __ mla(v5.V4H(), v17.V4H(), v25.V4H());
1240*f5c631daSSadaf Ebrahimi __ mla(v24.V4H(), v7.V4H(), v11.H(), 3);
1241*f5c631daSSadaf Ebrahimi __ mla(v12.V4S(), v3.V4S(), v4.V4S());
1242*f5c631daSSadaf Ebrahimi __ mla(v10.V4S(), v7.V4S(), v7.S(), 3);
1243*f5c631daSSadaf Ebrahimi __ mla(v3.V8B(), v16.V8B(), v9.V8B());
1244*f5c631daSSadaf Ebrahimi __ mla(v19.V8H(), v22.V8H(), v18.V8H());
1245*f5c631daSSadaf Ebrahimi __ mla(v6.V8H(), v2.V8H(), v0.H(), 0);
1246*f5c631daSSadaf Ebrahimi __ mls(v23.V16B(), v10.V16B(), v11.V16B());
1247*f5c631daSSadaf Ebrahimi __ mls(v14.V2S(), v31.V2S(), v22.V2S());
1248*f5c631daSSadaf Ebrahimi __ mls(v28.V2S(), v13.V2S(), v1.S(), 3);
1249*f5c631daSSadaf Ebrahimi __ mls(v2.V4H(), v19.V4H(), v13.V4H());
1250*f5c631daSSadaf Ebrahimi __ mls(v18.V4H(), v15.V4H(), v12.H(), 6);
1251*f5c631daSSadaf Ebrahimi __ mls(v6.V4S(), v11.V4S(), v16.V4S());
1252*f5c631daSSadaf Ebrahimi __ mls(v23.V4S(), v16.V4S(), v10.S(), 2);
1253*f5c631daSSadaf Ebrahimi __ mls(v26.V8B(), v13.V8B(), v23.V8B());
1254*f5c631daSSadaf Ebrahimi __ mls(v10.V8H(), v10.V8H(), v12.V8H());
1255*f5c631daSSadaf Ebrahimi __ mls(v14.V8H(), v0.V8H(), v14.H(), 7);
1256*f5c631daSSadaf Ebrahimi __ mov(b22, v1.B(), 3);
1257*f5c631daSSadaf Ebrahimi __ mov(d7, v13.D(), 1);
1258*f5c631daSSadaf Ebrahimi __ mov(h26, v21.H(), 2);
1259*f5c631daSSadaf Ebrahimi __ mov(s26, v19.S(), 0);
1260*f5c631daSSadaf Ebrahimi __ mov(v26.V16B(), v11.V16B());
1261*f5c631daSSadaf Ebrahimi __ mov(v20.V8B(), v0.V8B());
1262*f5c631daSSadaf Ebrahimi __ mov(v19.B(), 13, v6.B(), 4);
1263*f5c631daSSadaf Ebrahimi __ mov(v4.B(), 13, w19);
1264*f5c631daSSadaf Ebrahimi __ mov(v11.D(), 1, v8.D(), 0);
1265*f5c631daSSadaf Ebrahimi __ mov(v3.D(), 0, x30);
1266*f5c631daSSadaf Ebrahimi __ mov(v29.H(), 4, v11.H(), 7);
1267*f5c631daSSadaf Ebrahimi __ mov(v2.H(), 6, w6);
1268*f5c631daSSadaf Ebrahimi __ mov(v22.S(), 0, v5.S(), 2);
1269*f5c631daSSadaf Ebrahimi __ mov(v24.S(), 3, w8);
1270*f5c631daSSadaf Ebrahimi __ mov(w18, v1.S(), 3);
1271*f5c631daSSadaf Ebrahimi __ mov(x28, v21.D(), 0);
1272*f5c631daSSadaf Ebrahimi __ movi(d24, 0xffff0000ffffff);
1273*f5c631daSSadaf Ebrahimi __ movi(v29.V16B(), 0x80);
1274*f5c631daSSadaf Ebrahimi __ movi(v12.V2D(), 0xffff00ff00ffff00);
1275*f5c631daSSadaf Ebrahimi __ movi(v12.V2S(), 0xec, LSL, 24);
1276*f5c631daSSadaf Ebrahimi __ movi(v10.V2S(), 0x4c, MSL, 16);
1277*f5c631daSSadaf Ebrahimi __ movi(v26.V4H(), 0xc0, LSL);
1278*f5c631daSSadaf Ebrahimi __ movi(v24.V4S(), 0x98, LSL, 16);
1279*f5c631daSSadaf Ebrahimi __ movi(v1.V4S(), 0xde, MSL, 16);
1280*f5c631daSSadaf Ebrahimi __ movi(v21.V8B(), 0x4d);
1281*f5c631daSSadaf Ebrahimi __ movi(v29.V8H(), 0x69, LSL);
1282*f5c631daSSadaf Ebrahimi __ mul(v1.V16B(), v15.V16B(), v17.V16B());
1283*f5c631daSSadaf Ebrahimi __ mul(v21.V2S(), v19.V2S(), v29.V2S());
1284*f5c631daSSadaf Ebrahimi __ mul(v19.V2S(), v5.V2S(), v3.S(), 0);
1285*f5c631daSSadaf Ebrahimi __ mul(v29.V4H(), v11.V4H(), v2.V4H());
1286*f5c631daSSadaf Ebrahimi __ mul(v2.V4H(), v7.V4H(), v0.H(), 0);
1287*f5c631daSSadaf Ebrahimi __ mul(v25.V4S(), v26.V4S(), v16.V4S());
1288*f5c631daSSadaf Ebrahimi __ mul(v26.V4S(), v6.V4S(), v15.S(), 2);
1289*f5c631daSSadaf Ebrahimi __ mul(v11.V8B(), v15.V8B(), v31.V8B());
1290*f5c631daSSadaf Ebrahimi __ mul(v20.V8H(), v31.V8H(), v15.V8H());
1291*f5c631daSSadaf Ebrahimi __ mul(v29.V8H(), v5.V8H(), v9.H(), 4);
1292*f5c631daSSadaf Ebrahimi __ mvn(v13.V16B(), v21.V16B());
1293*f5c631daSSadaf Ebrahimi __ mvn(v28.V8B(), v19.V8B());
1294*f5c631daSSadaf Ebrahimi __ mvni(v25.V2S(), 0xb8, LSL, 8);
1295*f5c631daSSadaf Ebrahimi __ mvni(v17.V2S(), 0x6c, MSL, 16);
1296*f5c631daSSadaf Ebrahimi __ mvni(v29.V4H(), 0x48, LSL);
1297*f5c631daSSadaf Ebrahimi __ mvni(v20.V4S(), 0x7a, LSL, 16);
1298*f5c631daSSadaf Ebrahimi __ mvni(v0.V4S(), 0x1e, MSL, 8);
1299*f5c631daSSadaf Ebrahimi __ mvni(v31.V8H(), 0x3e, LSL);
1300*f5c631daSSadaf Ebrahimi __ neg(d25, d11);
1301*f5c631daSSadaf Ebrahimi __ neg(v4.V16B(), v9.V16B());
1302*f5c631daSSadaf Ebrahimi __ neg(v11.V2D(), v25.V2D());
1303*f5c631daSSadaf Ebrahimi __ neg(v7.V2S(), v18.V2S());
1304*f5c631daSSadaf Ebrahimi __ neg(v7.V4H(), v15.V4H());
1305*f5c631daSSadaf Ebrahimi __ neg(v17.V4S(), v18.V4S());
1306*f5c631daSSadaf Ebrahimi __ neg(v20.V8B(), v17.V8B());
1307*f5c631daSSadaf Ebrahimi __ neg(v0.V8H(), v11.V8H());
1308*f5c631daSSadaf Ebrahimi __ orn(v13.V16B(), v11.V16B(), v31.V16B());
1309*f5c631daSSadaf Ebrahimi __ orn(v22.V8B(), v16.V8B(), v22.V8B());
1310*f5c631daSSadaf Ebrahimi __ orr(v17.V16B(), v17.V16B(), v23.V16B());
1311*f5c631daSSadaf Ebrahimi __ orr(v8.V2S(), 0xe3);
1312*f5c631daSSadaf Ebrahimi __ orr(v11.V4H(), 0x97, 8);
1313*f5c631daSSadaf Ebrahimi __ orr(v7.V4S(), 0xab);
1314*f5c631daSSadaf Ebrahimi __ orr(v8.V8B(), v4.V8B(), v3.V8B());
1315*f5c631daSSadaf Ebrahimi __ orr(v31.V8H(), 0xb0, 8);
1316*f5c631daSSadaf Ebrahimi __ pmul(v11.V16B(), v18.V16B(), v23.V16B());
1317*f5c631daSSadaf Ebrahimi __ pmul(v8.V8B(), v24.V8B(), v5.V8B());
1318*f5c631daSSadaf Ebrahimi __ pmull(v24.V8H(), v18.V8B(), v22.V8B());
1319*f5c631daSSadaf Ebrahimi __ pmull2(v13.V8H(), v3.V16B(), v21.V16B());
1320*f5c631daSSadaf Ebrahimi __ raddhn(v22.V2S(), v10.V2D(), v21.V2D());
1321*f5c631daSSadaf Ebrahimi __ raddhn(v5.V4H(), v13.V4S(), v13.V4S());
1322*f5c631daSSadaf Ebrahimi __ raddhn(v10.V8B(), v17.V8H(), v26.V8H());
1323*f5c631daSSadaf Ebrahimi __ raddhn2(v9.V16B(), v29.V8H(), v13.V8H());
1324*f5c631daSSadaf Ebrahimi __ raddhn2(v27.V4S(), v23.V2D(), v26.V2D());
1325*f5c631daSSadaf Ebrahimi __ raddhn2(v0.V8H(), v29.V4S(), v7.V4S());
1326*f5c631daSSadaf Ebrahimi __ rbit(v22.V16B(), v15.V16B());
1327*f5c631daSSadaf Ebrahimi __ rbit(v30.V8B(), v3.V8B());
1328*f5c631daSSadaf Ebrahimi __ rev16(v31.V16B(), v27.V16B());
1329*f5c631daSSadaf Ebrahimi __ rev16(v12.V8B(), v26.V8B());
1330*f5c631daSSadaf Ebrahimi __ rev32(v5.V16B(), v4.V16B());
1331*f5c631daSSadaf Ebrahimi __ rev32(v16.V4H(), v26.V4H());
1332*f5c631daSSadaf Ebrahimi __ rev32(v20.V8B(), v3.V8B());
1333*f5c631daSSadaf Ebrahimi __ rev32(v20.V8H(), v28.V8H());
1334*f5c631daSSadaf Ebrahimi __ rev64(v9.V16B(), v19.V16B());
1335*f5c631daSSadaf Ebrahimi __ rev64(v5.V2S(), v16.V2S());
1336*f5c631daSSadaf Ebrahimi __ rev64(v7.V4H(), v31.V4H());
1337*f5c631daSSadaf Ebrahimi __ rev64(v15.V4S(), v26.V4S());
1338*f5c631daSSadaf Ebrahimi __ rev64(v25.V8B(), v9.V8B());
1339*f5c631daSSadaf Ebrahimi __ rev64(v11.V8H(), v5.V8H());
1340*f5c631daSSadaf Ebrahimi __ rshrn(v18.V2S(), v13.V2D(), 1);
1341*f5c631daSSadaf Ebrahimi __ rshrn(v25.V4H(), v30.V4S(), 2);
1342*f5c631daSSadaf Ebrahimi __ rshrn(v13.V8B(), v9.V8H(), 8);
1343*f5c631daSSadaf Ebrahimi __ rshrn2(v3.V16B(), v6.V8H(), 8);
1344*f5c631daSSadaf Ebrahimi __ rshrn2(v0.V4S(), v29.V2D(), 25);
1345*f5c631daSSadaf Ebrahimi __ rshrn2(v27.V8H(), v26.V4S(), 15);
1346*f5c631daSSadaf Ebrahimi __ rsubhn(v15.V2S(), v25.V2D(), v4.V2D());
1347*f5c631daSSadaf Ebrahimi __ rsubhn(v23.V4H(), v9.V4S(), v3.V4S());
1348*f5c631daSSadaf Ebrahimi __ rsubhn(v6.V8B(), v30.V8H(), v24.V8H());
1349*f5c631daSSadaf Ebrahimi __ rsubhn2(v4.V16B(), v24.V8H(), v20.V8H());
1350*f5c631daSSadaf Ebrahimi __ rsubhn2(v1.V4S(), v23.V2D(), v22.V2D());
1351*f5c631daSSadaf Ebrahimi __ rsubhn2(v19.V8H(), v2.V4S(), v20.V4S());
1352*f5c631daSSadaf Ebrahimi __ saba(v28.V16B(), v9.V16B(), v25.V16B());
1353*f5c631daSSadaf Ebrahimi __ saba(v9.V2S(), v28.V2S(), v20.V2S());
1354*f5c631daSSadaf Ebrahimi __ saba(v17.V4H(), v22.V4H(), v22.V4H());
1355*f5c631daSSadaf Ebrahimi __ saba(v29.V4S(), v5.V4S(), v27.V4S());
1356*f5c631daSSadaf Ebrahimi __ saba(v20.V8B(), v21.V8B(), v18.V8B());
1357*f5c631daSSadaf Ebrahimi __ saba(v27.V8H(), v17.V8H(), v30.V8H());
1358*f5c631daSSadaf Ebrahimi __ sabal(v20.V2D(), v13.V2S(), v7.V2S());
1359*f5c631daSSadaf Ebrahimi __ sabal(v4.V4S(), v12.V4H(), v4.V4H());
1360*f5c631daSSadaf Ebrahimi __ sabal(v23.V8H(), v24.V8B(), v20.V8B());
1361*f5c631daSSadaf Ebrahimi __ sabal2(v26.V2D(), v21.V4S(), v18.V4S());
1362*f5c631daSSadaf Ebrahimi __ sabal2(v27.V4S(), v28.V8H(), v8.V8H());
1363*f5c631daSSadaf Ebrahimi __ sabal2(v12.V8H(), v16.V16B(), v21.V16B());
1364*f5c631daSSadaf Ebrahimi __ sabd(v0.V16B(), v15.V16B(), v13.V16B());
1365*f5c631daSSadaf Ebrahimi __ sabd(v15.V2S(), v7.V2S(), v30.V2S());
1366*f5c631daSSadaf Ebrahimi __ sabd(v17.V4H(), v17.V4H(), v12.V4H());
1367*f5c631daSSadaf Ebrahimi __ sabd(v7.V4S(), v4.V4S(), v22.V4S());
1368*f5c631daSSadaf Ebrahimi __ sabd(v23.V8B(), v3.V8B(), v26.V8B());
1369*f5c631daSSadaf Ebrahimi __ sabd(v20.V8H(), v28.V8H(), v5.V8H());
1370*f5c631daSSadaf Ebrahimi __ sabdl(v27.V2D(), v22.V2S(), v20.V2S());
1371*f5c631daSSadaf Ebrahimi __ sabdl(v31.V4S(), v20.V4H(), v23.V4H());
1372*f5c631daSSadaf Ebrahimi __ sabdl(v0.V8H(), v20.V8B(), v27.V8B());
1373*f5c631daSSadaf Ebrahimi __ sabdl2(v31.V2D(), v11.V4S(), v3.V4S());
1374*f5c631daSSadaf Ebrahimi __ sabdl2(v26.V4S(), v11.V8H(), v27.V8H());
1375*f5c631daSSadaf Ebrahimi __ sabdl2(v6.V8H(), v8.V16B(), v18.V16B());
1376*f5c631daSSadaf Ebrahimi __ sadalp(v8.V1D(), v26.V2S());
1377*f5c631daSSadaf Ebrahimi __ sadalp(v12.V2D(), v26.V4S());
1378*f5c631daSSadaf Ebrahimi __ sadalp(v12.V2S(), v26.V4H());
1379*f5c631daSSadaf Ebrahimi __ sadalp(v4.V4H(), v1.V8B());
1380*f5c631daSSadaf Ebrahimi __ sadalp(v15.V4S(), v17.V8H());
1381*f5c631daSSadaf Ebrahimi __ sadalp(v21.V8H(), v25.V16B());
1382*f5c631daSSadaf Ebrahimi __ saddl(v5.V2D(), v10.V2S(), v14.V2S());
1383*f5c631daSSadaf Ebrahimi __ saddl(v18.V4S(), v3.V4H(), v15.V4H());
1384*f5c631daSSadaf Ebrahimi __ saddl(v15.V8H(), v2.V8B(), v23.V8B());
1385*f5c631daSSadaf Ebrahimi __ saddl2(v16.V2D(), v16.V4S(), v27.V4S());
1386*f5c631daSSadaf Ebrahimi __ saddl2(v6.V4S(), v24.V8H(), v0.V8H());
1387*f5c631daSSadaf Ebrahimi __ saddl2(v7.V8H(), v20.V16B(), v28.V16B());
1388*f5c631daSSadaf Ebrahimi __ saddlp(v10.V1D(), v25.V2S());
1389*f5c631daSSadaf Ebrahimi __ saddlp(v15.V2D(), v16.V4S());
1390*f5c631daSSadaf Ebrahimi __ saddlp(v18.V2S(), v10.V4H());
1391*f5c631daSSadaf Ebrahimi __ saddlp(v29.V4H(), v26.V8B());
1392*f5c631daSSadaf Ebrahimi __ saddlp(v10.V4S(), v1.V8H());
1393*f5c631daSSadaf Ebrahimi __ saddlp(v0.V8H(), v21.V16B());
1394*f5c631daSSadaf Ebrahimi __ saddlv(d12, v7.V4S());
1395*f5c631daSSadaf Ebrahimi __ saddlv(h14, v28.V16B());
1396*f5c631daSSadaf Ebrahimi __ saddlv(h30, v30.V8B());
1397*f5c631daSSadaf Ebrahimi __ saddlv(s27, v3.V4H());
1398*f5c631daSSadaf Ebrahimi __ saddlv(s16, v16.V8H());
1399*f5c631daSSadaf Ebrahimi __ saddw(v24.V2D(), v11.V2D(), v18.V2S());
1400*f5c631daSSadaf Ebrahimi __ saddw(v13.V4S(), v12.V4S(), v6.V4H());
1401*f5c631daSSadaf Ebrahimi __ saddw(v19.V8H(), v19.V8H(), v7.V8B());
1402*f5c631daSSadaf Ebrahimi __ saddw2(v27.V2D(), v9.V2D(), v26.V4S());
1403*f5c631daSSadaf Ebrahimi __ saddw2(v19.V4S(), v23.V4S(), v21.V8H());
1404*f5c631daSSadaf Ebrahimi __ saddw2(v15.V8H(), v25.V8H(), v30.V16B());
1405*f5c631daSSadaf Ebrahimi __ shadd(v7.V16B(), v4.V16B(), v9.V16B());
1406*f5c631daSSadaf Ebrahimi __ shadd(v29.V2S(), v25.V2S(), v24.V2S());
1407*f5c631daSSadaf Ebrahimi __ shadd(v31.V4H(), v10.V4H(), v13.V4H());
1408*f5c631daSSadaf Ebrahimi __ shadd(v21.V4S(), v16.V4S(), v8.V4S());
1409*f5c631daSSadaf Ebrahimi __ shadd(v14.V8B(), v29.V8B(), v22.V8B());
1410*f5c631daSSadaf Ebrahimi __ shadd(v19.V8H(), v24.V8H(), v20.V8H());
1411*f5c631daSSadaf Ebrahimi __ shl(d22, d25, 23);
1412*f5c631daSSadaf Ebrahimi __ shl(v5.V16B(), v17.V16B(), 7);
1413*f5c631daSSadaf Ebrahimi __ shl(v2.V2D(), v4.V2D(), 21);
1414*f5c631daSSadaf Ebrahimi __ shl(v4.V2S(), v3.V2S(), 26);
1415*f5c631daSSadaf Ebrahimi __ shl(v3.V4H(), v28.V4H(), 8);
1416*f5c631daSSadaf Ebrahimi __ shl(v4.V4S(), v31.V4S(), 24);
1417*f5c631daSSadaf Ebrahimi __ shl(v18.V8B(), v16.V8B(), 2);
1418*f5c631daSSadaf Ebrahimi __ shl(v0.V8H(), v11.V8H(), 3);
1419*f5c631daSSadaf Ebrahimi __ shll(v5.V2D(), v24.V2S(), 32);
1420*f5c631daSSadaf Ebrahimi __ shll(v26.V4S(), v20.V4H(), 16);
1421*f5c631daSSadaf Ebrahimi __ shll(v5.V8H(), v9.V8B(), 8);
1422*f5c631daSSadaf Ebrahimi __ shll2(v21.V2D(), v28.V4S(), 32);
1423*f5c631daSSadaf Ebrahimi __ shll2(v22.V4S(), v1.V8H(), 16);
1424*f5c631daSSadaf Ebrahimi __ shll2(v30.V8H(), v25.V16B(), 8);
1425*f5c631daSSadaf Ebrahimi __ shrn(v5.V2S(), v1.V2D(), 28);
1426*f5c631daSSadaf Ebrahimi __ shrn(v29.V4H(), v18.V4S(), 7);
1427*f5c631daSSadaf Ebrahimi __ shrn(v17.V8B(), v29.V8H(), 2);
1428*f5c631daSSadaf Ebrahimi __ shrn2(v5.V16B(), v30.V8H(), 3);
1429*f5c631daSSadaf Ebrahimi __ shrn2(v24.V4S(), v1.V2D(), 1);
1430*f5c631daSSadaf Ebrahimi __ shrn2(v5.V8H(), v14.V4S(), 16);
1431*f5c631daSSadaf Ebrahimi __ shsub(v30.V16B(), v22.V16B(), v23.V16B());
1432*f5c631daSSadaf Ebrahimi __ shsub(v22.V2S(), v27.V2S(), v25.V2S());
1433*f5c631daSSadaf Ebrahimi __ shsub(v13.V4H(), v22.V4H(), v1.V4H());
1434*f5c631daSSadaf Ebrahimi __ shsub(v10.V4S(), v8.V4S(), v23.V4S());
1435*f5c631daSSadaf Ebrahimi __ shsub(v6.V8B(), v9.V8B(), v31.V8B());
1436*f5c631daSSadaf Ebrahimi __ shsub(v8.V8H(), v31.V8H(), v8.V8H());
1437*f5c631daSSadaf Ebrahimi __ sli(d19, d29, 20);
1438*f5c631daSSadaf Ebrahimi __ sli(v9.V16B(), v24.V16B(), 0);
1439*f5c631daSSadaf Ebrahimi __ sli(v22.V2D(), v9.V2D(), 10);
1440*f5c631daSSadaf Ebrahimi __ sli(v11.V2S(), v27.V2S(), 20);
1441*f5c631daSSadaf Ebrahimi __ sli(v16.V4H(), v15.V4H(), 5);
1442*f5c631daSSadaf Ebrahimi __ sli(v8.V4S(), v8.V4S(), 25);
1443*f5c631daSSadaf Ebrahimi __ sli(v10.V8B(), v30.V8B(), 0);
1444*f5c631daSSadaf Ebrahimi __ sli(v7.V8H(), v28.V8H(), 6);
1445*f5c631daSSadaf Ebrahimi __ smax(v18.V16B(), v8.V16B(), v1.V16B());
1446*f5c631daSSadaf Ebrahimi __ smax(v30.V2S(), v5.V2S(), v1.V2S());
1447*f5c631daSSadaf Ebrahimi __ smax(v17.V4H(), v25.V4H(), v19.V4H());
1448*f5c631daSSadaf Ebrahimi __ smax(v1.V4S(), v24.V4S(), v31.V4S());
1449*f5c631daSSadaf Ebrahimi __ smax(v17.V8B(), v24.V8B(), v24.V8B());
1450*f5c631daSSadaf Ebrahimi __ smax(v11.V8H(), v26.V8H(), v10.V8H());
1451*f5c631daSSadaf Ebrahimi __ smaxp(v12.V16B(), v14.V16B(), v7.V16B());
1452*f5c631daSSadaf Ebrahimi __ smaxp(v31.V2S(), v24.V2S(), v6.V2S());
1453*f5c631daSSadaf Ebrahimi __ smaxp(v10.V4H(), v29.V4H(), v10.V4H());
1454*f5c631daSSadaf Ebrahimi __ smaxp(v18.V4S(), v11.V4S(), v7.V4S());
1455*f5c631daSSadaf Ebrahimi __ smaxp(v21.V8B(), v0.V8B(), v18.V8B());
1456*f5c631daSSadaf Ebrahimi __ smaxp(v26.V8H(), v8.V8H(), v15.V8H());
1457*f5c631daSSadaf Ebrahimi __ smaxv(b4, v5.V16B());
1458*f5c631daSSadaf Ebrahimi __ smaxv(b23, v0.V8B());
1459*f5c631daSSadaf Ebrahimi __ smaxv(h6, v0.V4H());
1460*f5c631daSSadaf Ebrahimi __ smaxv(h24, v8.V8H());
1461*f5c631daSSadaf Ebrahimi __ smaxv(s3, v16.V4S());
1462*f5c631daSSadaf Ebrahimi __ smin(v24.V16B(), v8.V16B(), v18.V16B());
1463*f5c631daSSadaf Ebrahimi __ smin(v29.V2S(), v8.V2S(), v23.V2S());
1464*f5c631daSSadaf Ebrahimi __ smin(v6.V4H(), v11.V4H(), v21.V4H());
1465*f5c631daSSadaf Ebrahimi __ smin(v24.V4S(), v23.V4S(), v15.V4S());
1466*f5c631daSSadaf Ebrahimi __ smin(v8.V8B(), v16.V8B(), v4.V8B());
1467*f5c631daSSadaf Ebrahimi __ smin(v12.V8H(), v1.V8H(), v10.V8H());
1468*f5c631daSSadaf Ebrahimi __ sminp(v13.V16B(), v18.V16B(), v28.V16B());
1469*f5c631daSSadaf Ebrahimi __ sminp(v22.V2S(), v28.V2S(), v16.V2S());
1470*f5c631daSSadaf Ebrahimi __ sminp(v15.V4H(), v12.V4H(), v5.V4H());
1471*f5c631daSSadaf Ebrahimi __ sminp(v15.V4S(), v17.V4S(), v8.V4S());
1472*f5c631daSSadaf Ebrahimi __ sminp(v21.V8B(), v2.V8B(), v6.V8B());
1473*f5c631daSSadaf Ebrahimi __ sminp(v21.V8H(), v12.V8H(), v6.V8H());
1474*f5c631daSSadaf Ebrahimi __ sminv(b8, v6.V16B());
1475*f5c631daSSadaf Ebrahimi __ sminv(b6, v18.V8B());
1476*f5c631daSSadaf Ebrahimi __ sminv(h20, v1.V4H());
1477*f5c631daSSadaf Ebrahimi __ sminv(h7, v17.V8H());
1478*f5c631daSSadaf Ebrahimi __ sminv(s21, v4.V4S());
1479*f5c631daSSadaf Ebrahimi __ smlal(v24.V2D(), v14.V2S(), v21.V2S());
1480*f5c631daSSadaf Ebrahimi __ smlal(v31.V2D(), v3.V2S(), v14.S(), 2);
1481*f5c631daSSadaf Ebrahimi __ smlal(v7.V4S(), v20.V4H(), v21.V4H());
1482*f5c631daSSadaf Ebrahimi __ smlal(v19.V4S(), v16.V4H(), v9.H(), 3);
1483*f5c631daSSadaf Ebrahimi __ smlal(v29.V8H(), v14.V8B(), v1.V8B());
1484*f5c631daSSadaf Ebrahimi __ smlal2(v30.V2D(), v26.V4S(), v16.V4S());
1485*f5c631daSSadaf Ebrahimi __ smlal2(v31.V2D(), v30.V4S(), v1.S(), 0);
1486*f5c631daSSadaf Ebrahimi __ smlal2(v17.V4S(), v6.V8H(), v3.V8H());
1487*f5c631daSSadaf Ebrahimi __ smlal2(v11.V4S(), v31.V8H(), v5.H(), 7);
1488*f5c631daSSadaf Ebrahimi __ smlal2(v30.V8H(), v16.V16B(), v29.V16B());
1489*f5c631daSSadaf Ebrahimi __ smlsl(v1.V2D(), v20.V2S(), v17.V2S());
1490*f5c631daSSadaf Ebrahimi __ smlsl(v29.V2D(), v12.V2S(), v5.S(), 3);
1491*f5c631daSSadaf Ebrahimi __ smlsl(v0.V4S(), v26.V4H(), v1.V4H());
1492*f5c631daSSadaf Ebrahimi __ smlsl(v3.V4S(), v5.V4H(), v6.H(), 5);
1493*f5c631daSSadaf Ebrahimi __ smlsl(v4.V8H(), v0.V8B(), v26.V8B());
1494*f5c631daSSadaf Ebrahimi __ smlsl2(v14.V2D(), v14.V4S(), v5.V4S());
1495*f5c631daSSadaf Ebrahimi __ smlsl2(v15.V2D(), v5.V4S(), v0.S(), 1);
1496*f5c631daSSadaf Ebrahimi __ smlsl2(v29.V4S(), v17.V8H(), v31.V8H());
1497*f5c631daSSadaf Ebrahimi __ smlsl2(v6.V4S(), v15.V8H(), v9.H(), 6);
1498*f5c631daSSadaf Ebrahimi __ smlsl2(v30.V8H(), v15.V16B(), v15.V16B());
1499*f5c631daSSadaf Ebrahimi __ smov(w21, v6.B(), 3);
1500*f5c631daSSadaf Ebrahimi __ smov(w13, v26.H(), 7);
1501*f5c631daSSadaf Ebrahimi __ smov(x24, v16.B(), 7);
1502*f5c631daSSadaf Ebrahimi __ smov(x7, v4.H(), 3);
1503*f5c631daSSadaf Ebrahimi __ smov(x29, v7.S(), 1);
1504*f5c631daSSadaf Ebrahimi __ smull(v4.V2D(), v29.V2S(), v17.V2S());
1505*f5c631daSSadaf Ebrahimi __ smull(v30.V2D(), v21.V2S(), v6.S(), 2);
1506*f5c631daSSadaf Ebrahimi __ smull(v23.V4S(), v5.V4H(), v23.V4H());
1507*f5c631daSSadaf Ebrahimi __ smull(v8.V4S(), v9.V4H(), v2.H(), 1);
1508*f5c631daSSadaf Ebrahimi __ smull(v31.V8H(), v17.V8B(), v1.V8B());
1509*f5c631daSSadaf Ebrahimi __ smull2(v3.V2D(), v3.V4S(), v23.V4S());
1510*f5c631daSSadaf Ebrahimi __ smull2(v15.V2D(), v29.V4S(), v6.S(), 1);
1511*f5c631daSSadaf Ebrahimi __ smull2(v19.V4S(), v20.V8H(), v30.V8H());
1512*f5c631daSSadaf Ebrahimi __ smull2(v6.V4S(), v10.V8H(), v7.H(), 4);
1513*f5c631daSSadaf Ebrahimi __ smull2(v25.V8H(), v8.V16B(), v27.V16B());
1514*f5c631daSSadaf Ebrahimi __ sqabs(b3, b15);
1515*f5c631daSSadaf Ebrahimi __ sqabs(d14, d9);
1516*f5c631daSSadaf Ebrahimi __ sqabs(h31, h28);
1517*f5c631daSSadaf Ebrahimi __ sqabs(s8, s0);
1518*f5c631daSSadaf Ebrahimi __ sqabs(v14.V16B(), v7.V16B());
1519*f5c631daSSadaf Ebrahimi __ sqabs(v23.V2D(), v19.V2D());
1520*f5c631daSSadaf Ebrahimi __ sqabs(v10.V2S(), v24.V2S());
1521*f5c631daSSadaf Ebrahimi __ sqabs(v31.V4H(), v19.V4H());
1522*f5c631daSSadaf Ebrahimi __ sqabs(v23.V4S(), v0.V4S());
1523*f5c631daSSadaf Ebrahimi __ sqabs(v29.V8B(), v23.V8B());
1524*f5c631daSSadaf Ebrahimi __ sqabs(v17.V8H(), v21.V8H());
1525*f5c631daSSadaf Ebrahimi __ sqadd(b9, b23, b13);
1526*f5c631daSSadaf Ebrahimi __ sqadd(d2, d25, d26);
1527*f5c631daSSadaf Ebrahimi __ sqadd(h7, h29, h25);
1528*f5c631daSSadaf Ebrahimi __ sqadd(s11, s7, s24);
1529*f5c631daSSadaf Ebrahimi __ sqadd(v20.V16B(), v16.V16B(), v29.V16B());
1530*f5c631daSSadaf Ebrahimi __ sqadd(v23.V2D(), v30.V2D(), v28.V2D());
1531*f5c631daSSadaf Ebrahimi __ sqadd(v8.V2S(), v19.V2S(), v2.V2S());
1532*f5c631daSSadaf Ebrahimi __ sqadd(v20.V4H(), v12.V4H(), v31.V4H());
1533*f5c631daSSadaf Ebrahimi __ sqadd(v14.V4S(), v15.V4S(), v17.V4S());
1534*f5c631daSSadaf Ebrahimi __ sqadd(v2.V8B(), v29.V8B(), v13.V8B());
1535*f5c631daSSadaf Ebrahimi __ sqadd(v7.V8H(), v19.V8H(), v14.V8H());
1536*f5c631daSSadaf Ebrahimi __ sqdmlal(d15, s5, s30);
1537*f5c631daSSadaf Ebrahimi __ sqdmlal(d24, s10, v2.S(), 3);
1538*f5c631daSSadaf Ebrahimi __ sqdmlal(s9, h19, h8);
1539*f5c631daSSadaf Ebrahimi __ sqdmlal(s14, h1, v12.H(), 3);
1540*f5c631daSSadaf Ebrahimi __ sqdmlal(v30.V2D(), v5.V2S(), v31.V2S());
1541*f5c631daSSadaf Ebrahimi __ sqdmlal(v25.V2D(), v14.V2S(), v10.S(), 1);
1542*f5c631daSSadaf Ebrahimi __ sqdmlal(v19.V4S(), v17.V4H(), v16.V4H());
1543*f5c631daSSadaf Ebrahimi __ sqdmlal(v8.V4S(), v5.V4H(), v8.H(), 1);
1544*f5c631daSSadaf Ebrahimi __ sqdmlal2(v1.V2D(), v23.V4S(), v3.V4S());
1545*f5c631daSSadaf Ebrahimi __ sqdmlal2(v19.V2D(), v0.V4S(), v9.S(), 0);
1546*f5c631daSSadaf Ebrahimi __ sqdmlal2(v26.V4S(), v22.V8H(), v11.V8H());
1547*f5c631daSSadaf Ebrahimi __ sqdmlal2(v6.V4S(), v28.V8H(), v13.H(), 4);
1548*f5c631daSSadaf Ebrahimi __ sqdmlsl(d10, s29, s20);
1549*f5c631daSSadaf Ebrahimi __ sqdmlsl(d10, s9, v10.S(), 1);
1550*f5c631daSSadaf Ebrahimi __ sqdmlsl(s30, h9, h24);
1551*f5c631daSSadaf Ebrahimi __ sqdmlsl(s13, h24, v6.H(), 1);
1552*f5c631daSSadaf Ebrahimi __ sqdmlsl(v27.V2D(), v10.V2S(), v20.V2S());
1553*f5c631daSSadaf Ebrahimi __ sqdmlsl(v23.V2D(), v23.V2S(), v3.S(), 3);
1554*f5c631daSSadaf Ebrahimi __ sqdmlsl(v7.V4S(), v17.V4H(), v29.V4H());
1555*f5c631daSSadaf Ebrahimi __ sqdmlsl(v22.V4S(), v21.V4H(), v3.H(), 4);
1556*f5c631daSSadaf Ebrahimi __ sqdmlsl2(v12.V2D(), v7.V4S(), v22.V4S());
1557*f5c631daSSadaf Ebrahimi __ sqdmlsl2(v20.V2D(), v25.V4S(), v8.S(), 0);
1558*f5c631daSSadaf Ebrahimi __ sqdmlsl2(v25.V4S(), v26.V8H(), v18.V8H());
1559*f5c631daSSadaf Ebrahimi __ sqdmlsl2(v25.V4S(), v19.V8H(), v5.H(), 0);
1560*f5c631daSSadaf Ebrahimi __ sqdmulh(h17, h27, h12);
1561*f5c631daSSadaf Ebrahimi __ sqdmulh(h16, h5, v11.H(), 0);
1562*f5c631daSSadaf Ebrahimi __ sqdmulh(s1, s19, s16);
1563*f5c631daSSadaf Ebrahimi __ sqdmulh(s1, s16, v2.S(), 0);
1564*f5c631daSSadaf Ebrahimi __ sqdmulh(v28.V2S(), v1.V2S(), v8.V2S());
1565*f5c631daSSadaf Ebrahimi __ sqdmulh(v28.V2S(), v8.V2S(), v3.S(), 0);
1566*f5c631daSSadaf Ebrahimi __ sqdmulh(v11.V4H(), v25.V4H(), v5.V4H());
1567*f5c631daSSadaf Ebrahimi __ sqdmulh(v30.V4H(), v14.V4H(), v8.H(), 5);
1568*f5c631daSSadaf Ebrahimi __ sqdmulh(v25.V4S(), v21.V4S(), v13.V4S());
1569*f5c631daSSadaf Ebrahimi __ sqdmulh(v23.V4S(), v2.V4S(), v10.S(), 3);
1570*f5c631daSSadaf Ebrahimi __ sqdmulh(v26.V8H(), v5.V8H(), v23.V8H());
1571*f5c631daSSadaf Ebrahimi __ sqdmulh(v4.V8H(), v22.V8H(), v4.H(), 3);
1572*f5c631daSSadaf Ebrahimi __ sqdmull(d25, s2, s26);
1573*f5c631daSSadaf Ebrahimi __ sqdmull(d30, s14, v5.S(), 1);
1574*f5c631daSSadaf Ebrahimi __ sqdmull(s29, h18, h11);
1575*f5c631daSSadaf Ebrahimi __ sqdmull(s11, h13, v7.H(), 6);
1576*f5c631daSSadaf Ebrahimi __ sqdmull(v23.V2D(), v9.V2S(), v8.V2S());
1577*f5c631daSSadaf Ebrahimi __ sqdmull(v18.V2D(), v29.V2S(), v4.S(), 1);
1578*f5c631daSSadaf Ebrahimi __ sqdmull(v17.V4S(), v24.V4H(), v7.V4H());
1579*f5c631daSSadaf Ebrahimi __ sqdmull(v8.V4S(), v15.V4H(), v5.H(), 1);
1580*f5c631daSSadaf Ebrahimi __ sqdmull2(v28.V2D(), v14.V4S(), v2.V4S());
1581*f5c631daSSadaf Ebrahimi __ sqdmull2(v1.V2D(), v24.V4S(), v13.S(), 2);
1582*f5c631daSSadaf Ebrahimi __ sqdmull2(v11.V4S(), v17.V8H(), v31.V8H());
1583*f5c631daSSadaf Ebrahimi __ sqdmull2(v1.V4S(), v20.V8H(), v11.H(), 3);
1584*f5c631daSSadaf Ebrahimi __ sqneg(b2, b0);
1585*f5c631daSSadaf Ebrahimi __ sqneg(d24, d2);
1586*f5c631daSSadaf Ebrahimi __ sqneg(h29, h3);
1587*f5c631daSSadaf Ebrahimi __ sqneg(s4, s9);
1588*f5c631daSSadaf Ebrahimi __ sqneg(v14.V16B(), v29.V16B());
1589*f5c631daSSadaf Ebrahimi __ sqneg(v30.V2D(), v12.V2D());
1590*f5c631daSSadaf Ebrahimi __ sqneg(v28.V2S(), v26.V2S());
1591*f5c631daSSadaf Ebrahimi __ sqneg(v4.V4H(), v4.V4H());
1592*f5c631daSSadaf Ebrahimi __ sqneg(v9.V4S(), v8.V4S());
1593*f5c631daSSadaf Ebrahimi __ sqneg(v20.V8B(), v20.V8B());
1594*f5c631daSSadaf Ebrahimi __ sqneg(v27.V8H(), v10.V8H());
1595*f5c631daSSadaf Ebrahimi __ sqrdmulh(h7, h24, h0);
1596*f5c631daSSadaf Ebrahimi __ sqrdmulh(h14, h3, v4.H(), 6);
1597*f5c631daSSadaf Ebrahimi __ sqrdmulh(s27, s19, s24);
1598*f5c631daSSadaf Ebrahimi __ sqrdmulh(s31, s21, v4.S(), 0);
1599*f5c631daSSadaf Ebrahimi __ sqrdmulh(v18.V2S(), v25.V2S(), v1.V2S());
1600*f5c631daSSadaf Ebrahimi __ sqrdmulh(v22.V2S(), v5.V2S(), v13.S(), 0);
1601*f5c631daSSadaf Ebrahimi __ sqrdmulh(v22.V4H(), v24.V4H(), v9.V4H());
1602*f5c631daSSadaf Ebrahimi __ sqrdmulh(v13.V4H(), v2.V4H(), v12.H(), 6);
1603*f5c631daSSadaf Ebrahimi __ sqrdmulh(v9.V4S(), v27.V4S(), v2.V4S());
1604*f5c631daSSadaf Ebrahimi __ sqrdmulh(v3.V4S(), v23.V4S(), v7.S(), 1);
1605*f5c631daSSadaf Ebrahimi __ sqrdmulh(v2.V8H(), v0.V8H(), v7.V8H());
1606*f5c631daSSadaf Ebrahimi __ sqrdmulh(v16.V8H(), v9.V8H(), v8.H(), 2);
1607*f5c631daSSadaf Ebrahimi __ sqrshl(b8, b21, b13);
1608*f5c631daSSadaf Ebrahimi __ sqrshl(d29, d7, d20);
1609*f5c631daSSadaf Ebrahimi __ sqrshl(h28, h14, h10);
1610*f5c631daSSadaf Ebrahimi __ sqrshl(s26, s18, s2);
1611*f5c631daSSadaf Ebrahimi __ sqrshl(v18.V16B(), v31.V16B(), v26.V16B());
1612*f5c631daSSadaf Ebrahimi __ sqrshl(v28.V2D(), v4.V2D(), v0.V2D());
1613*f5c631daSSadaf Ebrahimi __ sqrshl(v3.V2S(), v6.V2S(), v0.V2S());
1614*f5c631daSSadaf Ebrahimi __ sqrshl(v1.V4H(), v18.V4H(), v22.V4H());
1615*f5c631daSSadaf Ebrahimi __ sqrshl(v16.V4S(), v25.V4S(), v7.V4S());
1616*f5c631daSSadaf Ebrahimi __ sqrshl(v0.V8B(), v21.V8B(), v5.V8B());
1617*f5c631daSSadaf Ebrahimi __ sqrshl(v30.V8H(), v19.V8H(), v8.V8H());
1618*f5c631daSSadaf Ebrahimi __ sqrshrn(b6, h21, 4);
1619*f5c631daSSadaf Ebrahimi __ sqrshrn(h14, s17, 11);
1620*f5c631daSSadaf Ebrahimi __ sqrshrn(s25, d27, 10);
1621*f5c631daSSadaf Ebrahimi __ sqrshrn(v6.V2S(), v13.V2D(), 18);
1622*f5c631daSSadaf Ebrahimi __ sqrshrn(v5.V4H(), v9.V4S(), 15);
1623*f5c631daSSadaf Ebrahimi __ sqrshrn(v19.V8B(), v12.V8H(), 1);
1624*f5c631daSSadaf Ebrahimi __ sqrshrn2(v19.V16B(), v21.V8H(), 7);
1625*f5c631daSSadaf Ebrahimi __ sqrshrn2(v29.V4S(), v24.V2D(), 13);
1626*f5c631daSSadaf Ebrahimi __ sqrshrn2(v12.V8H(), v2.V4S(), 10);
1627*f5c631daSSadaf Ebrahimi __ sqrshrun(b16, h9, 5);
1628*f5c631daSSadaf Ebrahimi __ sqrshrun(h3, s24, 15);
1629*f5c631daSSadaf Ebrahimi __ sqrshrun(s16, d18, 8);
1630*f5c631daSSadaf Ebrahimi __ sqrshrun(v28.V2S(), v23.V2D(), 8);
1631*f5c631daSSadaf Ebrahimi __ sqrshrun(v31.V4H(), v25.V4S(), 10);
1632*f5c631daSSadaf Ebrahimi __ sqrshrun(v19.V8B(), v23.V8H(), 2);
1633*f5c631daSSadaf Ebrahimi __ sqrshrun2(v24.V16B(), v0.V8H(), 8);
1634*f5c631daSSadaf Ebrahimi __ sqrshrun2(v22.V4S(), v1.V2D(), 23);
1635*f5c631daSSadaf Ebrahimi __ sqrshrun2(v28.V8H(), v21.V4S(), 13);
1636*f5c631daSSadaf Ebrahimi __ sqshl(b6, b21, b8);
1637*f5c631daSSadaf Ebrahimi __ sqshl(b11, b26, 2);
1638*f5c631daSSadaf Ebrahimi __ sqshl(d29, d0, d4);
1639*f5c631daSSadaf Ebrahimi __ sqshl(d21, d7, 35);
1640*f5c631daSSadaf Ebrahimi __ sqshl(h20, h25, h17);
1641*f5c631daSSadaf Ebrahimi __ sqshl(h20, h0, 8);
1642*f5c631daSSadaf Ebrahimi __ sqshl(s29, s13, s4);
1643*f5c631daSSadaf Ebrahimi __ sqshl(s10, s11, 20);
1644*f5c631daSSadaf Ebrahimi __ sqshl(v8.V16B(), v18.V16B(), v28.V16B());
1645*f5c631daSSadaf Ebrahimi __ sqshl(v29.V16B(), v29.V16B(), 2);
1646*f5c631daSSadaf Ebrahimi __ sqshl(v8.V2D(), v31.V2D(), v16.V2D());
1647*f5c631daSSadaf Ebrahimi __ sqshl(v7.V2D(), v14.V2D(), 37);
1648*f5c631daSSadaf Ebrahimi __ sqshl(v0.V2S(), v26.V2S(), v7.V2S());
1649*f5c631daSSadaf Ebrahimi __ sqshl(v5.V2S(), v11.V2S(), 19);
1650*f5c631daSSadaf Ebrahimi __ sqshl(v11.V4H(), v30.V4H(), v0.V4H());
1651*f5c631daSSadaf Ebrahimi __ sqshl(v1.V4H(), v18.V4H(), 7);
1652*f5c631daSSadaf Ebrahimi __ sqshl(v22.V4S(), v3.V4S(), v30.V4S());
1653*f5c631daSSadaf Ebrahimi __ sqshl(v16.V4S(), v15.V4S(), 28);
1654*f5c631daSSadaf Ebrahimi __ sqshl(v6.V8B(), v28.V8B(), v25.V8B());
1655*f5c631daSSadaf Ebrahimi __ sqshl(v0.V8B(), v15.V8B(), 0);
1656*f5c631daSSadaf Ebrahimi __ sqshl(v6.V8H(), v16.V8H(), v30.V8H());
1657*f5c631daSSadaf Ebrahimi __ sqshl(v3.V8H(), v20.V8H(), 14);
1658*f5c631daSSadaf Ebrahimi __ sqshlu(b13, b14, 6);
1659*f5c631daSSadaf Ebrahimi __ sqshlu(d0, d16, 44);
1660*f5c631daSSadaf Ebrahimi __ sqshlu(h5, h29, 15);
1661*f5c631daSSadaf Ebrahimi __ sqshlu(s29, s8, 13);
1662*f5c631daSSadaf Ebrahimi __ sqshlu(v27.V16B(), v20.V16B(), 2);
1663*f5c631daSSadaf Ebrahimi __ sqshlu(v24.V2D(), v12.V2D(), 11);
1664*f5c631daSSadaf Ebrahimi __ sqshlu(v12.V2S(), v19.V2S(), 22);
1665*f5c631daSSadaf Ebrahimi __ sqshlu(v8.V4H(), v12.V4H(), 11);
1666*f5c631daSSadaf Ebrahimi __ sqshlu(v18.V4S(), v3.V4S(), 8);
1667*f5c631daSSadaf Ebrahimi __ sqshlu(v3.V8B(), v10.V8B(), 1);
1668*f5c631daSSadaf Ebrahimi __ sqshlu(v30.V8H(), v24.V8H(), 4);
1669*f5c631daSSadaf Ebrahimi __ sqshrn(b1, h28, 1);
1670*f5c631daSSadaf Ebrahimi __ sqshrn(h31, s7, 10);
1671*f5c631daSSadaf Ebrahimi __ sqshrn(s4, d10, 24);
1672*f5c631daSSadaf Ebrahimi __ sqshrn(v10.V2S(), v1.V2D(), 29);
1673*f5c631daSSadaf Ebrahimi __ sqshrn(v3.V4H(), v13.V4S(), 14);
1674*f5c631daSSadaf Ebrahimi __ sqshrn(v27.V8B(), v6.V8H(), 7);
1675*f5c631daSSadaf Ebrahimi __ sqshrn2(v14.V16B(), v23.V8H(), 1);
1676*f5c631daSSadaf Ebrahimi __ sqshrn2(v25.V4S(), v22.V2D(), 27);
1677*f5c631daSSadaf Ebrahimi __ sqshrn2(v31.V8H(), v12.V4S(), 10);
1678*f5c631daSSadaf Ebrahimi __ sqshrun(b9, h0, 1);
1679*f5c631daSSadaf Ebrahimi __ sqshrun(h11, s6, 7);
1680*f5c631daSSadaf Ebrahimi __ sqshrun(s13, d12, 13);
1681*f5c631daSSadaf Ebrahimi __ sqshrun(v10.V2S(), v30.V2D(), 1);
1682*f5c631daSSadaf Ebrahimi __ sqshrun(v31.V4H(), v3.V4S(), 11);
1683*f5c631daSSadaf Ebrahimi __ sqshrun(v28.V8B(), v30.V8H(), 8);
1684*f5c631daSSadaf Ebrahimi __ sqshrun2(v16.V16B(), v27.V8H(), 3);
1685*f5c631daSSadaf Ebrahimi __ sqshrun2(v27.V4S(), v14.V2D(), 18);
1686*f5c631daSSadaf Ebrahimi __ sqshrun2(v23.V8H(), v14.V4S(), 1);
1687*f5c631daSSadaf Ebrahimi __ sqsub(b19, b29, b11);
1688*f5c631daSSadaf Ebrahimi __ sqsub(d21, d31, d6);
1689*f5c631daSSadaf Ebrahimi __ sqsub(h18, h10, h19);
1690*f5c631daSSadaf Ebrahimi __ sqsub(s6, s5, s0);
1691*f5c631daSSadaf Ebrahimi __ sqsub(v21.V16B(), v22.V16B(), v0.V16B());
1692*f5c631daSSadaf Ebrahimi __ sqsub(v22.V2D(), v10.V2D(), v17.V2D());
1693*f5c631daSSadaf Ebrahimi __ sqsub(v8.V2S(), v21.V2S(), v2.V2S());
1694*f5c631daSSadaf Ebrahimi __ sqsub(v18.V4H(), v25.V4H(), v27.V4H());
1695*f5c631daSSadaf Ebrahimi __ sqsub(v13.V4S(), v3.V4S(), v6.V4S());
1696*f5c631daSSadaf Ebrahimi __ sqsub(v28.V8B(), v29.V8B(), v16.V8B());
1697*f5c631daSSadaf Ebrahimi __ sqsub(v17.V8H(), v6.V8H(), v10.V8H());
1698*f5c631daSSadaf Ebrahimi __ sqxtn(b27, h26);
1699*f5c631daSSadaf Ebrahimi __ sqxtn(h17, s11);
1700*f5c631daSSadaf Ebrahimi __ sqxtn(s22, d31);
1701*f5c631daSSadaf Ebrahimi __ sqxtn(v26.V2S(), v5.V2D());
1702*f5c631daSSadaf Ebrahimi __ sqxtn(v13.V4H(), v7.V4S());
1703*f5c631daSSadaf Ebrahimi __ sqxtn(v19.V8B(), v19.V8H());
1704*f5c631daSSadaf Ebrahimi __ sqxtn2(v19.V16B(), v3.V8H());
1705*f5c631daSSadaf Ebrahimi __ sqxtn2(v23.V4S(), v1.V2D());
1706*f5c631daSSadaf Ebrahimi __ sqxtn2(v13.V8H(), v3.V4S());
1707*f5c631daSSadaf Ebrahimi __ sqxtun(b26, h9);
1708*f5c631daSSadaf Ebrahimi __ sqxtun(h19, s12);
1709*f5c631daSSadaf Ebrahimi __ sqxtun(s3, d6);
1710*f5c631daSSadaf Ebrahimi __ sqxtun(v29.V2S(), v26.V2D());
1711*f5c631daSSadaf Ebrahimi __ sqxtun(v26.V4H(), v10.V4S());
1712*f5c631daSSadaf Ebrahimi __ sqxtun(v7.V8B(), v29.V8H());
1713*f5c631daSSadaf Ebrahimi __ sqxtun2(v21.V16B(), v14.V8H());
1714*f5c631daSSadaf Ebrahimi __ sqxtun2(v24.V4S(), v15.V2D());
1715*f5c631daSSadaf Ebrahimi __ sqxtun2(v30.V8H(), v1.V4S());
1716*f5c631daSSadaf Ebrahimi __ srhadd(v21.V16B(), v17.V16B(), v15.V16B());
1717*f5c631daSSadaf Ebrahimi __ srhadd(v28.V2S(), v21.V2S(), v29.V2S());
1718*f5c631daSSadaf Ebrahimi __ srhadd(v9.V4H(), v1.V4H(), v30.V4H());
1719*f5c631daSSadaf Ebrahimi __ srhadd(v24.V4S(), v0.V4S(), v2.V4S());
1720*f5c631daSSadaf Ebrahimi __ srhadd(v6.V8B(), v17.V8B(), v15.V8B());
1721*f5c631daSSadaf Ebrahimi __ srhadd(v5.V8H(), v7.V8H(), v21.V8H());
1722*f5c631daSSadaf Ebrahimi __ sri(d14, d14, 49);
1723*f5c631daSSadaf Ebrahimi __ sri(v23.V16B(), v8.V16B(), 4);
1724*f5c631daSSadaf Ebrahimi __ sri(v20.V2D(), v13.V2D(), 20);
1725*f5c631daSSadaf Ebrahimi __ sri(v16.V2S(), v2.V2S(), 24);
1726*f5c631daSSadaf Ebrahimi __ sri(v5.V4H(), v23.V4H(), 11);
1727*f5c631daSSadaf Ebrahimi __ sri(v27.V4S(), v15.V4S(), 23);
1728*f5c631daSSadaf Ebrahimi __ sri(v19.V8B(), v29.V8B(), 4);
1729*f5c631daSSadaf Ebrahimi __ sri(v7.V8H(), v29.V8H(), 3);
1730*f5c631daSSadaf Ebrahimi __ srshl(d2, d9, d26);
1731*f5c631daSSadaf Ebrahimi __ srshl(v29.V16B(), v17.V16B(), v11.V16B());
1732*f5c631daSSadaf Ebrahimi __ srshl(v8.V2D(), v15.V2D(), v4.V2D());
1733*f5c631daSSadaf Ebrahimi __ srshl(v25.V2S(), v17.V2S(), v8.V2S());
1734*f5c631daSSadaf Ebrahimi __ srshl(v19.V4H(), v7.V4H(), v7.V4H());
1735*f5c631daSSadaf Ebrahimi __ srshl(v13.V4S(), v2.V4S(), v17.V4S());
1736*f5c631daSSadaf Ebrahimi __ srshl(v22.V8B(), v6.V8B(), v21.V8B());
1737*f5c631daSSadaf Ebrahimi __ srshl(v10.V8H(), v17.V8H(), v4.V8H());
1738*f5c631daSSadaf Ebrahimi __ srshr(d21, d18, 45);
1739*f5c631daSSadaf Ebrahimi __ srshr(v3.V16B(), v11.V16B(), 7);
1740*f5c631daSSadaf Ebrahimi __ srshr(v21.V2D(), v26.V2D(), 53);
1741*f5c631daSSadaf Ebrahimi __ srshr(v11.V2S(), v5.V2S(), 28);
1742*f5c631daSSadaf Ebrahimi __ srshr(v7.V4H(), v18.V4H(), 12);
1743*f5c631daSSadaf Ebrahimi __ srshr(v7.V4S(), v3.V4S(), 30);
1744*f5c631daSSadaf Ebrahimi __ srshr(v14.V8B(), v2.V8B(), 6);
1745*f5c631daSSadaf Ebrahimi __ srshr(v21.V8H(), v20.V8H(), 3);
1746*f5c631daSSadaf Ebrahimi __ srsra(d21, d30, 63);
1747*f5c631daSSadaf Ebrahimi __ srsra(v27.V16B(), v30.V16B(), 6);
1748*f5c631daSSadaf Ebrahimi __ srsra(v20.V2D(), v12.V2D(), 27);
1749*f5c631daSSadaf Ebrahimi __ srsra(v0.V2S(), v17.V2S(), 5);
1750*f5c631daSSadaf Ebrahimi __ srsra(v14.V4H(), v16.V4H(), 15);
1751*f5c631daSSadaf Ebrahimi __ srsra(v18.V4S(), v3.V4S(), 20);
1752*f5c631daSSadaf Ebrahimi __ srsra(v21.V8B(), v1.V8B(), 1);
1753*f5c631daSSadaf Ebrahimi __ srsra(v31.V8H(), v25.V8H(), 2);
1754*f5c631daSSadaf Ebrahimi __ sshl(d1, d13, d9);
1755*f5c631daSSadaf Ebrahimi __ sshl(v17.V16B(), v31.V16B(), v15.V16B());
1756*f5c631daSSadaf Ebrahimi __ sshl(v13.V2D(), v16.V2D(), v0.V2D());
1757*f5c631daSSadaf Ebrahimi __ sshl(v0.V2S(), v7.V2S(), v22.V2S());
1758*f5c631daSSadaf Ebrahimi __ sshl(v23.V4H(), v19.V4H(), v4.V4H());
1759*f5c631daSSadaf Ebrahimi __ sshl(v5.V4S(), v5.V4S(), v11.V4S());
1760*f5c631daSSadaf Ebrahimi __ sshl(v23.V8B(), v27.V8B(), v7.V8B());
1761*f5c631daSSadaf Ebrahimi __ sshl(v29.V8H(), v10.V8H(), v5.V8H());
1762*f5c631daSSadaf Ebrahimi __ sshll(v0.V2D(), v2.V2S(), 23);
1763*f5c631daSSadaf Ebrahimi __ sshll(v11.V4S(), v8.V4H(), 8);
1764*f5c631daSSadaf Ebrahimi __ sshll(v4.V8H(), v29.V8B(), 1);
1765*f5c631daSSadaf Ebrahimi __ sshll2(v10.V2D(), v4.V4S(), 14);
1766*f5c631daSSadaf Ebrahimi __ sshll2(v26.V4S(), v31.V8H(), 6);
1767*f5c631daSSadaf Ebrahimi __ sshll2(v3.V8H(), v26.V16B(), 4);
1768*f5c631daSSadaf Ebrahimi __ sshr(d19, d21, 20);
1769*f5c631daSSadaf Ebrahimi __ sshr(v15.V16B(), v23.V16B(), 5);
1770*f5c631daSSadaf Ebrahimi __ sshr(v17.V2D(), v14.V2D(), 38);
1771*f5c631daSSadaf Ebrahimi __ sshr(v3.V2S(), v29.V2S(), 23);
1772*f5c631daSSadaf Ebrahimi __ sshr(v23.V4H(), v27.V4H(), 4);
1773*f5c631daSSadaf Ebrahimi __ sshr(v28.V4S(), v3.V4S(), 4);
1774*f5c631daSSadaf Ebrahimi __ sshr(v14.V8B(), v2.V8B(), 6);
1775*f5c631daSSadaf Ebrahimi __ sshr(v3.V8H(), v8.V8H(), 6);
1776*f5c631daSSadaf Ebrahimi __ ssra(d12, d28, 44);
1777*f5c631daSSadaf Ebrahimi __ ssra(v29.V16B(), v31.V16B(), 4);
1778*f5c631daSSadaf Ebrahimi __ ssra(v3.V2D(), v0.V2D(), 24);
1779*f5c631daSSadaf Ebrahimi __ ssra(v14.V2S(), v28.V2S(), 6);
1780*f5c631daSSadaf Ebrahimi __ ssra(v18.V4H(), v8.V4H(), 7);
1781*f5c631daSSadaf Ebrahimi __ ssra(v31.V4S(), v14.V4S(), 24);
1782*f5c631daSSadaf Ebrahimi __ ssra(v28.V8B(), v26.V8B(), 5);
1783*f5c631daSSadaf Ebrahimi __ ssra(v9.V8H(), v9.V8H(), 14);
1784*f5c631daSSadaf Ebrahimi __ ssubl(v13.V2D(), v14.V2S(), v3.V2S());
1785*f5c631daSSadaf Ebrahimi __ ssubl(v5.V4S(), v16.V4H(), v8.V4H());
1786*f5c631daSSadaf Ebrahimi __ ssubl(v0.V8H(), v28.V8B(), v6.V8B());
1787*f5c631daSSadaf Ebrahimi __ ssubl2(v5.V2D(), v13.V4S(), v25.V4S());
1788*f5c631daSSadaf Ebrahimi __ ssubl2(v3.V4S(), v15.V8H(), v17.V8H());
1789*f5c631daSSadaf Ebrahimi __ ssubl2(v15.V8H(), v15.V16B(), v14.V16B());
1790*f5c631daSSadaf Ebrahimi __ ssubw(v25.V2D(), v23.V2D(), v26.V2S());
1791*f5c631daSSadaf Ebrahimi __ ssubw(v21.V4S(), v18.V4S(), v24.V4H());
1792*f5c631daSSadaf Ebrahimi __ ssubw(v30.V8H(), v22.V8H(), v3.V8B());
1793*f5c631daSSadaf Ebrahimi __ ssubw2(v16.V2D(), v24.V2D(), v28.V4S());
1794*f5c631daSSadaf Ebrahimi __ ssubw2(v31.V4S(), v11.V4S(), v15.V8H());
1795*f5c631daSSadaf Ebrahimi __ ssubw2(v4.V8H(), v8.V8H(), v16.V16B());
1796*f5c631daSSadaf Ebrahimi __ st1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x0));
1797*f5c631daSSadaf Ebrahimi __ st1(v10.V16B(),
1798*f5c631daSSadaf Ebrahimi v11.V16B(),
1799*f5c631daSSadaf Ebrahimi v12.V16B(),
1800*f5c631daSSadaf Ebrahimi v13.V16B(),
1801*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1802*f5c631daSSadaf Ebrahimi __ st1(v27.V16B(),
1803*f5c631daSSadaf Ebrahimi v28.V16B(),
1804*f5c631daSSadaf Ebrahimi v29.V16B(),
1805*f5c631daSSadaf Ebrahimi v30.V16B(),
1806*f5c631daSSadaf Ebrahimi MemOperand(x1, 64, PostIndex));
1807*f5c631daSSadaf Ebrahimi __ st1(v16.V16B(), v17.V16B(), v18.V16B(), MemOperand(x0));
1808*f5c631daSSadaf Ebrahimi __ st1(v21.V16B(), v22.V16B(), v23.V16B(), MemOperand(x1, x2, PostIndex));
1809*f5c631daSSadaf Ebrahimi __ st1(v9.V16B(), v10.V16B(), v11.V16B(), MemOperand(x1, 48, PostIndex));
1810*f5c631daSSadaf Ebrahimi __ st1(v7.V16B(), v8.V16B(), MemOperand(x0));
1811*f5c631daSSadaf Ebrahimi __ st1(v26.V16B(), v27.V16B(), MemOperand(x1, x2, PostIndex));
1812*f5c631daSSadaf Ebrahimi __ st1(v22.V16B(), v23.V16B(), MemOperand(x1, 32, PostIndex));
1813*f5c631daSSadaf Ebrahimi __ st1(v23.V16B(), MemOperand(x0));
1814*f5c631daSSadaf Ebrahimi __ st1(v28.V16B(), MemOperand(x1, x2, PostIndex));
1815*f5c631daSSadaf Ebrahimi __ st1(v2.V16B(), MemOperand(x1, 16, PostIndex));
1816*f5c631daSSadaf Ebrahimi __ st1(v29.V1D(), v30.V1D(), v31.V1D(), v0.V1D(), MemOperand(x0));
1817*f5c631daSSadaf Ebrahimi __ st1(v12.V1D(),
1818*f5c631daSSadaf Ebrahimi v13.V1D(),
1819*f5c631daSSadaf Ebrahimi v14.V1D(),
1820*f5c631daSSadaf Ebrahimi v15.V1D(),
1821*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1822*f5c631daSSadaf Ebrahimi __ st1(v30.V1D(),
1823*f5c631daSSadaf Ebrahimi v31.V1D(),
1824*f5c631daSSadaf Ebrahimi v0.V1D(),
1825*f5c631daSSadaf Ebrahimi v1.V1D(),
1826*f5c631daSSadaf Ebrahimi MemOperand(x1, 32, PostIndex));
1827*f5c631daSSadaf Ebrahimi __ st1(v16.V1D(), v17.V1D(), v18.V1D(), MemOperand(x0));
1828*f5c631daSSadaf Ebrahimi __ st1(v3.V1D(), v4.V1D(), v5.V1D(), MemOperand(x1, x2, PostIndex));
1829*f5c631daSSadaf Ebrahimi __ st1(v14.V1D(), v15.V1D(), v16.V1D(), MemOperand(x1, 24, PostIndex));
1830*f5c631daSSadaf Ebrahimi __ st1(v18.V1D(), v19.V1D(), MemOperand(x0));
1831*f5c631daSSadaf Ebrahimi __ st1(v5.V1D(), v6.V1D(), MemOperand(x1, x2, PostIndex));
1832*f5c631daSSadaf Ebrahimi __ st1(v2.V1D(), v3.V1D(), MemOperand(x1, 16, PostIndex));
1833*f5c631daSSadaf Ebrahimi __ st1(v4.V1D(), MemOperand(x0));
1834*f5c631daSSadaf Ebrahimi __ st1(v27.V1D(), MemOperand(x1, x2, PostIndex));
1835*f5c631daSSadaf Ebrahimi __ st1(v23.V1D(), MemOperand(x1, 8, PostIndex));
1836*f5c631daSSadaf Ebrahimi __ st1(v2.V2D(), v3.V2D(), v4.V2D(), v5.V2D(), MemOperand(x0));
1837*f5c631daSSadaf Ebrahimi __ st1(v22.V2D(),
1838*f5c631daSSadaf Ebrahimi v23.V2D(),
1839*f5c631daSSadaf Ebrahimi v24.V2D(),
1840*f5c631daSSadaf Ebrahimi v25.V2D(),
1841*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1842*f5c631daSSadaf Ebrahimi __ st1(v28.V2D(),
1843*f5c631daSSadaf Ebrahimi v29.V2D(),
1844*f5c631daSSadaf Ebrahimi v30.V2D(),
1845*f5c631daSSadaf Ebrahimi v31.V2D(),
1846*f5c631daSSadaf Ebrahimi MemOperand(x1, 64, PostIndex));
1847*f5c631daSSadaf Ebrahimi __ st1(v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0));
1848*f5c631daSSadaf Ebrahimi __ st1(v16.V2D(), v17.V2D(), v18.V2D(), MemOperand(x1, x2, PostIndex));
1849*f5c631daSSadaf Ebrahimi __ st1(v22.V2D(), v23.V2D(), v24.V2D(), MemOperand(x1, 48, PostIndex));
1850*f5c631daSSadaf Ebrahimi __ st1(v21.V2D(), v22.V2D(), MemOperand(x0));
1851*f5c631daSSadaf Ebrahimi __ st1(v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex));
1852*f5c631daSSadaf Ebrahimi __ st1(v27.V2D(), v28.V2D(), MemOperand(x1, 32, PostIndex));
1853*f5c631daSSadaf Ebrahimi __ st1(v21.V2D(), MemOperand(x0));
1854*f5c631daSSadaf Ebrahimi __ st1(v29.V2D(), MemOperand(x1, x2, PostIndex));
1855*f5c631daSSadaf Ebrahimi __ st1(v20.V2D(), MemOperand(x1, 16, PostIndex));
1856*f5c631daSSadaf Ebrahimi __ st1(v22.V2S(), v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x0));
1857*f5c631daSSadaf Ebrahimi __ st1(v8.V2S(),
1858*f5c631daSSadaf Ebrahimi v9.V2S(),
1859*f5c631daSSadaf Ebrahimi v10.V2S(),
1860*f5c631daSSadaf Ebrahimi v11.V2S(),
1861*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1862*f5c631daSSadaf Ebrahimi __ st1(v15.V2S(),
1863*f5c631daSSadaf Ebrahimi v16.V2S(),
1864*f5c631daSSadaf Ebrahimi v17.V2S(),
1865*f5c631daSSadaf Ebrahimi v18.V2S(),
1866*f5c631daSSadaf Ebrahimi MemOperand(x1, 32, PostIndex));
1867*f5c631daSSadaf Ebrahimi __ st1(v2.V2S(), v3.V2S(), v4.V2S(), MemOperand(x0));
1868*f5c631daSSadaf Ebrahimi __ st1(v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x1, x2, PostIndex));
1869*f5c631daSSadaf Ebrahimi __ st1(v7.V2S(), v8.V2S(), v9.V2S(), MemOperand(x1, 24, PostIndex));
1870*f5c631daSSadaf Ebrahimi __ st1(v28.V2S(), v29.V2S(), MemOperand(x0));
1871*f5c631daSSadaf Ebrahimi __ st1(v29.V2S(), v30.V2S(), MemOperand(x1, x2, PostIndex));
1872*f5c631daSSadaf Ebrahimi __ st1(v23.V2S(), v24.V2S(), MemOperand(x1, 16, PostIndex));
1873*f5c631daSSadaf Ebrahimi __ st1(v6.V2S(), MemOperand(x0));
1874*f5c631daSSadaf Ebrahimi __ st1(v11.V2S(), MemOperand(x1, x2, PostIndex));
1875*f5c631daSSadaf Ebrahimi __ st1(v17.V2S(), MemOperand(x1, 8, PostIndex));
1876*f5c631daSSadaf Ebrahimi __ st1(v6.V4H(), v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x0));
1877*f5c631daSSadaf Ebrahimi __ st1(v9.V4H(),
1878*f5c631daSSadaf Ebrahimi v10.V4H(),
1879*f5c631daSSadaf Ebrahimi v11.V4H(),
1880*f5c631daSSadaf Ebrahimi v12.V4H(),
1881*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1882*f5c631daSSadaf Ebrahimi __ st1(v25.V4H(),
1883*f5c631daSSadaf Ebrahimi v26.V4H(),
1884*f5c631daSSadaf Ebrahimi v27.V4H(),
1885*f5c631daSSadaf Ebrahimi v28.V4H(),
1886*f5c631daSSadaf Ebrahimi MemOperand(x1, 32, PostIndex));
1887*f5c631daSSadaf Ebrahimi __ st1(v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x0));
1888*f5c631daSSadaf Ebrahimi __ st1(v10.V4H(), v11.V4H(), v12.V4H(), MemOperand(x1, x2, PostIndex));
1889*f5c631daSSadaf Ebrahimi __ st1(v12.V4H(), v13.V4H(), v14.V4H(), MemOperand(x1, 24, PostIndex));
1890*f5c631daSSadaf Ebrahimi __ st1(v13.V4H(), v14.V4H(), MemOperand(x0));
1891*f5c631daSSadaf Ebrahimi __ st1(v15.V4H(), v16.V4H(), MemOperand(x1, x2, PostIndex));
1892*f5c631daSSadaf Ebrahimi __ st1(v21.V4H(), v22.V4H(), MemOperand(x1, 16, PostIndex));
1893*f5c631daSSadaf Ebrahimi __ st1(v16.V4H(), MemOperand(x0));
1894*f5c631daSSadaf Ebrahimi __ st1(v8.V4H(), MemOperand(x1, x2, PostIndex));
1895*f5c631daSSadaf Ebrahimi __ st1(v30.V4H(), MemOperand(x1, 8, PostIndex));
1896*f5c631daSSadaf Ebrahimi __ st1(v3.V4S(), v4.V4S(), v5.V4S(), v6.V4S(), MemOperand(x0));
1897*f5c631daSSadaf Ebrahimi __ st1(v25.V4S(),
1898*f5c631daSSadaf Ebrahimi v26.V4S(),
1899*f5c631daSSadaf Ebrahimi v27.V4S(),
1900*f5c631daSSadaf Ebrahimi v28.V4S(),
1901*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1902*f5c631daSSadaf Ebrahimi __ st1(v5.V4S(), v6.V4S(), v7.V4S(), v8.V4S(), MemOperand(x1, 64, PostIndex));
1903*f5c631daSSadaf Ebrahimi __ st1(v31.V4S(), v0.V4S(), v1.V4S(), MemOperand(x0));
1904*f5c631daSSadaf Ebrahimi __ st1(v30.V4S(), v31.V4S(), v0.V4S(), MemOperand(x1, x2, PostIndex));
1905*f5c631daSSadaf Ebrahimi __ st1(v6.V4S(), v7.V4S(), v8.V4S(), MemOperand(x1, 48, PostIndex));
1906*f5c631daSSadaf Ebrahimi __ st1(v17.V4S(), v18.V4S(), MemOperand(x0));
1907*f5c631daSSadaf Ebrahimi __ st1(v31.V4S(), v0.V4S(), MemOperand(x1, x2, PostIndex));
1908*f5c631daSSadaf Ebrahimi __ st1(v1.V4S(), v2.V4S(), MemOperand(x1, 32, PostIndex));
1909*f5c631daSSadaf Ebrahimi __ st1(v26.V4S(), MemOperand(x0));
1910*f5c631daSSadaf Ebrahimi __ st1(v15.V4S(), MemOperand(x1, x2, PostIndex));
1911*f5c631daSSadaf Ebrahimi __ st1(v13.V4S(), MemOperand(x1, 16, PostIndex));
1912*f5c631daSSadaf Ebrahimi __ st1(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0));
1913*f5c631daSSadaf Ebrahimi __ st1(v10.V8B(),
1914*f5c631daSSadaf Ebrahimi v11.V8B(),
1915*f5c631daSSadaf Ebrahimi v12.V8B(),
1916*f5c631daSSadaf Ebrahimi v13.V8B(),
1917*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
1918*f5c631daSSadaf Ebrahimi __ st1(v15.V8B(),
1919*f5c631daSSadaf Ebrahimi v16.V8B(),
1920*f5c631daSSadaf Ebrahimi v17.V8B(),
1921*f5c631daSSadaf Ebrahimi v18.V8B(),
1922*f5c631daSSadaf Ebrahimi MemOperand(x1, 32, PostIndex));
1923*f5c631daSSadaf Ebrahimi __ st1(v19.V8B(), v20.V8B(), v21.V8B(), MemOperand(x0));
1924*f5c631daSSadaf Ebrahimi __ st1(v31.V8B(), v0.V8B(), v1.V8B(), MemOperand(x1, x2, PostIndex));
1925*f5c631daSSadaf Ebrahimi __ st1(v9.V8B(), v10.V8B(), v11.V8B(), MemOperand(x1, 24, PostIndex));
1926*f5c631daSSadaf Ebrahimi __ st1(v12.V8B(), v13.V8B(), MemOperand(x0));
1927*f5c631daSSadaf Ebrahimi __ st1(v2.V8B(), v3.V8B(), MemOperand(x1, x2, PostIndex));
1928*f5c631daSSadaf Ebrahimi __ st1(v0.V8B(), v1.V8B(), MemOperand(x1, 16, PostIndex));
1929*f5c631daSSadaf Ebrahimi __ st1(v16.V8B(), MemOperand(x0));
1930*f5c631daSSadaf Ebrahimi __ st1(v25.V8B(), MemOperand(x1, x2, PostIndex));
1931*f5c631daSSadaf Ebrahimi __ st1(v31.V8B(), MemOperand(x1, 8, PostIndex));
1932*f5c631daSSadaf Ebrahimi __ st1(v4.V8H(), v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x0));
1933*f5c631daSSadaf Ebrahimi __ st1(v3.V8H(), v4.V8H(), v5.V8H(), v6.V8H(), MemOperand(x1, x2, PostIndex));
1934*f5c631daSSadaf Ebrahimi __ st1(v26.V8H(),
1935*f5c631daSSadaf Ebrahimi v27.V8H(),
1936*f5c631daSSadaf Ebrahimi v28.V8H(),
1937*f5c631daSSadaf Ebrahimi v29.V8H(),
1938*f5c631daSSadaf Ebrahimi MemOperand(x1, 64, PostIndex));
1939*f5c631daSSadaf Ebrahimi __ st1(v10.V8H(), v11.V8H(), v12.V8H(), MemOperand(x0));
1940*f5c631daSSadaf Ebrahimi __ st1(v21.V8H(), v22.V8H(), v23.V8H(), MemOperand(x1, x2, PostIndex));
1941*f5c631daSSadaf Ebrahimi __ st1(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, 48, PostIndex));
1942*f5c631daSSadaf Ebrahimi __ st1(v26.V8H(), v27.V8H(), MemOperand(x0));
1943*f5c631daSSadaf Ebrahimi __ st1(v24.V8H(), v25.V8H(), MemOperand(x1, x2, PostIndex));
1944*f5c631daSSadaf Ebrahimi __ st1(v17.V8H(), v18.V8H(), MemOperand(x1, 32, PostIndex));
1945*f5c631daSSadaf Ebrahimi __ st1(v29.V8H(), MemOperand(x0));
1946*f5c631daSSadaf Ebrahimi __ st1(v19.V8H(), MemOperand(x1, x2, PostIndex));
1947*f5c631daSSadaf Ebrahimi __ st1(v23.V8H(), MemOperand(x1, 16, PostIndex));
1948*f5c631daSSadaf Ebrahimi __ st1(v19.B(), 15, MemOperand(x0));
1949*f5c631daSSadaf Ebrahimi __ st1(v25.B(), 9, MemOperand(x1, x2, PostIndex));
1950*f5c631daSSadaf Ebrahimi __ st1(v4.B(), 8, MemOperand(x1, 1, PostIndex));
1951*f5c631daSSadaf Ebrahimi __ st1(v13.D(), 0, MemOperand(x0));
1952*f5c631daSSadaf Ebrahimi __ st1(v30.D(), 0, MemOperand(x1, x2, PostIndex));
1953*f5c631daSSadaf Ebrahimi __ st1(v3.D(), 0, MemOperand(x1, 8, PostIndex));
1954*f5c631daSSadaf Ebrahimi __ st1(v22.H(), 0, MemOperand(x0));
1955*f5c631daSSadaf Ebrahimi __ st1(v31.H(), 7, MemOperand(x1, x2, PostIndex));
1956*f5c631daSSadaf Ebrahimi __ st1(v23.H(), 3, MemOperand(x1, 2, PostIndex));
1957*f5c631daSSadaf Ebrahimi __ st1(v0.S(), 0, MemOperand(x0));
1958*f5c631daSSadaf Ebrahimi __ st1(v11.S(), 3, MemOperand(x1, x2, PostIndex));
1959*f5c631daSSadaf Ebrahimi __ st1(v24.S(), 3, MemOperand(x1, 4, PostIndex));
1960*f5c631daSSadaf Ebrahimi __ st2(v7.V16B(), v8.V16B(), MemOperand(x0));
1961*f5c631daSSadaf Ebrahimi __ st2(v5.V16B(), v6.V16B(), MemOperand(x1, x2, PostIndex));
1962*f5c631daSSadaf Ebrahimi __ st2(v18.V16B(), v19.V16B(), MemOperand(x1, 32, PostIndex));
1963*f5c631daSSadaf Ebrahimi __ st2(v14.V2D(), v15.V2D(), MemOperand(x0));
1964*f5c631daSSadaf Ebrahimi __ st2(v7.V2D(), v8.V2D(), MemOperand(x1, x2, PostIndex));
1965*f5c631daSSadaf Ebrahimi __ st2(v24.V2D(), v25.V2D(), MemOperand(x1, 32, PostIndex));
1966*f5c631daSSadaf Ebrahimi __ st2(v22.V2S(), v23.V2S(), MemOperand(x0));
1967*f5c631daSSadaf Ebrahimi __ st2(v4.V2S(), v5.V2S(), MemOperand(x1, x2, PostIndex));
1968*f5c631daSSadaf Ebrahimi __ st2(v2.V2S(), v3.V2S(), MemOperand(x1, 16, PostIndex));
1969*f5c631daSSadaf Ebrahimi __ st2(v23.V4H(), v24.V4H(), MemOperand(x0));
1970*f5c631daSSadaf Ebrahimi __ st2(v8.V4H(), v9.V4H(), MemOperand(x1, x2, PostIndex));
1971*f5c631daSSadaf Ebrahimi __ st2(v7.V4H(), v8.V4H(), MemOperand(x1, 16, PostIndex));
1972*f5c631daSSadaf Ebrahimi __ st2(v17.V4S(), v18.V4S(), MemOperand(x0));
1973*f5c631daSSadaf Ebrahimi __ st2(v6.V4S(), v7.V4S(), MemOperand(x1, x2, PostIndex));
1974*f5c631daSSadaf Ebrahimi __ st2(v26.V4S(), v27.V4S(), MemOperand(x1, 32, PostIndex));
1975*f5c631daSSadaf Ebrahimi __ st2(v31.V8B(), v0.V8B(), MemOperand(x0));
1976*f5c631daSSadaf Ebrahimi __ st2(v0.V8B(), v1.V8B(), MemOperand(x1, x2, PostIndex));
1977*f5c631daSSadaf Ebrahimi __ st2(v21.V8B(), v22.V8B(), MemOperand(x1, 16, PostIndex));
1978*f5c631daSSadaf Ebrahimi __ st2(v7.V8H(), v8.V8H(), MemOperand(x0));
1979*f5c631daSSadaf Ebrahimi __ st2(v22.V8H(), v23.V8H(), MemOperand(x1, x2, PostIndex));
1980*f5c631daSSadaf Ebrahimi __ st2(v4.V8H(), v5.V8H(), MemOperand(x1, 32, PostIndex));
1981*f5c631daSSadaf Ebrahimi __ st2(v8.B(), v9.B(), 15, MemOperand(x0));
1982*f5c631daSSadaf Ebrahimi __ st2(v8.B(), v9.B(), 15, MemOperand(x1, x2, PostIndex));
1983*f5c631daSSadaf Ebrahimi __ st2(v7.B(), v8.B(), 4, MemOperand(x1, 2, PostIndex));
1984*f5c631daSSadaf Ebrahimi __ st2(v25.D(), v26.D(), 0, MemOperand(x0));
1985*f5c631daSSadaf Ebrahimi __ st2(v17.D(), v18.D(), 1, MemOperand(x1, x2, PostIndex));
1986*f5c631daSSadaf Ebrahimi __ st2(v3.D(), v4.D(), 1, MemOperand(x1, 16, PostIndex));
1987*f5c631daSSadaf Ebrahimi __ st2(v4.H(), v5.H(), 3, MemOperand(x0));
1988*f5c631daSSadaf Ebrahimi __ st2(v0.H(), v1.H(), 5, MemOperand(x1, x2, PostIndex));
1989*f5c631daSSadaf Ebrahimi __ st2(v22.H(), v23.H(), 2, MemOperand(x1, 4, PostIndex));
1990*f5c631daSSadaf Ebrahimi __ st2(v14.S(), v15.S(), 3, MemOperand(x0));
1991*f5c631daSSadaf Ebrahimi __ st2(v23.S(), v24.S(), 3, MemOperand(x1, x2, PostIndex));
1992*f5c631daSSadaf Ebrahimi __ st2(v0.S(), v1.S(), 2, MemOperand(x1, 8, PostIndex));
1993*f5c631daSSadaf Ebrahimi __ st3(v26.V16B(), v27.V16B(), v28.V16B(), MemOperand(x0));
1994*f5c631daSSadaf Ebrahimi __ st3(v21.V16B(), v22.V16B(), v23.V16B(), MemOperand(x1, x2, PostIndex));
1995*f5c631daSSadaf Ebrahimi __ st3(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x1, 48, PostIndex));
1996*f5c631daSSadaf Ebrahimi __ st3(v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0));
1997*f5c631daSSadaf Ebrahimi __ st3(v23.V2D(), v24.V2D(), v25.V2D(), MemOperand(x1, x2, PostIndex));
1998*f5c631daSSadaf Ebrahimi __ st3(v10.V2D(), v11.V2D(), v12.V2D(), MemOperand(x1, 48, PostIndex));
1999*f5c631daSSadaf Ebrahimi __ st3(v9.V2S(), v10.V2S(), v11.V2S(), MemOperand(x0));
2000*f5c631daSSadaf Ebrahimi __ st3(v13.V2S(), v14.V2S(), v15.V2S(), MemOperand(x1, x2, PostIndex));
2001*f5c631daSSadaf Ebrahimi __ st3(v22.V2S(), v23.V2S(), v24.V2S(), MemOperand(x1, 24, PostIndex));
2002*f5c631daSSadaf Ebrahimi __ st3(v31.V4H(), v0.V4H(), v1.V4H(), MemOperand(x0));
2003*f5c631daSSadaf Ebrahimi __ st3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x1, x2, PostIndex));
2004*f5c631daSSadaf Ebrahimi __ st3(v19.V4H(), v20.V4H(), v21.V4H(), MemOperand(x1, 24, PostIndex));
2005*f5c631daSSadaf Ebrahimi __ st3(v18.V4S(), v19.V4S(), v20.V4S(), MemOperand(x0));
2006*f5c631daSSadaf Ebrahimi __ st3(v25.V4S(), v26.V4S(), v27.V4S(), MemOperand(x1, x2, PostIndex));
2007*f5c631daSSadaf Ebrahimi __ st3(v16.V4S(), v17.V4S(), v18.V4S(), MemOperand(x1, 48, PostIndex));
2008*f5c631daSSadaf Ebrahimi __ st3(v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0));
2009*f5c631daSSadaf Ebrahimi __ st3(v29.V8B(), v30.V8B(), v31.V8B(), MemOperand(x1, x2, PostIndex));
2010*f5c631daSSadaf Ebrahimi __ st3(v30.V8B(), v31.V8B(), v0.V8B(), MemOperand(x1, 24, PostIndex));
2011*f5c631daSSadaf Ebrahimi __ st3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x0));
2012*f5c631daSSadaf Ebrahimi __ st3(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, x2, PostIndex));
2013*f5c631daSSadaf Ebrahimi __ st3(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, 48, PostIndex));
2014*f5c631daSSadaf Ebrahimi __ st3(v31.B(), v0.B(), v1.B(), 10, MemOperand(x0));
2015*f5c631daSSadaf Ebrahimi __ st3(v4.B(), v5.B(), v6.B(), 5, MemOperand(x1, x2, PostIndex));
2016*f5c631daSSadaf Ebrahimi __ st3(v5.B(), v6.B(), v7.B(), 1, MemOperand(x1, 3, PostIndex));
2017*f5c631daSSadaf Ebrahimi __ st3(v5.D(), v6.D(), v7.D(), 0, MemOperand(x0));
2018*f5c631daSSadaf Ebrahimi __ st3(v6.D(), v7.D(), v8.D(), 0, MemOperand(x1, x2, PostIndex));
2019*f5c631daSSadaf Ebrahimi __ st3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x1, 24, PostIndex));
2020*f5c631daSSadaf Ebrahimi __ st3(v31.H(), v0.H(), v1.H(), 2, MemOperand(x0));
2021*f5c631daSSadaf Ebrahimi __ st3(v14.H(), v15.H(), v16.H(), 5, MemOperand(x1, x2, PostIndex));
2022*f5c631daSSadaf Ebrahimi __ st3(v21.H(), v22.H(), v23.H(), 6, MemOperand(x1, 6, PostIndex));
2023*f5c631daSSadaf Ebrahimi __ st3(v21.S(), v22.S(), v23.S(), 0, MemOperand(x0));
2024*f5c631daSSadaf Ebrahimi __ st3(v11.S(), v12.S(), v13.S(), 1, MemOperand(x1, x2, PostIndex));
2025*f5c631daSSadaf Ebrahimi __ st3(v15.S(), v16.S(), v17.S(), 0, MemOperand(x1, 12, PostIndex));
2026*f5c631daSSadaf Ebrahimi __ st4(v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), MemOperand(x0));
2027*f5c631daSSadaf Ebrahimi __ st4(v24.V16B(),
2028*f5c631daSSadaf Ebrahimi v25.V16B(),
2029*f5c631daSSadaf Ebrahimi v26.V16B(),
2030*f5c631daSSadaf Ebrahimi v27.V16B(),
2031*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
2032*f5c631daSSadaf Ebrahimi __ st4(v15.V16B(),
2033*f5c631daSSadaf Ebrahimi v16.V16B(),
2034*f5c631daSSadaf Ebrahimi v17.V16B(),
2035*f5c631daSSadaf Ebrahimi v18.V16B(),
2036*f5c631daSSadaf Ebrahimi MemOperand(x1, 64, PostIndex));
2037*f5c631daSSadaf Ebrahimi __ st4(v16.V2D(), v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0));
2038*f5c631daSSadaf Ebrahimi __ st4(v17.V2D(),
2039*f5c631daSSadaf Ebrahimi v18.V2D(),
2040*f5c631daSSadaf Ebrahimi v19.V2D(),
2041*f5c631daSSadaf Ebrahimi v20.V2D(),
2042*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
2043*f5c631daSSadaf Ebrahimi __ st4(v9.V2D(),
2044*f5c631daSSadaf Ebrahimi v10.V2D(),
2045*f5c631daSSadaf Ebrahimi v11.V2D(),
2046*f5c631daSSadaf Ebrahimi v12.V2D(),
2047*f5c631daSSadaf Ebrahimi MemOperand(x1, 64, PostIndex));
2048*f5c631daSSadaf Ebrahimi __ st4(v23.V2S(), v24.V2S(), v25.V2S(), v26.V2S(), MemOperand(x0));
2049*f5c631daSSadaf Ebrahimi __ st4(v15.V2S(),
2050*f5c631daSSadaf Ebrahimi v16.V2S(),
2051*f5c631daSSadaf Ebrahimi v17.V2S(),
2052*f5c631daSSadaf Ebrahimi v18.V2S(),
2053*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
2054*f5c631daSSadaf Ebrahimi __ st4(v24.V2S(),
2055*f5c631daSSadaf Ebrahimi v25.V2S(),
2056*f5c631daSSadaf Ebrahimi v26.V2S(),
2057*f5c631daSSadaf Ebrahimi v27.V2S(),
2058*f5c631daSSadaf Ebrahimi MemOperand(x1, 32, PostIndex));
2059*f5c631daSSadaf Ebrahimi __ st4(v14.V4H(), v15.V4H(), v16.V4H(), v17.V4H(), MemOperand(x0));
2060*f5c631daSSadaf Ebrahimi __ st4(v18.V4H(),
2061*f5c631daSSadaf Ebrahimi v19.V4H(),
2062*f5c631daSSadaf Ebrahimi v20.V4H(),
2063*f5c631daSSadaf Ebrahimi v21.V4H(),
2064*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
2065*f5c631daSSadaf Ebrahimi __ st4(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), MemOperand(x1, 32, PostIndex));
2066*f5c631daSSadaf Ebrahimi __ st4(v13.V4S(), v14.V4S(), v15.V4S(), v16.V4S(), MemOperand(x0));
2067*f5c631daSSadaf Ebrahimi __ st4(v6.V4S(), v7.V4S(), v8.V4S(), v9.V4S(), MemOperand(x1, x2, PostIndex));
2068*f5c631daSSadaf Ebrahimi __ st4(v15.V4S(),
2069*f5c631daSSadaf Ebrahimi v16.V4S(),
2070*f5c631daSSadaf Ebrahimi v17.V4S(),
2071*f5c631daSSadaf Ebrahimi v18.V4S(),
2072*f5c631daSSadaf Ebrahimi MemOperand(x1, 64, PostIndex));
2073*f5c631daSSadaf Ebrahimi __ st4(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0));
2074*f5c631daSSadaf Ebrahimi __ st4(v25.V8B(),
2075*f5c631daSSadaf Ebrahimi v26.V8B(),
2076*f5c631daSSadaf Ebrahimi v27.V8B(),
2077*f5c631daSSadaf Ebrahimi v28.V8B(),
2078*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
2079*f5c631daSSadaf Ebrahimi __ st4(v19.V8B(),
2080*f5c631daSSadaf Ebrahimi v20.V8B(),
2081*f5c631daSSadaf Ebrahimi v21.V8B(),
2082*f5c631daSSadaf Ebrahimi v22.V8B(),
2083*f5c631daSSadaf Ebrahimi MemOperand(x1, 32, PostIndex));
2084*f5c631daSSadaf Ebrahimi __ st4(v19.V8H(), v20.V8H(), v21.V8H(), v22.V8H(), MemOperand(x0));
2085*f5c631daSSadaf Ebrahimi __ st4(v15.V8H(),
2086*f5c631daSSadaf Ebrahimi v16.V8H(),
2087*f5c631daSSadaf Ebrahimi v17.V8H(),
2088*f5c631daSSadaf Ebrahimi v18.V8H(),
2089*f5c631daSSadaf Ebrahimi MemOperand(x1, x2, PostIndex));
2090*f5c631daSSadaf Ebrahimi __ st4(v31.V8H(),
2091*f5c631daSSadaf Ebrahimi v0.V8H(),
2092*f5c631daSSadaf Ebrahimi v1.V8H(),
2093*f5c631daSSadaf Ebrahimi v2.V8H(),
2094*f5c631daSSadaf Ebrahimi MemOperand(x1, 64, PostIndex));
2095*f5c631daSSadaf Ebrahimi __ st4(v0.B(), v1.B(), v2.B(), v3.B(), 13, MemOperand(x0));
2096*f5c631daSSadaf Ebrahimi __ st4(v4.B(), v5.B(), v6.B(), v7.B(), 10, MemOperand(x1, x2, PostIndex));
2097*f5c631daSSadaf Ebrahimi __ st4(v9.B(), v10.B(), v11.B(), v12.B(), 9, MemOperand(x1, 4, PostIndex));
2098*f5c631daSSadaf Ebrahimi __ st4(v2.D(), v3.D(), v4.D(), v5.D(), 1, MemOperand(x0));
2099*f5c631daSSadaf Ebrahimi __ st4(v7.D(), v8.D(), v9.D(), v10.D(), 0, MemOperand(x1, x2, PostIndex));
2100*f5c631daSSadaf Ebrahimi __ st4(v31.D(), v0.D(), v1.D(), v2.D(), 1, MemOperand(x1, 32, PostIndex));
2101*f5c631daSSadaf Ebrahimi __ st4(v2.H(), v3.H(), v4.H(), v5.H(), 1, MemOperand(x0));
2102*f5c631daSSadaf Ebrahimi __ st4(v27.H(), v28.H(), v29.H(), v30.H(), 3, MemOperand(x1, x2, PostIndex));
2103*f5c631daSSadaf Ebrahimi __ st4(v24.H(), v25.H(), v26.H(), v27.H(), 4, MemOperand(x1, 8, PostIndex));
2104*f5c631daSSadaf Ebrahimi __ st4(v18.S(), v19.S(), v20.S(), v21.S(), 2, MemOperand(x0));
2105*f5c631daSSadaf Ebrahimi __ st4(v6.S(), v7.S(), v8.S(), v9.S(), 2, MemOperand(x1, x2, PostIndex));
2106*f5c631daSSadaf Ebrahimi __ st4(v25.S(), v26.S(), v27.S(), v28.S(), 1, MemOperand(x1, 16, PostIndex));
2107*f5c631daSSadaf Ebrahimi __ sub(d12, d17, d2);
2108*f5c631daSSadaf Ebrahimi __ sub(v20.V16B(), v24.V16B(), v8.V16B());
2109*f5c631daSSadaf Ebrahimi __ sub(v8.V2D(), v29.V2D(), v5.V2D());
2110*f5c631daSSadaf Ebrahimi __ sub(v2.V2S(), v28.V2S(), v24.V2S());
2111*f5c631daSSadaf Ebrahimi __ sub(v24.V4H(), v10.V4H(), v4.V4H());
2112*f5c631daSSadaf Ebrahimi __ sub(v28.V4S(), v4.V4S(), v17.V4S());
2113*f5c631daSSadaf Ebrahimi __ sub(v16.V8B(), v27.V8B(), v2.V8B());
2114*f5c631daSSadaf Ebrahimi __ sub(v20.V8H(), v10.V8H(), v13.V8H());
2115*f5c631daSSadaf Ebrahimi __ subhn(v5.V2S(), v14.V2D(), v13.V2D());
2116*f5c631daSSadaf Ebrahimi __ subhn(v10.V4H(), v5.V4S(), v8.V4S());
2117*f5c631daSSadaf Ebrahimi __ subhn(v6.V8B(), v10.V8H(), v22.V8H());
2118*f5c631daSSadaf Ebrahimi __ subhn2(v11.V16B(), v6.V8H(), v9.V8H());
2119*f5c631daSSadaf Ebrahimi __ subhn2(v25.V4S(), v18.V2D(), v24.V2D());
2120*f5c631daSSadaf Ebrahimi __ subhn2(v20.V8H(), v21.V4S(), v1.V4S());
2121*f5c631daSSadaf Ebrahimi __ suqadd(b25, b11);
2122*f5c631daSSadaf Ebrahimi __ suqadd(d13, d1);
2123*f5c631daSSadaf Ebrahimi __ suqadd(h0, h9);
2124*f5c631daSSadaf Ebrahimi __ suqadd(s22, s8);
2125*f5c631daSSadaf Ebrahimi __ suqadd(v24.V16B(), v27.V16B());
2126*f5c631daSSadaf Ebrahimi __ suqadd(v26.V2D(), v14.V2D());
2127*f5c631daSSadaf Ebrahimi __ suqadd(v7.V2S(), v10.V2S());
2128*f5c631daSSadaf Ebrahimi __ suqadd(v25.V4H(), v12.V4H());
2129*f5c631daSSadaf Ebrahimi __ suqadd(v4.V4S(), v3.V4S());
2130*f5c631daSSadaf Ebrahimi __ suqadd(v14.V8B(), v18.V8B());
2131*f5c631daSSadaf Ebrahimi __ suqadd(v31.V8H(), v8.V8H());
2132*f5c631daSSadaf Ebrahimi __ sxtl(v16.V2D(), v20.V2S());
2133*f5c631daSSadaf Ebrahimi __ sxtl(v27.V4S(), v28.V4H());
2134*f5c631daSSadaf Ebrahimi __ sxtl(v0.V8H(), v22.V8B());
2135*f5c631daSSadaf Ebrahimi __ sxtl2(v6.V2D(), v7.V4S());
2136*f5c631daSSadaf Ebrahimi __ sxtl2(v9.V4S(), v27.V8H());
2137*f5c631daSSadaf Ebrahimi __ sxtl2(v16.V8H(), v16.V16B());
2138*f5c631daSSadaf Ebrahimi __ tbl(v25.V16B(),
2139*f5c631daSSadaf Ebrahimi v17.V16B(),
2140*f5c631daSSadaf Ebrahimi v18.V16B(),
2141*f5c631daSSadaf Ebrahimi v19.V16B(),
2142*f5c631daSSadaf Ebrahimi v20.V16B(),
2143*f5c631daSSadaf Ebrahimi v22.V16B());
2144*f5c631daSSadaf Ebrahimi __ tbl(v28.V16B(), v13.V16B(), v14.V16B(), v15.V16B(), v4.V16B());
2145*f5c631daSSadaf Ebrahimi __ tbl(v3.V16B(), v0.V16B(), v1.V16B(), v2.V16B());
2146*f5c631daSSadaf Ebrahimi __ tbl(v20.V16B(), v15.V16B(), v4.V16B());
2147*f5c631daSSadaf Ebrahimi __ tbl(v7.V8B(), v23.V16B(), v24.V16B(), v25.V16B(), v26.V16B(), v20.V8B());
2148*f5c631daSSadaf Ebrahimi __ tbl(v8.V8B(), v1.V16B(), v2.V16B(), v3.V16B(), v31.V8B());
2149*f5c631daSSadaf Ebrahimi __ tbl(v8.V8B(), v25.V16B(), v26.V16B(), v16.V8B());
2150*f5c631daSSadaf Ebrahimi __ tbl(v11.V8B(), v19.V16B(), v30.V8B());
2151*f5c631daSSadaf Ebrahimi __ tbx(v25.V16B(), v25.V16B(), v26.V16B(), v27.V16B(), v28.V16B(), v5.V16B());
2152*f5c631daSSadaf Ebrahimi __ tbx(v21.V16B(), v29.V16B(), v30.V16B(), v31.V16B(), v24.V16B());
2153*f5c631daSSadaf Ebrahimi __ tbx(v6.V16B(), v16.V16B(), v17.V16B(), v1.V16B());
2154*f5c631daSSadaf Ebrahimi __ tbx(v13.V16B(), v3.V16B(), v20.V16B());
2155*f5c631daSSadaf Ebrahimi __ tbx(v24.V8B(), v29.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v9.V8B());
2156*f5c631daSSadaf Ebrahimi __ tbx(v17.V8B(), v9.V16B(), v10.V16B(), v11.V16B(), v26.V8B());
2157*f5c631daSSadaf Ebrahimi __ tbx(v5.V8B(), v3.V16B(), v4.V16B(), v21.V8B());
2158*f5c631daSSadaf Ebrahimi __ tbx(v16.V8B(), v11.V16B(), v29.V8B());
2159*f5c631daSSadaf Ebrahimi __ trn1(v19.V16B(), v24.V16B(), v12.V16B());
2160*f5c631daSSadaf Ebrahimi __ trn1(v2.V2D(), v7.V2D(), v10.V2D());
2161*f5c631daSSadaf Ebrahimi __ trn1(v22.V2S(), v0.V2S(), v21.V2S());
2162*f5c631daSSadaf Ebrahimi __ trn1(v12.V4H(), v15.V4H(), v20.V4H());
2163*f5c631daSSadaf Ebrahimi __ trn1(v30.V4S(), v17.V4S(), v9.V4S());
2164*f5c631daSSadaf Ebrahimi __ trn1(v12.V8B(), v19.V8B(), v29.V8B());
2165*f5c631daSSadaf Ebrahimi __ trn1(v23.V8H(), v8.V8H(), v9.V8H());
2166*f5c631daSSadaf Ebrahimi __ trn2(v28.V16B(), v30.V16B(), v25.V16B());
2167*f5c631daSSadaf Ebrahimi __ trn2(v7.V2D(), v27.V2D(), v7.V2D());
2168*f5c631daSSadaf Ebrahimi __ trn2(v30.V2S(), v16.V2S(), v19.V2S());
2169*f5c631daSSadaf Ebrahimi __ trn2(v24.V4H(), v6.V4H(), v25.V4H());
2170*f5c631daSSadaf Ebrahimi __ trn2(v2.V4S(), v19.V4S(), v11.V4S());
2171*f5c631daSSadaf Ebrahimi __ trn2(v25.V8B(), v27.V8B(), v18.V8B());
2172*f5c631daSSadaf Ebrahimi __ trn2(v12.V8H(), v4.V8H(), v15.V8H());
2173*f5c631daSSadaf Ebrahimi __ uaba(v31.V16B(), v12.V16B(), v28.V16B());
2174*f5c631daSSadaf Ebrahimi __ uaba(v18.V2S(), v5.V2S(), v14.V2S());
2175*f5c631daSSadaf Ebrahimi __ uaba(v9.V4H(), v20.V4H(), v21.V4H());
2176*f5c631daSSadaf Ebrahimi __ uaba(v6.V4S(), v20.V4S(), v2.V4S());
2177*f5c631daSSadaf Ebrahimi __ uaba(v16.V8B(), v12.V8B(), v5.V8B());
2178*f5c631daSSadaf Ebrahimi __ uaba(v15.V8H(), v26.V8H(), v30.V8H());
2179*f5c631daSSadaf Ebrahimi __ uabal(v10.V2D(), v18.V2S(), v15.V2S());
2180*f5c631daSSadaf Ebrahimi __ uabal(v30.V4S(), v19.V4H(), v7.V4H());
2181*f5c631daSSadaf Ebrahimi __ uabal(v4.V8H(), v27.V8B(), v0.V8B());
2182*f5c631daSSadaf Ebrahimi __ uabal2(v19.V2D(), v12.V4S(), v2.V4S());
2183*f5c631daSSadaf Ebrahimi __ uabal2(v26.V4S(), v5.V8H(), v12.V8H());
2184*f5c631daSSadaf Ebrahimi __ uabal2(v19.V8H(), v20.V16B(), v28.V16B());
2185*f5c631daSSadaf Ebrahimi __ uabd(v18.V16B(), v4.V16B(), v21.V16B());
2186*f5c631daSSadaf Ebrahimi __ uabd(v30.V2S(), v21.V2S(), v16.V2S());
2187*f5c631daSSadaf Ebrahimi __ uabd(v8.V4H(), v28.V4H(), v25.V4H());
2188*f5c631daSSadaf Ebrahimi __ uabd(v28.V4S(), v12.V4S(), v21.V4S());
2189*f5c631daSSadaf Ebrahimi __ uabd(v19.V8B(), v16.V8B(), v28.V8B());
2190*f5c631daSSadaf Ebrahimi __ uabd(v9.V8H(), v12.V8H(), v29.V8H());
2191*f5c631daSSadaf Ebrahimi __ uabdl(v26.V2D(), v0.V2S(), v8.V2S());
2192*f5c631daSSadaf Ebrahimi __ uabdl(v29.V4S(), v31.V4H(), v25.V4H());
2193*f5c631daSSadaf Ebrahimi __ uabdl(v27.V8H(), v29.V8B(), v14.V8B());
2194*f5c631daSSadaf Ebrahimi __ uabdl2(v20.V2D(), v20.V4S(), v8.V4S());
2195*f5c631daSSadaf Ebrahimi __ uabdl2(v22.V4S(), v15.V8H(), v18.V8H());
2196*f5c631daSSadaf Ebrahimi __ uabdl2(v9.V8H(), v18.V16B(), v23.V16B());
2197*f5c631daSSadaf Ebrahimi __ uadalp(v9.V1D(), v15.V2S());
2198*f5c631daSSadaf Ebrahimi __ uadalp(v14.V2D(), v12.V4S());
2199*f5c631daSSadaf Ebrahimi __ uadalp(v28.V2S(), v12.V4H());
2200*f5c631daSSadaf Ebrahimi __ uadalp(v0.V4H(), v17.V8B());
2201*f5c631daSSadaf Ebrahimi __ uadalp(v1.V4S(), v29.V8H());
2202*f5c631daSSadaf Ebrahimi __ uadalp(v15.V8H(), v22.V16B());
2203*f5c631daSSadaf Ebrahimi __ uaddl(v1.V2D(), v20.V2S(), v27.V2S());
2204*f5c631daSSadaf Ebrahimi __ uaddl(v31.V4S(), v25.V4H(), v5.V4H());
2205*f5c631daSSadaf Ebrahimi __ uaddl(v12.V8H(), v3.V8B(), v3.V8B());
2206*f5c631daSSadaf Ebrahimi __ uaddl2(v5.V2D(), v23.V4S(), v6.V4S());
2207*f5c631daSSadaf Ebrahimi __ uaddl2(v1.V4S(), v5.V8H(), v25.V8H());
2208*f5c631daSSadaf Ebrahimi __ uaddl2(v22.V8H(), v30.V16B(), v28.V16B());
2209*f5c631daSSadaf Ebrahimi __ uaddlp(v7.V1D(), v9.V2S());
2210*f5c631daSSadaf Ebrahimi __ uaddlp(v26.V2D(), v4.V4S());
2211*f5c631daSSadaf Ebrahimi __ uaddlp(v28.V2S(), v1.V4H());
2212*f5c631daSSadaf Ebrahimi __ uaddlp(v20.V4H(), v31.V8B());
2213*f5c631daSSadaf Ebrahimi __ uaddlp(v16.V4S(), v17.V8H());
2214*f5c631daSSadaf Ebrahimi __ uaddlp(v6.V8H(), v2.V16B());
2215*f5c631daSSadaf Ebrahimi __ uaddlv(d28, v22.V4S());
2216*f5c631daSSadaf Ebrahimi __ uaddlv(h0, v19.V16B());
2217*f5c631daSSadaf Ebrahimi __ uaddlv(h30, v30.V8B());
2218*f5c631daSSadaf Ebrahimi __ uaddlv(s24, v18.V4H());
2219*f5c631daSSadaf Ebrahimi __ uaddlv(s10, v0.V8H());
2220*f5c631daSSadaf Ebrahimi __ uaddw(v9.V2D(), v17.V2D(), v14.V2S());
2221*f5c631daSSadaf Ebrahimi __ uaddw(v9.V4S(), v25.V4S(), v3.V4H());
2222*f5c631daSSadaf Ebrahimi __ uaddw(v18.V8H(), v1.V8H(), v0.V8B());
2223*f5c631daSSadaf Ebrahimi __ uaddw2(v18.V2D(), v5.V2D(), v6.V4S());
2224*f5c631daSSadaf Ebrahimi __ uaddw2(v17.V4S(), v15.V4S(), v11.V8H());
2225*f5c631daSSadaf Ebrahimi __ uaddw2(v29.V8H(), v11.V8H(), v7.V16B());
2226*f5c631daSSadaf Ebrahimi __ uhadd(v13.V16B(), v9.V16B(), v3.V16B());
2227*f5c631daSSadaf Ebrahimi __ uhadd(v17.V2S(), v25.V2S(), v24.V2S());
2228*f5c631daSSadaf Ebrahimi __ uhadd(v25.V4H(), v23.V4H(), v13.V4H());
2229*f5c631daSSadaf Ebrahimi __ uhadd(v0.V4S(), v20.V4S(), v16.V4S());
2230*f5c631daSSadaf Ebrahimi __ uhadd(v5.V8B(), v5.V8B(), v25.V8B());
2231*f5c631daSSadaf Ebrahimi __ uhadd(v3.V8H(), v29.V8H(), v18.V8H());
2232*f5c631daSSadaf Ebrahimi __ uhsub(v1.V16B(), v22.V16B(), v13.V16B());
2233*f5c631daSSadaf Ebrahimi __ uhsub(v14.V2S(), v30.V2S(), v30.V2S());
2234*f5c631daSSadaf Ebrahimi __ uhsub(v29.V4H(), v14.V4H(), v17.V4H());
2235*f5c631daSSadaf Ebrahimi __ uhsub(v26.V4S(), v5.V4S(), v18.V4S());
2236*f5c631daSSadaf Ebrahimi __ uhsub(v3.V8B(), v7.V8B(), v12.V8B());
2237*f5c631daSSadaf Ebrahimi __ uhsub(v25.V8H(), v21.V8H(), v5.V8H());
2238*f5c631daSSadaf Ebrahimi __ umax(v28.V16B(), v12.V16B(), v6.V16B());
2239*f5c631daSSadaf Ebrahimi __ umax(v20.V2S(), v19.V2S(), v26.V2S());
2240*f5c631daSSadaf Ebrahimi __ umax(v0.V4H(), v31.V4H(), v18.V4H());
2241*f5c631daSSadaf Ebrahimi __ umax(v6.V4S(), v21.V4S(), v28.V4S());
2242*f5c631daSSadaf Ebrahimi __ umax(v0.V8B(), v2.V8B(), v20.V8B());
2243*f5c631daSSadaf Ebrahimi __ umax(v4.V8H(), v11.V8H(), v22.V8H());
2244*f5c631daSSadaf Ebrahimi __ umaxp(v1.V16B(), v6.V16B(), v29.V16B());
2245*f5c631daSSadaf Ebrahimi __ umaxp(v19.V2S(), v17.V2S(), v27.V2S());
2246*f5c631daSSadaf Ebrahimi __ umaxp(v21.V4H(), v16.V4H(), v7.V4H());
2247*f5c631daSSadaf Ebrahimi __ umaxp(v9.V4S(), v20.V4S(), v29.V4S());
2248*f5c631daSSadaf Ebrahimi __ umaxp(v13.V8B(), v1.V8B(), v16.V8B());
2249*f5c631daSSadaf Ebrahimi __ umaxp(v19.V8H(), v23.V8H(), v26.V8H());
2250*f5c631daSSadaf Ebrahimi __ umaxv(b17, v30.V16B());
2251*f5c631daSSadaf Ebrahimi __ umaxv(b23, v12.V8B());
2252*f5c631daSSadaf Ebrahimi __ umaxv(h31, v15.V4H());
2253*f5c631daSSadaf Ebrahimi __ umaxv(h15, v25.V8H());
2254*f5c631daSSadaf Ebrahimi __ umaxv(s18, v21.V4S());
2255*f5c631daSSadaf Ebrahimi __ umin(v22.V16B(), v0.V16B(), v18.V16B());
2256*f5c631daSSadaf Ebrahimi __ umin(v1.V2S(), v21.V2S(), v16.V2S());
2257*f5c631daSSadaf Ebrahimi __ umin(v17.V4H(), v4.V4H(), v25.V4H());
2258*f5c631daSSadaf Ebrahimi __ umin(v24.V4S(), v26.V4S(), v13.V4S());
2259*f5c631daSSadaf Ebrahimi __ umin(v20.V8B(), v1.V8B(), v5.V8B());
2260*f5c631daSSadaf Ebrahimi __ umin(v26.V8H(), v25.V8H(), v23.V8H());
2261*f5c631daSSadaf Ebrahimi __ uminp(v5.V16B(), v1.V16B(), v23.V16B());
2262*f5c631daSSadaf Ebrahimi __ uminp(v7.V2S(), v26.V2S(), v30.V2S());
2263*f5c631daSSadaf Ebrahimi __ uminp(v9.V4H(), v5.V4H(), v25.V4H());
2264*f5c631daSSadaf Ebrahimi __ uminp(v23.V4S(), v10.V4S(), v1.V4S());
2265*f5c631daSSadaf Ebrahimi __ uminp(v4.V8B(), v29.V8B(), v14.V8B());
2266*f5c631daSSadaf Ebrahimi __ uminp(v21.V8H(), v0.V8H(), v14.V8H());
2267*f5c631daSSadaf Ebrahimi __ uminv(b0, v17.V16B());
2268*f5c631daSSadaf Ebrahimi __ uminv(b0, v31.V8B());
2269*f5c631daSSadaf Ebrahimi __ uminv(h24, v0.V4H());
2270*f5c631daSSadaf Ebrahimi __ uminv(h29, v14.V8H());
2271*f5c631daSSadaf Ebrahimi __ uminv(s30, v3.V4S());
2272*f5c631daSSadaf Ebrahimi __ umlal(v11.V2D(), v11.V2S(), v24.V2S());
2273*f5c631daSSadaf Ebrahimi __ umlal(v30.V2D(), v16.V2S(), v11.S(), 3);
2274*f5c631daSSadaf Ebrahimi __ umlal(v0.V4S(), v9.V4H(), v26.V4H());
2275*f5c631daSSadaf Ebrahimi __ umlal(v20.V4S(), v24.V4H(), v12.H(), 4);
2276*f5c631daSSadaf Ebrahimi __ umlal(v16.V8H(), v21.V8B(), v6.V8B());
2277*f5c631daSSadaf Ebrahimi __ umlal2(v17.V2D(), v19.V4S(), v23.V4S());
2278*f5c631daSSadaf Ebrahimi __ umlal2(v5.V2D(), v30.V4S(), v8.S(), 0);
2279*f5c631daSSadaf Ebrahimi __ umlal2(v16.V4S(), v8.V8H(), v15.V8H());
2280*f5c631daSSadaf Ebrahimi __ umlal2(v15.V4S(), v26.V8H(), v1.H(), 5);
2281*f5c631daSSadaf Ebrahimi __ umlal2(v30.V8H(), v1.V16B(), v17.V16B());
2282*f5c631daSSadaf Ebrahimi __ umlsl(v18.V2D(), v19.V2S(), v28.V2S());
2283*f5c631daSSadaf Ebrahimi __ umlsl(v7.V2D(), v7.V2S(), v8.S(), 0);
2284*f5c631daSSadaf Ebrahimi __ umlsl(v24.V4S(), v8.V4H(), v4.V4H());
2285*f5c631daSSadaf Ebrahimi __ umlsl(v18.V4S(), v22.V4H(), v12.H(), 4);
2286*f5c631daSSadaf Ebrahimi __ umlsl(v28.V8H(), v14.V8B(), v20.V8B());
2287*f5c631daSSadaf Ebrahimi __ umlsl2(v11.V2D(), v0.V4S(), v9.V4S());
2288*f5c631daSSadaf Ebrahimi __ umlsl2(v26.V2D(), v16.V4S(), v9.S(), 2);
2289*f5c631daSSadaf Ebrahimi __ umlsl2(v3.V4S(), v11.V8H(), v9.V8H());
2290*f5c631daSSadaf Ebrahimi __ umlsl2(v10.V4S(), v25.V8H(), v9.H(), 4);
2291*f5c631daSSadaf Ebrahimi __ umlsl2(v24.V8H(), v16.V16B(), v28.V16B());
2292*f5c631daSSadaf Ebrahimi __ umov(x30, v25.D(), 1);
2293*f5c631daSSadaf Ebrahimi __ umull(v12.V2D(), v10.V2S(), v29.V2S());
2294*f5c631daSSadaf Ebrahimi __ umull(v22.V2D(), v30.V2S(), v5.S(), 3);
2295*f5c631daSSadaf Ebrahimi __ umull(v7.V4S(), v0.V4H(), v25.V4H());
2296*f5c631daSSadaf Ebrahimi __ umull(v11.V4S(), v13.V4H(), v3.H(), 2);
2297*f5c631daSSadaf Ebrahimi __ umull(v25.V8H(), v16.V8B(), v10.V8B());
2298*f5c631daSSadaf Ebrahimi __ umull2(v17.V2D(), v3.V4S(), v26.V4S());
2299*f5c631daSSadaf Ebrahimi __ umull2(v26.V2D(), v11.V4S(), v2.S(), 3);
2300*f5c631daSSadaf Ebrahimi __ umull2(v12.V4S(), v17.V8H(), v23.V8H());
2301*f5c631daSSadaf Ebrahimi __ umull2(v4.V4S(), v31.V8H(), v1.H(), 2);
2302*f5c631daSSadaf Ebrahimi __ umull2(v5.V8H(), v12.V16B(), v17.V16B());
2303*f5c631daSSadaf Ebrahimi __ uqadd(b30, b4, b28);
2304*f5c631daSSadaf Ebrahimi __ uqadd(d27, d20, d16);
2305*f5c631daSSadaf Ebrahimi __ uqadd(h7, h14, h28);
2306*f5c631daSSadaf Ebrahimi __ uqadd(s28, s17, s4);
2307*f5c631daSSadaf Ebrahimi __ uqadd(v19.V16B(), v22.V16B(), v21.V16B());
2308*f5c631daSSadaf Ebrahimi __ uqadd(v16.V2D(), v4.V2D(), v11.V2D());
2309*f5c631daSSadaf Ebrahimi __ uqadd(v20.V2S(), v14.V2S(), v4.V2S());
2310*f5c631daSSadaf Ebrahimi __ uqadd(v5.V4H(), v0.V4H(), v16.V4H());
2311*f5c631daSSadaf Ebrahimi __ uqadd(v21.V4S(), v31.V4S(), v9.V4S());
2312*f5c631daSSadaf Ebrahimi __ uqadd(v23.V8B(), v24.V8B(), v3.V8B());
2313*f5c631daSSadaf Ebrahimi __ uqadd(v17.V8H(), v27.V8H(), v11.V8H());
2314*f5c631daSSadaf Ebrahimi __ uqrshl(b10, b22, b10);
2315*f5c631daSSadaf Ebrahimi __ uqrshl(d29, d5, d11);
2316*f5c631daSSadaf Ebrahimi __ uqrshl(h27, h24, h30);
2317*f5c631daSSadaf Ebrahimi __ uqrshl(s10, s13, s8);
2318*f5c631daSSadaf Ebrahimi __ uqrshl(v9.V16B(), v18.V16B(), v14.V16B());
2319*f5c631daSSadaf Ebrahimi __ uqrshl(v24.V2D(), v15.V2D(), v17.V2D());
2320*f5c631daSSadaf Ebrahimi __ uqrshl(v4.V2S(), v14.V2S(), v27.V2S());
2321*f5c631daSSadaf Ebrahimi __ uqrshl(v15.V4H(), v5.V4H(), v8.V4H());
2322*f5c631daSSadaf Ebrahimi __ uqrshl(v21.V4S(), v29.V4S(), v0.V4S());
2323*f5c631daSSadaf Ebrahimi __ uqrshl(v16.V8B(), v24.V8B(), v9.V8B());
2324*f5c631daSSadaf Ebrahimi __ uqrshl(v2.V8H(), v0.V8H(), v15.V8H());
2325*f5c631daSSadaf Ebrahimi __ uqrshrn(b11, h26, 4);
2326*f5c631daSSadaf Ebrahimi __ uqrshrn(h7, s30, 5);
2327*f5c631daSSadaf Ebrahimi __ uqrshrn(s10, d8, 21);
2328*f5c631daSSadaf Ebrahimi __ uqrshrn(v15.V2S(), v6.V2D(), 11);
2329*f5c631daSSadaf Ebrahimi __ uqrshrn(v5.V4H(), v26.V4S(), 12);
2330*f5c631daSSadaf Ebrahimi __ uqrshrn(v28.V8B(), v25.V8H(), 5);
2331*f5c631daSSadaf Ebrahimi __ uqrshrn2(v25.V16B(), v30.V8H(), 2);
2332*f5c631daSSadaf Ebrahimi __ uqrshrn2(v21.V4S(), v14.V2D(), 32);
2333*f5c631daSSadaf Ebrahimi __ uqrshrn2(v13.V8H(), v7.V4S(), 2);
2334*f5c631daSSadaf Ebrahimi __ uqshl(b13, b0, b23);
2335*f5c631daSSadaf Ebrahimi __ uqshl(b9, b17, 4);
2336*f5c631daSSadaf Ebrahimi __ uqshl(d23, d6, d4);
2337*f5c631daSSadaf Ebrahimi __ uqshl(d8, d11, 44);
2338*f5c631daSSadaf Ebrahimi __ uqshl(h19, h13, h15);
2339*f5c631daSSadaf Ebrahimi __ uqshl(h25, h26, 6);
2340*f5c631daSSadaf Ebrahimi __ uqshl(s4, s24, s10);
2341*f5c631daSSadaf Ebrahimi __ uqshl(s19, s14, 1);
2342*f5c631daSSadaf Ebrahimi __ uqshl(v14.V16B(), v30.V16B(), v25.V16B());
2343*f5c631daSSadaf Ebrahimi __ uqshl(v6.V16B(), v10.V16B(), 5);
2344*f5c631daSSadaf Ebrahimi __ uqshl(v18.V2D(), v8.V2D(), v7.V2D());
2345*f5c631daSSadaf Ebrahimi __ uqshl(v25.V2D(), v14.V2D(), 18);
2346*f5c631daSSadaf Ebrahimi __ uqshl(v25.V2S(), v16.V2S(), v23.V2S());
2347*f5c631daSSadaf Ebrahimi __ uqshl(v13.V2S(), v15.V2S(), 31);
2348*f5c631daSSadaf Ebrahimi __ uqshl(v28.V4H(), v24.V4H(), v15.V4H());
2349*f5c631daSSadaf Ebrahimi __ uqshl(v4.V4H(), v17.V4H(), 1);
2350*f5c631daSSadaf Ebrahimi __ uqshl(v9.V4S(), v31.V4S(), v23.V4S());
2351*f5c631daSSadaf Ebrahimi __ uqshl(v18.V4S(), v28.V4S(), 31);
2352*f5c631daSSadaf Ebrahimi __ uqshl(v31.V8B(), v21.V8B(), v15.V8B());
2353*f5c631daSSadaf Ebrahimi __ uqshl(v6.V8B(), v21.V8B(), 1);
2354*f5c631daSSadaf Ebrahimi __ uqshl(v28.V8H(), v2.V8H(), v17.V8H());
2355*f5c631daSSadaf Ebrahimi __ uqshl(v24.V8H(), v8.V8H(), 14);
2356*f5c631daSSadaf Ebrahimi __ uqshrn(b21, h27, 7);
2357*f5c631daSSadaf Ebrahimi __ uqshrn(h28, s26, 11);
2358*f5c631daSSadaf Ebrahimi __ uqshrn(s13, d31, 17);
2359*f5c631daSSadaf Ebrahimi __ uqshrn(v21.V2S(), v16.V2D(), 8);
2360*f5c631daSSadaf Ebrahimi __ uqshrn(v24.V4H(), v24.V4S(), 2);
2361*f5c631daSSadaf Ebrahimi __ uqshrn(v5.V8B(), v1.V8H(), 8);
2362*f5c631daSSadaf Ebrahimi __ uqshrn2(v16.V16B(), v29.V8H(), 6);
2363*f5c631daSSadaf Ebrahimi __ uqshrn2(v2.V4S(), v6.V2D(), 1);
2364*f5c631daSSadaf Ebrahimi __ uqshrn2(v16.V8H(), v10.V4S(), 14);
2365*f5c631daSSadaf Ebrahimi __ uqsub(b28, b20, b26);
2366*f5c631daSSadaf Ebrahimi __ uqsub(d0, d7, d10);
2367*f5c631daSSadaf Ebrahimi __ uqsub(h26, h24, h7);
2368*f5c631daSSadaf Ebrahimi __ uqsub(s23, s23, s16);
2369*f5c631daSSadaf Ebrahimi __ uqsub(v14.V16B(), v16.V16B(), v24.V16B());
2370*f5c631daSSadaf Ebrahimi __ uqsub(v11.V2D(), v17.V2D(), v6.V2D());
2371*f5c631daSSadaf Ebrahimi __ uqsub(v10.V2S(), v10.V2S(), v8.V2S());
2372*f5c631daSSadaf Ebrahimi __ uqsub(v9.V4H(), v15.V4H(), v12.V4H());
2373*f5c631daSSadaf Ebrahimi __ uqsub(v23.V4S(), v18.V4S(), v7.V4S());
2374*f5c631daSSadaf Ebrahimi __ uqsub(v9.V8B(), v19.V8B(), v17.V8B());
2375*f5c631daSSadaf Ebrahimi __ uqsub(v20.V8H(), v2.V8H(), v6.V8H());
2376*f5c631daSSadaf Ebrahimi __ uqxtn(b29, h19);
2377*f5c631daSSadaf Ebrahimi __ uqxtn(h0, s13);
2378*f5c631daSSadaf Ebrahimi __ uqxtn(s26, d22);
2379*f5c631daSSadaf Ebrahimi __ uqxtn(v5.V2S(), v31.V2D());
2380*f5c631daSSadaf Ebrahimi __ uqxtn(v30.V4H(), v19.V4S());
2381*f5c631daSSadaf Ebrahimi __ uqxtn(v15.V8B(), v2.V8H());
2382*f5c631daSSadaf Ebrahimi __ uqxtn2(v29.V16B(), v3.V8H());
2383*f5c631daSSadaf Ebrahimi __ uqxtn2(v13.V4S(), v17.V2D());
2384*f5c631daSSadaf Ebrahimi __ uqxtn2(v28.V8H(), v11.V4S());
2385*f5c631daSSadaf Ebrahimi __ urecpe(v23.V2S(), v15.V2S());
2386*f5c631daSSadaf Ebrahimi __ urecpe(v27.V4S(), v7.V4S());
2387*f5c631daSSadaf Ebrahimi __ urhadd(v2.V16B(), v15.V16B(), v27.V16B());
2388*f5c631daSSadaf Ebrahimi __ urhadd(v15.V2S(), v1.V2S(), v18.V2S());
2389*f5c631daSSadaf Ebrahimi __ urhadd(v17.V4H(), v4.V4H(), v26.V4H());
2390*f5c631daSSadaf Ebrahimi __ urhadd(v2.V4S(), v27.V4S(), v14.V4S());
2391*f5c631daSSadaf Ebrahimi __ urhadd(v5.V8B(), v17.V8B(), v14.V8B());
2392*f5c631daSSadaf Ebrahimi __ urhadd(v30.V8H(), v2.V8H(), v25.V8H());
2393*f5c631daSSadaf Ebrahimi __ urshl(d4, d28, d30);
2394*f5c631daSSadaf Ebrahimi __ urshl(v13.V16B(), v31.V16B(), v19.V16B());
2395*f5c631daSSadaf Ebrahimi __ urshl(v14.V2D(), v23.V2D(), v21.V2D());
2396*f5c631daSSadaf Ebrahimi __ urshl(v10.V2S(), v7.V2S(), v8.V2S());
2397*f5c631daSSadaf Ebrahimi __ urshl(v15.V4H(), v21.V4H(), v28.V4H());
2398*f5c631daSSadaf Ebrahimi __ urshl(v30.V4S(), v8.V4S(), v23.V4S());
2399*f5c631daSSadaf Ebrahimi __ urshl(v31.V8B(), v20.V8B(), v5.V8B());
2400*f5c631daSSadaf Ebrahimi __ urshl(v30.V8H(), v27.V8H(), v30.V8H());
2401*f5c631daSSadaf Ebrahimi __ urshr(d4, d13, 49);
2402*f5c631daSSadaf Ebrahimi __ urshr(v2.V16B(), v20.V16B(), 1);
2403*f5c631daSSadaf Ebrahimi __ urshr(v13.V2D(), v11.V2D(), 51);
2404*f5c631daSSadaf Ebrahimi __ urshr(v21.V2S(), v31.V2S(), 10);
2405*f5c631daSSadaf Ebrahimi __ urshr(v21.V4H(), v17.V4H(), 11);
2406*f5c631daSSadaf Ebrahimi __ urshr(v4.V4S(), v22.V4S(), 1);
2407*f5c631daSSadaf Ebrahimi __ urshr(v0.V8B(), v1.V8B(), 7);
2408*f5c631daSSadaf Ebrahimi __ urshr(v13.V8H(), v20.V8H(), 1);
2409*f5c631daSSadaf Ebrahimi __ ursqrte(v20.V2S(), v16.V2S());
2410*f5c631daSSadaf Ebrahimi __ ursqrte(v28.V4S(), v8.V4S());
2411*f5c631daSSadaf Ebrahimi __ ursra(d27, d16, 45);
2412*f5c631daSSadaf Ebrahimi __ ursra(v18.V16B(), v17.V16B(), 3);
2413*f5c631daSSadaf Ebrahimi __ ursra(v26.V2D(), v28.V2D(), 58);
2414*f5c631daSSadaf Ebrahimi __ ursra(v8.V2S(), v22.V2S(), 31);
2415*f5c631daSSadaf Ebrahimi __ ursra(v31.V4H(), v4.V4H(), 7);
2416*f5c631daSSadaf Ebrahimi __ ursra(v31.V4S(), v15.V4S(), 2);
2417*f5c631daSSadaf Ebrahimi __ ursra(v3.V8B(), v1.V8B(), 5);
2418*f5c631daSSadaf Ebrahimi __ ursra(v18.V8H(), v14.V8H(), 13);
2419*f5c631daSSadaf Ebrahimi __ ushl(d31, d0, d16);
2420*f5c631daSSadaf Ebrahimi __ ushl(v0.V16B(), v6.V16B(), v2.V16B());
2421*f5c631daSSadaf Ebrahimi __ ushl(v18.V2D(), v1.V2D(), v18.V2D());
2422*f5c631daSSadaf Ebrahimi __ ushl(v27.V2S(), v7.V2S(), v29.V2S());
2423*f5c631daSSadaf Ebrahimi __ ushl(v14.V4H(), v14.V4H(), v13.V4H());
2424*f5c631daSSadaf Ebrahimi __ ushl(v22.V4S(), v4.V4S(), v9.V4S());
2425*f5c631daSSadaf Ebrahimi __ ushl(v23.V8B(), v22.V8B(), v27.V8B());
2426*f5c631daSSadaf Ebrahimi __ ushl(v21.V8H(), v25.V8H(), v8.V8H());
2427*f5c631daSSadaf Ebrahimi __ ushll(v11.V2D(), v0.V2S(), 21);
2428*f5c631daSSadaf Ebrahimi __ ushll(v2.V4S(), v17.V4H(), 8);
2429*f5c631daSSadaf Ebrahimi __ ushll(v11.V8H(), v14.V8B(), 1);
2430*f5c631daSSadaf Ebrahimi __ ushll2(v8.V2D(), v29.V4S(), 7);
2431*f5c631daSSadaf Ebrahimi __ ushll2(v29.V4S(), v9.V8H(), 2);
2432*f5c631daSSadaf Ebrahimi __ ushll2(v5.V8H(), v24.V16B(), 6);
2433*f5c631daSSadaf Ebrahimi __ ushr(d28, d27, 53);
2434*f5c631daSSadaf Ebrahimi __ ushr(v1.V16B(), v9.V16B(), 7);
2435*f5c631daSSadaf Ebrahimi __ ushr(v2.V2D(), v24.V2D(), 43);
2436*f5c631daSSadaf Ebrahimi __ ushr(v30.V2S(), v25.V2S(), 11);
2437*f5c631daSSadaf Ebrahimi __ ushr(v10.V4H(), v26.V4H(), 12);
2438*f5c631daSSadaf Ebrahimi __ ushr(v4.V4S(), v5.V4S(), 30);
2439*f5c631daSSadaf Ebrahimi __ ushr(v30.V8B(), v2.V8B(), 1);
2440*f5c631daSSadaf Ebrahimi __ ushr(v6.V8H(), v12.V8H(), 2);
2441*f5c631daSSadaf Ebrahimi __ usqadd(b19, b5);
2442*f5c631daSSadaf Ebrahimi __ usqadd(d9, d2);
2443*f5c631daSSadaf Ebrahimi __ usqadd(h2, h16);
2444*f5c631daSSadaf Ebrahimi __ usqadd(s16, s3);
2445*f5c631daSSadaf Ebrahimi __ usqadd(v31.V16B(), v29.V16B());
2446*f5c631daSSadaf Ebrahimi __ usqadd(v8.V2D(), v10.V2D());
2447*f5c631daSSadaf Ebrahimi __ usqadd(v18.V2S(), v9.V2S());
2448*f5c631daSSadaf Ebrahimi __ usqadd(v24.V4H(), v14.V4H());
2449*f5c631daSSadaf Ebrahimi __ usqadd(v10.V4S(), v30.V4S());
2450*f5c631daSSadaf Ebrahimi __ usqadd(v16.V8B(), v20.V8B());
2451*f5c631daSSadaf Ebrahimi __ usqadd(v12.V8H(), v16.V8H());
2452*f5c631daSSadaf Ebrahimi __ usra(d28, d27, 37);
2453*f5c631daSSadaf Ebrahimi __ usra(v5.V16B(), v22.V16B(), 5);
2454*f5c631daSSadaf Ebrahimi __ usra(v2.V2D(), v19.V2D(), 33);
2455*f5c631daSSadaf Ebrahimi __ usra(v0.V2S(), v0.V2S(), 21);
2456*f5c631daSSadaf Ebrahimi __ usra(v7.V4H(), v6.V4H(), 12);
2457*f5c631daSSadaf Ebrahimi __ usra(v4.V4S(), v17.V4S(), 9);
2458*f5c631daSSadaf Ebrahimi __ usra(v9.V8B(), v12.V8B(), 7);
2459*f5c631daSSadaf Ebrahimi __ usra(v3.V8H(), v27.V8H(), 14);
2460*f5c631daSSadaf Ebrahimi __ usubl(v29.V2D(), v12.V2S(), v30.V2S());
2461*f5c631daSSadaf Ebrahimi __ usubl(v29.V4S(), v28.V4H(), v6.V4H());
2462*f5c631daSSadaf Ebrahimi __ usubl(v12.V8H(), v4.V8B(), v14.V8B());
2463*f5c631daSSadaf Ebrahimi __ usubl2(v1.V2D(), v24.V4S(), v17.V4S());
2464*f5c631daSSadaf Ebrahimi __ usubl2(v4.V4S(), v1.V8H(), v3.V8H());
2465*f5c631daSSadaf Ebrahimi __ usubl2(v23.V8H(), v4.V16B(), v7.V16B());
2466*f5c631daSSadaf Ebrahimi __ usubw(v9.V2D(), v20.V2D(), v30.V2S());
2467*f5c631daSSadaf Ebrahimi __ usubw(v20.V4S(), v16.V4S(), v23.V4H());
2468*f5c631daSSadaf Ebrahimi __ usubw(v25.V8H(), v8.V8H(), v29.V8B());
2469*f5c631daSSadaf Ebrahimi __ usubw2(v18.V2D(), v29.V2D(), v6.V4S());
2470*f5c631daSSadaf Ebrahimi __ usubw2(v6.V4S(), v6.V4S(), v20.V8H());
2471*f5c631daSSadaf Ebrahimi __ usubw2(v18.V8H(), v4.V8H(), v16.V16B());
2472*f5c631daSSadaf Ebrahimi __ uxtl(v27.V2D(), v21.V2S());
2473*f5c631daSSadaf Ebrahimi __ uxtl(v0.V4S(), v31.V4H());
2474*f5c631daSSadaf Ebrahimi __ uxtl(v27.V8H(), v10.V8B());
2475*f5c631daSSadaf Ebrahimi __ uxtl2(v6.V2D(), v16.V4S());
2476*f5c631daSSadaf Ebrahimi __ uxtl2(v22.V4S(), v20.V8H());
2477*f5c631daSSadaf Ebrahimi __ uxtl2(v20.V8H(), v21.V16B());
2478*f5c631daSSadaf Ebrahimi __ uzp1(v30.V16B(), v9.V16B(), v17.V16B());
2479*f5c631daSSadaf Ebrahimi __ uzp1(v7.V2D(), v26.V2D(), v28.V2D());
2480*f5c631daSSadaf Ebrahimi __ uzp1(v26.V2S(), v16.V2S(), v22.V2S());
2481*f5c631daSSadaf Ebrahimi __ uzp1(v14.V4H(), v19.V4H(), v6.V4H());
2482*f5c631daSSadaf Ebrahimi __ uzp1(v17.V4S(), v23.V4S(), v30.V4S());
2483*f5c631daSSadaf Ebrahimi __ uzp1(v28.V8B(), v27.V8B(), v13.V8B());
2484*f5c631daSSadaf Ebrahimi __ uzp1(v17.V8H(), v1.V8H(), v12.V8H());
2485*f5c631daSSadaf Ebrahimi __ uzp2(v8.V16B(), v18.V16B(), v26.V16B());
2486*f5c631daSSadaf Ebrahimi __ uzp2(v21.V2D(), v22.V2D(), v24.V2D());
2487*f5c631daSSadaf Ebrahimi __ uzp2(v20.V2S(), v21.V2S(), v2.V2S());
2488*f5c631daSSadaf Ebrahimi __ uzp2(v16.V4H(), v31.V4H(), v6.V4H());
2489*f5c631daSSadaf Ebrahimi __ uzp2(v25.V4S(), v11.V4S(), v8.V4S());
2490*f5c631daSSadaf Ebrahimi __ uzp2(v31.V8B(), v31.V8B(), v13.V8B());
2491*f5c631daSSadaf Ebrahimi __ uzp2(v8.V8H(), v17.V8H(), v1.V8H());
2492*f5c631daSSadaf Ebrahimi __ xtn(v17.V2S(), v26.V2D());
2493*f5c631daSSadaf Ebrahimi __ xtn(v3.V4H(), v0.V4S());
2494*f5c631daSSadaf Ebrahimi __ xtn(v18.V8B(), v8.V8H());
2495*f5c631daSSadaf Ebrahimi __ xtn2(v0.V16B(), v0.V8H());
2496*f5c631daSSadaf Ebrahimi __ xtn2(v15.V4S(), v4.V2D());
2497*f5c631daSSadaf Ebrahimi __ xtn2(v31.V8H(), v18.V4S());
2498*f5c631daSSadaf Ebrahimi __ zip1(v22.V16B(), v9.V16B(), v6.V16B());
2499*f5c631daSSadaf Ebrahimi __ zip1(v23.V2D(), v11.V2D(), v2.V2D());
2500*f5c631daSSadaf Ebrahimi __ zip1(v26.V2S(), v16.V2S(), v9.V2S());
2501*f5c631daSSadaf Ebrahimi __ zip1(v1.V4H(), v9.V4H(), v7.V4H());
2502*f5c631daSSadaf Ebrahimi __ zip1(v0.V4S(), v30.V4S(), v20.V4S());
2503*f5c631daSSadaf Ebrahimi __ zip1(v30.V8B(), v17.V8B(), v15.V8B());
2504*f5c631daSSadaf Ebrahimi __ zip1(v17.V8H(), v8.V8H(), v2.V8H());
2505*f5c631daSSadaf Ebrahimi __ zip2(v23.V16B(), v10.V16B(), v11.V16B());
2506*f5c631daSSadaf Ebrahimi __ zip2(v30.V2D(), v6.V2D(), v14.V2D());
2507*f5c631daSSadaf Ebrahimi __ zip2(v9.V2S(), v10.V2S(), v21.V2S());
2508*f5c631daSSadaf Ebrahimi __ zip2(v8.V4H(), v24.V4H(), v29.V4H());
2509*f5c631daSSadaf Ebrahimi __ zip2(v0.V4S(), v21.V4S(), v23.V4S());
2510*f5c631daSSadaf Ebrahimi __ zip2(v25.V8B(), v23.V8B(), v30.V8B());
2511*f5c631daSSadaf Ebrahimi __ zip2(v7.V8H(), v10.V8H(), v30.V8H());
2512*f5c631daSSadaf Ebrahimi } // NOLINT(readability/fn_size)
2513*f5c631daSSadaf Ebrahimi
2514*f5c631daSSadaf Ebrahimi
GenerateTestSequenceNEONFP(MacroAssembler * masm)2515*f5c631daSSadaf Ebrahimi static void GenerateTestSequenceNEONFP(MacroAssembler* masm) {
2516*f5c631daSSadaf Ebrahimi ExactAssemblyScope guard(masm,
2517*f5c631daSSadaf Ebrahimi masm->GetBuffer()->GetRemainingBytes(),
2518*f5c631daSSadaf Ebrahimi ExactAssemblyScope::kMaximumSize);
2519*f5c631daSSadaf Ebrahimi
2520*f5c631daSSadaf Ebrahimi // NEON floating point instructions.
2521*f5c631daSSadaf Ebrahimi __ fabd(v3.V2D(), v25.V2D(), v8.V2D());
2522*f5c631daSSadaf Ebrahimi __ fabd(v14.V2S(), v27.V2S(), v11.V2S());
2523*f5c631daSSadaf Ebrahimi __ fabd(v9.V4S(), v22.V4S(), v18.V4S());
2524*f5c631daSSadaf Ebrahimi __ fabs(v1.V2D(), v29.V2D());
2525*f5c631daSSadaf Ebrahimi __ fabs(v6.V2S(), v21.V2S());
2526*f5c631daSSadaf Ebrahimi __ fabs(v12.V4S(), v25.V4S());
2527*f5c631daSSadaf Ebrahimi __ facge(v18.V2D(), v5.V2D(), v0.V2D());
2528*f5c631daSSadaf Ebrahimi __ facge(v15.V2S(), v11.V2S(), v6.V2S());
2529*f5c631daSSadaf Ebrahimi __ facge(v30.V4S(), v10.V4S(), v25.V4S());
2530*f5c631daSSadaf Ebrahimi __ facgt(v28.V2D(), v16.V2D(), v31.V2D());
2531*f5c631daSSadaf Ebrahimi __ facgt(v15.V2S(), v1.V2S(), v4.V2S());
2532*f5c631daSSadaf Ebrahimi __ facgt(v22.V4S(), v3.V4S(), v10.V4S());
2533*f5c631daSSadaf Ebrahimi __ fadd(v7.V2D(), v10.V2D(), v24.V2D());
2534*f5c631daSSadaf Ebrahimi __ fadd(v10.V2S(), v23.V2S(), v7.V2S());
2535*f5c631daSSadaf Ebrahimi __ fadd(v16.V4S(), v22.V4S(), v11.V4S());
2536*f5c631daSSadaf Ebrahimi __ faddp(d27, v28.V2D());
2537*f5c631daSSadaf Ebrahimi __ faddp(s20, v23.V2S());
2538*f5c631daSSadaf Ebrahimi __ faddp(v21.V2D(), v4.V2D(), v11.V2D());
2539*f5c631daSSadaf Ebrahimi __ faddp(v31.V2S(), v26.V2S(), v1.V2S());
2540*f5c631daSSadaf Ebrahimi __ faddp(v13.V4S(), v27.V4S(), v28.V4S());
2541*f5c631daSSadaf Ebrahimi __ fcmeq(v17.V2D(), v13.V2D(), v20.V2D());
2542*f5c631daSSadaf Ebrahimi __ fcmeq(v24.V2D(), v16.V2D(), 0.0);
2543*f5c631daSSadaf Ebrahimi __ fcmeq(v26.V2S(), v17.V2S(), v10.V2S());
2544*f5c631daSSadaf Ebrahimi __ fcmeq(v24.V2S(), v4.V2S(), 0.0);
2545*f5c631daSSadaf Ebrahimi __ fcmeq(v8.V4S(), v4.V4S(), v14.V4S());
2546*f5c631daSSadaf Ebrahimi __ fcmeq(v26.V4S(), v25.V4S(), 0.0);
2547*f5c631daSSadaf Ebrahimi __ fcmge(v27.V2D(), v0.V2D(), v0.V2D());
2548*f5c631daSSadaf Ebrahimi __ fcmge(v22.V2D(), v30.V2D(), 0.0);
2549*f5c631daSSadaf Ebrahimi __ fcmge(v7.V2S(), v21.V2S(), v25.V2S());
2550*f5c631daSSadaf Ebrahimi __ fcmge(v15.V2S(), v15.V2S(), 0.0);
2551*f5c631daSSadaf Ebrahimi __ fcmge(v29.V4S(), v4.V4S(), v27.V4S());
2552*f5c631daSSadaf Ebrahimi __ fcmge(v22.V4S(), v21.V4S(), 0.0);
2553*f5c631daSSadaf Ebrahimi __ fcmgt(v1.V2D(), v26.V2D(), v15.V2D());
2554*f5c631daSSadaf Ebrahimi __ fcmgt(v15.V2D(), v23.V2D(), 0.0);
2555*f5c631daSSadaf Ebrahimi __ fcmgt(v21.V2S(), v16.V2S(), v6.V2S());
2556*f5c631daSSadaf Ebrahimi __ fcmgt(v1.V2S(), v13.V2S(), 0.0);
2557*f5c631daSSadaf Ebrahimi __ fcmgt(v14.V4S(), v0.V4S(), v25.V4S());
2558*f5c631daSSadaf Ebrahimi __ fcmgt(v13.V4S(), v8.V4S(), 0.0);
2559*f5c631daSSadaf Ebrahimi __ fcmle(v4.V2D(), v6.V2D(), 0.0);
2560*f5c631daSSadaf Ebrahimi __ fcmle(v24.V2S(), v31.V2S(), 0.0);
2561*f5c631daSSadaf Ebrahimi __ fcmle(v8.V4S(), v23.V4S(), 0.0);
2562*f5c631daSSadaf Ebrahimi __ fcmlt(v7.V2D(), v3.V2D(), 0.0);
2563*f5c631daSSadaf Ebrahimi __ fcmlt(v15.V2S(), v21.V2S(), 0.0);
2564*f5c631daSSadaf Ebrahimi __ fcmlt(v1.V4S(), v2.V4S(), 0.0);
2565*f5c631daSSadaf Ebrahimi __ fcvtas(v6.V2D(), v8.V2D());
2566*f5c631daSSadaf Ebrahimi __ fcvtas(v1.V2S(), v9.V2S());
2567*f5c631daSSadaf Ebrahimi __ fcvtas(v8.V4S(), v19.V4S());
2568*f5c631daSSadaf Ebrahimi __ fcvtau(v5.V2D(), v31.V2D());
2569*f5c631daSSadaf Ebrahimi __ fcvtau(v28.V2S(), v29.V2S());
2570*f5c631daSSadaf Ebrahimi __ fcvtau(v11.V4S(), v26.V4S());
2571*f5c631daSSadaf Ebrahimi __ fcvtl(v8.V2D(), v25.V2S());
2572*f5c631daSSadaf Ebrahimi __ fcvtl(v27.V4S(), v14.V4H());
2573*f5c631daSSadaf Ebrahimi __ fcvtl2(v1.V2D(), v6.V4S());
2574*f5c631daSSadaf Ebrahimi __ fcvtl2(v24.V4S(), v9.V8H());
2575*f5c631daSSadaf Ebrahimi __ fcvtms(v9.V2D(), v24.V2D());
2576*f5c631daSSadaf Ebrahimi __ fcvtms(v7.V2S(), v11.V2S());
2577*f5c631daSSadaf Ebrahimi __ fcvtms(v23.V4S(), v21.V4S());
2578*f5c631daSSadaf Ebrahimi __ fcvtmu(v13.V2D(), v1.V2D());
2579*f5c631daSSadaf Ebrahimi __ fcvtmu(v26.V2S(), v12.V2S());
2580*f5c631daSSadaf Ebrahimi __ fcvtmu(v21.V4S(), v21.V4S());
2581*f5c631daSSadaf Ebrahimi __ fcvtn(v11.V2S(), v1.V2D());
2582*f5c631daSSadaf Ebrahimi __ fcvtn(v8.V4H(), v2.V4S());
2583*f5c631daSSadaf Ebrahimi __ fcvtn2(v24.V4S(), v29.V2D());
2584*f5c631daSSadaf Ebrahimi __ fcvtn2(v4.V8H(), v10.V4S());
2585*f5c631daSSadaf Ebrahimi __ fcvtns(v25.V2D(), v10.V2D());
2586*f5c631daSSadaf Ebrahimi __ fcvtns(v4.V2S(), v8.V2S());
2587*f5c631daSSadaf Ebrahimi __ fcvtns(v29.V4S(), v27.V4S());
2588*f5c631daSSadaf Ebrahimi __ fcvtnu(v18.V2D(), v27.V2D());
2589*f5c631daSSadaf Ebrahimi __ fcvtnu(v11.V2S(), v14.V2S());
2590*f5c631daSSadaf Ebrahimi __ fcvtnu(v27.V4S(), v21.V4S());
2591*f5c631daSSadaf Ebrahimi __ fcvtps(v23.V2D(), v5.V2D());
2592*f5c631daSSadaf Ebrahimi __ fcvtps(v24.V2S(), v15.V2S());
2593*f5c631daSSadaf Ebrahimi __ fcvtps(v5.V4S(), v19.V4S());
2594*f5c631daSSadaf Ebrahimi __ fcvtpu(v3.V2D(), v21.V2D());
2595*f5c631daSSadaf Ebrahimi __ fcvtpu(v3.V2S(), v21.V2S());
2596*f5c631daSSadaf Ebrahimi __ fcvtpu(v0.V4S(), v7.V4S());
2597*f5c631daSSadaf Ebrahimi __ fcvtxn(v29.V2S(), v11.V2D());
2598*f5c631daSSadaf Ebrahimi __ fcvtxn2(v31.V4S(), v25.V2D());
2599*f5c631daSSadaf Ebrahimi __ fcvtzs(v19.V2D(), v17.V2D());
2600*f5c631daSSadaf Ebrahimi __ fcvtzs(v12.V2D(), v24.V2D(), 64);
2601*f5c631daSSadaf Ebrahimi __ fcvtzs(v9.V2S(), v2.V2S());
2602*f5c631daSSadaf Ebrahimi __ fcvtzs(v5.V2S(), v20.V2S(), 29);
2603*f5c631daSSadaf Ebrahimi __ fcvtzs(v21.V4S(), v25.V4S());
2604*f5c631daSSadaf Ebrahimi __ fcvtzs(v26.V4S(), v1.V4S(), 6);
2605*f5c631daSSadaf Ebrahimi __ fcvtzu(v13.V2D(), v25.V2D());
2606*f5c631daSSadaf Ebrahimi __ fcvtzu(v28.V2D(), v13.V2D(), 32);
2607*f5c631daSSadaf Ebrahimi __ fcvtzu(v26.V2S(), v6.V2S());
2608*f5c631daSSadaf Ebrahimi __ fcvtzu(v9.V2S(), v10.V2S(), 15);
2609*f5c631daSSadaf Ebrahimi __ fcvtzu(v30.V4S(), v6.V4S());
2610*f5c631daSSadaf Ebrahimi __ fcvtzu(v19.V4S(), v22.V4S(), 18);
2611*f5c631daSSadaf Ebrahimi __ fdiv(v15.V2D(), v8.V2D(), v15.V2D());
2612*f5c631daSSadaf Ebrahimi __ fdiv(v12.V2S(), v9.V2S(), v26.V2S());
2613*f5c631daSSadaf Ebrahimi __ fdiv(v19.V4S(), v22.V4S(), v19.V4S());
2614*f5c631daSSadaf Ebrahimi __ fmax(v19.V2D(), v7.V2D(), v8.V2D());
2615*f5c631daSSadaf Ebrahimi __ fmax(v25.V2S(), v12.V2S(), v29.V2S());
2616*f5c631daSSadaf Ebrahimi __ fmax(v6.V4S(), v15.V4S(), v5.V4S());
2617*f5c631daSSadaf Ebrahimi __ fmaxnm(v16.V2D(), v8.V2D(), v20.V2D());
2618*f5c631daSSadaf Ebrahimi __ fmaxnm(v15.V2S(), v26.V2S(), v25.V2S());
2619*f5c631daSSadaf Ebrahimi __ fmaxnm(v23.V4S(), v14.V4S(), v16.V4S());
2620*f5c631daSSadaf Ebrahimi __ fmaxnmp(d6, v19.V2D());
2621*f5c631daSSadaf Ebrahimi __ fmaxnmp(s27, v26.V2S());
2622*f5c631daSSadaf Ebrahimi __ fmaxnmp(v8.V2D(), v12.V2D(), v23.V2D());
2623*f5c631daSSadaf Ebrahimi __ fmaxnmp(v13.V2S(), v25.V2S(), v22.V2S());
2624*f5c631daSSadaf Ebrahimi __ fmaxnmp(v15.V4S(), v11.V4S(), v17.V4S());
2625*f5c631daSSadaf Ebrahimi __ fmaxnmv(s27, v19.V4S());
2626*f5c631daSSadaf Ebrahimi __ fmaxp(d20, v14.V2D());
2627*f5c631daSSadaf Ebrahimi __ fmaxp(s18, v2.V2S());
2628*f5c631daSSadaf Ebrahimi __ fmaxp(v9.V2D(), v23.V2D(), v31.V2D());
2629*f5c631daSSadaf Ebrahimi __ fmaxp(v7.V2S(), v22.V2S(), v31.V2S());
2630*f5c631daSSadaf Ebrahimi __ fmaxp(v18.V4S(), v7.V4S(), v29.V4S());
2631*f5c631daSSadaf Ebrahimi __ fmaxv(s31, v29.V4S());
2632*f5c631daSSadaf Ebrahimi __ fmin(v2.V2D(), v5.V2D(), v2.V2D());
2633*f5c631daSSadaf Ebrahimi __ fmin(v31.V2S(), v17.V2S(), v10.V2S());
2634*f5c631daSSadaf Ebrahimi __ fmin(v10.V4S(), v4.V4S(), v16.V4S());
2635*f5c631daSSadaf Ebrahimi __ fminnm(v21.V2D(), v6.V2D(), v5.V2D());
2636*f5c631daSSadaf Ebrahimi __ fminnm(v22.V2S(), v18.V2S(), v14.V2S());
2637*f5c631daSSadaf Ebrahimi __ fminnm(v25.V4S(), v31.V4S(), v3.V4S());
2638*f5c631daSSadaf Ebrahimi __ fminnmp(d9, v1.V2D());
2639*f5c631daSSadaf Ebrahimi __ fminnmp(s21, v20.V2S());
2640*f5c631daSSadaf Ebrahimi __ fminnmp(v16.V2D(), v21.V2D(), v19.V2D());
2641*f5c631daSSadaf Ebrahimi __ fminnmp(v16.V2S(), v31.V2S(), v25.V2S());
2642*f5c631daSSadaf Ebrahimi __ fminnmp(v26.V4S(), v16.V4S(), v15.V4S());
2643*f5c631daSSadaf Ebrahimi __ fminnmv(s3, v4.V4S());
2644*f5c631daSSadaf Ebrahimi __ fminp(d24, v26.V2D());
2645*f5c631daSSadaf Ebrahimi __ fminp(s7, v17.V2S());
2646*f5c631daSSadaf Ebrahimi __ fminp(v23.V2D(), v19.V2D(), v3.V2D());
2647*f5c631daSSadaf Ebrahimi __ fminp(v29.V2S(), v21.V2S(), v9.V2S());
2648*f5c631daSSadaf Ebrahimi __ fminp(v0.V4S(), v24.V4S(), v21.V4S());
2649*f5c631daSSadaf Ebrahimi __ fminv(s25, v8.V4S());
2650*f5c631daSSadaf Ebrahimi __ fmla(d23, d0, v9.D(), 1);
2651*f5c631daSSadaf Ebrahimi __ fmla(s23, s15, v7.S(), 0);
2652*f5c631daSSadaf Ebrahimi __ fmla(v17.V2D(), v11.V2D(), v6.V2D());
2653*f5c631daSSadaf Ebrahimi __ fmla(v30.V2D(), v30.V2D(), v11.D(), 0);
2654*f5c631daSSadaf Ebrahimi __ fmla(v19.V2S(), v12.V2S(), v6.V2S());
2655*f5c631daSSadaf Ebrahimi __ fmla(v24.V2S(), v17.V2S(), v9.S(), 0);
2656*f5c631daSSadaf Ebrahimi __ fmla(v16.V4S(), v11.V4S(), v11.V4S());
2657*f5c631daSSadaf Ebrahimi __ fmla(v27.V4S(), v23.V4S(), v9.S(), 2);
2658*f5c631daSSadaf Ebrahimi __ fmls(d27, d30, v6.D(), 0);
2659*f5c631daSSadaf Ebrahimi __ fmls(s21, s16, v2.S(), 0);
2660*f5c631daSSadaf Ebrahimi __ fmls(v5.V2D(), v19.V2D(), v21.V2D());
2661*f5c631daSSadaf Ebrahimi __ fmls(v18.V2D(), v30.V2D(), v12.D(), 0);
2662*f5c631daSSadaf Ebrahimi __ fmls(v5.V2S(), v16.V2S(), v7.V2S());
2663*f5c631daSSadaf Ebrahimi __ fmls(v3.V2S(), v18.V2S(), v11.S(), 1);
2664*f5c631daSSadaf Ebrahimi __ fmls(v27.V4S(), v5.V4S(), v30.V4S());
2665*f5c631daSSadaf Ebrahimi __ fmls(v26.V4S(), v20.V4S(), v4.S(), 3);
2666*f5c631daSSadaf Ebrahimi __ fmov(v14.V2D(), -0.34375);
2667*f5c631daSSadaf Ebrahimi __ fmov(v26.V2S(), 0.90625f);
2668*f5c631daSSadaf Ebrahimi __ fmov(v31.V4S(), -5.0000f);
2669*f5c631daSSadaf Ebrahimi __ fmov(v28.D(), 1, x25);
2670*f5c631daSSadaf Ebrahimi __ fmov(x18, v2.D(), 1);
2671*f5c631daSSadaf Ebrahimi __ fmul(d12, d4, v1.D(), 1);
2672*f5c631daSSadaf Ebrahimi __ fmul(s30, s1, v15.S(), 3);
2673*f5c631daSSadaf Ebrahimi __ fmul(v25.V2D(), v0.V2D(), v21.V2D());
2674*f5c631daSSadaf Ebrahimi __ fmul(v10.V2D(), v24.V2D(), v10.D(), 1);
2675*f5c631daSSadaf Ebrahimi __ fmul(v7.V2S(), v24.V2S(), v16.V2S());
2676*f5c631daSSadaf Ebrahimi __ fmul(v1.V2S(), v16.V2S(), v4.S(), 2);
2677*f5c631daSSadaf Ebrahimi __ fmul(v5.V4S(), v28.V4S(), v25.V4S());
2678*f5c631daSSadaf Ebrahimi __ fmul(v11.V4S(), v3.V4S(), v8.S(), 0);
2679*f5c631daSSadaf Ebrahimi __ fmulx(d28, d9, v3.D(), 1);
2680*f5c631daSSadaf Ebrahimi __ fmulx(s25, s21, v15.S(), 1);
2681*f5c631daSSadaf Ebrahimi __ fmulx(v31.V2D(), v28.V2D(), v8.V2D());
2682*f5c631daSSadaf Ebrahimi __ fmulx(v3.V2D(), v21.V2D(), v6.D(), 0);
2683*f5c631daSSadaf Ebrahimi __ fmulx(v9.V2S(), v1.V2S(), v0.V2S());
2684*f5c631daSSadaf Ebrahimi __ fmulx(v16.V2S(), v27.V2S(), v6.S(), 0);
2685*f5c631daSSadaf Ebrahimi __ fmulx(v2.V4S(), v4.V4S(), v5.V4S());
2686*f5c631daSSadaf Ebrahimi __ fmulx(v18.V4S(), v7.V4S(), v4.S(), 0);
2687*f5c631daSSadaf Ebrahimi __ fneg(v1.V2D(), v25.V2D());
2688*f5c631daSSadaf Ebrahimi __ fneg(v14.V2S(), v31.V2S());
2689*f5c631daSSadaf Ebrahimi __ fneg(v5.V4S(), v4.V4S());
2690*f5c631daSSadaf Ebrahimi __ frecpe(v18.V2D(), v12.V2D());
2691*f5c631daSSadaf Ebrahimi __ frecpe(v10.V2S(), v22.V2S());
2692*f5c631daSSadaf Ebrahimi __ frecpe(v5.V4S(), v6.V4S());
2693*f5c631daSSadaf Ebrahimi __ frecps(v22.V2D(), v7.V2D(), v26.V2D());
2694*f5c631daSSadaf Ebrahimi __ frecps(v31.V2S(), v27.V2S(), v2.V2S());
2695*f5c631daSSadaf Ebrahimi __ frecps(v18.V4S(), v6.V4S(), v27.V4S());
2696*f5c631daSSadaf Ebrahimi __ frinta(v26.V2D(), v13.V2D());
2697*f5c631daSSadaf Ebrahimi __ frinta(v15.V2S(), v26.V2S());
2698*f5c631daSSadaf Ebrahimi __ frinta(v13.V4S(), v16.V4S());
2699*f5c631daSSadaf Ebrahimi __ frinti(v9.V2D(), v12.V2D());
2700*f5c631daSSadaf Ebrahimi __ frinti(v5.V2S(), v19.V2S());
2701*f5c631daSSadaf Ebrahimi __ frinti(v15.V4S(), v11.V4S());
2702*f5c631daSSadaf Ebrahimi __ frintm(v17.V2D(), v29.V2D());
2703*f5c631daSSadaf Ebrahimi __ frintm(v30.V2S(), v11.V2S());
2704*f5c631daSSadaf Ebrahimi __ frintm(v1.V4S(), v20.V4S());
2705*f5c631daSSadaf Ebrahimi __ frintn(v24.V2D(), v6.V2D());
2706*f5c631daSSadaf Ebrahimi __ frintn(v12.V2S(), v17.V2S());
2707*f5c631daSSadaf Ebrahimi __ frintn(v29.V4S(), v11.V4S());
2708*f5c631daSSadaf Ebrahimi __ frintp(v10.V2D(), v7.V2D());
2709*f5c631daSSadaf Ebrahimi __ frintp(v12.V2S(), v18.V2S());
2710*f5c631daSSadaf Ebrahimi __ frintp(v26.V4S(), v31.V4S());
2711*f5c631daSSadaf Ebrahimi __ frintx(v24.V2D(), v13.V2D());
2712*f5c631daSSadaf Ebrahimi __ frintx(v7.V2S(), v9.V2S());
2713*f5c631daSSadaf Ebrahimi __ frintx(v18.V4S(), v21.V4S());
2714*f5c631daSSadaf Ebrahimi __ frintz(v19.V2D(), v25.V2D());
2715*f5c631daSSadaf Ebrahimi __ frintz(v15.V2S(), v8.V2S());
2716*f5c631daSSadaf Ebrahimi __ frintz(v20.V4S(), v3.V4S());
2717*f5c631daSSadaf Ebrahimi __ frsqrte(v23.V2D(), v5.V2D());
2718*f5c631daSSadaf Ebrahimi __ frsqrte(v9.V2S(), v7.V2S());
2719*f5c631daSSadaf Ebrahimi __ frsqrte(v3.V4S(), v9.V4S());
2720*f5c631daSSadaf Ebrahimi __ frsqrts(v25.V2D(), v28.V2D(), v15.V2D());
2721*f5c631daSSadaf Ebrahimi __ frsqrts(v9.V2S(), v26.V2S(), v10.V2S());
2722*f5c631daSSadaf Ebrahimi __ frsqrts(v5.V4S(), v1.V4S(), v10.V4S());
2723*f5c631daSSadaf Ebrahimi __ fsqrt(v6.V2D(), v18.V2D());
2724*f5c631daSSadaf Ebrahimi __ fsqrt(v6.V2S(), v18.V2S());
2725*f5c631daSSadaf Ebrahimi __ fsqrt(v0.V4S(), v31.V4S());
2726*f5c631daSSadaf Ebrahimi __ fsub(v31.V2D(), v30.V2D(), v31.V2D());
2727*f5c631daSSadaf Ebrahimi __ fsub(v11.V2S(), v8.V2S(), v6.V2S());
2728*f5c631daSSadaf Ebrahimi __ fsub(v16.V4S(), v0.V4S(), v31.V4S());
2729*f5c631daSSadaf Ebrahimi __ scvtf(v25.V2D(), v31.V2D());
2730*f5c631daSSadaf Ebrahimi __ scvtf(v10.V2D(), v13.V2D(), 45);
2731*f5c631daSSadaf Ebrahimi __ scvtf(v10.V2S(), v15.V2S());
2732*f5c631daSSadaf Ebrahimi __ scvtf(v18.V2S(), v4.V2S(), 27);
2733*f5c631daSSadaf Ebrahimi __ scvtf(v17.V4S(), v5.V4S());
2734*f5c631daSSadaf Ebrahimi __ scvtf(v11.V4S(), v25.V4S(), 24);
2735*f5c631daSSadaf Ebrahimi __ ucvtf(v9.V2D(), v3.V2D());
2736*f5c631daSSadaf Ebrahimi __ ucvtf(v26.V2D(), v30.V2D(), 46);
2737*f5c631daSSadaf Ebrahimi __ ucvtf(v11.V2S(), v4.V2S());
2738*f5c631daSSadaf Ebrahimi __ ucvtf(v29.V2S(), v3.V2S(), 25);
2739*f5c631daSSadaf Ebrahimi __ ucvtf(v22.V4S(), v23.V4S());
2740*f5c631daSSadaf Ebrahimi __ ucvtf(v18.V4S(), v9.V4S(), 25);
2741*f5c631daSSadaf Ebrahimi }
2742*f5c631daSSadaf Ebrahimi
2743*f5c631daSSadaf Ebrahimi
GenerateTestSequenceSVE(MacroAssembler * masm)2744*f5c631daSSadaf Ebrahimi static void GenerateTestSequenceSVE(MacroAssembler* masm) {
2745*f5c631daSSadaf Ebrahimi ExactAssemblyScope guard(masm,
2746*f5c631daSSadaf Ebrahimi masm->GetBuffer()->GetRemainingBytes(),
2747*f5c631daSSadaf Ebrahimi ExactAssemblyScope::kMaximumSize);
2748*f5c631daSSadaf Ebrahimi CPUFeaturesScope feature_guard(masm, CPUFeatures::kSVE);
2749*f5c631daSSadaf Ebrahimi
2750*f5c631daSSadaf Ebrahimi // Simple, unpredicated loads and stores.
2751*f5c631daSSadaf Ebrahimi __ str(p12.VnD(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2752*f5c631daSSadaf Ebrahimi __ str(p13.VnS(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2753*f5c631daSSadaf Ebrahimi __ str(p14.VnH(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2754*f5c631daSSadaf Ebrahimi __ str(p15.VnB(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2755*f5c631daSSadaf Ebrahimi __ ldr(p8.VnD(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2756*f5c631daSSadaf Ebrahimi __ ldr(p9.VnS(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2757*f5c631daSSadaf Ebrahimi __ ldr(p10.VnH(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2758*f5c631daSSadaf Ebrahimi __ ldr(p11.VnB(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2759*f5c631daSSadaf Ebrahimi
2760*f5c631daSSadaf Ebrahimi __ str(z0.VnD(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2761*f5c631daSSadaf Ebrahimi __ str(z1.VnS(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2762*f5c631daSSadaf Ebrahimi __ str(z2.VnH(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2763*f5c631daSSadaf Ebrahimi __ str(z3.VnB(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2764*f5c631daSSadaf Ebrahimi __ ldr(z20.VnD(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2765*f5c631daSSadaf Ebrahimi __ ldr(z21.VnS(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2766*f5c631daSSadaf Ebrahimi __ ldr(z22.VnH(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2767*f5c631daSSadaf Ebrahimi __ ldr(z23.VnB(), SVEMemOperand(x0, 11, SVE_MUL_VL));
2768*f5c631daSSadaf Ebrahimi
2769*f5c631daSSadaf Ebrahimi // Structured accesses.
2770*f5c631daSSadaf Ebrahimi __ st1b(z0.VnB(), p2, SVEMemOperand(x0, 3, SVE_MUL_VL));
2771*f5c631daSSadaf Ebrahimi __ st1h(z1.VnH(), p1, SVEMemOperand(x0, 3, SVE_MUL_VL));
2772*f5c631daSSadaf Ebrahimi __ st1w(z2.VnS(), p1, SVEMemOperand(x0, x3, LSL, 2));
2773*f5c631daSSadaf Ebrahimi __ st1d(z3.VnD(), p2, SVEMemOperand(x0, 3, SVE_MUL_VL));
2774*f5c631daSSadaf Ebrahimi __ ld1b(z20.VnB(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2775*f5c631daSSadaf Ebrahimi __ ld1h(z21.VnH(), p2.Zeroing(), SVEMemOperand(x0, x2, LSL, 1));
2776*f5c631daSSadaf Ebrahimi __ ld1w(z22.VnS(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2777*f5c631daSSadaf Ebrahimi __ ld1d(z23.VnD(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2778*f5c631daSSadaf Ebrahimi
2779*f5c631daSSadaf Ebrahimi // Structured, packed accesses.
2780*f5c631daSSadaf Ebrahimi __ st1b(z2.VnH(), p1, SVEMemOperand(x0, 3, SVE_MUL_VL));
2781*f5c631daSSadaf Ebrahimi __ st1b(z3.VnS(), p2, SVEMemOperand(x0, 3, SVE_MUL_VL));
2782*f5c631daSSadaf Ebrahimi __ st1b(z4.VnD(), p2, SVEMemOperand(x0, 3, SVE_MUL_VL));
2783*f5c631daSSadaf Ebrahimi __ st1h(z0.VnS(), p1, SVEMemOperand(x0, 3, SVE_MUL_VL));
2784*f5c631daSSadaf Ebrahimi __ st1h(z1.VnD(), p1, SVEMemOperand(x0, x2, LSL, 1));
2785*f5c631daSSadaf Ebrahimi __ st1w(z2.VnD(), p1, SVEMemOperand(x0, 3, SVE_MUL_VL));
2786*f5c631daSSadaf Ebrahimi __ ld1b(z20.VnH(), p1.Zeroing(), SVEMemOperand(x0, x2));
2787*f5c631daSSadaf Ebrahimi __ ld1b(z21.VnS(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2788*f5c631daSSadaf Ebrahimi __ ld1b(z22.VnD(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2789*f5c631daSSadaf Ebrahimi __ ld1h(z23.VnS(), p2.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2790*f5c631daSSadaf Ebrahimi __ ld1h(z24.VnD(), p2.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2791*f5c631daSSadaf Ebrahimi __ ld1w(z20.VnD(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2792*f5c631daSSadaf Ebrahimi __ ld1sb(z21.VnH(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2793*f5c631daSSadaf Ebrahimi __ ld1sb(z22.VnS(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2794*f5c631daSSadaf Ebrahimi __ ld1sb(z23.VnD(), p2.Zeroing(), SVEMemOperand(x0, x2));
2795*f5c631daSSadaf Ebrahimi __ ld1sh(z24.VnS(), p2.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2796*f5c631daSSadaf Ebrahimi __ ld1sh(z20.VnD(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2797*f5c631daSSadaf Ebrahimi __ ld1sw(z21.VnD(), p1.Zeroing(), SVEMemOperand(x0, 3, SVE_MUL_VL));
2798*f5c631daSSadaf Ebrahimi
2799*f5c631daSSadaf Ebrahimi // Structured, interleaved accesses.
2800*f5c631daSSadaf Ebrahimi __ st2b(z0.VnB(), z1.VnB(), p4, SVEMemOperand(x0, 4, SVE_MUL_VL));
2801*f5c631daSSadaf Ebrahimi __ st2h(z1.VnH(), z2.VnH(), p4, SVEMemOperand(x0, 4, SVE_MUL_VL));
2802*f5c631daSSadaf Ebrahimi __ st2w(z2.VnS(), z3.VnS(), p3, SVEMemOperand(x0, x2, LSL, 2));
2803*f5c631daSSadaf Ebrahimi __ st2d(z3.VnD(), z4.VnD(), p4, SVEMemOperand(x0, 4, SVE_MUL_VL));
2804*f5c631daSSadaf Ebrahimi __ ld2b(z20.VnB(), z21.VnB(), p5.Zeroing(), SVEMemOperand(x0, x2));
2805*f5c631daSSadaf Ebrahimi __ ld2h(z21.VnH(), z22.VnH(), p6.Zeroing(), SVEMemOperand(x0, 4, SVE_MUL_VL));
2806*f5c631daSSadaf Ebrahimi __ ld2w(z22.VnS(), z23.VnS(), p6.Zeroing(), SVEMemOperand(x0, 4, SVE_MUL_VL));
2807*f5c631daSSadaf Ebrahimi __ ld2d(z23.VnD(), z24.VnD(), p5.Zeroing(), SVEMemOperand(x0, 4, SVE_MUL_VL));
2808*f5c631daSSadaf Ebrahimi
2809*f5c631daSSadaf Ebrahimi __ st3b(z4.VnB(), z5.VnB(), z6.VnB(), p4, SVEMemOperand(x0, 3, SVE_MUL_VL));
2810*f5c631daSSadaf Ebrahimi __ st3h(z5.VnH(), z6.VnH(), z7.VnH(), p4, SVEMemOperand(x0, 3, SVE_MUL_VL));
2811*f5c631daSSadaf Ebrahimi __ st3w(z6.VnS(), z7.VnS(), z8.VnS(), p3, SVEMemOperand(x0, 3, SVE_MUL_VL));
2812*f5c631daSSadaf Ebrahimi __ st3d(z7.VnD(), z8.VnD(), z9.VnD(), p4, SVEMemOperand(x0, x2, LSL, 3));
2813*f5c631daSSadaf Ebrahimi __ ld3b(z24.VnB(),
2814*f5c631daSSadaf Ebrahimi z25.VnB(),
2815*f5c631daSSadaf Ebrahimi z26.VnB(),
2816*f5c631daSSadaf Ebrahimi p5.Zeroing(),
2817*f5c631daSSadaf Ebrahimi SVEMemOperand(x0, 3, SVE_MUL_VL));
2818*f5c631daSSadaf Ebrahimi __ ld3h(z25.VnH(),
2819*f5c631daSSadaf Ebrahimi z26.VnH(),
2820*f5c631daSSadaf Ebrahimi z27.VnH(),
2821*f5c631daSSadaf Ebrahimi p6.Zeroing(),
2822*f5c631daSSadaf Ebrahimi SVEMemOperand(x0, x2, LSL, 1));
2823*f5c631daSSadaf Ebrahimi __ ld3w(z26.VnS(),
2824*f5c631daSSadaf Ebrahimi z27.VnS(),
2825*f5c631daSSadaf Ebrahimi z28.VnS(),
2826*f5c631daSSadaf Ebrahimi p6.Zeroing(),
2827*f5c631daSSadaf Ebrahimi SVEMemOperand(x0, 3, SVE_MUL_VL));
2828*f5c631daSSadaf Ebrahimi __ ld3d(z27.VnD(),
2829*f5c631daSSadaf Ebrahimi z28.VnD(),
2830*f5c631daSSadaf Ebrahimi z29.VnD(),
2831*f5c631daSSadaf Ebrahimi p5.Zeroing(),
2832*f5c631daSSadaf Ebrahimi SVEMemOperand(x0, 3, SVE_MUL_VL));
2833*f5c631daSSadaf Ebrahimi
2834*f5c631daSSadaf Ebrahimi __ st4b(z31.VnB(),
2835*f5c631daSSadaf Ebrahimi z0.VnB(),
2836*f5c631daSSadaf Ebrahimi z1.VnB(),
2837*f5c631daSSadaf Ebrahimi z2.VnB(),
2838*f5c631daSSadaf Ebrahimi p4,
2839*f5c631daSSadaf Ebrahimi SVEMemOperand(x0, 4, SVE_MUL_VL));
2840*f5c631daSSadaf Ebrahimi __ st4h(z0.VnH(),
2841*f5c631daSSadaf Ebrahimi z1.VnH(),
2842*f5c631daSSadaf Ebrahimi z2.VnH(),
2843*f5c631daSSadaf Ebrahimi z3.VnH(),
2844*f5c631daSSadaf Ebrahimi p4,
2845*f5c631daSSadaf Ebrahimi SVEMemOperand(x0, 4, SVE_MUL_VL));
2846*f5c631daSSadaf Ebrahimi __ st4w(z1.VnS(),
2847*f5c631daSSadaf Ebrahimi z2.VnS(),
2848*f5c631daSSadaf Ebrahimi z3.VnS(),
2849*f5c631daSSadaf Ebrahimi z4.VnS(),
2850*f5c631daSSadaf Ebrahimi p3,
2851*f5c631daSSadaf Ebrahimi SVEMemOperand(x0, 4, SVE_MUL_VL));
2852*f5c631daSSadaf Ebrahimi __ st4d(z2.VnD(),
2853*f5c631daSSadaf Ebrahimi z3.VnD(),
2854*f5c631daSSadaf Ebrahimi z4.VnD(),
2855*f5c631daSSadaf Ebrahimi z5.VnD(),
2856*f5c631daSSadaf Ebrahimi p4,
2857*f5c631daSSadaf Ebrahimi SVEMemOperand(x0, x2, LSL, 3));
2858*f5c631daSSadaf Ebrahimi __ ld4b(z25.VnB(),
2859*f5c631daSSadaf Ebrahimi z26.VnB(),
2860*f5c631daSSadaf Ebrahimi z27.VnB(),
2861*f5c631daSSadaf Ebrahimi z28.VnB(),
2862*f5c631daSSadaf Ebrahimi p5.Zeroing(),
2863*f5c631daSSadaf Ebrahimi SVEMemOperand(x0, 4, SVE_MUL_VL));
2864*f5c631daSSadaf Ebrahimi __ ld4h(z26.VnH(),
2865*f5c631daSSadaf Ebrahimi z27.VnH(),
2866*f5c631daSSadaf Ebrahimi z28.VnH(),
2867*f5c631daSSadaf Ebrahimi z29.VnH(),
2868*f5c631daSSadaf Ebrahimi p6.Zeroing(),
2869*f5c631daSSadaf Ebrahimi SVEMemOperand(x0, 4, SVE_MUL_VL));
2870*f5c631daSSadaf Ebrahimi __ ld4w(z27.VnS(),
2871*f5c631daSSadaf Ebrahimi z28.VnS(),
2872*f5c631daSSadaf Ebrahimi z29.VnS(),
2873*f5c631daSSadaf Ebrahimi z30.VnS(),
2874*f5c631daSSadaf Ebrahimi p6.Zeroing(),
2875*f5c631daSSadaf Ebrahimi SVEMemOperand(x0, x2, LSL, 2));
2876*f5c631daSSadaf Ebrahimi __ ld4d(z28.VnD(),
2877*f5c631daSSadaf Ebrahimi z29.VnD(),
2878*f5c631daSSadaf Ebrahimi z30.VnD(),
2879*f5c631daSSadaf Ebrahimi z31.VnD(),
2880*f5c631daSSadaf Ebrahimi p5.Zeroing(),
2881*f5c631daSSadaf Ebrahimi SVEMemOperand(x0, 4, SVE_MUL_VL));
2882*f5c631daSSadaf Ebrahimi }
2883*f5c631daSSadaf Ebrahimi
GenerateTestSequenceAtomics(MacroAssembler * masm)2884*f5c631daSSadaf Ebrahimi static void GenerateTestSequenceAtomics(MacroAssembler* masm) {
2885*f5c631daSSadaf Ebrahimi ExactAssemblyScope guard(masm,
2886*f5c631daSSadaf Ebrahimi masm->GetBuffer()->GetRemainingBytes(),
2887*f5c631daSSadaf Ebrahimi ExactAssemblyScope::kMaximumSize);
2888*f5c631daSSadaf Ebrahimi CPUFeaturesScope feature_guard(masm, CPUFeatures::kAtomics);
2889*f5c631daSSadaf Ebrahimi __ sub(sp, sp, 16); // Claim some working space on the stack.
2890*f5c631daSSadaf Ebrahimi __ mov(x0, 0x5555555555555555);
2891*f5c631daSSadaf Ebrahimi __ str(x0, MemOperand(sp)); // Initialise working space.
2892*f5c631daSSadaf Ebrahimi
2893*f5c631daSSadaf Ebrahimi #define INST_LIST(OP) \
2894*f5c631daSSadaf Ebrahimi __ ld##OP##b(w0, w0, MemOperand(sp)); \
2895*f5c631daSSadaf Ebrahimi __ ld##OP##ab(w0, w1, MemOperand(sp)); \
2896*f5c631daSSadaf Ebrahimi __ ld##OP##lb(w0, w2, MemOperand(sp)); \
2897*f5c631daSSadaf Ebrahimi __ ld##OP##alb(w0, w3, MemOperand(sp)); \
2898*f5c631daSSadaf Ebrahimi __ ld##OP##h(w0, w0, MemOperand(sp)); \
2899*f5c631daSSadaf Ebrahimi __ ld##OP##ah(w0, w1, MemOperand(sp)); \
2900*f5c631daSSadaf Ebrahimi __ ld##OP##lh(w0, w2, MemOperand(sp)); \
2901*f5c631daSSadaf Ebrahimi __ ld##OP##alh(w0, w3, MemOperand(sp)); \
2902*f5c631daSSadaf Ebrahimi __ ld##OP(w0, w0, MemOperand(sp)); \
2903*f5c631daSSadaf Ebrahimi __ ld##OP##a(w0, w1, MemOperand(sp)); \
2904*f5c631daSSadaf Ebrahimi __ ld##OP##l(w0, w2, MemOperand(sp)); \
2905*f5c631daSSadaf Ebrahimi __ ld##OP##al(w0, w3, MemOperand(sp)); \
2906*f5c631daSSadaf Ebrahimi __ ld##OP(x0, x0, MemOperand(sp)); \
2907*f5c631daSSadaf Ebrahimi __ ld##OP##a(x0, x1, MemOperand(sp)); \
2908*f5c631daSSadaf Ebrahimi __ ld##OP##l(x0, x2, MemOperand(sp)); \
2909*f5c631daSSadaf Ebrahimi __ ld##OP##al(x0, x3, MemOperand(sp)); \
2910*f5c631daSSadaf Ebrahimi __ st##OP##b(w0, MemOperand(sp)); \
2911*f5c631daSSadaf Ebrahimi __ st##OP##lb(w0, MemOperand(sp)); \
2912*f5c631daSSadaf Ebrahimi __ st##OP##h(w0, MemOperand(sp)); \
2913*f5c631daSSadaf Ebrahimi __ st##OP##lh(w0, MemOperand(sp)); \
2914*f5c631daSSadaf Ebrahimi __ st##OP(w0, MemOperand(sp)); \
2915*f5c631daSSadaf Ebrahimi __ st##OP##l(w0, MemOperand(sp)); \
2916*f5c631daSSadaf Ebrahimi __ st##OP(x0, MemOperand(sp)); \
2917*f5c631daSSadaf Ebrahimi __ st##OP##l(x0, MemOperand(sp));
2918*f5c631daSSadaf Ebrahimi
2919*f5c631daSSadaf Ebrahimi INST_LIST(add);
2920*f5c631daSSadaf Ebrahimi INST_LIST(set);
2921*f5c631daSSadaf Ebrahimi INST_LIST(eor);
2922*f5c631daSSadaf Ebrahimi INST_LIST(smin);
2923*f5c631daSSadaf Ebrahimi INST_LIST(smax);
2924*f5c631daSSadaf Ebrahimi INST_LIST(umin);
2925*f5c631daSSadaf Ebrahimi INST_LIST(umax);
2926*f5c631daSSadaf Ebrahimi INST_LIST(clr);
2927*f5c631daSSadaf Ebrahimi
2928*f5c631daSSadaf Ebrahimi #undef INST_LIST
2929*f5c631daSSadaf Ebrahimi
2930*f5c631daSSadaf Ebrahimi __ add(sp, sp, 16); // Restore stack pointer.
2931*f5c631daSSadaf Ebrahimi }
2932*f5c631daSSadaf Ebrahimi
MaskAddresses(const char * trace)2933*f5c631daSSadaf Ebrahimi static void MaskAddresses(const char* trace) {
2934*f5c631daSSadaf Ebrahimi #define VIXL_COLOUR "(\x1b\\[[01];([0-9][0-9])?m)?"
2935*f5c631daSSadaf Ebrahimi // All patterns are replaced with "$1~~~~~~~~~~~~~~~~".
2936*f5c631daSSadaf Ebrahimi std::regex patterns[] =
2937*f5c631daSSadaf Ebrahimi {// Mask registers that hold addresses that change from run to run.
2938*f5c631daSSadaf Ebrahimi std::regex("((x0|x1|x2|sp): " VIXL_COLOUR "0x)[0-9a-f]{16}"),
2939*f5c631daSSadaf Ebrahimi // Mask accessed memory addresses.
2940*f5c631daSSadaf Ebrahimi std::regex("((<-|->) " VIXL_COLOUR "0x)[0-9a-f]{16}"),
2941*f5c631daSSadaf Ebrahimi // Mask instruction addresses.
2942*f5c631daSSadaf Ebrahimi std::regex("^(0x)[0-9a-f]{16}"),
2943*f5c631daSSadaf Ebrahimi // Mask branch targets.
2944*f5c631daSSadaf Ebrahimi std::regex("(Branch" VIXL_COLOUR " to 0x)[0-9a-f]{16}"),
2945*f5c631daSSadaf Ebrahimi // Mask explicit address annotations.
2946*f5c631daSSadaf Ebrahimi std::regex("(addr 0x)[0-9a-f]+")};
2947*f5c631daSSadaf Ebrahimi #undef VIXL_COLOUR
2948*f5c631daSSadaf Ebrahimi
2949*f5c631daSSadaf Ebrahimi std::vector<std::string> lines;
2950*f5c631daSSadaf Ebrahimi std::ifstream in(trace);
2951*f5c631daSSadaf Ebrahimi while (!in.eof()) {
2952*f5c631daSSadaf Ebrahimi std::string line;
2953*f5c631daSSadaf Ebrahimi std::getline(in, line);
2954*f5c631daSSadaf Ebrahimi for (auto&& pattern : patterns) {
2955*f5c631daSSadaf Ebrahimi line = std::regex_replace(line, pattern, "$1~~~~~~~~~~~~~~~~");
2956*f5c631daSSadaf Ebrahimi }
2957*f5c631daSSadaf Ebrahimi lines.push_back(line);
2958*f5c631daSSadaf Ebrahimi }
2959*f5c631daSSadaf Ebrahimi in.close();
2960*f5c631daSSadaf Ebrahimi
2961*f5c631daSSadaf Ebrahimi // `getline` produces an empty line after a terminal "\n".
2962*f5c631daSSadaf Ebrahimi if (lines.back().empty()) lines.pop_back();
2963*f5c631daSSadaf Ebrahimi
2964*f5c631daSSadaf Ebrahimi std::ofstream out(trace, std::ofstream::trunc);
2965*f5c631daSSadaf Ebrahimi for (auto&& line : lines) {
2966*f5c631daSSadaf Ebrahimi out << line << "\n";
2967*f5c631daSSadaf Ebrahimi }
2968*f5c631daSSadaf Ebrahimi }
2969*f5c631daSSadaf Ebrahimi
PrintFile(const char * name)2970*f5c631daSSadaf Ebrahimi static void PrintFile(const char* name) {
2971*f5c631daSSadaf Ebrahimi FILE* file = fopen(name, "r");
2972*f5c631daSSadaf Ebrahimi char buffer[1024]; // The buffer size is arbitrary.
2973*f5c631daSSadaf Ebrahimi while (fgets(buffer, sizeof(buffer), file) != NULL) fputs(buffer, stdout);
2974*f5c631daSSadaf Ebrahimi fclose(file);
2975*f5c631daSSadaf Ebrahimi }
2976*f5c631daSSadaf Ebrahimi
CheckOrGenerateTrace(const char * filename,const char * ref_file)2977*f5c631daSSadaf Ebrahimi static bool CheckOrGenerateTrace(const char* filename, const char* ref_file) {
2978*f5c631daSSadaf Ebrahimi bool trace_matched_reference;
2979*f5c631daSSadaf Ebrahimi if (Test::generate_test_trace()) {
2980*f5c631daSSadaf Ebrahimi // Copy trace_stream to stdout.
2981*f5c631daSSadaf Ebrahimi FILE* trace_stream = fopen(filename, "r");
2982*f5c631daSSadaf Ebrahimi VIXL_ASSERT(trace_stream != NULL);
2983*f5c631daSSadaf Ebrahimi fseek(trace_stream, 0, SEEK_SET);
2984*f5c631daSSadaf Ebrahimi int c;
2985*f5c631daSSadaf Ebrahimi while (1) {
2986*f5c631daSSadaf Ebrahimi c = getc(trace_stream);
2987*f5c631daSSadaf Ebrahimi if (c == EOF) break;
2988*f5c631daSSadaf Ebrahimi putc(c, stdout);
2989*f5c631daSSadaf Ebrahimi }
2990*f5c631daSSadaf Ebrahimi fclose(trace_stream);
2991*f5c631daSSadaf Ebrahimi trace_matched_reference = true;
2992*f5c631daSSadaf Ebrahimi } else {
2993*f5c631daSSadaf Ebrahimi // Check trace_stream against ref_file.
2994*f5c631daSSadaf Ebrahimi char command[1024];
2995*f5c631daSSadaf Ebrahimi size_t length =
2996*f5c631daSSadaf Ebrahimi snprintf(command, sizeof(command), "diff -u %s %s", ref_file, filename);
2997*f5c631daSSadaf Ebrahimi VIXL_CHECK(length < sizeof(command));
2998*f5c631daSSadaf Ebrahimi trace_matched_reference = (system(command) == 0);
2999*f5c631daSSadaf Ebrahimi }
3000*f5c631daSSadaf Ebrahimi return trace_matched_reference;
3001*f5c631daSSadaf Ebrahimi }
3002*f5c631daSSadaf Ebrahimi
3003*f5c631daSSadaf Ebrahimi
3004*f5c631daSSadaf Ebrahimi // Trace tests can only work with the simulator.
3005*f5c631daSSadaf Ebrahimi #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
3006*f5c631daSSadaf Ebrahimi
TraceTestHelper(bool coloured_trace,TraceParameters trace_parameters,const char * ref_file)3007*f5c631daSSadaf Ebrahimi static void TraceTestHelper(bool coloured_trace,
3008*f5c631daSSadaf Ebrahimi TraceParameters trace_parameters,
3009*f5c631daSSadaf Ebrahimi const char* ref_file) {
3010*f5c631daSSadaf Ebrahimi MacroAssembler masm(12 * KBytes);
3011*f5c631daSSadaf Ebrahimi
3012*f5c631daSSadaf Ebrahimi char trace_stream_filename[] = "/tmp/vixl-test-trace-XXXXXX";
3013*f5c631daSSadaf Ebrahimi FILE* trace_stream = fdopen(mkstemp(trace_stream_filename), "w");
3014*f5c631daSSadaf Ebrahimi
3015*f5c631daSSadaf Ebrahimi Decoder decoder;
3016*f5c631daSSadaf Ebrahimi Simulator simulator(&decoder, trace_stream);
3017*f5c631daSSadaf Ebrahimi simulator.SetColouredTrace(coloured_trace);
3018*f5c631daSSadaf Ebrahimi simulator.SetTraceParameters(trace_parameters);
3019*f5c631daSSadaf Ebrahimi simulator.SilenceExclusiveAccessWarning();
3020*f5c631daSSadaf Ebrahimi
3021*f5c631daSSadaf Ebrahimi const int vl_in_bytes = 5 * kZRegMinSizeInBytes;
3022*f5c631daSSadaf Ebrahimi const int vl_in_bits = vl_in_bytes * kBitsPerByte;
3023*f5c631daSSadaf Ebrahimi const int pl_in_bits = vl_in_bits / kZRegBitsPerPRegBit;
3024*f5c631daSSadaf Ebrahimi simulator.SetVectorLengthInBits(vl_in_bits);
3025*f5c631daSSadaf Ebrahimi
3026*f5c631daSSadaf Ebrahimi // Set up a scratch buffer so we can test loads and stores.
3027*f5c631daSSadaf Ebrahimi const int kScratchSize = vl_in_bytes * 1024;
3028*f5c631daSSadaf Ebrahimi const int kScratchGuardSize = vl_in_bytes;
3029*f5c631daSSadaf Ebrahimi char scratch_buffer[kScratchSize + kScratchGuardSize];
3030*f5c631daSSadaf Ebrahimi for (size_t i = 0; i < (sizeof(scratch_buffer) / sizeof(scratch_buffer[0]));
3031*f5c631daSSadaf Ebrahimi i++) {
3032*f5c631daSSadaf Ebrahimi scratch_buffer[i] = i & 0xff;
3033*f5c631daSSadaf Ebrahimi }
3034*f5c631daSSadaf Ebrahimi // Used for offset addressing.
3035*f5c631daSSadaf Ebrahimi simulator.WriteXRegister(0, reinterpret_cast<uintptr_t>(scratch_buffer));
3036*f5c631daSSadaf Ebrahimi // Used for pre-/post-index addressing.
3037*f5c631daSSadaf Ebrahimi simulator.WriteXRegister(1, reinterpret_cast<uintptr_t>(scratch_buffer));
3038*f5c631daSSadaf Ebrahimi
3039*f5c631daSSadaf Ebrahimi const int kPostIndexRegisterStep = 13; // Arbitrary interesting value.
3040*f5c631daSSadaf Ebrahimi // Used for post-index offsets.
3041*f5c631daSSadaf Ebrahimi simulator.WriteXRegister(2, kPostIndexRegisterStep);
3042*f5c631daSSadaf Ebrahimi
3043*f5c631daSSadaf Ebrahimi // Initialize the other registers with unique values.
3044*f5c631daSSadaf Ebrahimi uint64_t initial_base_u64 = 0x0100001000100101;
3045*f5c631daSSadaf Ebrahimi for (unsigned i = 3; i < kNumberOfRegisters; i++) {
3046*f5c631daSSadaf Ebrahimi if (i == kLinkRegCode) continue;
3047*f5c631daSSadaf Ebrahimi if (i == kZeroRegCode) continue;
3048*f5c631daSSadaf Ebrahimi // NoRegLog suppresses the log now, but the registers will still be logged
3049*f5c631daSSadaf Ebrahimi // before the first instruction is executed since they have been written but
3050*f5c631daSSadaf Ebrahimi // not printed.
3051*f5c631daSSadaf Ebrahimi simulator.WriteRegister(i, initial_base_u64 * i, Simulator::NoRegLog);
3052*f5c631daSSadaf Ebrahimi }
3053*f5c631daSSadaf Ebrahimi for (unsigned r = 0; r < kNumberOfVRegisters; r++) {
3054*f5c631daSSadaf Ebrahimi LogicVRegister reg(simulator.ReadVRegister(r));
3055*f5c631daSSadaf Ebrahimi // Try to initialise Z registers with reasonable FP values. We prioritise
3056*f5c631daSSadaf Ebrahimi // setting double values, then floats and half-precision values. The lanes
3057*f5c631daSSadaf Ebrahimi // overlap, so this is a compromise, but d0, s0 and h0 views all see similar
3058*f5c631daSSadaf Ebrahimi // arithmetic values.
3059*f5c631daSSadaf Ebrahimi //
3060*f5c631daSSadaf Ebrahimi // The exponent of each value is set to the (biased) register number. We set
3061*f5c631daSSadaf Ebrahimi // the double, float and half-precision exponents where we can.
3062*f5c631daSSadaf Ebrahimi uint64_t base = 0x3ff000003f803c00 + (0x0010000000800400 * (0x7f + r));
3063*f5c631daSSadaf Ebrahimi for (unsigned lane = 0; lane < (vl_in_bytes / kDRegSizeInBytes); lane++) {
3064*f5c631daSSadaf Ebrahimi uint64_t mantissas = 0x0000000100010001 * (lane & 0x7f);
3065*f5c631daSSadaf Ebrahimi reg.SetUint(kFormatVnD, lane, base | mantissas);
3066*f5c631daSSadaf Ebrahimi }
3067*f5c631daSSadaf Ebrahimi }
3068*f5c631daSSadaf Ebrahimi for (unsigned r = 0; r < kNumberOfPRegisters; r++) {
3069*f5c631daSSadaf Ebrahimi LogicPRegister reg(simulator.ReadPRegister(r));
3070*f5c631daSSadaf Ebrahimi // Set `r` active lanes between each inactive lane.
3071*f5c631daSSadaf Ebrahimi for (unsigned bit = 0; bit < pl_in_bits; bit++) {
3072*f5c631daSSadaf Ebrahimi reg.SetActive(kFormatVnB, bit, ((bit + 1) % (r + 2)) != 0);
3073*f5c631daSSadaf Ebrahimi }
3074*f5c631daSSadaf Ebrahimi // Completely clear some Q-sized blocks. The trace will completely omit
3075*f5c631daSSadaf Ebrahimi // these for stores.
3076*f5c631daSSadaf Ebrahimi for (unsigned chunk = 0; chunk < (vl_in_bits / kQRegSize); chunk++) {
3077*f5c631daSSadaf Ebrahimi if (((chunk + 1) % (r + 2)) == 0) {
3078*f5c631daSSadaf Ebrahimi reg.SetActiveMask(chunk, static_cast<uint16_t>(0));
3079*f5c631daSSadaf Ebrahimi }
3080*f5c631daSSadaf Ebrahimi }
3081*f5c631daSSadaf Ebrahimi }
3082*f5c631daSSadaf Ebrahimi
3083*f5c631daSSadaf Ebrahimi GenerateTestSequenceBase(&masm);
3084*f5c631daSSadaf Ebrahimi GenerateTestSequenceFP(&masm);
3085*f5c631daSSadaf Ebrahimi GenerateTestSequenceNEON(&masm);
3086*f5c631daSSadaf Ebrahimi GenerateTestSequenceNEONFP(&masm);
3087*f5c631daSSadaf Ebrahimi GenerateTestSequenceSVE(&masm);
3088*f5c631daSSadaf Ebrahimi GenerateTestSequenceAtomics(&masm);
3089*f5c631daSSadaf Ebrahimi masm.Ret();
3090*f5c631daSSadaf Ebrahimi masm.FinalizeCode();
3091*f5c631daSSadaf Ebrahimi
3092*f5c631daSSadaf Ebrahimi if (Test::disassemble()) {
3093*f5c631daSSadaf Ebrahimi PrintDisassembler disasm(stdout);
3094*f5c631daSSadaf Ebrahimi Instruction* start = masm.GetBuffer()->GetStartAddress<Instruction*>();
3095*f5c631daSSadaf Ebrahimi Instruction* end = masm.GetBuffer()->GetEndAddress<Instruction*>();
3096*f5c631daSSadaf Ebrahimi disasm.DisassembleBuffer(start, end);
3097*f5c631daSSadaf Ebrahimi }
3098*f5c631daSSadaf Ebrahimi
3099*f5c631daSSadaf Ebrahimi simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>());
3100*f5c631daSSadaf Ebrahimi
3101*f5c631daSSadaf Ebrahimi fclose(trace_stream);
3102*f5c631daSSadaf Ebrahimi
3103*f5c631daSSadaf Ebrahimi // We already traced into the temporary file, so just print the file.
3104*f5c631daSSadaf Ebrahimi // Note that these tests need to control the trace flags, so we ignore all
3105*f5c631daSSadaf Ebrahimi // --trace-* options here except for --trace-sim.
3106*f5c631daSSadaf Ebrahimi if (Test::trace_sim()) PrintFile(trace_stream_filename);
3107*f5c631daSSadaf Ebrahimi
3108*f5c631daSSadaf Ebrahimi MaskAddresses(trace_stream_filename);
3109*f5c631daSSadaf Ebrahimi
3110*f5c631daSSadaf Ebrahimi bool trace_matched_reference =
3111*f5c631daSSadaf Ebrahimi CheckOrGenerateTrace(trace_stream_filename, ref_file);
3112*f5c631daSSadaf Ebrahimi remove(trace_stream_filename); // Clean up before checking the result.
3113*f5c631daSSadaf Ebrahimi VIXL_CHECK(trace_matched_reference);
3114*f5c631daSSadaf Ebrahimi
3115*f5c631daSSadaf Ebrahimi uint64_t offset_base = simulator.ReadRegister<uint64_t>(0);
3116*f5c631daSSadaf Ebrahimi uint64_t index_base = simulator.ReadRegister<uint64_t>(1);
3117*f5c631daSSadaf Ebrahimi
3118*f5c631daSSadaf Ebrahimi VIXL_CHECK(index_base >= offset_base);
3119*f5c631daSSadaf Ebrahimi VIXL_CHECK((index_base - offset_base) <= kScratchSize);
3120*f5c631daSSadaf Ebrahimi }
3121*f5c631daSSadaf Ebrahimi
3122*f5c631daSSadaf Ebrahimi
3123*f5c631daSSadaf Ebrahimi // Test individual options.
TEST(disasm)3124*f5c631daSSadaf Ebrahimi TEST(disasm) { TraceTestHelper(false, LOG_DISASM, REF("log-disasm")); }
TEST(regs)3125*f5c631daSSadaf Ebrahimi TEST(regs) { TraceTestHelper(false, LOG_REGS, REF("log-regs")); }
TEST(vregs)3126*f5c631daSSadaf Ebrahimi TEST(vregs) { TraceTestHelper(false, LOG_VREGS, REF("log-vregs")); }
TEST(sysregs)3127*f5c631daSSadaf Ebrahimi TEST(sysregs) { TraceTestHelper(false, LOG_SYSREGS, REF("log-sysregs")); }
TEST(write)3128*f5c631daSSadaf Ebrahimi TEST(write) { TraceTestHelper(false, LOG_WRITE, REF("log-write")); }
TEST(branch)3129*f5c631daSSadaf Ebrahimi TEST(branch) { TraceTestHelper(false, LOG_WRITE, REF("log-branch")); }
3130*f5c631daSSadaf Ebrahimi
3131*f5c631daSSadaf Ebrahimi // Test standard combinations.
TEST(none)3132*f5c631daSSadaf Ebrahimi TEST(none) { TraceTestHelper(false, LOG_NONE, REF("log-none")); }
TEST(state)3133*f5c631daSSadaf Ebrahimi TEST(state) { TraceTestHelper(false, LOG_STATE, REF("log-state")); }
TEST(all)3134*f5c631daSSadaf Ebrahimi TEST(all) { TraceTestHelper(false, LOG_ALL, REF("log-all")); }
3135*f5c631daSSadaf Ebrahimi
3136*f5c631daSSadaf Ebrahimi
3137*f5c631daSSadaf Ebrahimi // Test individual options (with colour).
TEST(disasm_colour)3138*f5c631daSSadaf Ebrahimi TEST(disasm_colour) {
3139*f5c631daSSadaf Ebrahimi TraceTestHelper(true, LOG_DISASM, REF("log-disasm-colour"));
3140*f5c631daSSadaf Ebrahimi }
TEST(regs_colour)3141*f5c631daSSadaf Ebrahimi TEST(regs_colour) { TraceTestHelper(true, LOG_REGS, REF("log-regs-colour")); }
TEST(vregs_colour)3142*f5c631daSSadaf Ebrahimi TEST(vregs_colour) {
3143*f5c631daSSadaf Ebrahimi TraceTestHelper(true, LOG_VREGS, REF("log-vregs-colour"));
3144*f5c631daSSadaf Ebrahimi }
TEST(sysregs_colour)3145*f5c631daSSadaf Ebrahimi TEST(sysregs_colour) {
3146*f5c631daSSadaf Ebrahimi TraceTestHelper(true, LOG_SYSREGS, REF("log-sysregs-colour"));
3147*f5c631daSSadaf Ebrahimi }
TEST(write_colour)3148*f5c631daSSadaf Ebrahimi TEST(write_colour) {
3149*f5c631daSSadaf Ebrahimi TraceTestHelper(true, LOG_WRITE, REF("log-write-colour"));
3150*f5c631daSSadaf Ebrahimi }
TEST(branch_colour)3151*f5c631daSSadaf Ebrahimi TEST(branch_colour) {
3152*f5c631daSSadaf Ebrahimi TraceTestHelper(true, LOG_WRITE, REF("log-branch-colour"));
3153*f5c631daSSadaf Ebrahimi }
3154*f5c631daSSadaf Ebrahimi
3155*f5c631daSSadaf Ebrahimi // Test standard combinations (with colour).
TEST(none_colour)3156*f5c631daSSadaf Ebrahimi TEST(none_colour) { TraceTestHelper(true, LOG_NONE, REF("log-none-colour")); }
TEST(state_colour)3157*f5c631daSSadaf Ebrahimi TEST(state_colour) {
3158*f5c631daSSadaf Ebrahimi TraceTestHelper(true, LOG_STATE, REF("log-state-colour"));
3159*f5c631daSSadaf Ebrahimi }
TEST(all_colour)3160*f5c631daSSadaf Ebrahimi TEST(all_colour) { TraceTestHelper(true, LOG_ALL, REF("log-all-colour")); }
3161*f5c631daSSadaf Ebrahimi
3162*f5c631daSSadaf Ebrahimi #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
3163*f5c631daSSadaf Ebrahimi
PrintDisassemblerTestHelper(const char * prefix,const char * suffix,const char * ref_file)3164*f5c631daSSadaf Ebrahimi static void PrintDisassemblerTestHelper(const char* prefix,
3165*f5c631daSSadaf Ebrahimi const char* suffix,
3166*f5c631daSSadaf Ebrahimi const char* ref_file) {
3167*f5c631daSSadaf Ebrahimi MacroAssembler masm(12 * KBytes);
3168*f5c631daSSadaf Ebrahimi
3169*f5c631daSSadaf Ebrahimi char trace_stream_filename[] = "/tmp/vixl-test-trace-XXXXXX";
3170*f5c631daSSadaf Ebrahimi FILE* trace_stream = fdopen(mkstemp(trace_stream_filename), "w");
3171*f5c631daSSadaf Ebrahimi
3172*f5c631daSSadaf Ebrahimi // We don't need to execute this code so there's no need for the execution
3173*f5c631daSSadaf Ebrahimi // environment setup from TraceTestHelper.
3174*f5c631daSSadaf Ebrahimi
3175*f5c631daSSadaf Ebrahimi GenerateTestSequenceBase(&masm);
3176*f5c631daSSadaf Ebrahimi GenerateTestSequenceFP(&masm);
3177*f5c631daSSadaf Ebrahimi GenerateTestSequenceNEON(&masm);
3178*f5c631daSSadaf Ebrahimi GenerateTestSequenceNEONFP(&masm);
3179*f5c631daSSadaf Ebrahimi GenerateTestSequenceSVE(&masm);
3180*f5c631daSSadaf Ebrahimi GenerateTestSequenceAtomics(&masm);
3181*f5c631daSSadaf Ebrahimi masm.FinalizeCode();
3182*f5c631daSSadaf Ebrahimi
3183*f5c631daSSadaf Ebrahimi Decoder decoder;
3184*f5c631daSSadaf Ebrahimi CPUFeaturesAuditor auditor(&decoder);
3185*f5c631daSSadaf Ebrahimi PrintDisassembler disasm(trace_stream);
3186*f5c631daSSadaf Ebrahimi if (prefix != NULL) disasm.SetCPUFeaturesPrefix(prefix);
3187*f5c631daSSadaf Ebrahimi if (suffix != NULL) disasm.SetCPUFeaturesSuffix(suffix);
3188*f5c631daSSadaf Ebrahimi disasm.RegisterCPUFeaturesAuditor(&auditor);
3189*f5c631daSSadaf Ebrahimi decoder.AppendVisitor(&disasm);
3190*f5c631daSSadaf Ebrahimi
3191*f5c631daSSadaf Ebrahimi Instruction* instruction = masm.GetBuffer()->GetStartAddress<Instruction*>();
3192*f5c631daSSadaf Ebrahimi Instruction* end = masm.GetCursorAddress<Instruction*>();
3193*f5c631daSSadaf Ebrahimi while (instruction != end) {
3194*f5c631daSSadaf Ebrahimi decoder.Decode(instruction);
3195*f5c631daSSadaf Ebrahimi instruction += kInstructionSize;
3196*f5c631daSSadaf Ebrahimi }
3197*f5c631daSSadaf Ebrahimi
3198*f5c631daSSadaf Ebrahimi fclose(trace_stream);
3199*f5c631daSSadaf Ebrahimi
3200*f5c631daSSadaf Ebrahimi // We already disassembled into the temporary file, so just print the file.
3201*f5c631daSSadaf Ebrahimi if (Test::disassemble()) PrintFile(trace_stream_filename);
3202*f5c631daSSadaf Ebrahimi
3203*f5c631daSSadaf Ebrahimi MaskAddresses(trace_stream_filename);
3204*f5c631daSSadaf Ebrahimi
3205*f5c631daSSadaf Ebrahimi bool trace_matched_reference =
3206*f5c631daSSadaf Ebrahimi CheckOrGenerateTrace(trace_stream_filename, ref_file);
3207*f5c631daSSadaf Ebrahimi remove(trace_stream_filename); // Clean up before checking the result.
3208*f5c631daSSadaf Ebrahimi VIXL_CHECK(trace_matched_reference);
3209*f5c631daSSadaf Ebrahimi }
3210*f5c631daSSadaf Ebrahimi
3211*f5c631daSSadaf Ebrahimi
3212*f5c631daSSadaf Ebrahimi // Test CPUFeatures disassembly annotations.
TEST(cpufeatures)3213*f5c631daSSadaf Ebrahimi TEST(cpufeatures) {
3214*f5c631daSSadaf Ebrahimi PrintDisassemblerTestHelper(NULL, NULL, REF("log-cpufeatures"));
3215*f5c631daSSadaf Ebrahimi }
TEST(cpufeatures_custom)3216*f5c631daSSadaf Ebrahimi TEST(cpufeatures_custom) {
3217*f5c631daSSadaf Ebrahimi PrintDisassemblerTestHelper("### {", "} ###", REF("log-cpufeatures-custom"));
3218*f5c631daSSadaf Ebrahimi }
TEST(cpufeatures_colour)3219*f5c631daSSadaf Ebrahimi TEST(cpufeatures_colour) {
3220*f5c631daSSadaf Ebrahimi // The colour chosen is arbitrary.
3221*f5c631daSSadaf Ebrahimi PrintDisassemblerTestHelper("\033[1;35m", // Prefix: Bold magenta.
3222*f5c631daSSadaf Ebrahimi "\033[0;m", // Suffix: Reset colour.
3223*f5c631daSSadaf Ebrahimi REF("log-cpufeatures-colour"));
3224*f5c631daSSadaf Ebrahimi }
3225*f5c631daSSadaf Ebrahimi } // namespace aarch64
3226*f5c631daSSadaf Ebrahimi } // namespace vixl
3227