xref: /aosp_15_r20/external/vixl/src/aarch64/macro-assembler-sve-aarch64.cc (revision f5c631da2f1efdd72b5fd1e20510e4042af13d77)
1*f5c631daSSadaf Ebrahimi // Copyright 2019, VIXL authors
2*f5c631daSSadaf Ebrahimi // All rights reserved.
3*f5c631daSSadaf Ebrahimi //
4*f5c631daSSadaf Ebrahimi // Redistribution and use in source and binary forms, with or without
5*f5c631daSSadaf Ebrahimi // modification, are permitted provided that the following conditions are met:
6*f5c631daSSadaf Ebrahimi //
7*f5c631daSSadaf Ebrahimi //   * Redistributions of source code must retain the above copyright notice,
8*f5c631daSSadaf Ebrahimi //     this list of conditions and the following disclaimer.
9*f5c631daSSadaf Ebrahimi //   * Redistributions in binary form must reproduce the above copyright notice,
10*f5c631daSSadaf Ebrahimi //     this list of conditions and the following disclaimer in the documentation
11*f5c631daSSadaf Ebrahimi //     and/or other materials provided with the distribution.
12*f5c631daSSadaf Ebrahimi //   * Neither the name of ARM Limited nor the names of its contributors may be
13*f5c631daSSadaf Ebrahimi //     used to endorse or promote products derived from this software without
14*f5c631daSSadaf Ebrahimi //     specific prior written permission.
15*f5c631daSSadaf Ebrahimi //
16*f5c631daSSadaf Ebrahimi // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17*f5c631daSSadaf Ebrahimi // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*f5c631daSSadaf Ebrahimi // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*f5c631daSSadaf Ebrahimi // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20*f5c631daSSadaf Ebrahimi // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*f5c631daSSadaf Ebrahimi // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22*f5c631daSSadaf Ebrahimi // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23*f5c631daSSadaf Ebrahimi // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24*f5c631daSSadaf Ebrahimi // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25*f5c631daSSadaf Ebrahimi // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*f5c631daSSadaf Ebrahimi 
27*f5c631daSSadaf Ebrahimi #include "macro-assembler-aarch64.h"
28*f5c631daSSadaf Ebrahimi 
29*f5c631daSSadaf Ebrahimi namespace vixl {
30*f5c631daSSadaf Ebrahimi namespace aarch64 {
31*f5c631daSSadaf Ebrahimi 
AddSubHelper(AddSubHelperOption option,const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)32*f5c631daSSadaf Ebrahimi void MacroAssembler::AddSubHelper(AddSubHelperOption option,
33*f5c631daSSadaf Ebrahimi                                   const ZRegister& zd,
34*f5c631daSSadaf Ebrahimi                                   const ZRegister& zn,
35*f5c631daSSadaf Ebrahimi                                   IntegerOperand imm) {
36*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(imm.FitsInLane(zd));
37*f5c631daSSadaf Ebrahimi 
38*f5c631daSSadaf Ebrahimi   // Simple, encodable cases.
39*f5c631daSSadaf Ebrahimi   if (TrySingleAddSub(option, zd, zn, imm)) return;
40*f5c631daSSadaf Ebrahimi 
41*f5c631daSSadaf Ebrahimi   VIXL_ASSERT((option == kAddImmediate) || (option == kSubImmediate));
42*f5c631daSSadaf Ebrahimi   bool add_imm = (option == kAddImmediate);
43*f5c631daSSadaf Ebrahimi 
44*f5c631daSSadaf Ebrahimi   // Try to translate Add(..., -imm) to Sub(..., imm) if we can encode it in one
45*f5c631daSSadaf Ebrahimi   // instruction. Also interpret the immediate as signed, so we can convert
46*f5c631daSSadaf Ebrahimi   // Add(zd.VnH(), zn.VnH(), 0xffff...) to Sub(..., 1), etc.
47*f5c631daSSadaf Ebrahimi   IntegerOperand signed_imm(imm.AsIntN(zd.GetLaneSizeInBits()));
48*f5c631daSSadaf Ebrahimi   if (signed_imm.IsNegative()) {
49*f5c631daSSadaf Ebrahimi     AddSubHelperOption n_option = add_imm ? kSubImmediate : kAddImmediate;
50*f5c631daSSadaf Ebrahimi     IntegerOperand n_imm(signed_imm.GetMagnitude());
51*f5c631daSSadaf Ebrahimi     // IntegerOperand can represent -INT_MIN, so this is always safe.
52*f5c631daSSadaf Ebrahimi     VIXL_ASSERT(n_imm.IsPositiveOrZero());
53*f5c631daSSadaf Ebrahimi     if (TrySingleAddSub(n_option, zd, zn, n_imm)) return;
54*f5c631daSSadaf Ebrahimi   }
55*f5c631daSSadaf Ebrahimi 
56*f5c631daSSadaf Ebrahimi   // Otherwise, fall back to dup + ADD_z_z/SUB_z_z.
57*f5c631daSSadaf Ebrahimi   UseScratchRegisterScope temps(this);
58*f5c631daSSadaf Ebrahimi   ZRegister scratch = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
59*f5c631daSSadaf Ebrahimi   Dup(scratch, imm);
60*f5c631daSSadaf Ebrahimi 
61*f5c631daSSadaf Ebrahimi   SingleEmissionCheckScope guard(this);
62*f5c631daSSadaf Ebrahimi   if (add_imm) {
63*f5c631daSSadaf Ebrahimi     add(zd, zn, scratch);
64*f5c631daSSadaf Ebrahimi   } else {
65*f5c631daSSadaf Ebrahimi     sub(zd, zn, scratch);
66*f5c631daSSadaf Ebrahimi   }
67*f5c631daSSadaf Ebrahimi }
68*f5c631daSSadaf Ebrahimi 
TrySingleAddSub(AddSubHelperOption option,const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)69*f5c631daSSadaf Ebrahimi bool MacroAssembler::TrySingleAddSub(AddSubHelperOption option,
70*f5c631daSSadaf Ebrahimi                                      const ZRegister& zd,
71*f5c631daSSadaf Ebrahimi                                      const ZRegister& zn,
72*f5c631daSSadaf Ebrahimi                                      IntegerOperand imm) {
73*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(imm.FitsInLane(zd));
74*f5c631daSSadaf Ebrahimi 
75*f5c631daSSadaf Ebrahimi   int imm8;
76*f5c631daSSadaf Ebrahimi   int shift = -1;
77*f5c631daSSadaf Ebrahimi   if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) ||
78*f5c631daSSadaf Ebrahimi       imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) {
79*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, zn);
80*f5c631daSSadaf Ebrahimi     switch (option) {
81*f5c631daSSadaf Ebrahimi       case kAddImmediate:
82*f5c631daSSadaf Ebrahimi         add(zd, zd, imm8, shift);
83*f5c631daSSadaf Ebrahimi         return true;
84*f5c631daSSadaf Ebrahimi       case kSubImmediate:
85*f5c631daSSadaf Ebrahimi         sub(zd, zd, imm8, shift);
86*f5c631daSSadaf Ebrahimi         return true;
87*f5c631daSSadaf Ebrahimi     }
88*f5c631daSSadaf Ebrahimi   }
89*f5c631daSSadaf Ebrahimi   return false;
90*f5c631daSSadaf Ebrahimi }
91*f5c631daSSadaf Ebrahimi 
IntWideImmHelper(IntArithImmFn imm_fn,SVEArithPredicatedFn reg_macro,const ZRegister & zd,const ZRegister & zn,IntegerOperand imm,bool is_signed)92*f5c631daSSadaf Ebrahimi void MacroAssembler::IntWideImmHelper(IntArithImmFn imm_fn,
93*f5c631daSSadaf Ebrahimi                                       SVEArithPredicatedFn reg_macro,
94*f5c631daSSadaf Ebrahimi                                       const ZRegister& zd,
95*f5c631daSSadaf Ebrahimi                                       const ZRegister& zn,
96*f5c631daSSadaf Ebrahimi                                       IntegerOperand imm,
97*f5c631daSSadaf Ebrahimi                                       bool is_signed) {
98*f5c631daSSadaf Ebrahimi   if (is_signed) {
99*f5c631daSSadaf Ebrahimi     // E.g. MUL_z_zi, SMIN_z_zi, SMAX_z_zi
100*f5c631daSSadaf Ebrahimi     if (imm.IsInt8()) {
101*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, zd, zn);
102*f5c631daSSadaf Ebrahimi       (this->*imm_fn)(zd, zd, imm.AsInt8());
103*f5c631daSSadaf Ebrahimi       return;
104*f5c631daSSadaf Ebrahimi     }
105*f5c631daSSadaf Ebrahimi   } else {
106*f5c631daSSadaf Ebrahimi     // E.g. UMIN_z_zi, UMAX_z_zi
107*f5c631daSSadaf Ebrahimi     if (imm.IsUint8()) {
108*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, zd, zn);
109*f5c631daSSadaf Ebrahimi       (this->*imm_fn)(zd, zd, imm.AsUint8());
110*f5c631daSSadaf Ebrahimi       return;
111*f5c631daSSadaf Ebrahimi     }
112*f5c631daSSadaf Ebrahimi   }
113*f5c631daSSadaf Ebrahimi 
114*f5c631daSSadaf Ebrahimi   UseScratchRegisterScope temps(this);
115*f5c631daSSadaf Ebrahimi   PRegister pg = temps.AcquireGoverningP();
116*f5c631daSSadaf Ebrahimi   Ptrue(pg.WithSameLaneSizeAs(zd));
117*f5c631daSSadaf Ebrahimi 
118*f5c631daSSadaf Ebrahimi   // Try to re-use zd if we can, so we can avoid a movprfx.
119*f5c631daSSadaf Ebrahimi   ZRegister scratch =
120*f5c631daSSadaf Ebrahimi       zd.Aliases(zn) ? temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits())
121*f5c631daSSadaf Ebrahimi                      : zd;
122*f5c631daSSadaf Ebrahimi   Dup(scratch, imm);
123*f5c631daSSadaf Ebrahimi 
124*f5c631daSSadaf Ebrahimi   // The vector-form macro for commutative operations will swap the arguments to
125*f5c631daSSadaf Ebrahimi   // avoid movprfx, if necessary.
126*f5c631daSSadaf Ebrahimi   (this->*reg_macro)(zd, pg.Merging(), zn, scratch);
127*f5c631daSSadaf Ebrahimi }
128*f5c631daSSadaf Ebrahimi 
Mul(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)129*f5c631daSSadaf Ebrahimi void MacroAssembler::Mul(const ZRegister& zd,
130*f5c631daSSadaf Ebrahimi                          const ZRegister& zn,
131*f5c631daSSadaf Ebrahimi                          IntegerOperand imm) {
132*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
133*f5c631daSSadaf Ebrahimi   IntArithImmFn imm_fn = &Assembler::mul;
134*f5c631daSSadaf Ebrahimi   SVEArithPredicatedFn reg_fn = &MacroAssembler::Mul;
135*f5c631daSSadaf Ebrahimi   IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
136*f5c631daSSadaf Ebrahimi }
137*f5c631daSSadaf Ebrahimi 
Smin(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)138*f5c631daSSadaf Ebrahimi void MacroAssembler::Smin(const ZRegister& zd,
139*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
140*f5c631daSSadaf Ebrahimi                           IntegerOperand imm) {
141*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
142*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(imm.FitsInSignedLane(zd));
143*f5c631daSSadaf Ebrahimi   IntArithImmFn imm_fn = &Assembler::smin;
144*f5c631daSSadaf Ebrahimi   SVEArithPredicatedFn reg_fn = &MacroAssembler::Smin;
145*f5c631daSSadaf Ebrahimi   IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
146*f5c631daSSadaf Ebrahimi }
147*f5c631daSSadaf Ebrahimi 
Smax(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)148*f5c631daSSadaf Ebrahimi void MacroAssembler::Smax(const ZRegister& zd,
149*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
150*f5c631daSSadaf Ebrahimi                           IntegerOperand imm) {
151*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
152*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(imm.FitsInSignedLane(zd));
153*f5c631daSSadaf Ebrahimi   IntArithImmFn imm_fn = &Assembler::smax;
154*f5c631daSSadaf Ebrahimi   SVEArithPredicatedFn reg_fn = &MacroAssembler::Smax;
155*f5c631daSSadaf Ebrahimi   IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
156*f5c631daSSadaf Ebrahimi }
157*f5c631daSSadaf Ebrahimi 
Umax(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)158*f5c631daSSadaf Ebrahimi void MacroAssembler::Umax(const ZRegister& zd,
159*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
160*f5c631daSSadaf Ebrahimi                           IntegerOperand imm) {
161*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
162*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
163*f5c631daSSadaf Ebrahimi   IntArithImmFn imm_fn = &Assembler::umax;
164*f5c631daSSadaf Ebrahimi   SVEArithPredicatedFn reg_fn = &MacroAssembler::Umax;
165*f5c631daSSadaf Ebrahimi   IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
166*f5c631daSSadaf Ebrahimi }
167*f5c631daSSadaf Ebrahimi 
Umin(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)168*f5c631daSSadaf Ebrahimi void MacroAssembler::Umin(const ZRegister& zd,
169*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
170*f5c631daSSadaf Ebrahimi                           IntegerOperand imm) {
171*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
172*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
173*f5c631daSSadaf Ebrahimi   IntArithImmFn imm_fn = &Assembler::umin;
174*f5c631daSSadaf Ebrahimi   SVEArithPredicatedFn reg_fn = &MacroAssembler::Umin;
175*f5c631daSSadaf Ebrahimi   IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
176*f5c631daSSadaf Ebrahimi }
177*f5c631daSSadaf Ebrahimi 
Addpl(const Register & xd,const Register & xn,int64_t multiplier)178*f5c631daSSadaf Ebrahimi void MacroAssembler::Addpl(const Register& xd,
179*f5c631daSSadaf Ebrahimi                            const Register& xn,
180*f5c631daSSadaf Ebrahimi                            int64_t multiplier) {
181*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
182*f5c631daSSadaf Ebrahimi 
183*f5c631daSSadaf Ebrahimi   // This macro relies on `Rdvl` to handle some out-of-range cases. Check that
184*f5c631daSSadaf Ebrahimi   // `VL * multiplier` cannot overflow, for any possible value of VL.
185*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes));
186*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes));
187*f5c631daSSadaf Ebrahimi 
188*f5c631daSSadaf Ebrahimi   if (xd.IsZero()) return;
189*f5c631daSSadaf Ebrahimi   if (xn.IsZero() && xd.IsSP()) {
190*f5c631daSSadaf Ebrahimi     // TODO: This operation doesn't make much sense, but we could support it
191*f5c631daSSadaf Ebrahimi     // with a scratch register if necessary.
192*f5c631daSSadaf Ebrahimi     VIXL_UNIMPLEMENTED();
193*f5c631daSSadaf Ebrahimi   }
194*f5c631daSSadaf Ebrahimi 
195*f5c631daSSadaf Ebrahimi   // Handling xzr requires an extra move, so defer it until later so we can try
196*f5c631daSSadaf Ebrahimi   // to use `rdvl` instead (via `Addvl`).
197*f5c631daSSadaf Ebrahimi   if (IsInt6(multiplier) && !xn.IsZero()) {
198*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
199*f5c631daSSadaf Ebrahimi     addpl(xd, xn, static_cast<int>(multiplier));
200*f5c631daSSadaf Ebrahimi     return;
201*f5c631daSSadaf Ebrahimi   }
202*f5c631daSSadaf Ebrahimi 
203*f5c631daSSadaf Ebrahimi   // If `multiplier` is a multiple of 8, we can use `Addvl` instead.
204*f5c631daSSadaf Ebrahimi   if ((multiplier % kZRegBitsPerPRegBit) == 0) {
205*f5c631daSSadaf Ebrahimi     Addvl(xd, xn, multiplier / kZRegBitsPerPRegBit);
206*f5c631daSSadaf Ebrahimi     return;
207*f5c631daSSadaf Ebrahimi   }
208*f5c631daSSadaf Ebrahimi 
209*f5c631daSSadaf Ebrahimi   if (IsInt6(multiplier)) {
210*f5c631daSSadaf Ebrahimi     VIXL_ASSERT(xn.IsZero());  // Other cases were handled with `addpl`.
211*f5c631daSSadaf Ebrahimi     // There is no simple `rdpl` instruction, and `addpl` cannot accept xzr, so
212*f5c631daSSadaf Ebrahimi     // materialise a zero.
213*f5c631daSSadaf Ebrahimi     MacroEmissionCheckScope guard(this);
214*f5c631daSSadaf Ebrahimi     movz(xd, 0);
215*f5c631daSSadaf Ebrahimi     addpl(xd, xd, static_cast<int>(multiplier));
216*f5c631daSSadaf Ebrahimi     return;
217*f5c631daSSadaf Ebrahimi   }
218*f5c631daSSadaf Ebrahimi 
219*f5c631daSSadaf Ebrahimi   // TODO: Some probable cases result in rather long sequences. For example,
220*f5c631daSSadaf Ebrahimi   // `Addpl(sp, sp, 33)` requires five instructions, even though it's only just
221*f5c631daSSadaf Ebrahimi   // outside the encodable range. We should look for ways to cover such cases
222*f5c631daSSadaf Ebrahimi   // without drastically increasing the complexity of this logic.
223*f5c631daSSadaf Ebrahimi 
224*f5c631daSSadaf Ebrahimi   // For other cases, calculate xn + (PL * multiplier) using discrete
225*f5c631daSSadaf Ebrahimi   // instructions. This requires two scratch registers in the general case, so
226*f5c631daSSadaf Ebrahimi   // try to re-use the destination as a scratch register.
227*f5c631daSSadaf Ebrahimi   UseScratchRegisterScope temps(this);
228*f5c631daSSadaf Ebrahimi   temps.Include(xd);
229*f5c631daSSadaf Ebrahimi   temps.Exclude(xn);
230*f5c631daSSadaf Ebrahimi 
231*f5c631daSSadaf Ebrahimi   Register scratch = temps.AcquireX();
232*f5c631daSSadaf Ebrahimi   // Because there is no `rdpl`, so we have to calculate PL from VL. We can't
233*f5c631daSSadaf Ebrahimi   // scale the multiplier because (we already know) it isn't a multiple of 8.
234*f5c631daSSadaf Ebrahimi   Rdvl(scratch, multiplier);
235*f5c631daSSadaf Ebrahimi 
236*f5c631daSSadaf Ebrahimi   MacroEmissionCheckScope guard(this);
237*f5c631daSSadaf Ebrahimi   if (xn.IsZero()) {
238*f5c631daSSadaf Ebrahimi     asr(xd, scratch, kZRegBitsPerPRegBitLog2);
239*f5c631daSSadaf Ebrahimi   } else if (xd.IsSP() || xn.IsSP()) {
240*f5c631daSSadaf Ebrahimi     // TODO: MacroAssembler::Add should be able to handle this.
241*f5c631daSSadaf Ebrahimi     asr(scratch, scratch, kZRegBitsPerPRegBitLog2);
242*f5c631daSSadaf Ebrahimi     add(xd, xn, scratch);
243*f5c631daSSadaf Ebrahimi   } else {
244*f5c631daSSadaf Ebrahimi     add(xd, xn, Operand(scratch, ASR, kZRegBitsPerPRegBitLog2));
245*f5c631daSSadaf Ebrahimi   }
246*f5c631daSSadaf Ebrahimi }
247*f5c631daSSadaf Ebrahimi 
Addvl(const Register & xd,const Register & xn,int64_t multiplier)248*f5c631daSSadaf Ebrahimi void MacroAssembler::Addvl(const Register& xd,
249*f5c631daSSadaf Ebrahimi                            const Register& xn,
250*f5c631daSSadaf Ebrahimi                            int64_t multiplier) {
251*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
252*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(xd.IsX());
253*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(xn.IsX());
254*f5c631daSSadaf Ebrahimi 
255*f5c631daSSadaf Ebrahimi   // Check that `VL * multiplier` cannot overflow, for any possible value of VL.
256*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes));
257*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes));
258*f5c631daSSadaf Ebrahimi 
259*f5c631daSSadaf Ebrahimi   if (xd.IsZero()) return;
260*f5c631daSSadaf Ebrahimi   if (xn.IsZero() && xd.IsSP()) {
261*f5c631daSSadaf Ebrahimi     // TODO: This operation doesn't make much sense, but we could support it
262*f5c631daSSadaf Ebrahimi     // with a scratch register if necessary. `rdvl` cannot write into `sp`.
263*f5c631daSSadaf Ebrahimi     VIXL_UNIMPLEMENTED();
264*f5c631daSSadaf Ebrahimi   }
265*f5c631daSSadaf Ebrahimi 
266*f5c631daSSadaf Ebrahimi   if (IsInt6(multiplier)) {
267*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
268*f5c631daSSadaf Ebrahimi     if (xn.IsZero()) {
269*f5c631daSSadaf Ebrahimi       rdvl(xd, static_cast<int>(multiplier));
270*f5c631daSSadaf Ebrahimi     } else {
271*f5c631daSSadaf Ebrahimi       addvl(xd, xn, static_cast<int>(multiplier));
272*f5c631daSSadaf Ebrahimi     }
273*f5c631daSSadaf Ebrahimi     return;
274*f5c631daSSadaf Ebrahimi   }
275*f5c631daSSadaf Ebrahimi 
276*f5c631daSSadaf Ebrahimi   // TODO: Some probable cases result in rather long sequences. For example,
277*f5c631daSSadaf Ebrahimi   // `Addvl(sp, sp, 42)` requires four instructions, even though it's only just
278*f5c631daSSadaf Ebrahimi   // outside the encodable range. We should look for ways to cover such cases
279*f5c631daSSadaf Ebrahimi   // without drastically increasing the complexity of this logic.
280*f5c631daSSadaf Ebrahimi 
281*f5c631daSSadaf Ebrahimi   // For other cases, calculate xn + (VL * multiplier) using discrete
282*f5c631daSSadaf Ebrahimi   // instructions. This requires two scratch registers in the general case, so
283*f5c631daSSadaf Ebrahimi   // we try to re-use the destination as a scratch register.
284*f5c631daSSadaf Ebrahimi   UseScratchRegisterScope temps(this);
285*f5c631daSSadaf Ebrahimi   temps.Include(xd);
286*f5c631daSSadaf Ebrahimi   temps.Exclude(xn);
287*f5c631daSSadaf Ebrahimi 
288*f5c631daSSadaf Ebrahimi   Register a = temps.AcquireX();
289*f5c631daSSadaf Ebrahimi   Mov(a, multiplier);
290*f5c631daSSadaf Ebrahimi 
291*f5c631daSSadaf Ebrahimi   MacroEmissionCheckScope guard(this);
292*f5c631daSSadaf Ebrahimi   Register b = temps.AcquireX();
293*f5c631daSSadaf Ebrahimi   rdvl(b, 1);
294*f5c631daSSadaf Ebrahimi   if (xn.IsZero()) {
295*f5c631daSSadaf Ebrahimi     mul(xd, a, b);
296*f5c631daSSadaf Ebrahimi   } else if (xd.IsSP() || xn.IsSP()) {
297*f5c631daSSadaf Ebrahimi     mul(a, a, b);
298*f5c631daSSadaf Ebrahimi     add(xd, xn, a);
299*f5c631daSSadaf Ebrahimi   } else {
300*f5c631daSSadaf Ebrahimi     madd(xd, a, b, xn);
301*f5c631daSSadaf Ebrahimi   }
302*f5c631daSSadaf Ebrahimi }
303*f5c631daSSadaf Ebrahimi 
CalculateSVEAddress(const Register & xd,const SVEMemOperand & addr,int vl_divisor_log2)304*f5c631daSSadaf Ebrahimi void MacroAssembler::CalculateSVEAddress(const Register& xd,
305*f5c631daSSadaf Ebrahimi                                          const SVEMemOperand& addr,
306*f5c631daSSadaf Ebrahimi                                          int vl_divisor_log2) {
307*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
308*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(!addr.IsScatterGather());
309*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(xd.IsX());
310*f5c631daSSadaf Ebrahimi 
311*f5c631daSSadaf Ebrahimi   // The lower bound is where a whole Z register is accessed.
312*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(!addr.IsMulVl() || (vl_divisor_log2 >= 0));
313*f5c631daSSadaf Ebrahimi   // The upper bound is for P register accesses, and for instructions like
314*f5c631daSSadaf Ebrahimi   // "st1b { z0.d } [...]", where one byte is accessed for every D-sized lane.
315*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(vl_divisor_log2 <= static_cast<int>(kZRegBitsPerPRegBitLog2));
316*f5c631daSSadaf Ebrahimi 
317*f5c631daSSadaf Ebrahimi   SVEOffsetModifier mod = addr.GetOffsetModifier();
318*f5c631daSSadaf Ebrahimi   Register base = addr.GetScalarBase();
319*f5c631daSSadaf Ebrahimi 
320*f5c631daSSadaf Ebrahimi   if (addr.IsEquivalentToScalar()) {
321*f5c631daSSadaf Ebrahimi     // For example:
322*f5c631daSSadaf Ebrahimi     //   [x0]
323*f5c631daSSadaf Ebrahimi     //   [x0, #0]
324*f5c631daSSadaf Ebrahimi     //   [x0, xzr, LSL 2]
325*f5c631daSSadaf Ebrahimi     Mov(xd, base);
326*f5c631daSSadaf Ebrahimi   } else if (addr.IsScalarPlusImmediate()) {
327*f5c631daSSadaf Ebrahimi     // For example:
328*f5c631daSSadaf Ebrahimi     //   [x0, #42]
329*f5c631daSSadaf Ebrahimi     //   [x0, #42, MUL VL]
330*f5c631daSSadaf Ebrahimi     int64_t offset = addr.GetImmediateOffset();
331*f5c631daSSadaf Ebrahimi     VIXL_ASSERT(offset != 0);  // Handled by IsEquivalentToScalar.
332*f5c631daSSadaf Ebrahimi     if (addr.IsMulVl()) {
333*f5c631daSSadaf Ebrahimi       int vl_divisor = 1 << vl_divisor_log2;
334*f5c631daSSadaf Ebrahimi       // For all possible values of vl_divisor, we can simply use `Addpl`. This
335*f5c631daSSadaf Ebrahimi       // will select `addvl` if necessary.
336*f5c631daSSadaf Ebrahimi       VIXL_ASSERT((kZRegBitsPerPRegBit % vl_divisor) == 0);
337*f5c631daSSadaf Ebrahimi       Addpl(xd, base, offset * (kZRegBitsPerPRegBit / vl_divisor));
338*f5c631daSSadaf Ebrahimi     } else {
339*f5c631daSSadaf Ebrahimi       // IsScalarPlusImmediate() ensures that no other modifiers can occur.
340*f5c631daSSadaf Ebrahimi       VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
341*f5c631daSSadaf Ebrahimi       Add(xd, base, offset);
342*f5c631daSSadaf Ebrahimi     }
343*f5c631daSSadaf Ebrahimi   } else if (addr.IsScalarPlusScalar()) {
344*f5c631daSSadaf Ebrahimi     // For example:
345*f5c631daSSadaf Ebrahimi     //   [x0, x1]
346*f5c631daSSadaf Ebrahimi     //   [x0, x1, LSL #4]
347*f5c631daSSadaf Ebrahimi     Register offset = addr.GetScalarOffset();
348*f5c631daSSadaf Ebrahimi     VIXL_ASSERT(!offset.IsZero());  // Handled by IsEquivalentToScalar.
349*f5c631daSSadaf Ebrahimi     if (mod == SVE_LSL) {
350*f5c631daSSadaf Ebrahimi       Add(xd, base, Operand(offset, LSL, addr.GetShiftAmount()));
351*f5c631daSSadaf Ebrahimi     } else {
352*f5c631daSSadaf Ebrahimi       // IsScalarPlusScalar() ensures that no other modifiers can occur.
353*f5c631daSSadaf Ebrahimi       VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
354*f5c631daSSadaf Ebrahimi       Add(xd, base, offset);
355*f5c631daSSadaf Ebrahimi     }
356*f5c631daSSadaf Ebrahimi   } else {
357*f5c631daSSadaf Ebrahimi     // All other forms are scatter-gather addresses, which cannot be evaluated
358*f5c631daSSadaf Ebrahimi     // into an X register.
359*f5c631daSSadaf Ebrahimi     VIXL_UNREACHABLE();
360*f5c631daSSadaf Ebrahimi   }
361*f5c631daSSadaf Ebrahimi }
362*f5c631daSSadaf Ebrahimi 
Cpy(const ZRegister & zd,const PRegister & pg,IntegerOperand imm)363*f5c631daSSadaf Ebrahimi void MacroAssembler::Cpy(const ZRegister& zd,
364*f5c631daSSadaf Ebrahimi                          const PRegister& pg,
365*f5c631daSSadaf Ebrahimi                          IntegerOperand imm) {
366*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
367*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(imm.FitsInLane(zd));
368*f5c631daSSadaf Ebrahimi   int imm8;
369*f5c631daSSadaf Ebrahimi   int shift;
370*f5c631daSSadaf Ebrahimi   if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
371*f5c631daSSadaf Ebrahimi       imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
372*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
373*f5c631daSSadaf Ebrahimi     cpy(zd, pg, imm8, shift);
374*f5c631daSSadaf Ebrahimi     return;
375*f5c631daSSadaf Ebrahimi   }
376*f5c631daSSadaf Ebrahimi 
377*f5c631daSSadaf Ebrahimi   // The fallbacks rely on `cpy` variants that only support merging predication.
378*f5c631daSSadaf Ebrahimi   // If zeroing predication was requested, zero the destination first.
379*f5c631daSSadaf Ebrahimi   if (pg.IsZeroing()) {
380*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
381*f5c631daSSadaf Ebrahimi     dup(zd, 0);
382*f5c631daSSadaf Ebrahimi   }
383*f5c631daSSadaf Ebrahimi   PRegisterM pg_m = pg.Merging();
384*f5c631daSSadaf Ebrahimi 
385*f5c631daSSadaf Ebrahimi   // Try to encode the immediate using fcpy.
386*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(imm.FitsInLane(zd));
387*f5c631daSSadaf Ebrahimi   if (zd.GetLaneSizeInBits() >= kHRegSize) {
388*f5c631daSSadaf Ebrahimi     double fp_imm = 0.0;
389*f5c631daSSadaf Ebrahimi     switch (zd.GetLaneSizeInBits()) {
390*f5c631daSSadaf Ebrahimi       case kHRegSize:
391*f5c631daSSadaf Ebrahimi         fp_imm =
392*f5c631daSSadaf Ebrahimi             FPToDouble(RawbitsToFloat16(imm.AsUint16()), kIgnoreDefaultNaN);
393*f5c631daSSadaf Ebrahimi         break;
394*f5c631daSSadaf Ebrahimi       case kSRegSize:
395*f5c631daSSadaf Ebrahimi         fp_imm = RawbitsToFloat(imm.AsUint32());
396*f5c631daSSadaf Ebrahimi         break;
397*f5c631daSSadaf Ebrahimi       case kDRegSize:
398*f5c631daSSadaf Ebrahimi         fp_imm = RawbitsToDouble(imm.AsUint64());
399*f5c631daSSadaf Ebrahimi         break;
400*f5c631daSSadaf Ebrahimi       default:
401*f5c631daSSadaf Ebrahimi         VIXL_UNREACHABLE();
402*f5c631daSSadaf Ebrahimi         break;
403*f5c631daSSadaf Ebrahimi     }
404*f5c631daSSadaf Ebrahimi     // IsImmFP64 is equivalent to IsImmFP<n> for the same arithmetic value, so
405*f5c631daSSadaf Ebrahimi     // we can use IsImmFP64 for all lane sizes.
406*f5c631daSSadaf Ebrahimi     if (IsImmFP64(fp_imm)) {
407*f5c631daSSadaf Ebrahimi       SingleEmissionCheckScope guard(this);
408*f5c631daSSadaf Ebrahimi       fcpy(zd, pg_m, fp_imm);
409*f5c631daSSadaf Ebrahimi       return;
410*f5c631daSSadaf Ebrahimi     }
411*f5c631daSSadaf Ebrahimi   }
412*f5c631daSSadaf Ebrahimi 
413*f5c631daSSadaf Ebrahimi   // Fall back to using a scratch register.
414*f5c631daSSadaf Ebrahimi   UseScratchRegisterScope temps(this);
415*f5c631daSSadaf Ebrahimi   Register scratch = temps.AcquireRegisterToHoldLane(zd);
416*f5c631daSSadaf Ebrahimi   Mov(scratch, imm);
417*f5c631daSSadaf Ebrahimi 
418*f5c631daSSadaf Ebrahimi   SingleEmissionCheckScope guard(this);
419*f5c631daSSadaf Ebrahimi   cpy(zd, pg_m, scratch);
420*f5c631daSSadaf Ebrahimi }
421*f5c631daSSadaf Ebrahimi 
422*f5c631daSSadaf Ebrahimi // TODO: We implement Fcpy (amongst other things) for all FP types because it
423*f5c631daSSadaf Ebrahimi // allows us to preserve user-specified NaNs. We should come up with some
424*f5c631daSSadaf Ebrahimi // FPImmediate type to abstract this, and avoid all the duplication below (and
425*f5c631daSSadaf Ebrahimi // elsewhere).
426*f5c631daSSadaf Ebrahimi 
Fcpy(const ZRegister & zd,const PRegisterM & pg,double imm)427*f5c631daSSadaf Ebrahimi void MacroAssembler::Fcpy(const ZRegister& zd,
428*f5c631daSSadaf Ebrahimi                           const PRegisterM& pg,
429*f5c631daSSadaf Ebrahimi                           double imm) {
430*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
431*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(pg.IsMerging());
432*f5c631daSSadaf Ebrahimi 
433*f5c631daSSadaf Ebrahimi   if (IsImmFP64(imm)) {
434*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
435*f5c631daSSadaf Ebrahimi     fcpy(zd, pg, imm);
436*f5c631daSSadaf Ebrahimi     return;
437*f5c631daSSadaf Ebrahimi   }
438*f5c631daSSadaf Ebrahimi 
439*f5c631daSSadaf Ebrahimi   // As a fall-back, cast the immediate to the required lane size, and try to
440*f5c631daSSadaf Ebrahimi   // encode the bit pattern using `Cpy`.
441*f5c631daSSadaf Ebrahimi   Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
442*f5c631daSSadaf Ebrahimi }
443*f5c631daSSadaf Ebrahimi 
Fcpy(const ZRegister & zd,const PRegisterM & pg,float imm)444*f5c631daSSadaf Ebrahimi void MacroAssembler::Fcpy(const ZRegister& zd,
445*f5c631daSSadaf Ebrahimi                           const PRegisterM& pg,
446*f5c631daSSadaf Ebrahimi                           float imm) {
447*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
448*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(pg.IsMerging());
449*f5c631daSSadaf Ebrahimi 
450*f5c631daSSadaf Ebrahimi   if (IsImmFP32(imm)) {
451*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
452*f5c631daSSadaf Ebrahimi     fcpy(zd, pg, imm);
453*f5c631daSSadaf Ebrahimi     return;
454*f5c631daSSadaf Ebrahimi   }
455*f5c631daSSadaf Ebrahimi 
456*f5c631daSSadaf Ebrahimi   // As a fall-back, cast the immediate to the required lane size, and try to
457*f5c631daSSadaf Ebrahimi   // encode the bit pattern using `Cpy`.
458*f5c631daSSadaf Ebrahimi   Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
459*f5c631daSSadaf Ebrahimi }
460*f5c631daSSadaf Ebrahimi 
Fcpy(const ZRegister & zd,const PRegisterM & pg,Float16 imm)461*f5c631daSSadaf Ebrahimi void MacroAssembler::Fcpy(const ZRegister& zd,
462*f5c631daSSadaf Ebrahimi                           const PRegisterM& pg,
463*f5c631daSSadaf Ebrahimi                           Float16 imm) {
464*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
465*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(pg.IsMerging());
466*f5c631daSSadaf Ebrahimi 
467*f5c631daSSadaf Ebrahimi   if (IsImmFP16(imm)) {
468*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
469*f5c631daSSadaf Ebrahimi     fcpy(zd, pg, imm);
470*f5c631daSSadaf Ebrahimi     return;
471*f5c631daSSadaf Ebrahimi   }
472*f5c631daSSadaf Ebrahimi 
473*f5c631daSSadaf Ebrahimi   // As a fall-back, cast the immediate to the required lane size, and try to
474*f5c631daSSadaf Ebrahimi   // encode the bit pattern using `Cpy`.
475*f5c631daSSadaf Ebrahimi   Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
476*f5c631daSSadaf Ebrahimi }
477*f5c631daSSadaf Ebrahimi 
Dup(const ZRegister & zd,IntegerOperand imm)478*f5c631daSSadaf Ebrahimi void MacroAssembler::Dup(const ZRegister& zd, IntegerOperand imm) {
479*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
480*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(imm.FitsInLane(zd));
481*f5c631daSSadaf Ebrahimi   unsigned lane_size = zd.GetLaneSizeInBits();
482*f5c631daSSadaf Ebrahimi   int imm8;
483*f5c631daSSadaf Ebrahimi   int shift;
484*f5c631daSSadaf Ebrahimi   if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
485*f5c631daSSadaf Ebrahimi       imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
486*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
487*f5c631daSSadaf Ebrahimi     dup(zd, imm8, shift);
488*f5c631daSSadaf Ebrahimi   } else if (IsImmLogical(imm.AsUintN(lane_size), lane_size)) {
489*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
490*f5c631daSSadaf Ebrahimi     dupm(zd, imm.AsUintN(lane_size));
491*f5c631daSSadaf Ebrahimi   } else {
492*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
493*f5c631daSSadaf Ebrahimi     Register scratch = temps.AcquireRegisterToHoldLane(zd);
494*f5c631daSSadaf Ebrahimi     Mov(scratch, imm);
495*f5c631daSSadaf Ebrahimi 
496*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
497*f5c631daSSadaf Ebrahimi     dup(zd, scratch);
498*f5c631daSSadaf Ebrahimi   }
499*f5c631daSSadaf Ebrahimi }
500*f5c631daSSadaf Ebrahimi 
NoncommutativeArithmeticHelper(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,SVEArithPredicatedFn fn,SVEArithPredicatedFn rev_fn)501*f5c631daSSadaf Ebrahimi void MacroAssembler::NoncommutativeArithmeticHelper(
502*f5c631daSSadaf Ebrahimi     const ZRegister& zd,
503*f5c631daSSadaf Ebrahimi     const PRegisterM& pg,
504*f5c631daSSadaf Ebrahimi     const ZRegister& zn,
505*f5c631daSSadaf Ebrahimi     const ZRegister& zm,
506*f5c631daSSadaf Ebrahimi     SVEArithPredicatedFn fn,
507*f5c631daSSadaf Ebrahimi     SVEArithPredicatedFn rev_fn) {
508*f5c631daSSadaf Ebrahimi   if (zd.Aliases(zn)) {
509*f5c631daSSadaf Ebrahimi     // E.g. zd = zd / zm
510*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
511*f5c631daSSadaf Ebrahimi     (this->*fn)(zd, pg, zn, zm);
512*f5c631daSSadaf Ebrahimi   } else if (zd.Aliases(zm)) {
513*f5c631daSSadaf Ebrahimi     // E.g. zd = zn / zd
514*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
515*f5c631daSSadaf Ebrahimi     (this->*rev_fn)(zd, pg, zm, zn);
516*f5c631daSSadaf Ebrahimi   } else {
517*f5c631daSSadaf Ebrahimi     // E.g. zd = zn / zm
518*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, pg, zn);
519*f5c631daSSadaf Ebrahimi     (this->*fn)(zd, pg, zd, zm);
520*f5c631daSSadaf Ebrahimi   }
521*f5c631daSSadaf Ebrahimi }
522*f5c631daSSadaf Ebrahimi 
FPCommutativeArithmeticHelper(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,SVEArithPredicatedFn fn,FPMacroNaNPropagationOption nan_option)523*f5c631daSSadaf Ebrahimi void MacroAssembler::FPCommutativeArithmeticHelper(
524*f5c631daSSadaf Ebrahimi     const ZRegister& zd,
525*f5c631daSSadaf Ebrahimi     const PRegisterM& pg,
526*f5c631daSSadaf Ebrahimi     const ZRegister& zn,
527*f5c631daSSadaf Ebrahimi     const ZRegister& zm,
528*f5c631daSSadaf Ebrahimi     SVEArithPredicatedFn fn,
529*f5c631daSSadaf Ebrahimi     FPMacroNaNPropagationOption nan_option) {
530*f5c631daSSadaf Ebrahimi   ResolveFPNaNPropagationOption(&nan_option);
531*f5c631daSSadaf Ebrahimi 
532*f5c631daSSadaf Ebrahimi   if (zd.Aliases(zn)) {
533*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
534*f5c631daSSadaf Ebrahimi     (this->*fn)(zd, pg, zd, zm);
535*f5c631daSSadaf Ebrahimi   } else if (zd.Aliases(zm)) {
536*f5c631daSSadaf Ebrahimi     switch (nan_option) {
537*f5c631daSSadaf Ebrahimi       case FastNaNPropagation: {
538*f5c631daSSadaf Ebrahimi         // Swap the arguments.
539*f5c631daSSadaf Ebrahimi         SingleEmissionCheckScope guard(this);
540*f5c631daSSadaf Ebrahimi         (this->*fn)(zd, pg, zd, zn);
541*f5c631daSSadaf Ebrahimi         return;
542*f5c631daSSadaf Ebrahimi       }
543*f5c631daSSadaf Ebrahimi       case StrictNaNPropagation: {
544*f5c631daSSadaf Ebrahimi         UseScratchRegisterScope temps(this);
545*f5c631daSSadaf Ebrahimi         // Use a scratch register to keep the argument order exactly as
546*f5c631daSSadaf Ebrahimi         // specified.
547*f5c631daSSadaf Ebrahimi         ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn);
548*f5c631daSSadaf Ebrahimi         {
549*f5c631daSSadaf Ebrahimi           MovprfxHelperScope guard(this, scratch, pg, zn);
550*f5c631daSSadaf Ebrahimi           (this->*fn)(scratch, pg, scratch, zm);
551*f5c631daSSadaf Ebrahimi         }
552*f5c631daSSadaf Ebrahimi         Mov(zd, scratch);
553*f5c631daSSadaf Ebrahimi         return;
554*f5c631daSSadaf Ebrahimi       }
555*f5c631daSSadaf Ebrahimi       case NoFPMacroNaNPropagationSelected:
556*f5c631daSSadaf Ebrahimi         VIXL_UNREACHABLE();
557*f5c631daSSadaf Ebrahimi         return;
558*f5c631daSSadaf Ebrahimi     }
559*f5c631daSSadaf Ebrahimi   } else {
560*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, pg, zn);
561*f5c631daSSadaf Ebrahimi     (this->*fn)(zd, pg, zd, zm);
562*f5c631daSSadaf Ebrahimi   }
563*f5c631daSSadaf Ebrahimi }
564*f5c631daSSadaf Ebrahimi 
565*f5c631daSSadaf Ebrahimi // Instructions of the form "inst zda, zn, zm, #num", where they are
566*f5c631daSSadaf Ebrahimi // non-commutative and no reversed form is provided.
567*f5c631daSSadaf Ebrahimi #define VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(V) \
568*f5c631daSSadaf Ebrahimi   V(Cmla, cmla)                              \
569*f5c631daSSadaf Ebrahimi   V(Sqrdcmlah, sqrdcmlah)
570*f5c631daSSadaf Ebrahimi 
571*f5c631daSSadaf Ebrahimi #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN)                     \
572*f5c631daSSadaf Ebrahimi   void MacroAssembler::MASMFN(const ZRegister& zd,               \
573*f5c631daSSadaf Ebrahimi                               const ZRegister& za,               \
574*f5c631daSSadaf Ebrahimi                               const ZRegister& zn,               \
575*f5c631daSSadaf Ebrahimi                               const ZRegister& zm,               \
576*f5c631daSSadaf Ebrahimi                               int imm) {                         \
577*f5c631daSSadaf Ebrahimi     if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \
578*f5c631daSSadaf Ebrahimi       UseScratchRegisterScope temps(this);                       \
579*f5c631daSSadaf Ebrahimi       VIXL_ASSERT(AreSameLaneSize(zn, zm));                      \
580*f5c631daSSadaf Ebrahimi       ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);  \
581*f5c631daSSadaf Ebrahimi       Mov(ztmp, zd.Aliases(zn) ? zn : zm);                       \
582*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, zd, za);                    \
583*f5c631daSSadaf Ebrahimi       ASMFN(zd,                                                  \
584*f5c631daSSadaf Ebrahimi             (zd.Aliases(zn) ? ztmp : zn),                        \
585*f5c631daSSadaf Ebrahimi             (zd.Aliases(zm) ? ztmp : zm),                        \
586*f5c631daSSadaf Ebrahimi             imm);                                                \
587*f5c631daSSadaf Ebrahimi     } else {                                                     \
588*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, zd, za);                    \
589*f5c631daSSadaf Ebrahimi       ASMFN(zd, zn, zm, imm);                                    \
590*f5c631daSSadaf Ebrahimi     }                                                            \
591*f5c631daSSadaf Ebrahimi   }
592*f5c631daSSadaf Ebrahimi VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(VIXL_DEFINE_MASM_FUNC)
593*f5c631daSSadaf Ebrahimi #undef VIXL_DEFINE_MASM_FUNC
594*f5c631daSSadaf Ebrahimi 
595*f5c631daSSadaf Ebrahimi // Instructions of the form "inst zda, zn, zm, #num, #num", where they are
596*f5c631daSSadaf Ebrahimi // non-commutative and no reversed form is provided.
597*f5c631daSSadaf Ebrahimi #define VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(V) \
598*f5c631daSSadaf Ebrahimi   V(Cmla, cmla)                               \
599*f5c631daSSadaf Ebrahimi   V(Sqrdcmlah, sqrdcmlah)
600*f5c631daSSadaf Ebrahimi 
601*f5c631daSSadaf Ebrahimi // This doesn't handle zm when it's out of the range that can be encoded in
602*f5c631daSSadaf Ebrahimi // instruction. The range depends on element size: z0-z7 for H, z0-15 for S.
603*f5c631daSSadaf Ebrahimi #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN)                     \
604*f5c631daSSadaf Ebrahimi   void MacroAssembler::MASMFN(const ZRegister& zd,               \
605*f5c631daSSadaf Ebrahimi                               const ZRegister& za,               \
606*f5c631daSSadaf Ebrahimi                               const ZRegister& zn,               \
607*f5c631daSSadaf Ebrahimi                               const ZRegister& zm,               \
608*f5c631daSSadaf Ebrahimi                               int index,                         \
609*f5c631daSSadaf Ebrahimi                               int rot) {                         \
610*f5c631daSSadaf Ebrahimi     if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \
611*f5c631daSSadaf Ebrahimi       UseScratchRegisterScope temps(this);                       \
612*f5c631daSSadaf Ebrahimi       ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);  \
613*f5c631daSSadaf Ebrahimi       {                                                          \
614*f5c631daSSadaf Ebrahimi         MovprfxHelperScope guard(this, ztmp, za);                \
615*f5c631daSSadaf Ebrahimi         ASMFN(ztmp, zn, zm, index, rot);                         \
616*f5c631daSSadaf Ebrahimi       }                                                          \
617*f5c631daSSadaf Ebrahimi       Mov(zd, ztmp);                                             \
618*f5c631daSSadaf Ebrahimi     } else {                                                     \
619*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, zd, za);                    \
620*f5c631daSSadaf Ebrahimi       ASMFN(zd, zn, zm, index, rot);                             \
621*f5c631daSSadaf Ebrahimi     }                                                            \
622*f5c631daSSadaf Ebrahimi   }
VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(VIXL_DEFINE_MASM_FUNC)623*f5c631daSSadaf Ebrahimi VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(VIXL_DEFINE_MASM_FUNC)
624*f5c631daSSadaf Ebrahimi #undef VIXL_DEFINE_MASM_FUNC
625*f5c631daSSadaf Ebrahimi 
626*f5c631daSSadaf Ebrahimi // Instructions of the form "inst zda, pg, zda, zn", where they are
627*f5c631daSSadaf Ebrahimi // non-commutative and no reversed form is provided.
628*f5c631daSSadaf Ebrahimi #define VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(V) \
629*f5c631daSSadaf Ebrahimi   V(Addp, addp)                             \
630*f5c631daSSadaf Ebrahimi   V(Faddp, faddp)                           \
631*f5c631daSSadaf Ebrahimi   V(Fmaxnmp, fmaxnmp)                       \
632*f5c631daSSadaf Ebrahimi   V(Fminnmp, fminnmp)                       \
633*f5c631daSSadaf Ebrahimi   V(Fmaxp, fmaxp)                           \
634*f5c631daSSadaf Ebrahimi   V(Fminp, fminp)                           \
635*f5c631daSSadaf Ebrahimi   V(Fscale, fscale)                         \
636*f5c631daSSadaf Ebrahimi   V(Smaxp, smaxp)                           \
637*f5c631daSSadaf Ebrahimi   V(Sminp, sminp)                           \
638*f5c631daSSadaf Ebrahimi   V(Suqadd, suqadd)                         \
639*f5c631daSSadaf Ebrahimi   V(Umaxp, umaxp)                           \
640*f5c631daSSadaf Ebrahimi   V(Uminp, uminp)                           \
641*f5c631daSSadaf Ebrahimi   V(Usqadd, usqadd)
642*f5c631daSSadaf Ebrahimi 
643*f5c631daSSadaf Ebrahimi #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN)                       \
644*f5c631daSSadaf Ebrahimi   void MacroAssembler::MASMFN(const ZRegister& zd,                 \
645*f5c631daSSadaf Ebrahimi                               const PRegisterM& pg,                \
646*f5c631daSSadaf Ebrahimi                               const ZRegister& zn,                 \
647*f5c631daSSadaf Ebrahimi                               const ZRegister& zm) {               \
648*f5c631daSSadaf Ebrahimi     VIXL_ASSERT(allow_macro_instructions_);                        \
649*f5c631daSSadaf Ebrahimi     if (zd.Aliases(zm) && !zd.Aliases(zn)) {                       \
650*f5c631daSSadaf Ebrahimi       UseScratchRegisterScope temps(this);                         \
651*f5c631daSSadaf Ebrahimi       ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); \
652*f5c631daSSadaf Ebrahimi       Mov(scratch, zm);                                            \
653*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, zd, pg, zn);                  \
654*f5c631daSSadaf Ebrahimi       ASMFN(zd, pg, zd, scratch);                                  \
655*f5c631daSSadaf Ebrahimi     } else {                                                       \
656*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, zd, pg, zn);                  \
657*f5c631daSSadaf Ebrahimi       ASMFN(zd, pg, zd, zm);                                       \
658*f5c631daSSadaf Ebrahimi     }                                                              \
659*f5c631daSSadaf Ebrahimi   }
660*f5c631daSSadaf Ebrahimi VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC)
661*f5c631daSSadaf Ebrahimi #undef VIXL_DEFINE_MASM_FUNC
662*f5c631daSSadaf Ebrahimi 
663*f5c631daSSadaf Ebrahimi // Instructions of the form "inst zda, pg, zda, zn", where they are
664*f5c631daSSadaf Ebrahimi // non-commutative and a reversed form is provided.
665*f5c631daSSadaf Ebrahimi #define VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(V) \
666*f5c631daSSadaf Ebrahimi   V(Asr, asr)                                       \
667*f5c631daSSadaf Ebrahimi   V(Fdiv, fdiv)                                     \
668*f5c631daSSadaf Ebrahimi   V(Fsub, fsub)                                     \
669*f5c631daSSadaf Ebrahimi   V(Lsl, lsl)                                       \
670*f5c631daSSadaf Ebrahimi   V(Lsr, lsr)                                       \
671*f5c631daSSadaf Ebrahimi   V(Sdiv, sdiv)                                     \
672*f5c631daSSadaf Ebrahimi   V(Shsub, shsub)                                   \
673*f5c631daSSadaf Ebrahimi   V(Sqrshl, sqrshl)                                 \
674*f5c631daSSadaf Ebrahimi   V(Sqshl, sqshl)                                   \
675*f5c631daSSadaf Ebrahimi   V(Sqsub, sqsub)                                   \
676*f5c631daSSadaf Ebrahimi   V(Srshl, srshl)                                   \
677*f5c631daSSadaf Ebrahimi   V(Sub, sub)                                       \
678*f5c631daSSadaf Ebrahimi   V(Udiv, udiv)                                     \
679*f5c631daSSadaf Ebrahimi   V(Uhsub, uhsub)                                   \
680*f5c631daSSadaf Ebrahimi   V(Uqrshl, uqrshl)                                 \
681*f5c631daSSadaf Ebrahimi   V(Uqshl, uqshl)                                   \
682*f5c631daSSadaf Ebrahimi   V(Uqsub, uqsub)                                   \
683*f5c631daSSadaf Ebrahimi   V(Urshl, urshl)
684*f5c631daSSadaf Ebrahimi 
685*f5c631daSSadaf Ebrahimi #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN)                          \
686*f5c631daSSadaf Ebrahimi   void MacroAssembler::MASMFN(const ZRegister& zd,                    \
687*f5c631daSSadaf Ebrahimi                               const PRegisterM& pg,                   \
688*f5c631daSSadaf Ebrahimi                               const ZRegister& zn,                    \
689*f5c631daSSadaf Ebrahimi                               const ZRegister& zm) {                  \
690*f5c631daSSadaf Ebrahimi     VIXL_ASSERT(allow_macro_instructions_);                           \
691*f5c631daSSadaf Ebrahimi     NoncommutativeArithmeticHelper(zd,                                \
692*f5c631daSSadaf Ebrahimi                                    pg,                                \
693*f5c631daSSadaf Ebrahimi                                    zn,                                \
694*f5c631daSSadaf Ebrahimi                                    zm,                                \
695*f5c631daSSadaf Ebrahimi                                    static_cast<SVEArithPredicatedFn>( \
696*f5c631daSSadaf Ebrahimi                                        &Assembler::ASMFN),            \
697*f5c631daSSadaf Ebrahimi                                    static_cast<SVEArithPredicatedFn>( \
698*f5c631daSSadaf Ebrahimi                                        &Assembler::ASMFN##r));        \
699*f5c631daSSadaf Ebrahimi   }
700*f5c631daSSadaf Ebrahimi VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC)
701*f5c631daSSadaf Ebrahimi #undef VIXL_DEFINE_MASM_FUNC
702*f5c631daSSadaf Ebrahimi 
703*f5c631daSSadaf Ebrahimi void MacroAssembler::Fadd(const ZRegister& zd,
704*f5c631daSSadaf Ebrahimi                           const PRegisterM& pg,
705*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
706*f5c631daSSadaf Ebrahimi                           const ZRegister& zm,
707*f5c631daSSadaf Ebrahimi                           FPMacroNaNPropagationOption nan_option) {
708*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
709*f5c631daSSadaf Ebrahimi   FPCommutativeArithmeticHelper(zd,
710*f5c631daSSadaf Ebrahimi                                 pg,
711*f5c631daSSadaf Ebrahimi                                 zn,
712*f5c631daSSadaf Ebrahimi                                 zm,
713*f5c631daSSadaf Ebrahimi                                 static_cast<SVEArithPredicatedFn>(
714*f5c631daSSadaf Ebrahimi                                     &Assembler::fadd),
715*f5c631daSSadaf Ebrahimi                                 nan_option);
716*f5c631daSSadaf Ebrahimi }
717*f5c631daSSadaf Ebrahimi 
Fabd(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)718*f5c631daSSadaf Ebrahimi void MacroAssembler::Fabd(const ZRegister& zd,
719*f5c631daSSadaf Ebrahimi                           const PRegisterM& pg,
720*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
721*f5c631daSSadaf Ebrahimi                           const ZRegister& zm,
722*f5c631daSSadaf Ebrahimi                           FPMacroNaNPropagationOption nan_option) {
723*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
724*f5c631daSSadaf Ebrahimi   FPCommutativeArithmeticHelper(zd,
725*f5c631daSSadaf Ebrahimi                                 pg,
726*f5c631daSSadaf Ebrahimi                                 zn,
727*f5c631daSSadaf Ebrahimi                                 zm,
728*f5c631daSSadaf Ebrahimi                                 static_cast<SVEArithPredicatedFn>(
729*f5c631daSSadaf Ebrahimi                                     &Assembler::fabd),
730*f5c631daSSadaf Ebrahimi                                 nan_option);
731*f5c631daSSadaf Ebrahimi }
732*f5c631daSSadaf Ebrahimi 
Fmul(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)733*f5c631daSSadaf Ebrahimi void MacroAssembler::Fmul(const ZRegister& zd,
734*f5c631daSSadaf Ebrahimi                           const PRegisterM& pg,
735*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
736*f5c631daSSadaf Ebrahimi                           const ZRegister& zm,
737*f5c631daSSadaf Ebrahimi                           FPMacroNaNPropagationOption nan_option) {
738*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
739*f5c631daSSadaf Ebrahimi   FPCommutativeArithmeticHelper(zd,
740*f5c631daSSadaf Ebrahimi                                 pg,
741*f5c631daSSadaf Ebrahimi                                 zn,
742*f5c631daSSadaf Ebrahimi                                 zm,
743*f5c631daSSadaf Ebrahimi                                 static_cast<SVEArithPredicatedFn>(
744*f5c631daSSadaf Ebrahimi                                     &Assembler::fmul),
745*f5c631daSSadaf Ebrahimi                                 nan_option);
746*f5c631daSSadaf Ebrahimi }
747*f5c631daSSadaf Ebrahimi 
Fmulx(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)748*f5c631daSSadaf Ebrahimi void MacroAssembler::Fmulx(const ZRegister& zd,
749*f5c631daSSadaf Ebrahimi                            const PRegisterM& pg,
750*f5c631daSSadaf Ebrahimi                            const ZRegister& zn,
751*f5c631daSSadaf Ebrahimi                            const ZRegister& zm,
752*f5c631daSSadaf Ebrahimi                            FPMacroNaNPropagationOption nan_option) {
753*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
754*f5c631daSSadaf Ebrahimi   FPCommutativeArithmeticHelper(zd,
755*f5c631daSSadaf Ebrahimi                                 pg,
756*f5c631daSSadaf Ebrahimi                                 zn,
757*f5c631daSSadaf Ebrahimi                                 zm,
758*f5c631daSSadaf Ebrahimi                                 static_cast<SVEArithPredicatedFn>(
759*f5c631daSSadaf Ebrahimi                                     &Assembler::fmulx),
760*f5c631daSSadaf Ebrahimi                                 nan_option);
761*f5c631daSSadaf Ebrahimi }
762*f5c631daSSadaf Ebrahimi 
Fmax(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)763*f5c631daSSadaf Ebrahimi void MacroAssembler::Fmax(const ZRegister& zd,
764*f5c631daSSadaf Ebrahimi                           const PRegisterM& pg,
765*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
766*f5c631daSSadaf Ebrahimi                           const ZRegister& zm,
767*f5c631daSSadaf Ebrahimi                           FPMacroNaNPropagationOption nan_option) {
768*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
769*f5c631daSSadaf Ebrahimi   FPCommutativeArithmeticHelper(zd,
770*f5c631daSSadaf Ebrahimi                                 pg,
771*f5c631daSSadaf Ebrahimi                                 zn,
772*f5c631daSSadaf Ebrahimi                                 zm,
773*f5c631daSSadaf Ebrahimi                                 static_cast<SVEArithPredicatedFn>(
774*f5c631daSSadaf Ebrahimi                                     &Assembler::fmax),
775*f5c631daSSadaf Ebrahimi                                 nan_option);
776*f5c631daSSadaf Ebrahimi }
777*f5c631daSSadaf Ebrahimi 
Fmin(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)778*f5c631daSSadaf Ebrahimi void MacroAssembler::Fmin(const ZRegister& zd,
779*f5c631daSSadaf Ebrahimi                           const PRegisterM& pg,
780*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
781*f5c631daSSadaf Ebrahimi                           const ZRegister& zm,
782*f5c631daSSadaf Ebrahimi                           FPMacroNaNPropagationOption nan_option) {
783*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
784*f5c631daSSadaf Ebrahimi   FPCommutativeArithmeticHelper(zd,
785*f5c631daSSadaf Ebrahimi                                 pg,
786*f5c631daSSadaf Ebrahimi                                 zn,
787*f5c631daSSadaf Ebrahimi                                 zm,
788*f5c631daSSadaf Ebrahimi                                 static_cast<SVEArithPredicatedFn>(
789*f5c631daSSadaf Ebrahimi                                     &Assembler::fmin),
790*f5c631daSSadaf Ebrahimi                                 nan_option);
791*f5c631daSSadaf Ebrahimi }
792*f5c631daSSadaf Ebrahimi 
Fmaxnm(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)793*f5c631daSSadaf Ebrahimi void MacroAssembler::Fmaxnm(const ZRegister& zd,
794*f5c631daSSadaf Ebrahimi                             const PRegisterM& pg,
795*f5c631daSSadaf Ebrahimi                             const ZRegister& zn,
796*f5c631daSSadaf Ebrahimi                             const ZRegister& zm,
797*f5c631daSSadaf Ebrahimi                             FPMacroNaNPropagationOption nan_option) {
798*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
799*f5c631daSSadaf Ebrahimi   FPCommutativeArithmeticHelper(zd,
800*f5c631daSSadaf Ebrahimi                                 pg,
801*f5c631daSSadaf Ebrahimi                                 zn,
802*f5c631daSSadaf Ebrahimi                                 zm,
803*f5c631daSSadaf Ebrahimi                                 static_cast<SVEArithPredicatedFn>(
804*f5c631daSSadaf Ebrahimi                                     &Assembler::fmaxnm),
805*f5c631daSSadaf Ebrahimi                                 nan_option);
806*f5c631daSSadaf Ebrahimi }
807*f5c631daSSadaf Ebrahimi 
Fminnm(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)808*f5c631daSSadaf Ebrahimi void MacroAssembler::Fminnm(const ZRegister& zd,
809*f5c631daSSadaf Ebrahimi                             const PRegisterM& pg,
810*f5c631daSSadaf Ebrahimi                             const ZRegister& zn,
811*f5c631daSSadaf Ebrahimi                             const ZRegister& zm,
812*f5c631daSSadaf Ebrahimi                             FPMacroNaNPropagationOption nan_option) {
813*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
814*f5c631daSSadaf Ebrahimi   FPCommutativeArithmeticHelper(zd,
815*f5c631daSSadaf Ebrahimi                                 pg,
816*f5c631daSSadaf Ebrahimi                                 zn,
817*f5c631daSSadaf Ebrahimi                                 zm,
818*f5c631daSSadaf Ebrahimi                                 static_cast<SVEArithPredicatedFn>(
819*f5c631daSSadaf Ebrahimi                                     &Assembler::fminnm),
820*f5c631daSSadaf Ebrahimi                                 nan_option);
821*f5c631daSSadaf Ebrahimi }
822*f5c631daSSadaf Ebrahimi 
Fdup(const ZRegister & zd,double imm)823*f5c631daSSadaf Ebrahimi void MacroAssembler::Fdup(const ZRegister& zd, double imm) {
824*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
825*f5c631daSSadaf Ebrahimi 
826*f5c631daSSadaf Ebrahimi   switch (zd.GetLaneSizeInBits()) {
827*f5c631daSSadaf Ebrahimi     case kHRegSize:
828*f5c631daSSadaf Ebrahimi       Fdup(zd, Float16(imm));
829*f5c631daSSadaf Ebrahimi       break;
830*f5c631daSSadaf Ebrahimi     case kSRegSize:
831*f5c631daSSadaf Ebrahimi       Fdup(zd, static_cast<float>(imm));
832*f5c631daSSadaf Ebrahimi       break;
833*f5c631daSSadaf Ebrahimi     case kDRegSize:
834*f5c631daSSadaf Ebrahimi       if (IsImmFP64(imm)) {
835*f5c631daSSadaf Ebrahimi         SingleEmissionCheckScope guard(this);
836*f5c631daSSadaf Ebrahimi         fdup(zd, imm);
837*f5c631daSSadaf Ebrahimi       } else {
838*f5c631daSSadaf Ebrahimi         Dup(zd, DoubleToRawbits(imm));
839*f5c631daSSadaf Ebrahimi       }
840*f5c631daSSadaf Ebrahimi       break;
841*f5c631daSSadaf Ebrahimi   }
842*f5c631daSSadaf Ebrahimi }
843*f5c631daSSadaf Ebrahimi 
Fdup(const ZRegister & zd,float imm)844*f5c631daSSadaf Ebrahimi void MacroAssembler::Fdup(const ZRegister& zd, float imm) {
845*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
846*f5c631daSSadaf Ebrahimi 
847*f5c631daSSadaf Ebrahimi   switch (zd.GetLaneSizeInBits()) {
848*f5c631daSSadaf Ebrahimi     case kHRegSize:
849*f5c631daSSadaf Ebrahimi       Fdup(zd, Float16(imm));
850*f5c631daSSadaf Ebrahimi       break;
851*f5c631daSSadaf Ebrahimi     case kSRegSize:
852*f5c631daSSadaf Ebrahimi       if (IsImmFP32(imm)) {
853*f5c631daSSadaf Ebrahimi         SingleEmissionCheckScope guard(this);
854*f5c631daSSadaf Ebrahimi         fdup(zd, imm);
855*f5c631daSSadaf Ebrahimi       } else {
856*f5c631daSSadaf Ebrahimi         Dup(zd, FloatToRawbits(imm));
857*f5c631daSSadaf Ebrahimi       }
858*f5c631daSSadaf Ebrahimi       break;
859*f5c631daSSadaf Ebrahimi     case kDRegSize:
860*f5c631daSSadaf Ebrahimi       Fdup(zd, static_cast<double>(imm));
861*f5c631daSSadaf Ebrahimi       break;
862*f5c631daSSadaf Ebrahimi   }
863*f5c631daSSadaf Ebrahimi }
864*f5c631daSSadaf Ebrahimi 
Fdup(const ZRegister & zd,Float16 imm)865*f5c631daSSadaf Ebrahimi void MacroAssembler::Fdup(const ZRegister& zd, Float16 imm) {
866*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
867*f5c631daSSadaf Ebrahimi 
868*f5c631daSSadaf Ebrahimi   switch (zd.GetLaneSizeInBits()) {
869*f5c631daSSadaf Ebrahimi     case kHRegSize:
870*f5c631daSSadaf Ebrahimi       if (IsImmFP16(imm)) {
871*f5c631daSSadaf Ebrahimi         SingleEmissionCheckScope guard(this);
872*f5c631daSSadaf Ebrahimi         fdup(zd, imm);
873*f5c631daSSadaf Ebrahimi       } else {
874*f5c631daSSadaf Ebrahimi         Dup(zd, Float16ToRawbits(imm));
875*f5c631daSSadaf Ebrahimi       }
876*f5c631daSSadaf Ebrahimi       break;
877*f5c631daSSadaf Ebrahimi     case kSRegSize:
878*f5c631daSSadaf Ebrahimi       Fdup(zd, FPToFloat(imm, kIgnoreDefaultNaN));
879*f5c631daSSadaf Ebrahimi       break;
880*f5c631daSSadaf Ebrahimi     case kDRegSize:
881*f5c631daSSadaf Ebrahimi       Fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
882*f5c631daSSadaf Ebrahimi       break;
883*f5c631daSSadaf Ebrahimi   }
884*f5c631daSSadaf Ebrahimi }
885*f5c631daSSadaf Ebrahimi 
Index(const ZRegister & zd,const Operand & start,const Operand & step)886*f5c631daSSadaf Ebrahimi void MacroAssembler::Index(const ZRegister& zd,
887*f5c631daSSadaf Ebrahimi                            const Operand& start,
888*f5c631daSSadaf Ebrahimi                            const Operand& step) {
889*f5c631daSSadaf Ebrahimi   class IndexOperand : public Operand {
890*f5c631daSSadaf Ebrahimi    public:
891*f5c631daSSadaf Ebrahimi     static IndexOperand Prepare(MacroAssembler* masm,
892*f5c631daSSadaf Ebrahimi                                 UseScratchRegisterScope* temps,
893*f5c631daSSadaf Ebrahimi                                 const Operand& op,
894*f5c631daSSadaf Ebrahimi                                 const ZRegister& zd_inner) {
895*f5c631daSSadaf Ebrahimi       // Look for encodable immediates.
896*f5c631daSSadaf Ebrahimi       int imm;
897*f5c631daSSadaf Ebrahimi       if (op.IsImmediate()) {
898*f5c631daSSadaf Ebrahimi         if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd_inner, &imm)) {
899*f5c631daSSadaf Ebrahimi           return IndexOperand(imm);
900*f5c631daSSadaf Ebrahimi         }
901*f5c631daSSadaf Ebrahimi         Register scratch = temps->AcquireRegisterToHoldLane(zd_inner);
902*f5c631daSSadaf Ebrahimi         masm->Mov(scratch, op);
903*f5c631daSSadaf Ebrahimi         return IndexOperand(scratch);
904*f5c631daSSadaf Ebrahimi       } else {
905*f5c631daSSadaf Ebrahimi         // Plain registers can be encoded directly.
906*f5c631daSSadaf Ebrahimi         VIXL_ASSERT(op.IsPlainRegister());
907*f5c631daSSadaf Ebrahimi         return IndexOperand(op.GetRegister());
908*f5c631daSSadaf Ebrahimi       }
909*f5c631daSSadaf Ebrahimi     }
910*f5c631daSSadaf Ebrahimi 
911*f5c631daSSadaf Ebrahimi     int GetImm5() const {
912*f5c631daSSadaf Ebrahimi       int64_t imm = GetImmediate();
913*f5c631daSSadaf Ebrahimi       VIXL_ASSERT(IsInt5(imm));
914*f5c631daSSadaf Ebrahimi       return static_cast<int>(imm);
915*f5c631daSSadaf Ebrahimi     }
916*f5c631daSSadaf Ebrahimi 
917*f5c631daSSadaf Ebrahimi    private:
918*f5c631daSSadaf Ebrahimi     explicit IndexOperand(const Register& reg) : Operand(reg) {}
919*f5c631daSSadaf Ebrahimi     explicit IndexOperand(int64_t imm) : Operand(imm) {}
920*f5c631daSSadaf Ebrahimi   };
921*f5c631daSSadaf Ebrahimi 
922*f5c631daSSadaf Ebrahimi   UseScratchRegisterScope temps(this);
923*f5c631daSSadaf Ebrahimi   IndexOperand start_enc = IndexOperand::Prepare(this, &temps, start, zd);
924*f5c631daSSadaf Ebrahimi   IndexOperand step_enc = IndexOperand::Prepare(this, &temps, step, zd);
925*f5c631daSSadaf Ebrahimi 
926*f5c631daSSadaf Ebrahimi   SingleEmissionCheckScope guard(this);
927*f5c631daSSadaf Ebrahimi   if (start_enc.IsImmediate()) {
928*f5c631daSSadaf Ebrahimi     if (step_enc.IsImmediate()) {
929*f5c631daSSadaf Ebrahimi       index(zd, start_enc.GetImm5(), step_enc.GetImm5());
930*f5c631daSSadaf Ebrahimi     } else {
931*f5c631daSSadaf Ebrahimi       index(zd, start_enc.GetImm5(), step_enc.GetRegister());
932*f5c631daSSadaf Ebrahimi     }
933*f5c631daSSadaf Ebrahimi   } else {
934*f5c631daSSadaf Ebrahimi     if (step_enc.IsImmediate()) {
935*f5c631daSSadaf Ebrahimi       index(zd, start_enc.GetRegister(), step_enc.GetImm5());
936*f5c631daSSadaf Ebrahimi     } else {
937*f5c631daSSadaf Ebrahimi       index(zd, start_enc.GetRegister(), step_enc.GetRegister());
938*f5c631daSSadaf Ebrahimi     }
939*f5c631daSSadaf Ebrahimi   }
940*f5c631daSSadaf Ebrahimi }
941*f5c631daSSadaf Ebrahimi 
Insr(const ZRegister & zdn,IntegerOperand imm)942*f5c631daSSadaf Ebrahimi void MacroAssembler::Insr(const ZRegister& zdn, IntegerOperand imm) {
943*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
944*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(imm.FitsInLane(zdn));
945*f5c631daSSadaf Ebrahimi 
946*f5c631daSSadaf Ebrahimi   if (imm.IsZero()) {
947*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
948*f5c631daSSadaf Ebrahimi     insr(zdn, xzr);
949*f5c631daSSadaf Ebrahimi     return;
950*f5c631daSSadaf Ebrahimi   }
951*f5c631daSSadaf Ebrahimi 
952*f5c631daSSadaf Ebrahimi   UseScratchRegisterScope temps(this);
953*f5c631daSSadaf Ebrahimi   Register scratch = temps.AcquireRegisterToHoldLane(zdn);
954*f5c631daSSadaf Ebrahimi 
955*f5c631daSSadaf Ebrahimi   // TODO: There are many cases where we could optimise immediates, such as by
956*f5c631daSSadaf Ebrahimi   // detecting repeating patterns or FP immediates. We should optimise and
957*f5c631daSSadaf Ebrahimi   // abstract this for use in other SVE mov-immediate-like macros.
958*f5c631daSSadaf Ebrahimi   Mov(scratch, imm);
959*f5c631daSSadaf Ebrahimi 
960*f5c631daSSadaf Ebrahimi   SingleEmissionCheckScope guard(this);
961*f5c631daSSadaf Ebrahimi   insr(zdn, scratch);
962*f5c631daSSadaf Ebrahimi }
963*f5c631daSSadaf Ebrahimi 
Mla(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)964*f5c631daSSadaf Ebrahimi void MacroAssembler::Mla(const ZRegister& zd,
965*f5c631daSSadaf Ebrahimi                          const PRegisterM& pg,
966*f5c631daSSadaf Ebrahimi                          const ZRegister& za,
967*f5c631daSSadaf Ebrahimi                          const ZRegister& zn,
968*f5c631daSSadaf Ebrahimi                          const ZRegister& zm) {
969*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
970*f5c631daSSadaf Ebrahimi   if (zd.Aliases(za)) {
971*f5c631daSSadaf Ebrahimi     // zda = zda + (zn * zm)
972*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
973*f5c631daSSadaf Ebrahimi     mla(zd, pg, zn, zm);
974*f5c631daSSadaf Ebrahimi   } else if (zd.Aliases(zn)) {
975*f5c631daSSadaf Ebrahimi     // zdn = za + (zdn * zm)
976*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
977*f5c631daSSadaf Ebrahimi     mad(zd, pg, zm, za);
978*f5c631daSSadaf Ebrahimi   } else if (zd.Aliases(zm)) {
979*f5c631daSSadaf Ebrahimi     // Multiplication is commutative, so we can swap zn and zm.
980*f5c631daSSadaf Ebrahimi     // zdm = za + (zdm * zn)
981*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
982*f5c631daSSadaf Ebrahimi     mad(zd, pg, zn, za);
983*f5c631daSSadaf Ebrahimi   } else {
984*f5c631daSSadaf Ebrahimi     // zd = za + (zn * zm)
985*f5c631daSSadaf Ebrahimi     ExactAssemblyScope guard(this, 2 * kInstructionSize);
986*f5c631daSSadaf Ebrahimi     movprfx(zd, pg, za);
987*f5c631daSSadaf Ebrahimi     mla(zd, pg, zn, zm);
988*f5c631daSSadaf Ebrahimi   }
989*f5c631daSSadaf Ebrahimi }
990*f5c631daSSadaf Ebrahimi 
Mls(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)991*f5c631daSSadaf Ebrahimi void MacroAssembler::Mls(const ZRegister& zd,
992*f5c631daSSadaf Ebrahimi                          const PRegisterM& pg,
993*f5c631daSSadaf Ebrahimi                          const ZRegister& za,
994*f5c631daSSadaf Ebrahimi                          const ZRegister& zn,
995*f5c631daSSadaf Ebrahimi                          const ZRegister& zm) {
996*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
997*f5c631daSSadaf Ebrahimi   if (zd.Aliases(za)) {
998*f5c631daSSadaf Ebrahimi     // zda = zda - (zn * zm)
999*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1000*f5c631daSSadaf Ebrahimi     mls(zd, pg, zn, zm);
1001*f5c631daSSadaf Ebrahimi   } else if (zd.Aliases(zn)) {
1002*f5c631daSSadaf Ebrahimi     // zdn = za - (zdn * zm)
1003*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1004*f5c631daSSadaf Ebrahimi     msb(zd, pg, zm, za);
1005*f5c631daSSadaf Ebrahimi   } else if (zd.Aliases(zm)) {
1006*f5c631daSSadaf Ebrahimi     // Multiplication is commutative, so we can swap zn and zm.
1007*f5c631daSSadaf Ebrahimi     // zdm = za - (zdm * zn)
1008*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1009*f5c631daSSadaf Ebrahimi     msb(zd, pg, zn, za);
1010*f5c631daSSadaf Ebrahimi   } else {
1011*f5c631daSSadaf Ebrahimi     // zd = za - (zn * zm)
1012*f5c631daSSadaf Ebrahimi     ExactAssemblyScope guard(this, 2 * kInstructionSize);
1013*f5c631daSSadaf Ebrahimi     movprfx(zd, pg, za);
1014*f5c631daSSadaf Ebrahimi     mls(zd, pg, zn, zm);
1015*f5c631daSSadaf Ebrahimi   }
1016*f5c631daSSadaf Ebrahimi }
1017*f5c631daSSadaf Ebrahimi 
CompareHelper(Condition cond,const PRegisterWithLaneSize & pd,const PRegisterZ & pg,const ZRegister & zn,IntegerOperand imm)1018*f5c631daSSadaf Ebrahimi void MacroAssembler::CompareHelper(Condition cond,
1019*f5c631daSSadaf Ebrahimi                                    const PRegisterWithLaneSize& pd,
1020*f5c631daSSadaf Ebrahimi                                    const PRegisterZ& pg,
1021*f5c631daSSadaf Ebrahimi                                    const ZRegister& zn,
1022*f5c631daSSadaf Ebrahimi                                    IntegerOperand imm) {
1023*f5c631daSSadaf Ebrahimi   UseScratchRegisterScope temps(this);
1024*f5c631daSSadaf Ebrahimi   ZRegister zm = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
1025*f5c631daSSadaf Ebrahimi   Dup(zm, imm);
1026*f5c631daSSadaf Ebrahimi   SingleEmissionCheckScope guard(this);
1027*f5c631daSSadaf Ebrahimi   cmp(cond, pd, pg, zn, zm);
1028*f5c631daSSadaf Ebrahimi }
1029*f5c631daSSadaf Ebrahimi 
Pfirst(const PRegisterWithLaneSize & pd,const PRegister & pg,const PRegisterWithLaneSize & pn)1030*f5c631daSSadaf Ebrahimi void MacroAssembler::Pfirst(const PRegisterWithLaneSize& pd,
1031*f5c631daSSadaf Ebrahimi                             const PRegister& pg,
1032*f5c631daSSadaf Ebrahimi                             const PRegisterWithLaneSize& pn) {
1033*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1034*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(pd.IsLaneSizeB());
1035*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(pn.IsLaneSizeB());
1036*f5c631daSSadaf Ebrahimi   if (pd.Is(pn)) {
1037*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1038*f5c631daSSadaf Ebrahimi     pfirst(pd, pg, pn);
1039*f5c631daSSadaf Ebrahimi   } else {
1040*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
1041*f5c631daSSadaf Ebrahimi     PRegister temp_pg = pg;
1042*f5c631daSSadaf Ebrahimi     if (pd.Aliases(pg)) {
1043*f5c631daSSadaf Ebrahimi       temp_pg = temps.AcquireP();
1044*f5c631daSSadaf Ebrahimi       Mov(temp_pg.VnB(), pg.VnB());
1045*f5c631daSSadaf Ebrahimi     }
1046*f5c631daSSadaf Ebrahimi     Mov(pd, pn);
1047*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1048*f5c631daSSadaf Ebrahimi     pfirst(pd, temp_pg, pd);
1049*f5c631daSSadaf Ebrahimi   }
1050*f5c631daSSadaf Ebrahimi }
1051*f5c631daSSadaf Ebrahimi 
Pnext(const PRegisterWithLaneSize & pd,const PRegister & pg,const PRegisterWithLaneSize & pn)1052*f5c631daSSadaf Ebrahimi void MacroAssembler::Pnext(const PRegisterWithLaneSize& pd,
1053*f5c631daSSadaf Ebrahimi                            const PRegister& pg,
1054*f5c631daSSadaf Ebrahimi                            const PRegisterWithLaneSize& pn) {
1055*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1056*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(AreSameFormat(pd, pn));
1057*f5c631daSSadaf Ebrahimi   if (pd.Is(pn)) {
1058*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1059*f5c631daSSadaf Ebrahimi     pnext(pd, pg, pn);
1060*f5c631daSSadaf Ebrahimi   } else {
1061*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
1062*f5c631daSSadaf Ebrahimi     PRegister temp_pg = pg;
1063*f5c631daSSadaf Ebrahimi     if (pd.Aliases(pg)) {
1064*f5c631daSSadaf Ebrahimi       temp_pg = temps.AcquireP();
1065*f5c631daSSadaf Ebrahimi       Mov(temp_pg.VnB(), pg.VnB());
1066*f5c631daSSadaf Ebrahimi     }
1067*f5c631daSSadaf Ebrahimi     Mov(pd.VnB(), pn.VnB());
1068*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1069*f5c631daSSadaf Ebrahimi     pnext(pd, temp_pg, pd);
1070*f5c631daSSadaf Ebrahimi   }
1071*f5c631daSSadaf Ebrahimi }
1072*f5c631daSSadaf Ebrahimi 
Ptrue(const PRegisterWithLaneSize & pd,SVEPredicateConstraint pattern,FlagsUpdate s)1073*f5c631daSSadaf Ebrahimi void MacroAssembler::Ptrue(const PRegisterWithLaneSize& pd,
1074*f5c631daSSadaf Ebrahimi                            SVEPredicateConstraint pattern,
1075*f5c631daSSadaf Ebrahimi                            FlagsUpdate s) {
1076*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1077*f5c631daSSadaf Ebrahimi   switch (s) {
1078*f5c631daSSadaf Ebrahimi     case LeaveFlags:
1079*f5c631daSSadaf Ebrahimi       Ptrue(pd, pattern);
1080*f5c631daSSadaf Ebrahimi       return;
1081*f5c631daSSadaf Ebrahimi     case SetFlags:
1082*f5c631daSSadaf Ebrahimi       Ptrues(pd, pattern);
1083*f5c631daSSadaf Ebrahimi       return;
1084*f5c631daSSadaf Ebrahimi   }
1085*f5c631daSSadaf Ebrahimi   VIXL_UNREACHABLE();
1086*f5c631daSSadaf Ebrahimi }
1087*f5c631daSSadaf Ebrahimi 
Sub(const ZRegister & zd,IntegerOperand imm,const ZRegister & zm)1088*f5c631daSSadaf Ebrahimi void MacroAssembler::Sub(const ZRegister& zd,
1089*f5c631daSSadaf Ebrahimi                          IntegerOperand imm,
1090*f5c631daSSadaf Ebrahimi                          const ZRegister& zm) {
1091*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1092*f5c631daSSadaf Ebrahimi 
1093*f5c631daSSadaf Ebrahimi   int imm8;
1094*f5c631daSSadaf Ebrahimi   int shift = -1;
1095*f5c631daSSadaf Ebrahimi   if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) ||
1096*f5c631daSSadaf Ebrahimi       imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) {
1097*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, zm);
1098*f5c631daSSadaf Ebrahimi     subr(zd, zd, imm8, shift);
1099*f5c631daSSadaf Ebrahimi   } else {
1100*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
1101*f5c631daSSadaf Ebrahimi     ZRegister scratch = temps.AcquireZ().WithLaneSize(zm.GetLaneSizeInBits());
1102*f5c631daSSadaf Ebrahimi     Dup(scratch, imm);
1103*f5c631daSSadaf Ebrahimi 
1104*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1105*f5c631daSSadaf Ebrahimi     sub(zd, scratch, zm);
1106*f5c631daSSadaf Ebrahimi   }
1107*f5c631daSSadaf Ebrahimi }
1108*f5c631daSSadaf Ebrahimi 
SVELoadBroadcastImmHelper(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr,SVELoadBroadcastFn fn,int divisor)1109*f5c631daSSadaf Ebrahimi void MacroAssembler::SVELoadBroadcastImmHelper(const ZRegister& zt,
1110*f5c631daSSadaf Ebrahimi                                                const PRegisterZ& pg,
1111*f5c631daSSadaf Ebrahimi                                                const SVEMemOperand& addr,
1112*f5c631daSSadaf Ebrahimi                                                SVELoadBroadcastFn fn,
1113*f5c631daSSadaf Ebrahimi                                                int divisor) {
1114*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(addr.IsScalarPlusImmediate());
1115*f5c631daSSadaf Ebrahimi   int64_t imm = addr.GetImmediateOffset();
1116*f5c631daSSadaf Ebrahimi   if ((imm % divisor == 0) && IsUint6(imm / divisor)) {
1117*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1118*f5c631daSSadaf Ebrahimi     (this->*fn)(zt, pg, addr);
1119*f5c631daSSadaf Ebrahimi   } else {
1120*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
1121*f5c631daSSadaf Ebrahimi     Register scratch = temps.AcquireX();
1122*f5c631daSSadaf Ebrahimi     CalculateSVEAddress(scratch, addr, zt);
1123*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1124*f5c631daSSadaf Ebrahimi     (this->*fn)(zt, pg, SVEMemOperand(scratch));
1125*f5c631daSSadaf Ebrahimi   }
1126*f5c631daSSadaf Ebrahimi }
1127*f5c631daSSadaf Ebrahimi 
SVELoadStoreScalarImmHelper(const CPURegister & rt,const SVEMemOperand & addr,SVELoadStoreFn fn)1128*f5c631daSSadaf Ebrahimi void MacroAssembler::SVELoadStoreScalarImmHelper(const CPURegister& rt,
1129*f5c631daSSadaf Ebrahimi                                                  const SVEMemOperand& addr,
1130*f5c631daSSadaf Ebrahimi                                                  SVELoadStoreFn fn) {
1131*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1132*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(rt.IsZRegister() || rt.IsPRegister());
1133*f5c631daSSadaf Ebrahimi 
1134*f5c631daSSadaf Ebrahimi   if (addr.IsPlainScalar() ||
1135*f5c631daSSadaf Ebrahimi       (addr.IsScalarPlusImmediate() && IsInt9(addr.GetImmediateOffset()) &&
1136*f5c631daSSadaf Ebrahimi        addr.IsMulVl())) {
1137*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1138*f5c631daSSadaf Ebrahimi     (this->*fn)(rt, addr);
1139*f5c631daSSadaf Ebrahimi     return;
1140*f5c631daSSadaf Ebrahimi   }
1141*f5c631daSSadaf Ebrahimi 
1142*f5c631daSSadaf Ebrahimi   if (addr.IsEquivalentToScalar()) {
1143*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1144*f5c631daSSadaf Ebrahimi     (this->*fn)(rt, SVEMemOperand(addr.GetScalarBase()));
1145*f5c631daSSadaf Ebrahimi     return;
1146*f5c631daSSadaf Ebrahimi   }
1147*f5c631daSSadaf Ebrahimi 
1148*f5c631daSSadaf Ebrahimi   UseScratchRegisterScope temps(this);
1149*f5c631daSSadaf Ebrahimi   Register scratch = temps.AcquireX();
1150*f5c631daSSadaf Ebrahimi   CalculateSVEAddress(scratch, addr, rt);
1151*f5c631daSSadaf Ebrahimi   SingleEmissionCheckScope guard(this);
1152*f5c631daSSadaf Ebrahimi   (this->*fn)(rt, SVEMemOperand(scratch));
1153*f5c631daSSadaf Ebrahimi }
1154*f5c631daSSadaf Ebrahimi 
1155*f5c631daSSadaf Ebrahimi template <typename Tg, typename Tf>
SVELoadStoreNTBroadcastQOHelper(const ZRegister & zt,const Tg & pg,const SVEMemOperand & addr,Tf fn,int imm_bits,int shift_amount,SVEOffsetModifier supported_modifier,int vl_divisor_log2)1156*f5c631daSSadaf Ebrahimi void MacroAssembler::SVELoadStoreNTBroadcastQOHelper(
1157*f5c631daSSadaf Ebrahimi     const ZRegister& zt,
1158*f5c631daSSadaf Ebrahimi     const Tg& pg,
1159*f5c631daSSadaf Ebrahimi     const SVEMemOperand& addr,
1160*f5c631daSSadaf Ebrahimi     Tf fn,
1161*f5c631daSSadaf Ebrahimi     int imm_bits,
1162*f5c631daSSadaf Ebrahimi     int shift_amount,
1163*f5c631daSSadaf Ebrahimi     SVEOffsetModifier supported_modifier,
1164*f5c631daSSadaf Ebrahimi     int vl_divisor_log2) {
1165*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1166*f5c631daSSadaf Ebrahimi   int imm_divisor = 1 << shift_amount;
1167*f5c631daSSadaf Ebrahimi 
1168*f5c631daSSadaf Ebrahimi   if (addr.IsPlainScalar() ||
1169*f5c631daSSadaf Ebrahimi       (addr.IsScalarPlusImmediate() &&
1170*f5c631daSSadaf Ebrahimi        IsIntN(imm_bits, addr.GetImmediateOffset() / imm_divisor) &&
1171*f5c631daSSadaf Ebrahimi        ((addr.GetImmediateOffset() % imm_divisor) == 0) &&
1172*f5c631daSSadaf Ebrahimi        (addr.GetOffsetModifier() == supported_modifier))) {
1173*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1174*f5c631daSSadaf Ebrahimi     (this->*fn)(zt, pg, addr);
1175*f5c631daSSadaf Ebrahimi     return;
1176*f5c631daSSadaf Ebrahimi   }
1177*f5c631daSSadaf Ebrahimi 
1178*f5c631daSSadaf Ebrahimi   if (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() &&
1179*f5c631daSSadaf Ebrahimi       addr.IsEquivalentToLSL(zt.GetLaneSizeInBytesLog2())) {
1180*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1181*f5c631daSSadaf Ebrahimi     (this->*fn)(zt, pg, addr);
1182*f5c631daSSadaf Ebrahimi     return;
1183*f5c631daSSadaf Ebrahimi   }
1184*f5c631daSSadaf Ebrahimi 
1185*f5c631daSSadaf Ebrahimi   if (addr.IsEquivalentToScalar()) {
1186*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1187*f5c631daSSadaf Ebrahimi     (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
1188*f5c631daSSadaf Ebrahimi     return;
1189*f5c631daSSadaf Ebrahimi   }
1190*f5c631daSSadaf Ebrahimi 
1191*f5c631daSSadaf Ebrahimi   if (addr.IsMulVl() && (supported_modifier != SVE_MUL_VL) &&
1192*f5c631daSSadaf Ebrahimi       (vl_divisor_log2 == -1)) {
1193*f5c631daSSadaf Ebrahimi     // We don't handle [x0, #imm, MUL VL] if the in-memory access size is not VL
1194*f5c631daSSadaf Ebrahimi     // dependent.
1195*f5c631daSSadaf Ebrahimi     VIXL_UNIMPLEMENTED();
1196*f5c631daSSadaf Ebrahimi   }
1197*f5c631daSSadaf Ebrahimi 
1198*f5c631daSSadaf Ebrahimi   UseScratchRegisterScope temps(this);
1199*f5c631daSSadaf Ebrahimi   Register scratch = temps.AcquireX();
1200*f5c631daSSadaf Ebrahimi   CalculateSVEAddress(scratch, addr, vl_divisor_log2);
1201*f5c631daSSadaf Ebrahimi   SingleEmissionCheckScope guard(this);
1202*f5c631daSSadaf Ebrahimi   (this->*fn)(zt, pg, SVEMemOperand(scratch));
1203*f5c631daSSadaf Ebrahimi }
1204*f5c631daSSadaf Ebrahimi 
1205*f5c631daSSadaf Ebrahimi template <typename Tg, typename Tf>
SVELoadStore1Helper(int msize_in_bytes_log2,const ZRegister & zt,const Tg & pg,const SVEMemOperand & addr,Tf fn)1206*f5c631daSSadaf Ebrahimi void MacroAssembler::SVELoadStore1Helper(int msize_in_bytes_log2,
1207*f5c631daSSadaf Ebrahimi                                          const ZRegister& zt,
1208*f5c631daSSadaf Ebrahimi                                          const Tg& pg,
1209*f5c631daSSadaf Ebrahimi                                          const SVEMemOperand& addr,
1210*f5c631daSSadaf Ebrahimi                                          Tf fn) {
1211*f5c631daSSadaf Ebrahimi   if (addr.IsPlainScalar() ||
1212*f5c631daSSadaf Ebrahimi       (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() &&
1213*f5c631daSSadaf Ebrahimi        addr.IsEquivalentToLSL(msize_in_bytes_log2)) ||
1214*f5c631daSSadaf Ebrahimi       (addr.IsScalarPlusImmediate() && IsInt4(addr.GetImmediateOffset()) &&
1215*f5c631daSSadaf Ebrahimi        addr.IsMulVl())) {
1216*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1217*f5c631daSSadaf Ebrahimi     (this->*fn)(zt, pg, addr);
1218*f5c631daSSadaf Ebrahimi     return;
1219*f5c631daSSadaf Ebrahimi   }
1220*f5c631daSSadaf Ebrahimi 
1221*f5c631daSSadaf Ebrahimi   if (addr.IsEquivalentToScalar()) {
1222*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1223*f5c631daSSadaf Ebrahimi     (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
1224*f5c631daSSadaf Ebrahimi     return;
1225*f5c631daSSadaf Ebrahimi   }
1226*f5c631daSSadaf Ebrahimi 
1227*f5c631daSSadaf Ebrahimi   if (addr.IsVectorPlusImmediate()) {
1228*f5c631daSSadaf Ebrahimi     uint64_t offset = addr.GetImmediateOffset();
1229*f5c631daSSadaf Ebrahimi     if (IsMultiple(offset, (1 << msize_in_bytes_log2)) &&
1230*f5c631daSSadaf Ebrahimi         IsUint5(offset >> msize_in_bytes_log2)) {
1231*f5c631daSSadaf Ebrahimi       SingleEmissionCheckScope guard(this);
1232*f5c631daSSadaf Ebrahimi       (this->*fn)(zt, pg, addr);
1233*f5c631daSSadaf Ebrahimi       return;
1234*f5c631daSSadaf Ebrahimi     }
1235*f5c631daSSadaf Ebrahimi   }
1236*f5c631daSSadaf Ebrahimi 
1237*f5c631daSSadaf Ebrahimi   if (addr.IsScalarPlusVector()) {
1238*f5c631daSSadaf Ebrahimi     VIXL_ASSERT(addr.IsScatterGather());
1239*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1240*f5c631daSSadaf Ebrahimi     (this->*fn)(zt, pg, addr);
1241*f5c631daSSadaf Ebrahimi     return;
1242*f5c631daSSadaf Ebrahimi   }
1243*f5c631daSSadaf Ebrahimi 
1244*f5c631daSSadaf Ebrahimi   UseScratchRegisterScope temps(this);
1245*f5c631daSSadaf Ebrahimi   if (addr.IsScatterGather()) {
1246*f5c631daSSadaf Ebrahimi     // In scatter-gather modes, zt and zn/zm have the same lane size. However,
1247*f5c631daSSadaf Ebrahimi     // for 32-bit accesses, the result of each lane's address calculation still
1248*f5c631daSSadaf Ebrahimi     // requires 64 bits; we can't naively use `Adr` for the address calculation
1249*f5c631daSSadaf Ebrahimi     // because it would truncate each address to 32 bits.
1250*f5c631daSSadaf Ebrahimi 
1251*f5c631daSSadaf Ebrahimi     if (addr.IsVectorPlusImmediate()) {
1252*f5c631daSSadaf Ebrahimi       // Synthesise the immediate in an X register, then use a
1253*f5c631daSSadaf Ebrahimi       // scalar-plus-vector access with the original vector.
1254*f5c631daSSadaf Ebrahimi       Register scratch = temps.AcquireX();
1255*f5c631daSSadaf Ebrahimi       Mov(scratch, addr.GetImmediateOffset());
1256*f5c631daSSadaf Ebrahimi       SingleEmissionCheckScope guard(this);
1257*f5c631daSSadaf Ebrahimi       SVEOffsetModifier om =
1258*f5c631daSSadaf Ebrahimi           zt.IsLaneSizeS() ? SVE_UXTW : NO_SVE_OFFSET_MODIFIER;
1259*f5c631daSSadaf Ebrahimi       (this->*fn)(zt, pg, SVEMemOperand(scratch, addr.GetVectorBase(), om));
1260*f5c631daSSadaf Ebrahimi       return;
1261*f5c631daSSadaf Ebrahimi     }
1262*f5c631daSSadaf Ebrahimi 
1263*f5c631daSSadaf Ebrahimi     VIXL_UNIMPLEMENTED();
1264*f5c631daSSadaf Ebrahimi   } else {
1265*f5c631daSSadaf Ebrahimi     Register scratch = temps.AcquireX();
1266*f5c631daSSadaf Ebrahimi     // TODO: If we have an immediate offset that is a multiple of
1267*f5c631daSSadaf Ebrahimi     // msize_in_bytes, we can use Rdvl/Rdpl and a scalar-plus-scalar form to
1268*f5c631daSSadaf Ebrahimi     // save an instruction.
1269*f5c631daSSadaf Ebrahimi     int vl_divisor_log2 = zt.GetLaneSizeInBytesLog2() - msize_in_bytes_log2;
1270*f5c631daSSadaf Ebrahimi     CalculateSVEAddress(scratch, addr, vl_divisor_log2);
1271*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1272*f5c631daSSadaf Ebrahimi     (this->*fn)(zt, pg, SVEMemOperand(scratch));
1273*f5c631daSSadaf Ebrahimi   }
1274*f5c631daSSadaf Ebrahimi }
1275*f5c631daSSadaf Ebrahimi 
1276*f5c631daSSadaf Ebrahimi template <typename Tf>
SVELoadFFHelper(int msize_in_bytes_log2,const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr,Tf fn)1277*f5c631daSSadaf Ebrahimi void MacroAssembler::SVELoadFFHelper(int msize_in_bytes_log2,
1278*f5c631daSSadaf Ebrahimi                                      const ZRegister& zt,
1279*f5c631daSSadaf Ebrahimi                                      const PRegisterZ& pg,
1280*f5c631daSSadaf Ebrahimi                                      const SVEMemOperand& addr,
1281*f5c631daSSadaf Ebrahimi                                      Tf fn) {
1282*f5c631daSSadaf Ebrahimi   if (addr.IsScatterGather()) {
1283*f5c631daSSadaf Ebrahimi     // Scatter-gather first-fault loads share encodings with normal loads.
1284*f5c631daSSadaf Ebrahimi     SVELoadStore1Helper(msize_in_bytes_log2, zt, pg, addr, fn);
1285*f5c631daSSadaf Ebrahimi     return;
1286*f5c631daSSadaf Ebrahimi   }
1287*f5c631daSSadaf Ebrahimi 
1288*f5c631daSSadaf Ebrahimi   // Contiguous first-faulting loads have no scalar-plus-immediate form at all,
1289*f5c631daSSadaf Ebrahimi   // so we don't do immediate synthesis.
1290*f5c631daSSadaf Ebrahimi 
1291*f5c631daSSadaf Ebrahimi   // We cannot currently distinguish "[x0]" from "[x0, #0]", and this
1292*f5c631daSSadaf Ebrahimi   // is not "scalar-plus-scalar", so we have to permit `IsPlainScalar()` here.
1293*f5c631daSSadaf Ebrahimi   if (addr.IsPlainScalar() || (addr.IsScalarPlusScalar() &&
1294*f5c631daSSadaf Ebrahimi                                addr.IsEquivalentToLSL(msize_in_bytes_log2))) {
1295*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1296*f5c631daSSadaf Ebrahimi     (this->*fn)(zt, pg, addr);
1297*f5c631daSSadaf Ebrahimi     return;
1298*f5c631daSSadaf Ebrahimi   }
1299*f5c631daSSadaf Ebrahimi 
1300*f5c631daSSadaf Ebrahimi   VIXL_UNIMPLEMENTED();
1301*f5c631daSSadaf Ebrahimi }
1302*f5c631daSSadaf Ebrahimi 
Ld1b(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1303*f5c631daSSadaf Ebrahimi void MacroAssembler::Ld1b(const ZRegister& zt,
1304*f5c631daSSadaf Ebrahimi                           const PRegisterZ& pg,
1305*f5c631daSSadaf Ebrahimi                           const SVEMemOperand& addr) {
1306*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1307*f5c631daSSadaf Ebrahimi   SVELoadStore1Helper(kBRegSizeInBytesLog2,
1308*f5c631daSSadaf Ebrahimi                       zt,
1309*f5c631daSSadaf Ebrahimi                       pg,
1310*f5c631daSSadaf Ebrahimi                       addr,
1311*f5c631daSSadaf Ebrahimi                       static_cast<SVELoad1Fn>(&Assembler::ld1b));
1312*f5c631daSSadaf Ebrahimi }
1313*f5c631daSSadaf Ebrahimi 
Ld1h(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1314*f5c631daSSadaf Ebrahimi void MacroAssembler::Ld1h(const ZRegister& zt,
1315*f5c631daSSadaf Ebrahimi                           const PRegisterZ& pg,
1316*f5c631daSSadaf Ebrahimi                           const SVEMemOperand& addr) {
1317*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1318*f5c631daSSadaf Ebrahimi   SVELoadStore1Helper(kHRegSizeInBytesLog2,
1319*f5c631daSSadaf Ebrahimi                       zt,
1320*f5c631daSSadaf Ebrahimi                       pg,
1321*f5c631daSSadaf Ebrahimi                       addr,
1322*f5c631daSSadaf Ebrahimi                       static_cast<SVELoad1Fn>(&Assembler::ld1h));
1323*f5c631daSSadaf Ebrahimi }
1324*f5c631daSSadaf Ebrahimi 
Ld1w(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1325*f5c631daSSadaf Ebrahimi void MacroAssembler::Ld1w(const ZRegister& zt,
1326*f5c631daSSadaf Ebrahimi                           const PRegisterZ& pg,
1327*f5c631daSSadaf Ebrahimi                           const SVEMemOperand& addr) {
1328*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1329*f5c631daSSadaf Ebrahimi   SVELoadStore1Helper(kWRegSizeInBytesLog2,
1330*f5c631daSSadaf Ebrahimi                       zt,
1331*f5c631daSSadaf Ebrahimi                       pg,
1332*f5c631daSSadaf Ebrahimi                       addr,
1333*f5c631daSSadaf Ebrahimi                       static_cast<SVELoad1Fn>(&Assembler::ld1w));
1334*f5c631daSSadaf Ebrahimi }
1335*f5c631daSSadaf Ebrahimi 
Ld1d(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1336*f5c631daSSadaf Ebrahimi void MacroAssembler::Ld1d(const ZRegister& zt,
1337*f5c631daSSadaf Ebrahimi                           const PRegisterZ& pg,
1338*f5c631daSSadaf Ebrahimi                           const SVEMemOperand& addr) {
1339*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1340*f5c631daSSadaf Ebrahimi   SVELoadStore1Helper(kDRegSizeInBytesLog2,
1341*f5c631daSSadaf Ebrahimi                       zt,
1342*f5c631daSSadaf Ebrahimi                       pg,
1343*f5c631daSSadaf Ebrahimi                       addr,
1344*f5c631daSSadaf Ebrahimi                       static_cast<SVELoad1Fn>(&Assembler::ld1d));
1345*f5c631daSSadaf Ebrahimi }
1346*f5c631daSSadaf Ebrahimi 
Ld1sb(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1347*f5c631daSSadaf Ebrahimi void MacroAssembler::Ld1sb(const ZRegister& zt,
1348*f5c631daSSadaf Ebrahimi                            const PRegisterZ& pg,
1349*f5c631daSSadaf Ebrahimi                            const SVEMemOperand& addr) {
1350*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1351*f5c631daSSadaf Ebrahimi   SVELoadStore1Helper(kBRegSizeInBytesLog2,
1352*f5c631daSSadaf Ebrahimi                       zt,
1353*f5c631daSSadaf Ebrahimi                       pg,
1354*f5c631daSSadaf Ebrahimi                       addr,
1355*f5c631daSSadaf Ebrahimi                       static_cast<SVELoad1Fn>(&Assembler::ld1sb));
1356*f5c631daSSadaf Ebrahimi }
1357*f5c631daSSadaf Ebrahimi 
Ld1sh(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1358*f5c631daSSadaf Ebrahimi void MacroAssembler::Ld1sh(const ZRegister& zt,
1359*f5c631daSSadaf Ebrahimi                            const PRegisterZ& pg,
1360*f5c631daSSadaf Ebrahimi                            const SVEMemOperand& addr) {
1361*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1362*f5c631daSSadaf Ebrahimi   SVELoadStore1Helper(kHRegSizeInBytesLog2,
1363*f5c631daSSadaf Ebrahimi                       zt,
1364*f5c631daSSadaf Ebrahimi                       pg,
1365*f5c631daSSadaf Ebrahimi                       addr,
1366*f5c631daSSadaf Ebrahimi                       static_cast<SVELoad1Fn>(&Assembler::ld1sh));
1367*f5c631daSSadaf Ebrahimi }
1368*f5c631daSSadaf Ebrahimi 
Ld1sw(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1369*f5c631daSSadaf Ebrahimi void MacroAssembler::Ld1sw(const ZRegister& zt,
1370*f5c631daSSadaf Ebrahimi                            const PRegisterZ& pg,
1371*f5c631daSSadaf Ebrahimi                            const SVEMemOperand& addr) {
1372*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1373*f5c631daSSadaf Ebrahimi   SVELoadStore1Helper(kSRegSizeInBytesLog2,
1374*f5c631daSSadaf Ebrahimi                       zt,
1375*f5c631daSSadaf Ebrahimi                       pg,
1376*f5c631daSSadaf Ebrahimi                       addr,
1377*f5c631daSSadaf Ebrahimi                       static_cast<SVELoad1Fn>(&Assembler::ld1sw));
1378*f5c631daSSadaf Ebrahimi }
1379*f5c631daSSadaf Ebrahimi 
St1b(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1380*f5c631daSSadaf Ebrahimi void MacroAssembler::St1b(const ZRegister& zt,
1381*f5c631daSSadaf Ebrahimi                           const PRegister& pg,
1382*f5c631daSSadaf Ebrahimi                           const SVEMemOperand& addr) {
1383*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1384*f5c631daSSadaf Ebrahimi   SVELoadStore1Helper(kBRegSizeInBytesLog2,
1385*f5c631daSSadaf Ebrahimi                       zt,
1386*f5c631daSSadaf Ebrahimi                       pg,
1387*f5c631daSSadaf Ebrahimi                       addr,
1388*f5c631daSSadaf Ebrahimi                       static_cast<SVEStore1Fn>(&Assembler::st1b));
1389*f5c631daSSadaf Ebrahimi }
1390*f5c631daSSadaf Ebrahimi 
St1h(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1391*f5c631daSSadaf Ebrahimi void MacroAssembler::St1h(const ZRegister& zt,
1392*f5c631daSSadaf Ebrahimi                           const PRegister& pg,
1393*f5c631daSSadaf Ebrahimi                           const SVEMemOperand& addr) {
1394*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1395*f5c631daSSadaf Ebrahimi   SVELoadStore1Helper(kHRegSizeInBytesLog2,
1396*f5c631daSSadaf Ebrahimi                       zt,
1397*f5c631daSSadaf Ebrahimi                       pg,
1398*f5c631daSSadaf Ebrahimi                       addr,
1399*f5c631daSSadaf Ebrahimi                       static_cast<SVEStore1Fn>(&Assembler::st1h));
1400*f5c631daSSadaf Ebrahimi }
1401*f5c631daSSadaf Ebrahimi 
St1w(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1402*f5c631daSSadaf Ebrahimi void MacroAssembler::St1w(const ZRegister& zt,
1403*f5c631daSSadaf Ebrahimi                           const PRegister& pg,
1404*f5c631daSSadaf Ebrahimi                           const SVEMemOperand& addr) {
1405*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1406*f5c631daSSadaf Ebrahimi   SVELoadStore1Helper(kSRegSizeInBytesLog2,
1407*f5c631daSSadaf Ebrahimi                       zt,
1408*f5c631daSSadaf Ebrahimi                       pg,
1409*f5c631daSSadaf Ebrahimi                       addr,
1410*f5c631daSSadaf Ebrahimi                       static_cast<SVEStore1Fn>(&Assembler::st1w));
1411*f5c631daSSadaf Ebrahimi }
1412*f5c631daSSadaf Ebrahimi 
St1d(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1413*f5c631daSSadaf Ebrahimi void MacroAssembler::St1d(const ZRegister& zt,
1414*f5c631daSSadaf Ebrahimi                           const PRegister& pg,
1415*f5c631daSSadaf Ebrahimi                           const SVEMemOperand& addr) {
1416*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1417*f5c631daSSadaf Ebrahimi   SVELoadStore1Helper(kDRegSizeInBytesLog2,
1418*f5c631daSSadaf Ebrahimi                       zt,
1419*f5c631daSSadaf Ebrahimi                       pg,
1420*f5c631daSSadaf Ebrahimi                       addr,
1421*f5c631daSSadaf Ebrahimi                       static_cast<SVEStore1Fn>(&Assembler::st1d));
1422*f5c631daSSadaf Ebrahimi }
1423*f5c631daSSadaf Ebrahimi 
Ldff1b(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1424*f5c631daSSadaf Ebrahimi void MacroAssembler::Ldff1b(const ZRegister& zt,
1425*f5c631daSSadaf Ebrahimi                             const PRegisterZ& pg,
1426*f5c631daSSadaf Ebrahimi                             const SVEMemOperand& addr) {
1427*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1428*f5c631daSSadaf Ebrahimi   SVELoadFFHelper(kBRegSizeInBytesLog2,
1429*f5c631daSSadaf Ebrahimi                   zt,
1430*f5c631daSSadaf Ebrahimi                   pg,
1431*f5c631daSSadaf Ebrahimi                   addr,
1432*f5c631daSSadaf Ebrahimi                   static_cast<SVELoad1Fn>(&Assembler::ldff1b));
1433*f5c631daSSadaf Ebrahimi }
1434*f5c631daSSadaf Ebrahimi 
Ldff1h(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1435*f5c631daSSadaf Ebrahimi void MacroAssembler::Ldff1h(const ZRegister& zt,
1436*f5c631daSSadaf Ebrahimi                             const PRegisterZ& pg,
1437*f5c631daSSadaf Ebrahimi                             const SVEMemOperand& addr) {
1438*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1439*f5c631daSSadaf Ebrahimi   SVELoadFFHelper(kHRegSizeInBytesLog2,
1440*f5c631daSSadaf Ebrahimi                   zt,
1441*f5c631daSSadaf Ebrahimi                   pg,
1442*f5c631daSSadaf Ebrahimi                   addr,
1443*f5c631daSSadaf Ebrahimi                   static_cast<SVELoad1Fn>(&Assembler::ldff1h));
1444*f5c631daSSadaf Ebrahimi }
1445*f5c631daSSadaf Ebrahimi 
Ldff1w(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1446*f5c631daSSadaf Ebrahimi void MacroAssembler::Ldff1w(const ZRegister& zt,
1447*f5c631daSSadaf Ebrahimi                             const PRegisterZ& pg,
1448*f5c631daSSadaf Ebrahimi                             const SVEMemOperand& addr) {
1449*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1450*f5c631daSSadaf Ebrahimi   SVELoadFFHelper(kSRegSizeInBytesLog2,
1451*f5c631daSSadaf Ebrahimi                   zt,
1452*f5c631daSSadaf Ebrahimi                   pg,
1453*f5c631daSSadaf Ebrahimi                   addr,
1454*f5c631daSSadaf Ebrahimi                   static_cast<SVELoad1Fn>(&Assembler::ldff1w));
1455*f5c631daSSadaf Ebrahimi }
1456*f5c631daSSadaf Ebrahimi 
Ldff1d(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1457*f5c631daSSadaf Ebrahimi void MacroAssembler::Ldff1d(const ZRegister& zt,
1458*f5c631daSSadaf Ebrahimi                             const PRegisterZ& pg,
1459*f5c631daSSadaf Ebrahimi                             const SVEMemOperand& addr) {
1460*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1461*f5c631daSSadaf Ebrahimi   SVELoadFFHelper(kDRegSizeInBytesLog2,
1462*f5c631daSSadaf Ebrahimi                   zt,
1463*f5c631daSSadaf Ebrahimi                   pg,
1464*f5c631daSSadaf Ebrahimi                   addr,
1465*f5c631daSSadaf Ebrahimi                   static_cast<SVELoad1Fn>(&Assembler::ldff1d));
1466*f5c631daSSadaf Ebrahimi }
1467*f5c631daSSadaf Ebrahimi 
Ldff1sb(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1468*f5c631daSSadaf Ebrahimi void MacroAssembler::Ldff1sb(const ZRegister& zt,
1469*f5c631daSSadaf Ebrahimi                              const PRegisterZ& pg,
1470*f5c631daSSadaf Ebrahimi                              const SVEMemOperand& addr) {
1471*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1472*f5c631daSSadaf Ebrahimi   SVELoadFFHelper(kBRegSizeInBytesLog2,
1473*f5c631daSSadaf Ebrahimi                   zt,
1474*f5c631daSSadaf Ebrahimi                   pg,
1475*f5c631daSSadaf Ebrahimi                   addr,
1476*f5c631daSSadaf Ebrahimi                   static_cast<SVELoad1Fn>(&Assembler::ldff1sb));
1477*f5c631daSSadaf Ebrahimi }
1478*f5c631daSSadaf Ebrahimi 
Ldff1sh(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1479*f5c631daSSadaf Ebrahimi void MacroAssembler::Ldff1sh(const ZRegister& zt,
1480*f5c631daSSadaf Ebrahimi                              const PRegisterZ& pg,
1481*f5c631daSSadaf Ebrahimi                              const SVEMemOperand& addr) {
1482*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1483*f5c631daSSadaf Ebrahimi   SVELoadFFHelper(kHRegSizeInBytesLog2,
1484*f5c631daSSadaf Ebrahimi                   zt,
1485*f5c631daSSadaf Ebrahimi                   pg,
1486*f5c631daSSadaf Ebrahimi                   addr,
1487*f5c631daSSadaf Ebrahimi                   static_cast<SVELoad1Fn>(&Assembler::ldff1sh));
1488*f5c631daSSadaf Ebrahimi }
1489*f5c631daSSadaf Ebrahimi 
Ldff1sw(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1490*f5c631daSSadaf Ebrahimi void MacroAssembler::Ldff1sw(const ZRegister& zt,
1491*f5c631daSSadaf Ebrahimi                              const PRegisterZ& pg,
1492*f5c631daSSadaf Ebrahimi                              const SVEMemOperand& addr) {
1493*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1494*f5c631daSSadaf Ebrahimi   SVELoadFFHelper(kSRegSizeInBytesLog2,
1495*f5c631daSSadaf Ebrahimi                   zt,
1496*f5c631daSSadaf Ebrahimi                   pg,
1497*f5c631daSSadaf Ebrahimi                   addr,
1498*f5c631daSSadaf Ebrahimi                   static_cast<SVELoad1Fn>(&Assembler::ldff1sw));
1499*f5c631daSSadaf Ebrahimi }
1500*f5c631daSSadaf Ebrahimi 
1501*f5c631daSSadaf Ebrahimi #define VIXL_SVE_LD1R_LIST(V) \
1502*f5c631daSSadaf Ebrahimi   V(qb, 4) V(qh, 4) V(qw, 4) V(qd, 4) V(ob, 5) V(oh, 5) V(ow, 5) V(od, 5)
1503*f5c631daSSadaf Ebrahimi 
1504*f5c631daSSadaf Ebrahimi #define VIXL_DEFINE_MASM_FUNC(SZ, SH)                          \
1505*f5c631daSSadaf Ebrahimi   void MacroAssembler::Ld1r##SZ(const ZRegister& zt,           \
1506*f5c631daSSadaf Ebrahimi                                 const PRegisterZ& pg,          \
1507*f5c631daSSadaf Ebrahimi                                 const SVEMemOperand& addr) {   \
1508*f5c631daSSadaf Ebrahimi     VIXL_ASSERT(allow_macro_instructions_);                    \
1509*f5c631daSSadaf Ebrahimi     SVELoadStoreNTBroadcastQOHelper(zt,                        \
1510*f5c631daSSadaf Ebrahimi                                     pg,                        \
1511*f5c631daSSadaf Ebrahimi                                     addr,                      \
1512*f5c631daSSadaf Ebrahimi                                     &MacroAssembler::ld1r##SZ, \
1513*f5c631daSSadaf Ebrahimi                                     4,                         \
1514*f5c631daSSadaf Ebrahimi                                     SH,                        \
1515*f5c631daSSadaf Ebrahimi                                     NO_SVE_OFFSET_MODIFIER,    \
1516*f5c631daSSadaf Ebrahimi                                     -1);                       \
1517*f5c631daSSadaf Ebrahimi   }
1518*f5c631daSSadaf Ebrahimi 
VIXL_SVE_LD1R_LIST(VIXL_DEFINE_MASM_FUNC)1519*f5c631daSSadaf Ebrahimi VIXL_SVE_LD1R_LIST(VIXL_DEFINE_MASM_FUNC)
1520*f5c631daSSadaf Ebrahimi 
1521*f5c631daSSadaf Ebrahimi #undef VIXL_DEFINE_MASM_FUNC
1522*f5c631daSSadaf Ebrahimi #undef VIXL_SVE_LD1R_LIST
1523*f5c631daSSadaf Ebrahimi 
1524*f5c631daSSadaf Ebrahimi void MacroAssembler::Ldnt1b(const ZRegister& zt,
1525*f5c631daSSadaf Ebrahimi                             const PRegisterZ& pg,
1526*f5c631daSSadaf Ebrahimi                             const SVEMemOperand& addr) {
1527*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1528*f5c631daSSadaf Ebrahimi   if (addr.IsVectorPlusScalar()) {
1529*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1530*f5c631daSSadaf Ebrahimi     ldnt1b(zt, pg, addr);
1531*f5c631daSSadaf Ebrahimi   } else {
1532*f5c631daSSadaf Ebrahimi     SVELoadStoreNTBroadcastQOHelper(zt,
1533*f5c631daSSadaf Ebrahimi                                     pg,
1534*f5c631daSSadaf Ebrahimi                                     addr,
1535*f5c631daSSadaf Ebrahimi                                     &MacroAssembler::ldnt1b,
1536*f5c631daSSadaf Ebrahimi                                     4,
1537*f5c631daSSadaf Ebrahimi                                     0,
1538*f5c631daSSadaf Ebrahimi                                     SVE_MUL_VL);
1539*f5c631daSSadaf Ebrahimi   }
1540*f5c631daSSadaf Ebrahimi }
1541*f5c631daSSadaf Ebrahimi 
Ldnt1d(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1542*f5c631daSSadaf Ebrahimi void MacroAssembler::Ldnt1d(const ZRegister& zt,
1543*f5c631daSSadaf Ebrahimi                             const PRegisterZ& pg,
1544*f5c631daSSadaf Ebrahimi                             const SVEMemOperand& addr) {
1545*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1546*f5c631daSSadaf Ebrahimi   if (addr.IsVectorPlusScalar()) {
1547*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1548*f5c631daSSadaf Ebrahimi     ldnt1d(zt, pg, addr);
1549*f5c631daSSadaf Ebrahimi   } else {
1550*f5c631daSSadaf Ebrahimi     SVELoadStoreNTBroadcastQOHelper(zt,
1551*f5c631daSSadaf Ebrahimi                                     pg,
1552*f5c631daSSadaf Ebrahimi                                     addr,
1553*f5c631daSSadaf Ebrahimi                                     &MacroAssembler::ldnt1d,
1554*f5c631daSSadaf Ebrahimi                                     4,
1555*f5c631daSSadaf Ebrahimi                                     0,
1556*f5c631daSSadaf Ebrahimi                                     SVE_MUL_VL);
1557*f5c631daSSadaf Ebrahimi   }
1558*f5c631daSSadaf Ebrahimi }
1559*f5c631daSSadaf Ebrahimi 
Ldnt1h(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1560*f5c631daSSadaf Ebrahimi void MacroAssembler::Ldnt1h(const ZRegister& zt,
1561*f5c631daSSadaf Ebrahimi                             const PRegisterZ& pg,
1562*f5c631daSSadaf Ebrahimi                             const SVEMemOperand& addr) {
1563*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1564*f5c631daSSadaf Ebrahimi   if (addr.IsVectorPlusScalar()) {
1565*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1566*f5c631daSSadaf Ebrahimi     ldnt1h(zt, pg, addr);
1567*f5c631daSSadaf Ebrahimi   } else {
1568*f5c631daSSadaf Ebrahimi     SVELoadStoreNTBroadcastQOHelper(zt,
1569*f5c631daSSadaf Ebrahimi                                     pg,
1570*f5c631daSSadaf Ebrahimi                                     addr,
1571*f5c631daSSadaf Ebrahimi                                     &MacroAssembler::ldnt1h,
1572*f5c631daSSadaf Ebrahimi                                     4,
1573*f5c631daSSadaf Ebrahimi                                     0,
1574*f5c631daSSadaf Ebrahimi                                     SVE_MUL_VL);
1575*f5c631daSSadaf Ebrahimi   }
1576*f5c631daSSadaf Ebrahimi }
1577*f5c631daSSadaf Ebrahimi 
Ldnt1w(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1578*f5c631daSSadaf Ebrahimi void MacroAssembler::Ldnt1w(const ZRegister& zt,
1579*f5c631daSSadaf Ebrahimi                             const PRegisterZ& pg,
1580*f5c631daSSadaf Ebrahimi                             const SVEMemOperand& addr) {
1581*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1582*f5c631daSSadaf Ebrahimi   if (addr.IsVectorPlusScalar()) {
1583*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1584*f5c631daSSadaf Ebrahimi     ldnt1w(zt, pg, addr);
1585*f5c631daSSadaf Ebrahimi   } else {
1586*f5c631daSSadaf Ebrahimi     SVELoadStoreNTBroadcastQOHelper(zt,
1587*f5c631daSSadaf Ebrahimi                                     pg,
1588*f5c631daSSadaf Ebrahimi                                     addr,
1589*f5c631daSSadaf Ebrahimi                                     &MacroAssembler::ldnt1w,
1590*f5c631daSSadaf Ebrahimi                                     4,
1591*f5c631daSSadaf Ebrahimi                                     0,
1592*f5c631daSSadaf Ebrahimi                                     SVE_MUL_VL);
1593*f5c631daSSadaf Ebrahimi   }
1594*f5c631daSSadaf Ebrahimi }
1595*f5c631daSSadaf Ebrahimi 
Stnt1b(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1596*f5c631daSSadaf Ebrahimi void MacroAssembler::Stnt1b(const ZRegister& zt,
1597*f5c631daSSadaf Ebrahimi                             const PRegister& pg,
1598*f5c631daSSadaf Ebrahimi                             const SVEMemOperand& addr) {
1599*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1600*f5c631daSSadaf Ebrahimi   if (addr.IsVectorPlusScalar()) {
1601*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1602*f5c631daSSadaf Ebrahimi     stnt1b(zt, pg, addr);
1603*f5c631daSSadaf Ebrahimi   } else {
1604*f5c631daSSadaf Ebrahimi     SVELoadStoreNTBroadcastQOHelper(zt,
1605*f5c631daSSadaf Ebrahimi                                     pg,
1606*f5c631daSSadaf Ebrahimi                                     addr,
1607*f5c631daSSadaf Ebrahimi                                     &MacroAssembler::stnt1b,
1608*f5c631daSSadaf Ebrahimi                                     4,
1609*f5c631daSSadaf Ebrahimi                                     0,
1610*f5c631daSSadaf Ebrahimi                                     SVE_MUL_VL);
1611*f5c631daSSadaf Ebrahimi   }
1612*f5c631daSSadaf Ebrahimi }
Stnt1d(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1613*f5c631daSSadaf Ebrahimi void MacroAssembler::Stnt1d(const ZRegister& zt,
1614*f5c631daSSadaf Ebrahimi                             const PRegister& pg,
1615*f5c631daSSadaf Ebrahimi                             const SVEMemOperand& addr) {
1616*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1617*f5c631daSSadaf Ebrahimi   if (addr.IsVectorPlusScalar()) {
1618*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1619*f5c631daSSadaf Ebrahimi     stnt1d(zt, pg, addr);
1620*f5c631daSSadaf Ebrahimi   } else {
1621*f5c631daSSadaf Ebrahimi     SVELoadStoreNTBroadcastQOHelper(zt,
1622*f5c631daSSadaf Ebrahimi                                     pg,
1623*f5c631daSSadaf Ebrahimi                                     addr,
1624*f5c631daSSadaf Ebrahimi                                     &MacroAssembler::stnt1d,
1625*f5c631daSSadaf Ebrahimi                                     4,
1626*f5c631daSSadaf Ebrahimi                                     0,
1627*f5c631daSSadaf Ebrahimi                                     SVE_MUL_VL);
1628*f5c631daSSadaf Ebrahimi   }
1629*f5c631daSSadaf Ebrahimi }
Stnt1h(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1630*f5c631daSSadaf Ebrahimi void MacroAssembler::Stnt1h(const ZRegister& zt,
1631*f5c631daSSadaf Ebrahimi                             const PRegister& pg,
1632*f5c631daSSadaf Ebrahimi                             const SVEMemOperand& addr) {
1633*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1634*f5c631daSSadaf Ebrahimi   if (addr.IsVectorPlusScalar()) {
1635*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1636*f5c631daSSadaf Ebrahimi     stnt1h(zt, pg, addr);
1637*f5c631daSSadaf Ebrahimi   } else {
1638*f5c631daSSadaf Ebrahimi     SVELoadStoreNTBroadcastQOHelper(zt,
1639*f5c631daSSadaf Ebrahimi                                     pg,
1640*f5c631daSSadaf Ebrahimi                                     addr,
1641*f5c631daSSadaf Ebrahimi                                     &MacroAssembler::stnt1h,
1642*f5c631daSSadaf Ebrahimi                                     4,
1643*f5c631daSSadaf Ebrahimi                                     0,
1644*f5c631daSSadaf Ebrahimi                                     SVE_MUL_VL);
1645*f5c631daSSadaf Ebrahimi   }
1646*f5c631daSSadaf Ebrahimi }
Stnt1w(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1647*f5c631daSSadaf Ebrahimi void MacroAssembler::Stnt1w(const ZRegister& zt,
1648*f5c631daSSadaf Ebrahimi                             const PRegister& pg,
1649*f5c631daSSadaf Ebrahimi                             const SVEMemOperand& addr) {
1650*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1651*f5c631daSSadaf Ebrahimi   if (addr.IsVectorPlusScalar()) {
1652*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1653*f5c631daSSadaf Ebrahimi     stnt1w(zt, pg, addr);
1654*f5c631daSSadaf Ebrahimi   } else {
1655*f5c631daSSadaf Ebrahimi     SVELoadStoreNTBroadcastQOHelper(zt,
1656*f5c631daSSadaf Ebrahimi                                     pg,
1657*f5c631daSSadaf Ebrahimi                                     addr,
1658*f5c631daSSadaf Ebrahimi                                     &MacroAssembler::stnt1w,
1659*f5c631daSSadaf Ebrahimi                                     4,
1660*f5c631daSSadaf Ebrahimi                                     0,
1661*f5c631daSSadaf Ebrahimi                                     SVE_MUL_VL);
1662*f5c631daSSadaf Ebrahimi   }
1663*f5c631daSSadaf Ebrahimi }
1664*f5c631daSSadaf Ebrahimi 
SVEDotIndexHelper(ZZZImmFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index)1665*f5c631daSSadaf Ebrahimi void MacroAssembler::SVEDotIndexHelper(ZZZImmFn fn,
1666*f5c631daSSadaf Ebrahimi                                        const ZRegister& zd,
1667*f5c631daSSadaf Ebrahimi                                        const ZRegister& za,
1668*f5c631daSSadaf Ebrahimi                                        const ZRegister& zn,
1669*f5c631daSSadaf Ebrahimi                                        const ZRegister& zm,
1670*f5c631daSSadaf Ebrahimi                                        int index) {
1671*f5c631daSSadaf Ebrahimi   if (zd.Aliases(za)) {
1672*f5c631daSSadaf Ebrahimi     // zda = zda + (zn . zm)
1673*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1674*f5c631daSSadaf Ebrahimi     (this->*fn)(zd, zn, zm, index);
1675*f5c631daSSadaf Ebrahimi 
1676*f5c631daSSadaf Ebrahimi   } else if (zd.Aliases(zn) || zd.Aliases(zm)) {
1677*f5c631daSSadaf Ebrahimi     // zdn = za + (zdn . zm[index])
1678*f5c631daSSadaf Ebrahimi     // zdm = za + (zn . zdm[index])
1679*f5c631daSSadaf Ebrahimi     // zdnm = za + (zdnm . zdnm[index])
1680*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
1681*f5c631daSSadaf Ebrahimi     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1682*f5c631daSSadaf Ebrahimi     {
1683*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, scratch, za);
1684*f5c631daSSadaf Ebrahimi       (this->*fn)(scratch, zn, zm, index);
1685*f5c631daSSadaf Ebrahimi     }
1686*f5c631daSSadaf Ebrahimi 
1687*f5c631daSSadaf Ebrahimi     Mov(zd, scratch);
1688*f5c631daSSadaf Ebrahimi   } else {
1689*f5c631daSSadaf Ebrahimi     // zd = za + (zn . zm)
1690*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, za);
1691*f5c631daSSadaf Ebrahimi     (this->*fn)(zd, zn, zm, index);
1692*f5c631daSSadaf Ebrahimi   }
1693*f5c631daSSadaf Ebrahimi }
1694*f5c631daSSadaf Ebrahimi 
FourRegDestructiveHelper(Int3ArithFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)1695*f5c631daSSadaf Ebrahimi void MacroAssembler::FourRegDestructiveHelper(Int3ArithFn fn,
1696*f5c631daSSadaf Ebrahimi                                               const ZRegister& zd,
1697*f5c631daSSadaf Ebrahimi                                               const ZRegister& za,
1698*f5c631daSSadaf Ebrahimi                                               const ZRegister& zn,
1699*f5c631daSSadaf Ebrahimi                                               const ZRegister& zm) {
1700*f5c631daSSadaf Ebrahimi   if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
1701*f5c631daSSadaf Ebrahimi     // zd = za . zd . zm
1702*f5c631daSSadaf Ebrahimi     // zd = za . zn . zd
1703*f5c631daSSadaf Ebrahimi     // zd = za . zd . zd
1704*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
1705*f5c631daSSadaf Ebrahimi     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1706*f5c631daSSadaf Ebrahimi     {
1707*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, scratch, za);
1708*f5c631daSSadaf Ebrahimi       (this->*fn)(scratch, zn, zm);
1709*f5c631daSSadaf Ebrahimi     }
1710*f5c631daSSadaf Ebrahimi 
1711*f5c631daSSadaf Ebrahimi     Mov(zd, scratch);
1712*f5c631daSSadaf Ebrahimi   } else {
1713*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, za);
1714*f5c631daSSadaf Ebrahimi     (this->*fn)(zd, zn, zm);
1715*f5c631daSSadaf Ebrahimi   }
1716*f5c631daSSadaf Ebrahimi }
1717*f5c631daSSadaf Ebrahimi 
FourRegDestructiveHelper(Int4ArithFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)1718*f5c631daSSadaf Ebrahimi void MacroAssembler::FourRegDestructiveHelper(Int4ArithFn fn,
1719*f5c631daSSadaf Ebrahimi                                               const ZRegister& zd,
1720*f5c631daSSadaf Ebrahimi                                               const ZRegister& za,
1721*f5c631daSSadaf Ebrahimi                                               const ZRegister& zn,
1722*f5c631daSSadaf Ebrahimi                                               const ZRegister& zm) {
1723*f5c631daSSadaf Ebrahimi   if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
1724*f5c631daSSadaf Ebrahimi     // zd = za . zd . zm
1725*f5c631daSSadaf Ebrahimi     // zd = za . zn . zd
1726*f5c631daSSadaf Ebrahimi     // zd = za . zd . zd
1727*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
1728*f5c631daSSadaf Ebrahimi     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1729*f5c631daSSadaf Ebrahimi     {
1730*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, scratch, za);
1731*f5c631daSSadaf Ebrahimi       (this->*fn)(scratch, scratch, zn, zm);
1732*f5c631daSSadaf Ebrahimi     }
1733*f5c631daSSadaf Ebrahimi 
1734*f5c631daSSadaf Ebrahimi     Mov(zd, scratch);
1735*f5c631daSSadaf Ebrahimi   } else {
1736*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, za);
1737*f5c631daSSadaf Ebrahimi     (this->*fn)(zd, zd, zn, zm);
1738*f5c631daSSadaf Ebrahimi   }
1739*f5c631daSSadaf Ebrahimi }
1740*f5c631daSSadaf Ebrahimi 
FourRegOneImmDestructiveHelper(ZZZImmFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int imm)1741*f5c631daSSadaf Ebrahimi void MacroAssembler::FourRegOneImmDestructiveHelper(ZZZImmFn fn,
1742*f5c631daSSadaf Ebrahimi                                                     const ZRegister& zd,
1743*f5c631daSSadaf Ebrahimi                                                     const ZRegister& za,
1744*f5c631daSSadaf Ebrahimi                                                     const ZRegister& zn,
1745*f5c631daSSadaf Ebrahimi                                                     const ZRegister& zm,
1746*f5c631daSSadaf Ebrahimi                                                     int imm) {
1747*f5c631daSSadaf Ebrahimi   if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
1748*f5c631daSSadaf Ebrahimi     // zd = za . zd . zm[i]
1749*f5c631daSSadaf Ebrahimi     // zd = za . zn . zd[i]
1750*f5c631daSSadaf Ebrahimi     // zd = za . zd . zd[i]
1751*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
1752*f5c631daSSadaf Ebrahimi     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1753*f5c631daSSadaf Ebrahimi     {
1754*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, scratch, za);
1755*f5c631daSSadaf Ebrahimi       (this->*fn)(scratch, zn, zm, imm);
1756*f5c631daSSadaf Ebrahimi     }
1757*f5c631daSSadaf Ebrahimi 
1758*f5c631daSSadaf Ebrahimi     Mov(zd, scratch);
1759*f5c631daSSadaf Ebrahimi   } else {
1760*f5c631daSSadaf Ebrahimi     // zd = za . zn . zm[i]
1761*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, za);
1762*f5c631daSSadaf Ebrahimi     (this->*fn)(zd, zn, zm, imm);
1763*f5c631daSSadaf Ebrahimi   }
1764*f5c631daSSadaf Ebrahimi }
1765*f5c631daSSadaf Ebrahimi 
AbsoluteDifferenceAccumulate(Int3ArithFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)1766*f5c631daSSadaf Ebrahimi void MacroAssembler::AbsoluteDifferenceAccumulate(Int3ArithFn fn,
1767*f5c631daSSadaf Ebrahimi                                                   const ZRegister& zd,
1768*f5c631daSSadaf Ebrahimi                                                   const ZRegister& za,
1769*f5c631daSSadaf Ebrahimi                                                   const ZRegister& zn,
1770*f5c631daSSadaf Ebrahimi                                                   const ZRegister& zm) {
1771*f5c631daSSadaf Ebrahimi   if (zn.Aliases(zm)) {
1772*f5c631daSSadaf Ebrahimi     // If zn == zm, the difference is zero.
1773*f5c631daSSadaf Ebrahimi     if (!zd.Aliases(za)) {
1774*f5c631daSSadaf Ebrahimi       Mov(zd, za);
1775*f5c631daSSadaf Ebrahimi     }
1776*f5c631daSSadaf Ebrahimi   } else if (zd.Aliases(za)) {
1777*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1778*f5c631daSSadaf Ebrahimi     (this->*fn)(zd, zn, zm);
1779*f5c631daSSadaf Ebrahimi   } else if (zd.Aliases(zn)) {
1780*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
1781*f5c631daSSadaf Ebrahimi     ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
1782*f5c631daSSadaf Ebrahimi     Mov(ztmp, zn);
1783*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, za);
1784*f5c631daSSadaf Ebrahimi     (this->*fn)(zd, ztmp, zm);
1785*f5c631daSSadaf Ebrahimi   } else if (zd.Aliases(zm)) {
1786*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
1787*f5c631daSSadaf Ebrahimi     ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
1788*f5c631daSSadaf Ebrahimi     Mov(ztmp, zm);
1789*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, za);
1790*f5c631daSSadaf Ebrahimi     (this->*fn)(zd, zn, ztmp);
1791*f5c631daSSadaf Ebrahimi   } else {
1792*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, za);
1793*f5c631daSSadaf Ebrahimi     (this->*fn)(zd, zn, zm);
1794*f5c631daSSadaf Ebrahimi   }
1795*f5c631daSSadaf Ebrahimi }
1796*f5c631daSSadaf Ebrahimi 
1797*f5c631daSSadaf Ebrahimi #define VIXL_SVE_4REG_LIST(V)                       \
1798*f5c631daSSadaf Ebrahimi   V(Saba, saba, AbsoluteDifferenceAccumulate)       \
1799*f5c631daSSadaf Ebrahimi   V(Uaba, uaba, AbsoluteDifferenceAccumulate)       \
1800*f5c631daSSadaf Ebrahimi   V(Sabalb, sabalb, AbsoluteDifferenceAccumulate)   \
1801*f5c631daSSadaf Ebrahimi   V(Sabalt, sabalt, AbsoluteDifferenceAccumulate)   \
1802*f5c631daSSadaf Ebrahimi   V(Uabalb, uabalb, AbsoluteDifferenceAccumulate)   \
1803*f5c631daSSadaf Ebrahimi   V(Uabalt, uabalt, AbsoluteDifferenceAccumulate)   \
1804*f5c631daSSadaf Ebrahimi   V(Sdot, sdot, FourRegDestructiveHelper)           \
1805*f5c631daSSadaf Ebrahimi   V(Udot, udot, FourRegDestructiveHelper)           \
1806*f5c631daSSadaf Ebrahimi   V(Adclb, adclb, FourRegDestructiveHelper)         \
1807*f5c631daSSadaf Ebrahimi   V(Adclt, adclt, FourRegDestructiveHelper)         \
1808*f5c631daSSadaf Ebrahimi   V(Sbclb, sbclb, FourRegDestructiveHelper)         \
1809*f5c631daSSadaf Ebrahimi   V(Sbclt, sbclt, FourRegDestructiveHelper)         \
1810*f5c631daSSadaf Ebrahimi   V(Smlalb, smlalb, FourRegDestructiveHelper)       \
1811*f5c631daSSadaf Ebrahimi   V(Smlalt, smlalt, FourRegDestructiveHelper)       \
1812*f5c631daSSadaf Ebrahimi   V(Smlslb, smlslb, FourRegDestructiveHelper)       \
1813*f5c631daSSadaf Ebrahimi   V(Smlslt, smlslt, FourRegDestructiveHelper)       \
1814*f5c631daSSadaf Ebrahimi   V(Umlalb, umlalb, FourRegDestructiveHelper)       \
1815*f5c631daSSadaf Ebrahimi   V(Umlalt, umlalt, FourRegDestructiveHelper)       \
1816*f5c631daSSadaf Ebrahimi   V(Umlslb, umlslb, FourRegDestructiveHelper)       \
1817*f5c631daSSadaf Ebrahimi   V(Umlslt, umlslt, FourRegDestructiveHelper)       \
1818*f5c631daSSadaf Ebrahimi   V(Bcax, bcax, FourRegDestructiveHelper)           \
1819*f5c631daSSadaf Ebrahimi   V(Bsl, bsl, FourRegDestructiveHelper)             \
1820*f5c631daSSadaf Ebrahimi   V(Bsl1n, bsl1n, FourRegDestructiveHelper)         \
1821*f5c631daSSadaf Ebrahimi   V(Bsl2n, bsl2n, FourRegDestructiveHelper)         \
1822*f5c631daSSadaf Ebrahimi   V(Eor3, eor3, FourRegDestructiveHelper)           \
1823*f5c631daSSadaf Ebrahimi   V(Nbsl, nbsl, FourRegDestructiveHelper)           \
1824*f5c631daSSadaf Ebrahimi   V(Fmlalb, fmlalb, FourRegDestructiveHelper)       \
1825*f5c631daSSadaf Ebrahimi   V(Fmlalt, fmlalt, FourRegDestructiveHelper)       \
1826*f5c631daSSadaf Ebrahimi   V(Fmlslb, fmlslb, FourRegDestructiveHelper)       \
1827*f5c631daSSadaf Ebrahimi   V(Fmlslt, fmlslt, FourRegDestructiveHelper)       \
1828*f5c631daSSadaf Ebrahimi   V(Sqdmlalb, sqdmlalb, FourRegDestructiveHelper)   \
1829*f5c631daSSadaf Ebrahimi   V(Sqdmlalbt, sqdmlalbt, FourRegDestructiveHelper) \
1830*f5c631daSSadaf Ebrahimi   V(Sqdmlalt, sqdmlalt, FourRegDestructiveHelper)   \
1831*f5c631daSSadaf Ebrahimi   V(Sqdmlslb, sqdmlslb, FourRegDestructiveHelper)   \
1832*f5c631daSSadaf Ebrahimi   V(Sqdmlslbt, sqdmlslbt, FourRegDestructiveHelper) \
1833*f5c631daSSadaf Ebrahimi   V(Sqdmlslt, sqdmlslt, FourRegDestructiveHelper)   \
1834*f5c631daSSadaf Ebrahimi   V(Sqrdmlah, sqrdmlah, FourRegDestructiveHelper)   \
1835*f5c631daSSadaf Ebrahimi   V(Sqrdmlsh, sqrdmlsh, FourRegDestructiveHelper)   \
1836*f5c631daSSadaf Ebrahimi   V(Fmmla, fmmla, FourRegDestructiveHelper)         \
1837*f5c631daSSadaf Ebrahimi   V(Smmla, smmla, FourRegDestructiveHelper)         \
1838*f5c631daSSadaf Ebrahimi   V(Ummla, ummla, FourRegDestructiveHelper)         \
1839*f5c631daSSadaf Ebrahimi   V(Usmmla, usmmla, FourRegDestructiveHelper)       \
1840*f5c631daSSadaf Ebrahimi   V(Usdot, usdot, FourRegDestructiveHelper)
1841*f5c631daSSadaf Ebrahimi 
1842*f5c631daSSadaf Ebrahimi #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \
1843*f5c631daSSadaf Ebrahimi   void MacroAssembler::MASMFN(const ZRegister& zd,   \
1844*f5c631daSSadaf Ebrahimi                               const ZRegister& za,   \
1845*f5c631daSSadaf Ebrahimi                               const ZRegister& zn,   \
1846*f5c631daSSadaf Ebrahimi                               const ZRegister& zm) { \
1847*f5c631daSSadaf Ebrahimi     VIXL_ASSERT(allow_macro_instructions_);          \
1848*f5c631daSSadaf Ebrahimi     HELPER(&Assembler::ASMFN, zd, za, zn, zm);       \
1849*f5c631daSSadaf Ebrahimi   }
1850*f5c631daSSadaf Ebrahimi VIXL_SVE_4REG_LIST(VIXL_DEFINE_MASM_FUNC)
1851*f5c631daSSadaf Ebrahimi #undef VIXL_DEFINE_MASM_FUNC
1852*f5c631daSSadaf Ebrahimi 
1853*f5c631daSSadaf Ebrahimi #define VIXL_SVE_4REG_1IMM_LIST(V)                      \
1854*f5c631daSSadaf Ebrahimi   V(Fmla, fmla, FourRegOneImmDestructiveHelper)         \
1855*f5c631daSSadaf Ebrahimi   V(Fmls, fmls, FourRegOneImmDestructiveHelper)         \
1856*f5c631daSSadaf Ebrahimi   V(Fmlalb, fmlalb, FourRegOneImmDestructiveHelper)     \
1857*f5c631daSSadaf Ebrahimi   V(Fmlalt, fmlalt, FourRegOneImmDestructiveHelper)     \
1858*f5c631daSSadaf Ebrahimi   V(Fmlslb, fmlslb, FourRegOneImmDestructiveHelper)     \
1859*f5c631daSSadaf Ebrahimi   V(Fmlslt, fmlslt, FourRegOneImmDestructiveHelper)     \
1860*f5c631daSSadaf Ebrahimi   V(Mla, mla, FourRegOneImmDestructiveHelper)           \
1861*f5c631daSSadaf Ebrahimi   V(Mls, mls, FourRegOneImmDestructiveHelper)           \
1862*f5c631daSSadaf Ebrahimi   V(Smlalb, smlalb, FourRegOneImmDestructiveHelper)     \
1863*f5c631daSSadaf Ebrahimi   V(Smlalt, smlalt, FourRegOneImmDestructiveHelper)     \
1864*f5c631daSSadaf Ebrahimi   V(Smlslb, smlslb, FourRegOneImmDestructiveHelper)     \
1865*f5c631daSSadaf Ebrahimi   V(Smlslt, smlslt, FourRegOneImmDestructiveHelper)     \
1866*f5c631daSSadaf Ebrahimi   V(Sqdmlalb, sqdmlalb, FourRegOneImmDestructiveHelper) \
1867*f5c631daSSadaf Ebrahimi   V(Sqdmlalt, sqdmlalt, FourRegOneImmDestructiveHelper) \
1868*f5c631daSSadaf Ebrahimi   V(Sqdmlslb, sqdmlslb, FourRegOneImmDestructiveHelper) \
1869*f5c631daSSadaf Ebrahimi   V(Sqdmlslt, sqdmlslt, FourRegOneImmDestructiveHelper) \
1870*f5c631daSSadaf Ebrahimi   V(Sqrdmlah, sqrdmlah, FourRegOneImmDestructiveHelper) \
1871*f5c631daSSadaf Ebrahimi   V(Sqrdmlsh, sqrdmlsh, FourRegOneImmDestructiveHelper) \
1872*f5c631daSSadaf Ebrahimi   V(Umlalb, umlalb, FourRegOneImmDestructiveHelper)     \
1873*f5c631daSSadaf Ebrahimi   V(Umlalt, umlalt, FourRegOneImmDestructiveHelper)     \
1874*f5c631daSSadaf Ebrahimi   V(Umlslb, umlslb, FourRegOneImmDestructiveHelper)     \
1875*f5c631daSSadaf Ebrahimi   V(Umlslt, umlslt, FourRegOneImmDestructiveHelper)
1876*f5c631daSSadaf Ebrahimi 
1877*f5c631daSSadaf Ebrahimi #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \
1878*f5c631daSSadaf Ebrahimi   void MacroAssembler::MASMFN(const ZRegister& zd,   \
1879*f5c631daSSadaf Ebrahimi                               const ZRegister& za,   \
1880*f5c631daSSadaf Ebrahimi                               const ZRegister& zn,   \
1881*f5c631daSSadaf Ebrahimi                               const ZRegister& zm,   \
1882*f5c631daSSadaf Ebrahimi                               int imm) {             \
1883*f5c631daSSadaf Ebrahimi     VIXL_ASSERT(allow_macro_instructions_);          \
1884*f5c631daSSadaf Ebrahimi     HELPER(&Assembler::ASMFN, zd, za, zn, zm, imm);  \
1885*f5c631daSSadaf Ebrahimi   }
VIXL_SVE_4REG_1IMM_LIST(VIXL_DEFINE_MASM_FUNC)1886*f5c631daSSadaf Ebrahimi VIXL_SVE_4REG_1IMM_LIST(VIXL_DEFINE_MASM_FUNC)
1887*f5c631daSSadaf Ebrahimi #undef VIXL_DEFINE_MASM_FUNC
1888*f5c631daSSadaf Ebrahimi 
1889*f5c631daSSadaf Ebrahimi void MacroAssembler::Sdot(const ZRegister& zd,
1890*f5c631daSSadaf Ebrahimi                           const ZRegister& za,
1891*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
1892*f5c631daSSadaf Ebrahimi                           const ZRegister& zm,
1893*f5c631daSSadaf Ebrahimi                           int index) {
1894*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1895*f5c631daSSadaf Ebrahimi   SVEDotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index);
1896*f5c631daSSadaf Ebrahimi }
1897*f5c631daSSadaf Ebrahimi 
Udot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index)1898*f5c631daSSadaf Ebrahimi void MacroAssembler::Udot(const ZRegister& zd,
1899*f5c631daSSadaf Ebrahimi                           const ZRegister& za,
1900*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
1901*f5c631daSSadaf Ebrahimi                           const ZRegister& zm,
1902*f5c631daSSadaf Ebrahimi                           int index) {
1903*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1904*f5c631daSSadaf Ebrahimi   SVEDotIndexHelper(&Assembler::udot, zd, za, zn, zm, index);
1905*f5c631daSSadaf Ebrahimi }
1906*f5c631daSSadaf Ebrahimi 
Sudot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index)1907*f5c631daSSadaf Ebrahimi void MacroAssembler::Sudot(const ZRegister& zd,
1908*f5c631daSSadaf Ebrahimi                            const ZRegister& za,
1909*f5c631daSSadaf Ebrahimi                            const ZRegister& zn,
1910*f5c631daSSadaf Ebrahimi                            const ZRegister& zm,
1911*f5c631daSSadaf Ebrahimi                            int index) {
1912*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1913*f5c631daSSadaf Ebrahimi   SVEDotIndexHelper(&Assembler::sudot, zd, za, zn, zm, index);
1914*f5c631daSSadaf Ebrahimi }
1915*f5c631daSSadaf Ebrahimi 
Usdot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index)1916*f5c631daSSadaf Ebrahimi void MacroAssembler::Usdot(const ZRegister& zd,
1917*f5c631daSSadaf Ebrahimi                            const ZRegister& za,
1918*f5c631daSSadaf Ebrahimi                            const ZRegister& zn,
1919*f5c631daSSadaf Ebrahimi                            const ZRegister& zm,
1920*f5c631daSSadaf Ebrahimi                            int index) {
1921*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
1922*f5c631daSSadaf Ebrahimi   SVEDotIndexHelper(&Assembler::usdot, zd, za, zn, zm, index);
1923*f5c631daSSadaf Ebrahimi }
1924*f5c631daSSadaf Ebrahimi 
Cdot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index,int rot)1925*f5c631daSSadaf Ebrahimi void MacroAssembler::Cdot(const ZRegister& zd,
1926*f5c631daSSadaf Ebrahimi                           const ZRegister& za,
1927*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
1928*f5c631daSSadaf Ebrahimi                           const ZRegister& zm,
1929*f5c631daSSadaf Ebrahimi                           int index,
1930*f5c631daSSadaf Ebrahimi                           int rot) {
1931*f5c631daSSadaf Ebrahimi   // This doesn't handle zm when it's out of the range that can be encoded in
1932*f5c631daSSadaf Ebrahimi   // instruction. The range depends on element size: z0-z7 for B, z0-15 for H.
1933*f5c631daSSadaf Ebrahimi   if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
1934*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
1935*f5c631daSSadaf Ebrahimi     ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);
1936*f5c631daSSadaf Ebrahimi     {
1937*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, ztmp, za);
1938*f5c631daSSadaf Ebrahimi       cdot(ztmp, zn, zm, index, rot);
1939*f5c631daSSadaf Ebrahimi     }
1940*f5c631daSSadaf Ebrahimi     Mov(zd, ztmp);
1941*f5c631daSSadaf Ebrahimi   } else {
1942*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, za);
1943*f5c631daSSadaf Ebrahimi     cdot(zd, zn, zm, index, rot);
1944*f5c631daSSadaf Ebrahimi   }
1945*f5c631daSSadaf Ebrahimi }
1946*f5c631daSSadaf Ebrahimi 
Cdot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int rot)1947*f5c631daSSadaf Ebrahimi void MacroAssembler::Cdot(const ZRegister& zd,
1948*f5c631daSSadaf Ebrahimi                           const ZRegister& za,
1949*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
1950*f5c631daSSadaf Ebrahimi                           const ZRegister& zm,
1951*f5c631daSSadaf Ebrahimi                           int rot) {
1952*f5c631daSSadaf Ebrahimi   if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
1953*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
1954*f5c631daSSadaf Ebrahimi     VIXL_ASSERT(AreSameLaneSize(zn, zm));
1955*f5c631daSSadaf Ebrahimi     ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);
1956*f5c631daSSadaf Ebrahimi     Mov(ztmp, zd.Aliases(zn) ? zn : zm);
1957*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, za);
1958*f5c631daSSadaf Ebrahimi     cdot(zd, (zd.Aliases(zn) ? ztmp : zn), (zd.Aliases(zm) ? ztmp : zm), rot);
1959*f5c631daSSadaf Ebrahimi   } else {
1960*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, za);
1961*f5c631daSSadaf Ebrahimi     cdot(zd, zn, zm, rot);
1962*f5c631daSSadaf Ebrahimi   }
1963*f5c631daSSadaf Ebrahimi }
1964*f5c631daSSadaf Ebrahimi 
FPMulAddHelper(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,SVEMulAddPredicatedZdaFn fn_zda,SVEMulAddPredicatedZdnFn fn_zdn,FPMacroNaNPropagationOption nan_option)1965*f5c631daSSadaf Ebrahimi void MacroAssembler::FPMulAddHelper(const ZRegister& zd,
1966*f5c631daSSadaf Ebrahimi                                     const PRegisterM& pg,
1967*f5c631daSSadaf Ebrahimi                                     const ZRegister& za,
1968*f5c631daSSadaf Ebrahimi                                     const ZRegister& zn,
1969*f5c631daSSadaf Ebrahimi                                     const ZRegister& zm,
1970*f5c631daSSadaf Ebrahimi                                     SVEMulAddPredicatedZdaFn fn_zda,
1971*f5c631daSSadaf Ebrahimi                                     SVEMulAddPredicatedZdnFn fn_zdn,
1972*f5c631daSSadaf Ebrahimi                                     FPMacroNaNPropagationOption nan_option) {
1973*f5c631daSSadaf Ebrahimi   ResolveFPNaNPropagationOption(&nan_option);
1974*f5c631daSSadaf Ebrahimi 
1975*f5c631daSSadaf Ebrahimi   if (zd.Aliases(za)) {
1976*f5c631daSSadaf Ebrahimi     // zda = (-)zda + ((-)zn * zm) for fmla, fmls, fnmla and fnmls.
1977*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1978*f5c631daSSadaf Ebrahimi     (this->*fn_zda)(zd, pg, zn, zm);
1979*f5c631daSSadaf Ebrahimi   } else if (zd.Aliases(zn)) {
1980*f5c631daSSadaf Ebrahimi     // zdn = (-)za + ((-)zdn * zm) for fmad, fmsb, fnmad and fnmsb.
1981*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
1982*f5c631daSSadaf Ebrahimi     (this->*fn_zdn)(zd, pg, zm, za);
1983*f5c631daSSadaf Ebrahimi   } else if (zd.Aliases(zm)) {
1984*f5c631daSSadaf Ebrahimi     switch (nan_option) {
1985*f5c631daSSadaf Ebrahimi       case FastNaNPropagation: {
1986*f5c631daSSadaf Ebrahimi         // We treat multiplication as commutative in the fast mode, so we can
1987*f5c631daSSadaf Ebrahimi         // swap zn and zm.
1988*f5c631daSSadaf Ebrahimi         // zdm = (-)za + ((-)zdm * zn) for fmad, fmsb, fnmad and fnmsb.
1989*f5c631daSSadaf Ebrahimi         SingleEmissionCheckScope guard(this);
1990*f5c631daSSadaf Ebrahimi         (this->*fn_zdn)(zd, pg, zn, za);
1991*f5c631daSSadaf Ebrahimi         return;
1992*f5c631daSSadaf Ebrahimi       }
1993*f5c631daSSadaf Ebrahimi       case StrictNaNPropagation: {
1994*f5c631daSSadaf Ebrahimi         UseScratchRegisterScope temps(this);
1995*f5c631daSSadaf Ebrahimi         // Use a scratch register to keep the argument order exactly as
1996*f5c631daSSadaf Ebrahimi         // specified.
1997*f5c631daSSadaf Ebrahimi         ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn);
1998*f5c631daSSadaf Ebrahimi         {
1999*f5c631daSSadaf Ebrahimi           MovprfxHelperScope guard(this, scratch, pg, za);
2000*f5c631daSSadaf Ebrahimi           // scratch = (-)za + ((-)zn * zm)
2001*f5c631daSSadaf Ebrahimi           (this->*fn_zda)(scratch, pg, zn, zm);
2002*f5c631daSSadaf Ebrahimi         }
2003*f5c631daSSadaf Ebrahimi         Mov(zd, scratch);
2004*f5c631daSSadaf Ebrahimi         return;
2005*f5c631daSSadaf Ebrahimi       }
2006*f5c631daSSadaf Ebrahimi       case NoFPMacroNaNPropagationSelected:
2007*f5c631daSSadaf Ebrahimi         VIXL_UNREACHABLE();
2008*f5c631daSSadaf Ebrahimi         return;
2009*f5c631daSSadaf Ebrahimi     }
2010*f5c631daSSadaf Ebrahimi   } else {
2011*f5c631daSSadaf Ebrahimi     // zd = (-)za + ((-)zn * zm) for fmla, fmls, fnmla and fnmls.
2012*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, pg, za);
2013*f5c631daSSadaf Ebrahimi     (this->*fn_zda)(zd, pg, zn, zm);
2014*f5c631daSSadaf Ebrahimi   }
2015*f5c631daSSadaf Ebrahimi }
2016*f5c631daSSadaf Ebrahimi 
Fmla(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)2017*f5c631daSSadaf Ebrahimi void MacroAssembler::Fmla(const ZRegister& zd,
2018*f5c631daSSadaf Ebrahimi                           const PRegisterM& pg,
2019*f5c631daSSadaf Ebrahimi                           const ZRegister& za,
2020*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
2021*f5c631daSSadaf Ebrahimi                           const ZRegister& zm,
2022*f5c631daSSadaf Ebrahimi                           FPMacroNaNPropagationOption nan_option) {
2023*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
2024*f5c631daSSadaf Ebrahimi   FPMulAddHelper(zd,
2025*f5c631daSSadaf Ebrahimi                  pg,
2026*f5c631daSSadaf Ebrahimi                  za,
2027*f5c631daSSadaf Ebrahimi                  zn,
2028*f5c631daSSadaf Ebrahimi                  zm,
2029*f5c631daSSadaf Ebrahimi                  &Assembler::fmla,
2030*f5c631daSSadaf Ebrahimi                  &Assembler::fmad,
2031*f5c631daSSadaf Ebrahimi                  nan_option);
2032*f5c631daSSadaf Ebrahimi }
2033*f5c631daSSadaf Ebrahimi 
Fmls(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)2034*f5c631daSSadaf Ebrahimi void MacroAssembler::Fmls(const ZRegister& zd,
2035*f5c631daSSadaf Ebrahimi                           const PRegisterM& pg,
2036*f5c631daSSadaf Ebrahimi                           const ZRegister& za,
2037*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
2038*f5c631daSSadaf Ebrahimi                           const ZRegister& zm,
2039*f5c631daSSadaf Ebrahimi                           FPMacroNaNPropagationOption nan_option) {
2040*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
2041*f5c631daSSadaf Ebrahimi   FPMulAddHelper(zd,
2042*f5c631daSSadaf Ebrahimi                  pg,
2043*f5c631daSSadaf Ebrahimi                  za,
2044*f5c631daSSadaf Ebrahimi                  zn,
2045*f5c631daSSadaf Ebrahimi                  zm,
2046*f5c631daSSadaf Ebrahimi                  &Assembler::fmls,
2047*f5c631daSSadaf Ebrahimi                  &Assembler::fmsb,
2048*f5c631daSSadaf Ebrahimi                  nan_option);
2049*f5c631daSSadaf Ebrahimi }
2050*f5c631daSSadaf Ebrahimi 
Fnmla(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)2051*f5c631daSSadaf Ebrahimi void MacroAssembler::Fnmla(const ZRegister& zd,
2052*f5c631daSSadaf Ebrahimi                            const PRegisterM& pg,
2053*f5c631daSSadaf Ebrahimi                            const ZRegister& za,
2054*f5c631daSSadaf Ebrahimi                            const ZRegister& zn,
2055*f5c631daSSadaf Ebrahimi                            const ZRegister& zm,
2056*f5c631daSSadaf Ebrahimi                            FPMacroNaNPropagationOption nan_option) {
2057*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
2058*f5c631daSSadaf Ebrahimi   FPMulAddHelper(zd,
2059*f5c631daSSadaf Ebrahimi                  pg,
2060*f5c631daSSadaf Ebrahimi                  za,
2061*f5c631daSSadaf Ebrahimi                  zn,
2062*f5c631daSSadaf Ebrahimi                  zm,
2063*f5c631daSSadaf Ebrahimi                  &Assembler::fnmla,
2064*f5c631daSSadaf Ebrahimi                  &Assembler::fnmad,
2065*f5c631daSSadaf Ebrahimi                  nan_option);
2066*f5c631daSSadaf Ebrahimi }
2067*f5c631daSSadaf Ebrahimi 
Fnmls(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)2068*f5c631daSSadaf Ebrahimi void MacroAssembler::Fnmls(const ZRegister& zd,
2069*f5c631daSSadaf Ebrahimi                            const PRegisterM& pg,
2070*f5c631daSSadaf Ebrahimi                            const ZRegister& za,
2071*f5c631daSSadaf Ebrahimi                            const ZRegister& zn,
2072*f5c631daSSadaf Ebrahimi                            const ZRegister& zm,
2073*f5c631daSSadaf Ebrahimi                            FPMacroNaNPropagationOption nan_option) {
2074*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
2075*f5c631daSSadaf Ebrahimi   FPMulAddHelper(zd,
2076*f5c631daSSadaf Ebrahimi                  pg,
2077*f5c631daSSadaf Ebrahimi                  za,
2078*f5c631daSSadaf Ebrahimi                  zn,
2079*f5c631daSSadaf Ebrahimi                  zm,
2080*f5c631daSSadaf Ebrahimi                  &Assembler::fnmls,
2081*f5c631daSSadaf Ebrahimi                  &Assembler::fnmsb,
2082*f5c631daSSadaf Ebrahimi                  nan_option);
2083*f5c631daSSadaf Ebrahimi }
2084*f5c631daSSadaf Ebrahimi 
Ftmad(const ZRegister & zd,const ZRegister & zn,const ZRegister & zm,int imm3)2085*f5c631daSSadaf Ebrahimi void MacroAssembler::Ftmad(const ZRegister& zd,
2086*f5c631daSSadaf Ebrahimi                            const ZRegister& zn,
2087*f5c631daSSadaf Ebrahimi                            const ZRegister& zm,
2088*f5c631daSSadaf Ebrahimi                            int imm3) {
2089*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
2090*f5c631daSSadaf Ebrahimi   if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2091*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
2092*f5c631daSSadaf Ebrahimi     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm);
2093*f5c631daSSadaf Ebrahimi     Mov(scratch, zm);
2094*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, zn);
2095*f5c631daSSadaf Ebrahimi     ftmad(zd, zd, scratch, imm3);
2096*f5c631daSSadaf Ebrahimi   } else {
2097*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, zn);
2098*f5c631daSSadaf Ebrahimi     ftmad(zd, zd, zm, imm3);
2099*f5c631daSSadaf Ebrahimi   }
2100*f5c631daSSadaf Ebrahimi }
2101*f5c631daSSadaf Ebrahimi 
Fcadd(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,int rot)2102*f5c631daSSadaf Ebrahimi void MacroAssembler::Fcadd(const ZRegister& zd,
2103*f5c631daSSadaf Ebrahimi                            const PRegisterM& pg,
2104*f5c631daSSadaf Ebrahimi                            const ZRegister& zn,
2105*f5c631daSSadaf Ebrahimi                            const ZRegister& zm,
2106*f5c631daSSadaf Ebrahimi                            int rot) {
2107*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
2108*f5c631daSSadaf Ebrahimi   if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2109*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
2110*f5c631daSSadaf Ebrahimi     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2111*f5c631daSSadaf Ebrahimi     {
2112*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, scratch, pg, zn);
2113*f5c631daSSadaf Ebrahimi       fcadd(scratch, pg, scratch, zm, rot);
2114*f5c631daSSadaf Ebrahimi     }
2115*f5c631daSSadaf Ebrahimi     Mov(zd, scratch);
2116*f5c631daSSadaf Ebrahimi   } else {
2117*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, pg, zn);
2118*f5c631daSSadaf Ebrahimi     fcadd(zd, pg, zd, zm, rot);
2119*f5c631daSSadaf Ebrahimi   }
2120*f5c631daSSadaf Ebrahimi }
2121*f5c631daSSadaf Ebrahimi 
Fcmla(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int rot)2122*f5c631daSSadaf Ebrahimi void MacroAssembler::Fcmla(const ZRegister& zd,
2123*f5c631daSSadaf Ebrahimi                            const PRegisterM& pg,
2124*f5c631daSSadaf Ebrahimi                            const ZRegister& za,
2125*f5c631daSSadaf Ebrahimi                            const ZRegister& zn,
2126*f5c631daSSadaf Ebrahimi                            const ZRegister& zm,
2127*f5c631daSSadaf Ebrahimi                            int rot) {
2128*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
2129*f5c631daSSadaf Ebrahimi   if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
2130*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
2131*f5c631daSSadaf Ebrahimi     ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);
2132*f5c631daSSadaf Ebrahimi     {
2133*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, ztmp, za);
2134*f5c631daSSadaf Ebrahimi       fcmla(ztmp, pg, zn, zm, rot);
2135*f5c631daSSadaf Ebrahimi     }
2136*f5c631daSSadaf Ebrahimi     Mov(zd, pg, ztmp);
2137*f5c631daSSadaf Ebrahimi   } else {
2138*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, pg, za);
2139*f5c631daSSadaf Ebrahimi     fcmla(zd, pg, zn, zm, rot);
2140*f5c631daSSadaf Ebrahimi   }
2141*f5c631daSSadaf Ebrahimi }
2142*f5c631daSSadaf Ebrahimi 
Splice(const ZRegister & zd,const PRegister & pg,const ZRegister & zn,const ZRegister & zm)2143*f5c631daSSadaf Ebrahimi void MacroAssembler::Splice(const ZRegister& zd,
2144*f5c631daSSadaf Ebrahimi                             const PRegister& pg,
2145*f5c631daSSadaf Ebrahimi                             const ZRegister& zn,
2146*f5c631daSSadaf Ebrahimi                             const ZRegister& zm) {
2147*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
2148*f5c631daSSadaf Ebrahimi   if (CPUHas(CPUFeatures::kSVE2) && AreConsecutive(zn, zm) && !zd.Aliases(zn)) {
2149*f5c631daSSadaf Ebrahimi     SingleEmissionCheckScope guard(this);
2150*f5c631daSSadaf Ebrahimi     splice(zd, pg, zn, zm);
2151*f5c631daSSadaf Ebrahimi   } else if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2152*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
2153*f5c631daSSadaf Ebrahimi     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2154*f5c631daSSadaf Ebrahimi     {
2155*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, scratch, zn);
2156*f5c631daSSadaf Ebrahimi       splice(scratch, pg, scratch, zm);
2157*f5c631daSSadaf Ebrahimi     }
2158*f5c631daSSadaf Ebrahimi     Mov(zd, scratch);
2159*f5c631daSSadaf Ebrahimi   } else {
2160*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, zn);
2161*f5c631daSSadaf Ebrahimi     splice(zd, pg, zd, zm);
2162*f5c631daSSadaf Ebrahimi   }
2163*f5c631daSSadaf Ebrahimi }
2164*f5c631daSSadaf Ebrahimi 
Clasta(const ZRegister & zd,const PRegister & pg,const ZRegister & zn,const ZRegister & zm)2165*f5c631daSSadaf Ebrahimi void MacroAssembler::Clasta(const ZRegister& zd,
2166*f5c631daSSadaf Ebrahimi                             const PRegister& pg,
2167*f5c631daSSadaf Ebrahimi                             const ZRegister& zn,
2168*f5c631daSSadaf Ebrahimi                             const ZRegister& zm) {
2169*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
2170*f5c631daSSadaf Ebrahimi   if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2171*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
2172*f5c631daSSadaf Ebrahimi     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2173*f5c631daSSadaf Ebrahimi     {
2174*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, scratch, zn);
2175*f5c631daSSadaf Ebrahimi       clasta(scratch, pg, scratch, zm);
2176*f5c631daSSadaf Ebrahimi     }
2177*f5c631daSSadaf Ebrahimi     Mov(zd, scratch);
2178*f5c631daSSadaf Ebrahimi   } else {
2179*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, zn);
2180*f5c631daSSadaf Ebrahimi     clasta(zd, pg, zd, zm);
2181*f5c631daSSadaf Ebrahimi   }
2182*f5c631daSSadaf Ebrahimi }
2183*f5c631daSSadaf Ebrahimi 
Clastb(const ZRegister & zd,const PRegister & pg,const ZRegister & zn,const ZRegister & zm)2184*f5c631daSSadaf Ebrahimi void MacroAssembler::Clastb(const ZRegister& zd,
2185*f5c631daSSadaf Ebrahimi                             const PRegister& pg,
2186*f5c631daSSadaf Ebrahimi                             const ZRegister& zn,
2187*f5c631daSSadaf Ebrahimi                             const ZRegister& zm) {
2188*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
2189*f5c631daSSadaf Ebrahimi   if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2190*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
2191*f5c631daSSadaf Ebrahimi     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2192*f5c631daSSadaf Ebrahimi     {
2193*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, scratch, zn);
2194*f5c631daSSadaf Ebrahimi       clastb(scratch, pg, scratch, zm);
2195*f5c631daSSadaf Ebrahimi     }
2196*f5c631daSSadaf Ebrahimi     Mov(zd, scratch);
2197*f5c631daSSadaf Ebrahimi   } else {
2198*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, zn);
2199*f5c631daSSadaf Ebrahimi     clastb(zd, pg, zd, zm);
2200*f5c631daSSadaf Ebrahimi   }
2201*f5c631daSSadaf Ebrahimi }
2202*f5c631daSSadaf Ebrahimi 
ShiftRightAccumulate(IntArithImmFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2203*f5c631daSSadaf Ebrahimi void MacroAssembler::ShiftRightAccumulate(IntArithImmFn fn,
2204*f5c631daSSadaf Ebrahimi                                           const ZRegister& zd,
2205*f5c631daSSadaf Ebrahimi                                           const ZRegister& za,
2206*f5c631daSSadaf Ebrahimi                                           const ZRegister& zn,
2207*f5c631daSSadaf Ebrahimi                                           int shift) {
2208*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
2209*f5c631daSSadaf Ebrahimi   if (!zd.Aliases(za) && zd.Aliases(zn)) {
2210*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
2211*f5c631daSSadaf Ebrahimi     ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);
2212*f5c631daSSadaf Ebrahimi     Mov(ztmp, zn);
2213*f5c631daSSadaf Ebrahimi     {
2214*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, zd, za);
2215*f5c631daSSadaf Ebrahimi       (this->*fn)(zd, ztmp, shift);
2216*f5c631daSSadaf Ebrahimi     }
2217*f5c631daSSadaf Ebrahimi   } else {
2218*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, za);
2219*f5c631daSSadaf Ebrahimi     (this->*fn)(zd, zn, shift);
2220*f5c631daSSadaf Ebrahimi   }
2221*f5c631daSSadaf Ebrahimi }
2222*f5c631daSSadaf Ebrahimi 
Srsra(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2223*f5c631daSSadaf Ebrahimi void MacroAssembler::Srsra(const ZRegister& zd,
2224*f5c631daSSadaf Ebrahimi                            const ZRegister& za,
2225*f5c631daSSadaf Ebrahimi                            const ZRegister& zn,
2226*f5c631daSSadaf Ebrahimi                            int shift) {
2227*f5c631daSSadaf Ebrahimi   ShiftRightAccumulate(&Assembler::srsra, zd, za, zn, shift);
2228*f5c631daSSadaf Ebrahimi }
2229*f5c631daSSadaf Ebrahimi 
Ssra(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2230*f5c631daSSadaf Ebrahimi void MacroAssembler::Ssra(const ZRegister& zd,
2231*f5c631daSSadaf Ebrahimi                           const ZRegister& za,
2232*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
2233*f5c631daSSadaf Ebrahimi                           int shift) {
2234*f5c631daSSadaf Ebrahimi   ShiftRightAccumulate(&Assembler::ssra, zd, za, zn, shift);
2235*f5c631daSSadaf Ebrahimi }
2236*f5c631daSSadaf Ebrahimi 
Ursra(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2237*f5c631daSSadaf Ebrahimi void MacroAssembler::Ursra(const ZRegister& zd,
2238*f5c631daSSadaf Ebrahimi                            const ZRegister& za,
2239*f5c631daSSadaf Ebrahimi                            const ZRegister& zn,
2240*f5c631daSSadaf Ebrahimi                            int shift) {
2241*f5c631daSSadaf Ebrahimi   ShiftRightAccumulate(&Assembler::ursra, zd, za, zn, shift);
2242*f5c631daSSadaf Ebrahimi }
2243*f5c631daSSadaf Ebrahimi 
Usra(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2244*f5c631daSSadaf Ebrahimi void MacroAssembler::Usra(const ZRegister& zd,
2245*f5c631daSSadaf Ebrahimi                           const ZRegister& za,
2246*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
2247*f5c631daSSadaf Ebrahimi                           int shift) {
2248*f5c631daSSadaf Ebrahimi   ShiftRightAccumulate(&Assembler::usra, zd, za, zn, shift);
2249*f5c631daSSadaf Ebrahimi }
2250*f5c631daSSadaf Ebrahimi 
ComplexAddition(ZZZImmFn fn,const ZRegister & zd,const ZRegister & zn,const ZRegister & zm,int rot)2251*f5c631daSSadaf Ebrahimi void MacroAssembler::ComplexAddition(ZZZImmFn fn,
2252*f5c631daSSadaf Ebrahimi                                      const ZRegister& zd,
2253*f5c631daSSadaf Ebrahimi                                      const ZRegister& zn,
2254*f5c631daSSadaf Ebrahimi                                      const ZRegister& zm,
2255*f5c631daSSadaf Ebrahimi                                      int rot) {
2256*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(allow_macro_instructions_);
2257*f5c631daSSadaf Ebrahimi   if (!zd.Aliases(zn) && zd.Aliases(zm)) {
2258*f5c631daSSadaf Ebrahimi     UseScratchRegisterScope temps(this);
2259*f5c631daSSadaf Ebrahimi     ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zm);
2260*f5c631daSSadaf Ebrahimi     Mov(ztmp, zm);
2261*f5c631daSSadaf Ebrahimi     {
2262*f5c631daSSadaf Ebrahimi       MovprfxHelperScope guard(this, zd, zn);
2263*f5c631daSSadaf Ebrahimi       (this->*fn)(zd, zd, ztmp, rot);
2264*f5c631daSSadaf Ebrahimi     }
2265*f5c631daSSadaf Ebrahimi   } else {
2266*f5c631daSSadaf Ebrahimi     MovprfxHelperScope guard(this, zd, zn);
2267*f5c631daSSadaf Ebrahimi     (this->*fn)(zd, zd, zm, rot);
2268*f5c631daSSadaf Ebrahimi   }
2269*f5c631daSSadaf Ebrahimi }
2270*f5c631daSSadaf Ebrahimi 
Cadd(const ZRegister & zd,const ZRegister & zn,const ZRegister & zm,int rot)2271*f5c631daSSadaf Ebrahimi void MacroAssembler::Cadd(const ZRegister& zd,
2272*f5c631daSSadaf Ebrahimi                           const ZRegister& zn,
2273*f5c631daSSadaf Ebrahimi                           const ZRegister& zm,
2274*f5c631daSSadaf Ebrahimi                           int rot) {
2275*f5c631daSSadaf Ebrahimi   ComplexAddition(&Assembler::cadd, zd, zn, zm, rot);
2276*f5c631daSSadaf Ebrahimi }
2277*f5c631daSSadaf Ebrahimi 
Sqcadd(const ZRegister & zd,const ZRegister & zn,const ZRegister & zm,int rot)2278*f5c631daSSadaf Ebrahimi void MacroAssembler::Sqcadd(const ZRegister& zd,
2279*f5c631daSSadaf Ebrahimi                             const ZRegister& zn,
2280*f5c631daSSadaf Ebrahimi                             const ZRegister& zm,
2281*f5c631daSSadaf Ebrahimi                             int rot) {
2282*f5c631daSSadaf Ebrahimi   ComplexAddition(&Assembler::sqcadd, zd, zn, zm, rot);
2283*f5c631daSSadaf Ebrahimi }
2284*f5c631daSSadaf Ebrahimi 
2285*f5c631daSSadaf Ebrahimi }  // namespace aarch64
2286*f5c631daSSadaf Ebrahimi }  // namespace vixl
2287