1*f5c631daSSadaf Ebrahimi // Copyright 2015, VIXL authors
2*f5c631daSSadaf Ebrahimi // All rights reserved.
3*f5c631daSSadaf Ebrahimi //
4*f5c631daSSadaf Ebrahimi // Redistribution and use in source and binary forms, with or without
5*f5c631daSSadaf Ebrahimi // modification, are permitted provided that the following conditions are met:
6*f5c631daSSadaf Ebrahimi //
7*f5c631daSSadaf Ebrahimi // * Redistributions of source code must retain the above copyright notice,
8*f5c631daSSadaf Ebrahimi // this list of conditions and the following disclaimer.
9*f5c631daSSadaf Ebrahimi // * Redistributions in binary form must reproduce the above copyright notice,
10*f5c631daSSadaf Ebrahimi // this list of conditions and the following disclaimer in the documentation
11*f5c631daSSadaf Ebrahimi // and/or other materials provided with the distribution.
12*f5c631daSSadaf Ebrahimi // * Neither the name of ARM Limited nor the names of its contributors may be
13*f5c631daSSadaf Ebrahimi // used to endorse or promote products derived from this software without
14*f5c631daSSadaf Ebrahimi // specific prior written permission.
15*f5c631daSSadaf Ebrahimi //
16*f5c631daSSadaf Ebrahimi // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17*f5c631daSSadaf Ebrahimi // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*f5c631daSSadaf Ebrahimi // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*f5c631daSSadaf Ebrahimi // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20*f5c631daSSadaf Ebrahimi // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*f5c631daSSadaf Ebrahimi // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22*f5c631daSSadaf Ebrahimi // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23*f5c631daSSadaf Ebrahimi // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24*f5c631daSSadaf Ebrahimi // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25*f5c631daSSadaf Ebrahimi // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*f5c631daSSadaf Ebrahimi
27*f5c631daSSadaf Ebrahimi #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28*f5c631daSSadaf Ebrahimi
29*f5c631daSSadaf Ebrahimi #include <cmath>
30*f5c631daSSadaf Ebrahimi
31*f5c631daSSadaf Ebrahimi #include "simulator-aarch64.h"
32*f5c631daSSadaf Ebrahimi
33*f5c631daSSadaf Ebrahimi namespace vixl {
34*f5c631daSSadaf Ebrahimi namespace aarch64 {
35*f5c631daSSadaf Ebrahimi
36*f5c631daSSadaf Ebrahimi using vixl::internal::SimFloat16;
37*f5c631daSSadaf Ebrahimi
38*f5c631daSSadaf Ebrahimi template <typename T>
IsFloat64()39*f5c631daSSadaf Ebrahimi bool IsFloat64() {
40*f5c631daSSadaf Ebrahimi return false;
41*f5c631daSSadaf Ebrahimi }
42*f5c631daSSadaf Ebrahimi template <>
IsFloat64()43*f5c631daSSadaf Ebrahimi bool IsFloat64<double>() {
44*f5c631daSSadaf Ebrahimi return true;
45*f5c631daSSadaf Ebrahimi }
46*f5c631daSSadaf Ebrahimi
47*f5c631daSSadaf Ebrahimi template <typename T>
IsFloat32()48*f5c631daSSadaf Ebrahimi bool IsFloat32() {
49*f5c631daSSadaf Ebrahimi return false;
50*f5c631daSSadaf Ebrahimi }
51*f5c631daSSadaf Ebrahimi template <>
IsFloat32()52*f5c631daSSadaf Ebrahimi bool IsFloat32<float>() {
53*f5c631daSSadaf Ebrahimi return true;
54*f5c631daSSadaf Ebrahimi }
55*f5c631daSSadaf Ebrahimi
56*f5c631daSSadaf Ebrahimi template <typename T>
IsFloat16()57*f5c631daSSadaf Ebrahimi bool IsFloat16() {
58*f5c631daSSadaf Ebrahimi return false;
59*f5c631daSSadaf Ebrahimi }
60*f5c631daSSadaf Ebrahimi template <>
IsFloat16()61*f5c631daSSadaf Ebrahimi bool IsFloat16<Float16>() {
62*f5c631daSSadaf Ebrahimi return true;
63*f5c631daSSadaf Ebrahimi }
64*f5c631daSSadaf Ebrahimi template <>
IsFloat16()65*f5c631daSSadaf Ebrahimi bool IsFloat16<SimFloat16>() {
66*f5c631daSSadaf Ebrahimi return true;
67*f5c631daSSadaf Ebrahimi }
68*f5c631daSSadaf Ebrahimi
69*f5c631daSSadaf Ebrahimi template <>
FPDefaultNaN()70*f5c631daSSadaf Ebrahimi double Simulator::FPDefaultNaN<double>() {
71*f5c631daSSadaf Ebrahimi return kFP64DefaultNaN;
72*f5c631daSSadaf Ebrahimi }
73*f5c631daSSadaf Ebrahimi
74*f5c631daSSadaf Ebrahimi
75*f5c631daSSadaf Ebrahimi template <>
FPDefaultNaN()76*f5c631daSSadaf Ebrahimi float Simulator::FPDefaultNaN<float>() {
77*f5c631daSSadaf Ebrahimi return kFP32DefaultNaN;
78*f5c631daSSadaf Ebrahimi }
79*f5c631daSSadaf Ebrahimi
80*f5c631daSSadaf Ebrahimi
81*f5c631daSSadaf Ebrahimi template <>
FPDefaultNaN()82*f5c631daSSadaf Ebrahimi SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
83*f5c631daSSadaf Ebrahimi return SimFloat16(kFP16DefaultNaN);
84*f5c631daSSadaf Ebrahimi }
85*f5c631daSSadaf Ebrahimi
86*f5c631daSSadaf Ebrahimi
FixedToDouble(int64_t src,int fbits,FPRounding round)87*f5c631daSSadaf Ebrahimi double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
88*f5c631daSSadaf Ebrahimi if (src >= 0) {
89*f5c631daSSadaf Ebrahimi return UFixedToDouble(src, fbits, round);
90*f5c631daSSadaf Ebrahimi } else if (src == INT64_MIN) {
91*f5c631daSSadaf Ebrahimi return -UFixedToDouble(src, fbits, round);
92*f5c631daSSadaf Ebrahimi } else {
93*f5c631daSSadaf Ebrahimi return -UFixedToDouble(-src, fbits, round);
94*f5c631daSSadaf Ebrahimi }
95*f5c631daSSadaf Ebrahimi }
96*f5c631daSSadaf Ebrahimi
97*f5c631daSSadaf Ebrahimi
UFixedToDouble(uint64_t src,int fbits,FPRounding round)98*f5c631daSSadaf Ebrahimi double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
99*f5c631daSSadaf Ebrahimi // An input of 0 is a special case because the result is effectively
100*f5c631daSSadaf Ebrahimi // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
101*f5c631daSSadaf Ebrahimi if (src == 0) {
102*f5c631daSSadaf Ebrahimi return 0.0;
103*f5c631daSSadaf Ebrahimi }
104*f5c631daSSadaf Ebrahimi
105*f5c631daSSadaf Ebrahimi // Calculate the exponent. The highest significant bit will have the value
106*f5c631daSSadaf Ebrahimi // 2^exponent.
107*f5c631daSSadaf Ebrahimi const int highest_significant_bit = 63 - CountLeadingZeros(src);
108*f5c631daSSadaf Ebrahimi const int64_t exponent = highest_significant_bit - fbits;
109*f5c631daSSadaf Ebrahimi
110*f5c631daSSadaf Ebrahimi return FPRoundToDouble(0, exponent, src, round);
111*f5c631daSSadaf Ebrahimi }
112*f5c631daSSadaf Ebrahimi
113*f5c631daSSadaf Ebrahimi
FixedToFloat(int64_t src,int fbits,FPRounding round)114*f5c631daSSadaf Ebrahimi float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
115*f5c631daSSadaf Ebrahimi if (src >= 0) {
116*f5c631daSSadaf Ebrahimi return UFixedToFloat(src, fbits, round);
117*f5c631daSSadaf Ebrahimi } else if (src == INT64_MIN) {
118*f5c631daSSadaf Ebrahimi return -UFixedToFloat(src, fbits, round);
119*f5c631daSSadaf Ebrahimi } else {
120*f5c631daSSadaf Ebrahimi return -UFixedToFloat(-src, fbits, round);
121*f5c631daSSadaf Ebrahimi }
122*f5c631daSSadaf Ebrahimi }
123*f5c631daSSadaf Ebrahimi
124*f5c631daSSadaf Ebrahimi
UFixedToFloat(uint64_t src,int fbits,FPRounding round)125*f5c631daSSadaf Ebrahimi float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
126*f5c631daSSadaf Ebrahimi // An input of 0 is a special case because the result is effectively
127*f5c631daSSadaf Ebrahimi // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
128*f5c631daSSadaf Ebrahimi if (src == 0) {
129*f5c631daSSadaf Ebrahimi return 0.0f;
130*f5c631daSSadaf Ebrahimi }
131*f5c631daSSadaf Ebrahimi
132*f5c631daSSadaf Ebrahimi // Calculate the exponent. The highest significant bit will have the value
133*f5c631daSSadaf Ebrahimi // 2^exponent.
134*f5c631daSSadaf Ebrahimi const int highest_significant_bit = 63 - CountLeadingZeros(src);
135*f5c631daSSadaf Ebrahimi const int32_t exponent = highest_significant_bit - fbits;
136*f5c631daSSadaf Ebrahimi
137*f5c631daSSadaf Ebrahimi return FPRoundToFloat(0, exponent, src, round);
138*f5c631daSSadaf Ebrahimi }
139*f5c631daSSadaf Ebrahimi
140*f5c631daSSadaf Ebrahimi
FixedToFloat16(int64_t src,int fbits,FPRounding round)141*f5c631daSSadaf Ebrahimi SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
142*f5c631daSSadaf Ebrahimi if (src >= 0) {
143*f5c631daSSadaf Ebrahimi return UFixedToFloat16(src, fbits, round);
144*f5c631daSSadaf Ebrahimi } else if (src == INT64_MIN) {
145*f5c631daSSadaf Ebrahimi return -UFixedToFloat16(src, fbits, round);
146*f5c631daSSadaf Ebrahimi } else {
147*f5c631daSSadaf Ebrahimi return -UFixedToFloat16(-src, fbits, round);
148*f5c631daSSadaf Ebrahimi }
149*f5c631daSSadaf Ebrahimi }
150*f5c631daSSadaf Ebrahimi
151*f5c631daSSadaf Ebrahimi
UFixedToFloat16(uint64_t src,int fbits,FPRounding round)152*f5c631daSSadaf Ebrahimi SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
153*f5c631daSSadaf Ebrahimi int fbits,
154*f5c631daSSadaf Ebrahimi FPRounding round) {
155*f5c631daSSadaf Ebrahimi // An input of 0 is a special case because the result is effectively
156*f5c631daSSadaf Ebrahimi // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
157*f5c631daSSadaf Ebrahimi if (src == 0) {
158*f5c631daSSadaf Ebrahimi return 0.0f;
159*f5c631daSSadaf Ebrahimi }
160*f5c631daSSadaf Ebrahimi
161*f5c631daSSadaf Ebrahimi // Calculate the exponent. The highest significant bit will have the value
162*f5c631daSSadaf Ebrahimi // 2^exponent.
163*f5c631daSSadaf Ebrahimi const int highest_significant_bit = 63 - CountLeadingZeros(src);
164*f5c631daSSadaf Ebrahimi const int16_t exponent = highest_significant_bit - fbits;
165*f5c631daSSadaf Ebrahimi
166*f5c631daSSadaf Ebrahimi return FPRoundToFloat16(0, exponent, src, round);
167*f5c631daSSadaf Ebrahimi }
168*f5c631daSSadaf Ebrahimi
169*f5c631daSSadaf Ebrahimi
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)170*f5c631daSSadaf Ebrahimi void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
171*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
172*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
173*f5c631daSSadaf Ebrahimi LoadLane(dst, vform, i, addr);
174*f5c631daSSadaf Ebrahimi addr += LaneSizeInBytesFromFormat(vform);
175*f5c631daSSadaf Ebrahimi }
176*f5c631daSSadaf Ebrahimi }
177*f5c631daSSadaf Ebrahimi
178*f5c631daSSadaf Ebrahimi
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)179*f5c631daSSadaf Ebrahimi void Simulator::ld1(VectorFormat vform,
180*f5c631daSSadaf Ebrahimi LogicVRegister dst,
181*f5c631daSSadaf Ebrahimi int index,
182*f5c631daSSadaf Ebrahimi uint64_t addr) {
183*f5c631daSSadaf Ebrahimi LoadLane(dst, vform, index, addr);
184*f5c631daSSadaf Ebrahimi }
185*f5c631daSSadaf Ebrahimi
186*f5c631daSSadaf Ebrahimi
ld1r(VectorFormat vform,VectorFormat unpack_vform,LogicVRegister dst,uint64_t addr,bool is_signed)187*f5c631daSSadaf Ebrahimi void Simulator::ld1r(VectorFormat vform,
188*f5c631daSSadaf Ebrahimi VectorFormat unpack_vform,
189*f5c631daSSadaf Ebrahimi LogicVRegister dst,
190*f5c631daSSadaf Ebrahimi uint64_t addr,
191*f5c631daSSadaf Ebrahimi bool is_signed) {
192*f5c631daSSadaf Ebrahimi unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform);
193*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
194*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
195*f5c631daSSadaf Ebrahimi if (is_signed) {
196*f5c631daSSadaf Ebrahimi LoadIntToLane(dst, vform, unpack_size, i, addr);
197*f5c631daSSadaf Ebrahimi } else {
198*f5c631daSSadaf Ebrahimi LoadUintToLane(dst, vform, unpack_size, i, addr);
199*f5c631daSSadaf Ebrahimi }
200*f5c631daSSadaf Ebrahimi }
201*f5c631daSSadaf Ebrahimi }
202*f5c631daSSadaf Ebrahimi
203*f5c631daSSadaf Ebrahimi
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)204*f5c631daSSadaf Ebrahimi void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
205*f5c631daSSadaf Ebrahimi ld1r(vform, vform, dst, addr);
206*f5c631daSSadaf Ebrahimi }
207*f5c631daSSadaf Ebrahimi
208*f5c631daSSadaf Ebrahimi
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)209*f5c631daSSadaf Ebrahimi void Simulator::ld2(VectorFormat vform,
210*f5c631daSSadaf Ebrahimi LogicVRegister dst1,
211*f5c631daSSadaf Ebrahimi LogicVRegister dst2,
212*f5c631daSSadaf Ebrahimi uint64_t addr1) {
213*f5c631daSSadaf Ebrahimi dst1.ClearForWrite(vform);
214*f5c631daSSadaf Ebrahimi dst2.ClearForWrite(vform);
215*f5c631daSSadaf Ebrahimi int esize = LaneSizeInBytesFromFormat(vform);
216*f5c631daSSadaf Ebrahimi uint64_t addr2 = addr1 + esize;
217*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
218*f5c631daSSadaf Ebrahimi LoadLane(dst1, vform, i, addr1);
219*f5c631daSSadaf Ebrahimi LoadLane(dst2, vform, i, addr2);
220*f5c631daSSadaf Ebrahimi addr1 += 2 * esize;
221*f5c631daSSadaf Ebrahimi addr2 += 2 * esize;
222*f5c631daSSadaf Ebrahimi }
223*f5c631daSSadaf Ebrahimi }
224*f5c631daSSadaf Ebrahimi
225*f5c631daSSadaf Ebrahimi
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)226*f5c631daSSadaf Ebrahimi void Simulator::ld2(VectorFormat vform,
227*f5c631daSSadaf Ebrahimi LogicVRegister dst1,
228*f5c631daSSadaf Ebrahimi LogicVRegister dst2,
229*f5c631daSSadaf Ebrahimi int index,
230*f5c631daSSadaf Ebrahimi uint64_t addr1) {
231*f5c631daSSadaf Ebrahimi dst1.ClearForWrite(vform);
232*f5c631daSSadaf Ebrahimi dst2.ClearForWrite(vform);
233*f5c631daSSadaf Ebrahimi uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
234*f5c631daSSadaf Ebrahimi LoadLane(dst1, vform, index, addr1);
235*f5c631daSSadaf Ebrahimi LoadLane(dst2, vform, index, addr2);
236*f5c631daSSadaf Ebrahimi }
237*f5c631daSSadaf Ebrahimi
238*f5c631daSSadaf Ebrahimi
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)239*f5c631daSSadaf Ebrahimi void Simulator::ld2r(VectorFormat vform,
240*f5c631daSSadaf Ebrahimi LogicVRegister dst1,
241*f5c631daSSadaf Ebrahimi LogicVRegister dst2,
242*f5c631daSSadaf Ebrahimi uint64_t addr) {
243*f5c631daSSadaf Ebrahimi dst1.ClearForWrite(vform);
244*f5c631daSSadaf Ebrahimi dst2.ClearForWrite(vform);
245*f5c631daSSadaf Ebrahimi uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
246*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
247*f5c631daSSadaf Ebrahimi LoadLane(dst1, vform, i, addr);
248*f5c631daSSadaf Ebrahimi LoadLane(dst2, vform, i, addr2);
249*f5c631daSSadaf Ebrahimi }
250*f5c631daSSadaf Ebrahimi }
251*f5c631daSSadaf Ebrahimi
252*f5c631daSSadaf Ebrahimi
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)253*f5c631daSSadaf Ebrahimi void Simulator::ld3(VectorFormat vform,
254*f5c631daSSadaf Ebrahimi LogicVRegister dst1,
255*f5c631daSSadaf Ebrahimi LogicVRegister dst2,
256*f5c631daSSadaf Ebrahimi LogicVRegister dst3,
257*f5c631daSSadaf Ebrahimi uint64_t addr1) {
258*f5c631daSSadaf Ebrahimi dst1.ClearForWrite(vform);
259*f5c631daSSadaf Ebrahimi dst2.ClearForWrite(vform);
260*f5c631daSSadaf Ebrahimi dst3.ClearForWrite(vform);
261*f5c631daSSadaf Ebrahimi int esize = LaneSizeInBytesFromFormat(vform);
262*f5c631daSSadaf Ebrahimi uint64_t addr2 = addr1 + esize;
263*f5c631daSSadaf Ebrahimi uint64_t addr3 = addr2 + esize;
264*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
265*f5c631daSSadaf Ebrahimi LoadLane(dst1, vform, i, addr1);
266*f5c631daSSadaf Ebrahimi LoadLane(dst2, vform, i, addr2);
267*f5c631daSSadaf Ebrahimi LoadLane(dst3, vform, i, addr3);
268*f5c631daSSadaf Ebrahimi addr1 += 3 * esize;
269*f5c631daSSadaf Ebrahimi addr2 += 3 * esize;
270*f5c631daSSadaf Ebrahimi addr3 += 3 * esize;
271*f5c631daSSadaf Ebrahimi }
272*f5c631daSSadaf Ebrahimi }
273*f5c631daSSadaf Ebrahimi
274*f5c631daSSadaf Ebrahimi
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)275*f5c631daSSadaf Ebrahimi void Simulator::ld3(VectorFormat vform,
276*f5c631daSSadaf Ebrahimi LogicVRegister dst1,
277*f5c631daSSadaf Ebrahimi LogicVRegister dst2,
278*f5c631daSSadaf Ebrahimi LogicVRegister dst3,
279*f5c631daSSadaf Ebrahimi int index,
280*f5c631daSSadaf Ebrahimi uint64_t addr1) {
281*f5c631daSSadaf Ebrahimi dst1.ClearForWrite(vform);
282*f5c631daSSadaf Ebrahimi dst2.ClearForWrite(vform);
283*f5c631daSSadaf Ebrahimi dst3.ClearForWrite(vform);
284*f5c631daSSadaf Ebrahimi uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
285*f5c631daSSadaf Ebrahimi uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
286*f5c631daSSadaf Ebrahimi LoadLane(dst1, vform, index, addr1);
287*f5c631daSSadaf Ebrahimi LoadLane(dst2, vform, index, addr2);
288*f5c631daSSadaf Ebrahimi LoadLane(dst3, vform, index, addr3);
289*f5c631daSSadaf Ebrahimi }
290*f5c631daSSadaf Ebrahimi
291*f5c631daSSadaf Ebrahimi
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)292*f5c631daSSadaf Ebrahimi void Simulator::ld3r(VectorFormat vform,
293*f5c631daSSadaf Ebrahimi LogicVRegister dst1,
294*f5c631daSSadaf Ebrahimi LogicVRegister dst2,
295*f5c631daSSadaf Ebrahimi LogicVRegister dst3,
296*f5c631daSSadaf Ebrahimi uint64_t addr) {
297*f5c631daSSadaf Ebrahimi dst1.ClearForWrite(vform);
298*f5c631daSSadaf Ebrahimi dst2.ClearForWrite(vform);
299*f5c631daSSadaf Ebrahimi dst3.ClearForWrite(vform);
300*f5c631daSSadaf Ebrahimi uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
301*f5c631daSSadaf Ebrahimi uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
302*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
303*f5c631daSSadaf Ebrahimi LoadLane(dst1, vform, i, addr);
304*f5c631daSSadaf Ebrahimi LoadLane(dst2, vform, i, addr2);
305*f5c631daSSadaf Ebrahimi LoadLane(dst3, vform, i, addr3);
306*f5c631daSSadaf Ebrahimi }
307*f5c631daSSadaf Ebrahimi }
308*f5c631daSSadaf Ebrahimi
309*f5c631daSSadaf Ebrahimi
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)310*f5c631daSSadaf Ebrahimi void Simulator::ld4(VectorFormat vform,
311*f5c631daSSadaf Ebrahimi LogicVRegister dst1,
312*f5c631daSSadaf Ebrahimi LogicVRegister dst2,
313*f5c631daSSadaf Ebrahimi LogicVRegister dst3,
314*f5c631daSSadaf Ebrahimi LogicVRegister dst4,
315*f5c631daSSadaf Ebrahimi uint64_t addr1) {
316*f5c631daSSadaf Ebrahimi dst1.ClearForWrite(vform);
317*f5c631daSSadaf Ebrahimi dst2.ClearForWrite(vform);
318*f5c631daSSadaf Ebrahimi dst3.ClearForWrite(vform);
319*f5c631daSSadaf Ebrahimi dst4.ClearForWrite(vform);
320*f5c631daSSadaf Ebrahimi int esize = LaneSizeInBytesFromFormat(vform);
321*f5c631daSSadaf Ebrahimi uint64_t addr2 = addr1 + esize;
322*f5c631daSSadaf Ebrahimi uint64_t addr3 = addr2 + esize;
323*f5c631daSSadaf Ebrahimi uint64_t addr4 = addr3 + esize;
324*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
325*f5c631daSSadaf Ebrahimi LoadLane(dst1, vform, i, addr1);
326*f5c631daSSadaf Ebrahimi LoadLane(dst2, vform, i, addr2);
327*f5c631daSSadaf Ebrahimi LoadLane(dst3, vform, i, addr3);
328*f5c631daSSadaf Ebrahimi LoadLane(dst4, vform, i, addr4);
329*f5c631daSSadaf Ebrahimi addr1 += 4 * esize;
330*f5c631daSSadaf Ebrahimi addr2 += 4 * esize;
331*f5c631daSSadaf Ebrahimi addr3 += 4 * esize;
332*f5c631daSSadaf Ebrahimi addr4 += 4 * esize;
333*f5c631daSSadaf Ebrahimi }
334*f5c631daSSadaf Ebrahimi }
335*f5c631daSSadaf Ebrahimi
336*f5c631daSSadaf Ebrahimi
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)337*f5c631daSSadaf Ebrahimi void Simulator::ld4(VectorFormat vform,
338*f5c631daSSadaf Ebrahimi LogicVRegister dst1,
339*f5c631daSSadaf Ebrahimi LogicVRegister dst2,
340*f5c631daSSadaf Ebrahimi LogicVRegister dst3,
341*f5c631daSSadaf Ebrahimi LogicVRegister dst4,
342*f5c631daSSadaf Ebrahimi int index,
343*f5c631daSSadaf Ebrahimi uint64_t addr1) {
344*f5c631daSSadaf Ebrahimi dst1.ClearForWrite(vform);
345*f5c631daSSadaf Ebrahimi dst2.ClearForWrite(vform);
346*f5c631daSSadaf Ebrahimi dst3.ClearForWrite(vform);
347*f5c631daSSadaf Ebrahimi dst4.ClearForWrite(vform);
348*f5c631daSSadaf Ebrahimi uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
349*f5c631daSSadaf Ebrahimi uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
350*f5c631daSSadaf Ebrahimi uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
351*f5c631daSSadaf Ebrahimi LoadLane(dst1, vform, index, addr1);
352*f5c631daSSadaf Ebrahimi LoadLane(dst2, vform, index, addr2);
353*f5c631daSSadaf Ebrahimi LoadLane(dst3, vform, index, addr3);
354*f5c631daSSadaf Ebrahimi LoadLane(dst4, vform, index, addr4);
355*f5c631daSSadaf Ebrahimi }
356*f5c631daSSadaf Ebrahimi
357*f5c631daSSadaf Ebrahimi
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)358*f5c631daSSadaf Ebrahimi void Simulator::ld4r(VectorFormat vform,
359*f5c631daSSadaf Ebrahimi LogicVRegister dst1,
360*f5c631daSSadaf Ebrahimi LogicVRegister dst2,
361*f5c631daSSadaf Ebrahimi LogicVRegister dst3,
362*f5c631daSSadaf Ebrahimi LogicVRegister dst4,
363*f5c631daSSadaf Ebrahimi uint64_t addr) {
364*f5c631daSSadaf Ebrahimi dst1.ClearForWrite(vform);
365*f5c631daSSadaf Ebrahimi dst2.ClearForWrite(vform);
366*f5c631daSSadaf Ebrahimi dst3.ClearForWrite(vform);
367*f5c631daSSadaf Ebrahimi dst4.ClearForWrite(vform);
368*f5c631daSSadaf Ebrahimi uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
369*f5c631daSSadaf Ebrahimi uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
370*f5c631daSSadaf Ebrahimi uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
371*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
372*f5c631daSSadaf Ebrahimi LoadLane(dst1, vform, i, addr);
373*f5c631daSSadaf Ebrahimi LoadLane(dst2, vform, i, addr2);
374*f5c631daSSadaf Ebrahimi LoadLane(dst3, vform, i, addr3);
375*f5c631daSSadaf Ebrahimi LoadLane(dst4, vform, i, addr4);
376*f5c631daSSadaf Ebrahimi }
377*f5c631daSSadaf Ebrahimi }
378*f5c631daSSadaf Ebrahimi
379*f5c631daSSadaf Ebrahimi
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)380*f5c631daSSadaf Ebrahimi void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
381*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
382*f5c631daSSadaf Ebrahimi StoreLane(src, vform, i, addr);
383*f5c631daSSadaf Ebrahimi addr += LaneSizeInBytesFromFormat(vform);
384*f5c631daSSadaf Ebrahimi }
385*f5c631daSSadaf Ebrahimi }
386*f5c631daSSadaf Ebrahimi
387*f5c631daSSadaf Ebrahimi
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)388*f5c631daSSadaf Ebrahimi void Simulator::st1(VectorFormat vform,
389*f5c631daSSadaf Ebrahimi LogicVRegister src,
390*f5c631daSSadaf Ebrahimi int index,
391*f5c631daSSadaf Ebrahimi uint64_t addr) {
392*f5c631daSSadaf Ebrahimi StoreLane(src, vform, index, addr);
393*f5c631daSSadaf Ebrahimi }
394*f5c631daSSadaf Ebrahimi
395*f5c631daSSadaf Ebrahimi
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,uint64_t addr)396*f5c631daSSadaf Ebrahimi void Simulator::st2(VectorFormat vform,
397*f5c631daSSadaf Ebrahimi LogicVRegister src,
398*f5c631daSSadaf Ebrahimi LogicVRegister src2,
399*f5c631daSSadaf Ebrahimi uint64_t addr) {
400*f5c631daSSadaf Ebrahimi int esize = LaneSizeInBytesFromFormat(vform);
401*f5c631daSSadaf Ebrahimi uint64_t addr2 = addr + esize;
402*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
403*f5c631daSSadaf Ebrahimi StoreLane(src, vform, i, addr);
404*f5c631daSSadaf Ebrahimi StoreLane(src2, vform, i, addr2);
405*f5c631daSSadaf Ebrahimi addr += 2 * esize;
406*f5c631daSSadaf Ebrahimi addr2 += 2 * esize;
407*f5c631daSSadaf Ebrahimi }
408*f5c631daSSadaf Ebrahimi }
409*f5c631daSSadaf Ebrahimi
410*f5c631daSSadaf Ebrahimi
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,int index,uint64_t addr)411*f5c631daSSadaf Ebrahimi void Simulator::st2(VectorFormat vform,
412*f5c631daSSadaf Ebrahimi LogicVRegister src,
413*f5c631daSSadaf Ebrahimi LogicVRegister src2,
414*f5c631daSSadaf Ebrahimi int index,
415*f5c631daSSadaf Ebrahimi uint64_t addr) {
416*f5c631daSSadaf Ebrahimi int esize = LaneSizeInBytesFromFormat(vform);
417*f5c631daSSadaf Ebrahimi StoreLane(src, vform, index, addr);
418*f5c631daSSadaf Ebrahimi StoreLane(src2, vform, index, addr + 1 * esize);
419*f5c631daSSadaf Ebrahimi }
420*f5c631daSSadaf Ebrahimi
421*f5c631daSSadaf Ebrahimi
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,uint64_t addr)422*f5c631daSSadaf Ebrahimi void Simulator::st3(VectorFormat vform,
423*f5c631daSSadaf Ebrahimi LogicVRegister src,
424*f5c631daSSadaf Ebrahimi LogicVRegister src2,
425*f5c631daSSadaf Ebrahimi LogicVRegister src3,
426*f5c631daSSadaf Ebrahimi uint64_t addr) {
427*f5c631daSSadaf Ebrahimi int esize = LaneSizeInBytesFromFormat(vform);
428*f5c631daSSadaf Ebrahimi uint64_t addr2 = addr + esize;
429*f5c631daSSadaf Ebrahimi uint64_t addr3 = addr2 + esize;
430*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
431*f5c631daSSadaf Ebrahimi StoreLane(src, vform, i, addr);
432*f5c631daSSadaf Ebrahimi StoreLane(src2, vform, i, addr2);
433*f5c631daSSadaf Ebrahimi StoreLane(src3, vform, i, addr3);
434*f5c631daSSadaf Ebrahimi addr += 3 * esize;
435*f5c631daSSadaf Ebrahimi addr2 += 3 * esize;
436*f5c631daSSadaf Ebrahimi addr3 += 3 * esize;
437*f5c631daSSadaf Ebrahimi }
438*f5c631daSSadaf Ebrahimi }
439*f5c631daSSadaf Ebrahimi
440*f5c631daSSadaf Ebrahimi
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,int index,uint64_t addr)441*f5c631daSSadaf Ebrahimi void Simulator::st3(VectorFormat vform,
442*f5c631daSSadaf Ebrahimi LogicVRegister src,
443*f5c631daSSadaf Ebrahimi LogicVRegister src2,
444*f5c631daSSadaf Ebrahimi LogicVRegister src3,
445*f5c631daSSadaf Ebrahimi int index,
446*f5c631daSSadaf Ebrahimi uint64_t addr) {
447*f5c631daSSadaf Ebrahimi int esize = LaneSizeInBytesFromFormat(vform);
448*f5c631daSSadaf Ebrahimi StoreLane(src, vform, index, addr);
449*f5c631daSSadaf Ebrahimi StoreLane(src2, vform, index, addr + 1 * esize);
450*f5c631daSSadaf Ebrahimi StoreLane(src3, vform, index, addr + 2 * esize);
451*f5c631daSSadaf Ebrahimi }
452*f5c631daSSadaf Ebrahimi
453*f5c631daSSadaf Ebrahimi
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,uint64_t addr)454*f5c631daSSadaf Ebrahimi void Simulator::st4(VectorFormat vform,
455*f5c631daSSadaf Ebrahimi LogicVRegister src,
456*f5c631daSSadaf Ebrahimi LogicVRegister src2,
457*f5c631daSSadaf Ebrahimi LogicVRegister src3,
458*f5c631daSSadaf Ebrahimi LogicVRegister src4,
459*f5c631daSSadaf Ebrahimi uint64_t addr) {
460*f5c631daSSadaf Ebrahimi int esize = LaneSizeInBytesFromFormat(vform);
461*f5c631daSSadaf Ebrahimi uint64_t addr2 = addr + esize;
462*f5c631daSSadaf Ebrahimi uint64_t addr3 = addr2 + esize;
463*f5c631daSSadaf Ebrahimi uint64_t addr4 = addr3 + esize;
464*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
465*f5c631daSSadaf Ebrahimi StoreLane(src, vform, i, addr);
466*f5c631daSSadaf Ebrahimi StoreLane(src2, vform, i, addr2);
467*f5c631daSSadaf Ebrahimi StoreLane(src3, vform, i, addr3);
468*f5c631daSSadaf Ebrahimi StoreLane(src4, vform, i, addr4);
469*f5c631daSSadaf Ebrahimi addr += 4 * esize;
470*f5c631daSSadaf Ebrahimi addr2 += 4 * esize;
471*f5c631daSSadaf Ebrahimi addr3 += 4 * esize;
472*f5c631daSSadaf Ebrahimi addr4 += 4 * esize;
473*f5c631daSSadaf Ebrahimi }
474*f5c631daSSadaf Ebrahimi }
475*f5c631daSSadaf Ebrahimi
476*f5c631daSSadaf Ebrahimi
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,int index,uint64_t addr)477*f5c631daSSadaf Ebrahimi void Simulator::st4(VectorFormat vform,
478*f5c631daSSadaf Ebrahimi LogicVRegister src,
479*f5c631daSSadaf Ebrahimi LogicVRegister src2,
480*f5c631daSSadaf Ebrahimi LogicVRegister src3,
481*f5c631daSSadaf Ebrahimi LogicVRegister src4,
482*f5c631daSSadaf Ebrahimi int index,
483*f5c631daSSadaf Ebrahimi uint64_t addr) {
484*f5c631daSSadaf Ebrahimi int esize = LaneSizeInBytesFromFormat(vform);
485*f5c631daSSadaf Ebrahimi StoreLane(src, vform, index, addr);
486*f5c631daSSadaf Ebrahimi StoreLane(src2, vform, index, addr + 1 * esize);
487*f5c631daSSadaf Ebrahimi StoreLane(src3, vform, index, addr + 2 * esize);
488*f5c631daSSadaf Ebrahimi StoreLane(src4, vform, index, addr + 3 * esize);
489*f5c631daSSadaf Ebrahimi }
490*f5c631daSSadaf Ebrahimi
491*f5c631daSSadaf Ebrahimi
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)492*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::cmp(VectorFormat vform,
493*f5c631daSSadaf Ebrahimi LogicVRegister dst,
494*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
495*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
496*f5c631daSSadaf Ebrahimi Condition cond) {
497*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
498*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
499*f5c631daSSadaf Ebrahimi int64_t sa = src1.Int(vform, i);
500*f5c631daSSadaf Ebrahimi int64_t sb = src2.Int(vform, i);
501*f5c631daSSadaf Ebrahimi uint64_t ua = src1.Uint(vform, i);
502*f5c631daSSadaf Ebrahimi uint64_t ub = src2.Uint(vform, i);
503*f5c631daSSadaf Ebrahimi bool result = false;
504*f5c631daSSadaf Ebrahimi switch (cond) {
505*f5c631daSSadaf Ebrahimi case eq:
506*f5c631daSSadaf Ebrahimi result = (ua == ub);
507*f5c631daSSadaf Ebrahimi break;
508*f5c631daSSadaf Ebrahimi case ge:
509*f5c631daSSadaf Ebrahimi result = (sa >= sb);
510*f5c631daSSadaf Ebrahimi break;
511*f5c631daSSadaf Ebrahimi case gt:
512*f5c631daSSadaf Ebrahimi result = (sa > sb);
513*f5c631daSSadaf Ebrahimi break;
514*f5c631daSSadaf Ebrahimi case hi:
515*f5c631daSSadaf Ebrahimi result = (ua > ub);
516*f5c631daSSadaf Ebrahimi break;
517*f5c631daSSadaf Ebrahimi case hs:
518*f5c631daSSadaf Ebrahimi result = (ua >= ub);
519*f5c631daSSadaf Ebrahimi break;
520*f5c631daSSadaf Ebrahimi case lt:
521*f5c631daSSadaf Ebrahimi result = (sa < sb);
522*f5c631daSSadaf Ebrahimi break;
523*f5c631daSSadaf Ebrahimi case le:
524*f5c631daSSadaf Ebrahimi result = (sa <= sb);
525*f5c631daSSadaf Ebrahimi break;
526*f5c631daSSadaf Ebrahimi default:
527*f5c631daSSadaf Ebrahimi VIXL_UNREACHABLE();
528*f5c631daSSadaf Ebrahimi break;
529*f5c631daSSadaf Ebrahimi }
530*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
531*f5c631daSSadaf Ebrahimi }
532*f5c631daSSadaf Ebrahimi return dst;
533*f5c631daSSadaf Ebrahimi }
534*f5c631daSSadaf Ebrahimi
535*f5c631daSSadaf Ebrahimi
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)536*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::cmp(VectorFormat vform,
537*f5c631daSSadaf Ebrahimi LogicVRegister dst,
538*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
539*f5c631daSSadaf Ebrahimi int imm,
540*f5c631daSSadaf Ebrahimi Condition cond) {
541*f5c631daSSadaf Ebrahimi SimVRegister temp;
542*f5c631daSSadaf Ebrahimi LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
543*f5c631daSSadaf Ebrahimi return cmp(vform, dst, src1, imm_reg, cond);
544*f5c631daSSadaf Ebrahimi }
545*f5c631daSSadaf Ebrahimi
546*f5c631daSSadaf Ebrahimi
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)547*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::cmptst(VectorFormat vform,
548*f5c631daSSadaf Ebrahimi LogicVRegister dst,
549*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
550*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
551*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
552*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
553*f5c631daSSadaf Ebrahimi uint64_t ua = src1.Uint(vform, i);
554*f5c631daSSadaf Ebrahimi uint64_t ub = src2.Uint(vform, i);
555*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
556*f5c631daSSadaf Ebrahimi }
557*f5c631daSSadaf Ebrahimi return dst;
558*f5c631daSSadaf Ebrahimi }
559*f5c631daSSadaf Ebrahimi
560*f5c631daSSadaf Ebrahimi
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)561*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::add(VectorFormat vform,
562*f5c631daSSadaf Ebrahimi LogicVRegister dst,
563*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
564*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
565*f5c631daSSadaf Ebrahimi int lane_size = LaneSizeInBitsFromFormat(vform);
566*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
567*f5c631daSSadaf Ebrahimi
568*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
569*f5c631daSSadaf Ebrahimi // Test for unsigned saturation.
570*f5c631daSSadaf Ebrahimi uint64_t ua = src1.UintLeftJustified(vform, i);
571*f5c631daSSadaf Ebrahimi uint64_t ub = src2.UintLeftJustified(vform, i);
572*f5c631daSSadaf Ebrahimi uint64_t ur = ua + ub;
573*f5c631daSSadaf Ebrahimi if (ur < ua) {
574*f5c631daSSadaf Ebrahimi dst.SetUnsignedSat(i, true);
575*f5c631daSSadaf Ebrahimi }
576*f5c631daSSadaf Ebrahimi
577*f5c631daSSadaf Ebrahimi // Test for signed saturation.
578*f5c631daSSadaf Ebrahimi bool pos_a = (ua >> 63) == 0;
579*f5c631daSSadaf Ebrahimi bool pos_b = (ub >> 63) == 0;
580*f5c631daSSadaf Ebrahimi bool pos_r = (ur >> 63) == 0;
581*f5c631daSSadaf Ebrahimi // If the signs of the operands are the same, but different from the result,
582*f5c631daSSadaf Ebrahimi // there was an overflow.
583*f5c631daSSadaf Ebrahimi if ((pos_a == pos_b) && (pos_a != pos_r)) {
584*f5c631daSSadaf Ebrahimi dst.SetSignedSat(i, pos_a);
585*f5c631daSSadaf Ebrahimi }
586*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, ur >> (64 - lane_size));
587*f5c631daSSadaf Ebrahimi }
588*f5c631daSSadaf Ebrahimi return dst;
589*f5c631daSSadaf Ebrahimi }
590*f5c631daSSadaf Ebrahimi
add_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)591*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::add_uint(VectorFormat vform,
592*f5c631daSSadaf Ebrahimi LogicVRegister dst,
593*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
594*f5c631daSSadaf Ebrahimi uint64_t value) {
595*f5c631daSSadaf Ebrahimi int lane_size = LaneSizeInBitsFromFormat(vform);
596*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsUintN(lane_size, value));
597*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
598*f5c631daSSadaf Ebrahimi // Left-justify `value`.
599*f5c631daSSadaf Ebrahimi uint64_t ub = value << (64 - lane_size);
600*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
601*f5c631daSSadaf Ebrahimi // Test for unsigned saturation.
602*f5c631daSSadaf Ebrahimi uint64_t ua = src1.UintLeftJustified(vform, i);
603*f5c631daSSadaf Ebrahimi uint64_t ur = ua + ub;
604*f5c631daSSadaf Ebrahimi if (ur < ua) {
605*f5c631daSSadaf Ebrahimi dst.SetUnsignedSat(i, true);
606*f5c631daSSadaf Ebrahimi }
607*f5c631daSSadaf Ebrahimi
608*f5c631daSSadaf Ebrahimi // Test for signed saturation.
609*f5c631daSSadaf Ebrahimi // `value` is always positive, so we have an overflow if the (signed) result
610*f5c631daSSadaf Ebrahimi // is smaller than the first operand.
611*f5c631daSSadaf Ebrahimi if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {
612*f5c631daSSadaf Ebrahimi dst.SetSignedSat(i, true);
613*f5c631daSSadaf Ebrahimi }
614*f5c631daSSadaf Ebrahimi
615*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, ur >> (64 - lane_size));
616*f5c631daSSadaf Ebrahimi }
617*f5c631daSSadaf Ebrahimi return dst;
618*f5c631daSSadaf Ebrahimi }
619*f5c631daSSadaf Ebrahimi
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)620*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::addp(VectorFormat vform,
621*f5c631daSSadaf Ebrahimi LogicVRegister dst,
622*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
623*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
624*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
625*f5c631daSSadaf Ebrahimi uzp1(vform, temp1, src1, src2);
626*f5c631daSSadaf Ebrahimi uzp2(vform, temp2, src1, src2);
627*f5c631daSSadaf Ebrahimi add(vform, dst, temp1, temp2);
628*f5c631daSSadaf Ebrahimi if (IsSVEFormat(vform)) {
629*f5c631daSSadaf Ebrahimi interleave_top_bottom(vform, dst, dst);
630*f5c631daSSadaf Ebrahimi }
631*f5c631daSSadaf Ebrahimi return dst;
632*f5c631daSSadaf Ebrahimi }
633*f5c631daSSadaf Ebrahimi
sdiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)634*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sdiv(VectorFormat vform,
635*f5c631daSSadaf Ebrahimi LogicVRegister dst,
636*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
637*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
638*f5c631daSSadaf Ebrahimi VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
639*f5c631daSSadaf Ebrahimi
640*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
641*f5c631daSSadaf Ebrahimi int64_t val1 = src1.Int(vform, i);
642*f5c631daSSadaf Ebrahimi int64_t val2 = src2.Int(vform, i);
643*f5c631daSSadaf Ebrahimi int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;
644*f5c631daSSadaf Ebrahimi int64_t quotient = 0;
645*f5c631daSSadaf Ebrahimi if ((val1 == min_int) && (val2 == -1)) {
646*f5c631daSSadaf Ebrahimi quotient = min_int;
647*f5c631daSSadaf Ebrahimi } else if (val2 != 0) {
648*f5c631daSSadaf Ebrahimi quotient = val1 / val2;
649*f5c631daSSadaf Ebrahimi }
650*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, quotient);
651*f5c631daSSadaf Ebrahimi }
652*f5c631daSSadaf Ebrahimi
653*f5c631daSSadaf Ebrahimi return dst;
654*f5c631daSSadaf Ebrahimi }
655*f5c631daSSadaf Ebrahimi
udiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)656*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::udiv(VectorFormat vform,
657*f5c631daSSadaf Ebrahimi LogicVRegister dst,
658*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
659*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
660*f5c631daSSadaf Ebrahimi VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
661*f5c631daSSadaf Ebrahimi
662*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
663*f5c631daSSadaf Ebrahimi uint64_t val1 = src1.Uint(vform, i);
664*f5c631daSSadaf Ebrahimi uint64_t val2 = src2.Uint(vform, i);
665*f5c631daSSadaf Ebrahimi uint64_t quotient = 0;
666*f5c631daSSadaf Ebrahimi if (val2 != 0) {
667*f5c631daSSadaf Ebrahimi quotient = val1 / val2;
668*f5c631daSSadaf Ebrahimi }
669*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, quotient);
670*f5c631daSSadaf Ebrahimi }
671*f5c631daSSadaf Ebrahimi
672*f5c631daSSadaf Ebrahimi return dst;
673*f5c631daSSadaf Ebrahimi }
674*f5c631daSSadaf Ebrahimi
675*f5c631daSSadaf Ebrahimi
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)676*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::mla(VectorFormat vform,
677*f5c631daSSadaf Ebrahimi LogicVRegister dst,
678*f5c631daSSadaf Ebrahimi const LogicVRegister& srca,
679*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
680*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
681*f5c631daSSadaf Ebrahimi SimVRegister temp;
682*f5c631daSSadaf Ebrahimi mul(vform, temp, src1, src2);
683*f5c631daSSadaf Ebrahimi add(vform, dst, srca, temp);
684*f5c631daSSadaf Ebrahimi return dst;
685*f5c631daSSadaf Ebrahimi }
686*f5c631daSSadaf Ebrahimi
687*f5c631daSSadaf Ebrahimi
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)688*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::mls(VectorFormat vform,
689*f5c631daSSadaf Ebrahimi LogicVRegister dst,
690*f5c631daSSadaf Ebrahimi const LogicVRegister& srca,
691*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
692*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
693*f5c631daSSadaf Ebrahimi SimVRegister temp;
694*f5c631daSSadaf Ebrahimi mul(vform, temp, src1, src2);
695*f5c631daSSadaf Ebrahimi sub(vform, dst, srca, temp);
696*f5c631daSSadaf Ebrahimi return dst;
697*f5c631daSSadaf Ebrahimi }
698*f5c631daSSadaf Ebrahimi
699*f5c631daSSadaf Ebrahimi
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)700*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::mul(VectorFormat vform,
701*f5c631daSSadaf Ebrahimi LogicVRegister dst,
702*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
703*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
704*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
705*f5c631daSSadaf Ebrahimi
706*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
707*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
708*f5c631daSSadaf Ebrahimi }
709*f5c631daSSadaf Ebrahimi return dst;
710*f5c631daSSadaf Ebrahimi }
711*f5c631daSSadaf Ebrahimi
712*f5c631daSSadaf Ebrahimi
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)713*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::mul(VectorFormat vform,
714*f5c631daSSadaf Ebrahimi LogicVRegister dst,
715*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
716*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
717*f5c631daSSadaf Ebrahimi int index) {
718*f5c631daSSadaf Ebrahimi SimVRegister temp;
719*f5c631daSSadaf Ebrahimi VectorFormat indexform = VectorFormatFillQ(vform);
720*f5c631daSSadaf Ebrahimi return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
721*f5c631daSSadaf Ebrahimi }
722*f5c631daSSadaf Ebrahimi
723*f5c631daSSadaf Ebrahimi
smulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)724*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::smulh(VectorFormat vform,
725*f5c631daSSadaf Ebrahimi LogicVRegister dst,
726*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
727*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
728*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
729*f5c631daSSadaf Ebrahimi int64_t dst_val = 0xbadbeef;
730*f5c631daSSadaf Ebrahimi int64_t val1 = src1.Int(vform, i);
731*f5c631daSSadaf Ebrahimi int64_t val2 = src2.Int(vform, i);
732*f5c631daSSadaf Ebrahimi switch (LaneSizeInBitsFromFormat(vform)) {
733*f5c631daSSadaf Ebrahimi case 8:
734*f5c631daSSadaf Ebrahimi dst_val = internal::MultiplyHigh<8>(val1, val2);
735*f5c631daSSadaf Ebrahimi break;
736*f5c631daSSadaf Ebrahimi case 16:
737*f5c631daSSadaf Ebrahimi dst_val = internal::MultiplyHigh<16>(val1, val2);
738*f5c631daSSadaf Ebrahimi break;
739*f5c631daSSadaf Ebrahimi case 32:
740*f5c631daSSadaf Ebrahimi dst_val = internal::MultiplyHigh<32>(val1, val2);
741*f5c631daSSadaf Ebrahimi break;
742*f5c631daSSadaf Ebrahimi case 64:
743*f5c631daSSadaf Ebrahimi dst_val = internal::MultiplyHigh<64>(val1, val2);
744*f5c631daSSadaf Ebrahimi break;
745*f5c631daSSadaf Ebrahimi default:
746*f5c631daSSadaf Ebrahimi VIXL_UNREACHABLE();
747*f5c631daSSadaf Ebrahimi break;
748*f5c631daSSadaf Ebrahimi }
749*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, dst_val);
750*f5c631daSSadaf Ebrahimi }
751*f5c631daSSadaf Ebrahimi return dst;
752*f5c631daSSadaf Ebrahimi }
753*f5c631daSSadaf Ebrahimi
754*f5c631daSSadaf Ebrahimi
umulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)755*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::umulh(VectorFormat vform,
756*f5c631daSSadaf Ebrahimi LogicVRegister dst,
757*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
758*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
759*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
760*f5c631daSSadaf Ebrahimi uint64_t dst_val = 0xbadbeef;
761*f5c631daSSadaf Ebrahimi uint64_t val1 = src1.Uint(vform, i);
762*f5c631daSSadaf Ebrahimi uint64_t val2 = src2.Uint(vform, i);
763*f5c631daSSadaf Ebrahimi switch (LaneSizeInBitsFromFormat(vform)) {
764*f5c631daSSadaf Ebrahimi case 8:
765*f5c631daSSadaf Ebrahimi dst_val = internal::MultiplyHigh<8>(val1, val2);
766*f5c631daSSadaf Ebrahimi break;
767*f5c631daSSadaf Ebrahimi case 16:
768*f5c631daSSadaf Ebrahimi dst_val = internal::MultiplyHigh<16>(val1, val2);
769*f5c631daSSadaf Ebrahimi break;
770*f5c631daSSadaf Ebrahimi case 32:
771*f5c631daSSadaf Ebrahimi dst_val = internal::MultiplyHigh<32>(val1, val2);
772*f5c631daSSadaf Ebrahimi break;
773*f5c631daSSadaf Ebrahimi case 64:
774*f5c631daSSadaf Ebrahimi dst_val = internal::MultiplyHigh<64>(val1, val2);
775*f5c631daSSadaf Ebrahimi break;
776*f5c631daSSadaf Ebrahimi default:
777*f5c631daSSadaf Ebrahimi VIXL_UNREACHABLE();
778*f5c631daSSadaf Ebrahimi break;
779*f5c631daSSadaf Ebrahimi }
780*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, dst_val);
781*f5c631daSSadaf Ebrahimi }
782*f5c631daSSadaf Ebrahimi return dst;
783*f5c631daSSadaf Ebrahimi }
784*f5c631daSSadaf Ebrahimi
785*f5c631daSSadaf Ebrahimi
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)786*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::mla(VectorFormat vform,
787*f5c631daSSadaf Ebrahimi LogicVRegister dst,
788*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
789*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
790*f5c631daSSadaf Ebrahimi int index) {
791*f5c631daSSadaf Ebrahimi SimVRegister temp;
792*f5c631daSSadaf Ebrahimi VectorFormat indexform = VectorFormatFillQ(vform);
793*f5c631daSSadaf Ebrahimi return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
794*f5c631daSSadaf Ebrahimi }
795*f5c631daSSadaf Ebrahimi
796*f5c631daSSadaf Ebrahimi
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)797*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::mls(VectorFormat vform,
798*f5c631daSSadaf Ebrahimi LogicVRegister dst,
799*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
800*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
801*f5c631daSSadaf Ebrahimi int index) {
802*f5c631daSSadaf Ebrahimi SimVRegister temp;
803*f5c631daSSadaf Ebrahimi VectorFormat indexform = VectorFormatFillQ(vform);
804*f5c631daSSadaf Ebrahimi return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
805*f5c631daSSadaf Ebrahimi }
806*f5c631daSSadaf Ebrahimi
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)807*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqdmull(VectorFormat vform,
808*f5c631daSSadaf Ebrahimi LogicVRegister dst,
809*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
810*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
811*f5c631daSSadaf Ebrahimi int index) {
812*f5c631daSSadaf Ebrahimi SimVRegister temp;
813*f5c631daSSadaf Ebrahimi VectorFormat indexform =
814*f5c631daSSadaf Ebrahimi VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
815*f5c631daSSadaf Ebrahimi return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
816*f5c631daSSadaf Ebrahimi }
817*f5c631daSSadaf Ebrahimi
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)818*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqdmlal(VectorFormat vform,
819*f5c631daSSadaf Ebrahimi LogicVRegister dst,
820*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
821*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
822*f5c631daSSadaf Ebrahimi int index) {
823*f5c631daSSadaf Ebrahimi SimVRegister temp;
824*f5c631daSSadaf Ebrahimi VectorFormat indexform =
825*f5c631daSSadaf Ebrahimi VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
826*f5c631daSSadaf Ebrahimi return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
827*f5c631daSSadaf Ebrahimi }
828*f5c631daSSadaf Ebrahimi
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)829*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
830*f5c631daSSadaf Ebrahimi LogicVRegister dst,
831*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
832*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
833*f5c631daSSadaf Ebrahimi int index) {
834*f5c631daSSadaf Ebrahimi SimVRegister temp;
835*f5c631daSSadaf Ebrahimi VectorFormat indexform =
836*f5c631daSSadaf Ebrahimi VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
837*f5c631daSSadaf Ebrahimi return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
838*f5c631daSSadaf Ebrahimi }
839*f5c631daSSadaf Ebrahimi
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)840*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqdmulh(VectorFormat vform,
841*f5c631daSSadaf Ebrahimi LogicVRegister dst,
842*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
843*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
844*f5c631daSSadaf Ebrahimi int index) {
845*f5c631daSSadaf Ebrahimi SimVRegister temp;
846*f5c631daSSadaf Ebrahimi VectorFormat indexform = VectorFormatFillQ(vform);
847*f5c631daSSadaf Ebrahimi return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
848*f5c631daSSadaf Ebrahimi }
849*f5c631daSSadaf Ebrahimi
850*f5c631daSSadaf Ebrahimi
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)851*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
852*f5c631daSSadaf Ebrahimi LogicVRegister dst,
853*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
854*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
855*f5c631daSSadaf Ebrahimi int index) {
856*f5c631daSSadaf Ebrahimi SimVRegister temp;
857*f5c631daSSadaf Ebrahimi VectorFormat indexform = VectorFormatFillQ(vform);
858*f5c631daSSadaf Ebrahimi return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
859*f5c631daSSadaf Ebrahimi }
860*f5c631daSSadaf Ebrahimi
861*f5c631daSSadaf Ebrahimi
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)862*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
863*f5c631daSSadaf Ebrahimi LogicVRegister dst,
864*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
865*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
866*f5c631daSSadaf Ebrahimi int index) {
867*f5c631daSSadaf Ebrahimi SimVRegister temp;
868*f5c631daSSadaf Ebrahimi VectorFormat indexform = VectorFormatFillQ(vform);
869*f5c631daSSadaf Ebrahimi return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));
870*f5c631daSSadaf Ebrahimi }
871*f5c631daSSadaf Ebrahimi
872*f5c631daSSadaf Ebrahimi
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)873*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
874*f5c631daSSadaf Ebrahimi LogicVRegister dst,
875*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
876*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
877*f5c631daSSadaf Ebrahimi int index) {
878*f5c631daSSadaf Ebrahimi SimVRegister temp;
879*f5c631daSSadaf Ebrahimi VectorFormat indexform = VectorFormatFillQ(vform);
880*f5c631daSSadaf Ebrahimi return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
881*f5c631daSSadaf Ebrahimi }
882*f5c631daSSadaf Ebrahimi
883*f5c631daSSadaf Ebrahimi
PolynomialMult(uint64_t op1,uint64_t op2,int lane_size_in_bits) const884*f5c631daSSadaf Ebrahimi uint64_t Simulator::PolynomialMult(uint64_t op1,
885*f5c631daSSadaf Ebrahimi uint64_t op2,
886*f5c631daSSadaf Ebrahimi int lane_size_in_bits) const {
887*f5c631daSSadaf Ebrahimi VIXL_ASSERT(static_cast<unsigned>(lane_size_in_bits) <= kSRegSize);
888*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsUintN(lane_size_in_bits, op1));
889*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsUintN(lane_size_in_bits, op2));
890*f5c631daSSadaf Ebrahimi uint64_t result = 0;
891*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_size_in_bits; ++i) {
892*f5c631daSSadaf Ebrahimi if ((op1 >> i) & 1) {
893*f5c631daSSadaf Ebrahimi result = result ^ (op2 << i);
894*f5c631daSSadaf Ebrahimi }
895*f5c631daSSadaf Ebrahimi }
896*f5c631daSSadaf Ebrahimi return result;
897*f5c631daSSadaf Ebrahimi }
898*f5c631daSSadaf Ebrahimi
899*f5c631daSSadaf Ebrahimi
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)900*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::pmul(VectorFormat vform,
901*f5c631daSSadaf Ebrahimi LogicVRegister dst,
902*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
903*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
904*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
905*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
906*f5c631daSSadaf Ebrahimi dst.SetUint(vform,
907*f5c631daSSadaf Ebrahimi i,
908*f5c631daSSadaf Ebrahimi PolynomialMult(src1.Uint(vform, i),
909*f5c631daSSadaf Ebrahimi src2.Uint(vform, i),
910*f5c631daSSadaf Ebrahimi LaneSizeInBitsFromFormat(vform)));
911*f5c631daSSadaf Ebrahimi }
912*f5c631daSSadaf Ebrahimi return dst;
913*f5c631daSSadaf Ebrahimi }
914*f5c631daSSadaf Ebrahimi
915*f5c631daSSadaf Ebrahimi
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)916*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::pmull(VectorFormat vform,
917*f5c631daSSadaf Ebrahimi LogicVRegister dst,
918*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
919*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
920*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
921*f5c631daSSadaf Ebrahimi
922*f5c631daSSadaf Ebrahimi VectorFormat vform_src = VectorFormatHalfWidth(vform);
923*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
924*f5c631daSSadaf Ebrahimi dst.SetUint(vform,
925*f5c631daSSadaf Ebrahimi i,
926*f5c631daSSadaf Ebrahimi PolynomialMult(src1.Uint(vform_src, i),
927*f5c631daSSadaf Ebrahimi src2.Uint(vform_src, i),
928*f5c631daSSadaf Ebrahimi LaneSizeInBitsFromFormat(vform_src)));
929*f5c631daSSadaf Ebrahimi }
930*f5c631daSSadaf Ebrahimi
931*f5c631daSSadaf Ebrahimi return dst;
932*f5c631daSSadaf Ebrahimi }
933*f5c631daSSadaf Ebrahimi
934*f5c631daSSadaf Ebrahimi
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)935*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::pmull2(VectorFormat vform,
936*f5c631daSSadaf Ebrahimi LogicVRegister dst,
937*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
938*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
939*f5c631daSSadaf Ebrahimi VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
940*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
941*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
942*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i++) {
943*f5c631daSSadaf Ebrahimi dst.SetUint(vform,
944*f5c631daSSadaf Ebrahimi i,
945*f5c631daSSadaf Ebrahimi PolynomialMult(src1.Uint(vform_src, lane_count + i),
946*f5c631daSSadaf Ebrahimi src2.Uint(vform_src, lane_count + i),
947*f5c631daSSadaf Ebrahimi LaneSizeInBitsFromFormat(vform_src)));
948*f5c631daSSadaf Ebrahimi }
949*f5c631daSSadaf Ebrahimi return dst;
950*f5c631daSSadaf Ebrahimi }
951*f5c631daSSadaf Ebrahimi
952*f5c631daSSadaf Ebrahimi
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)953*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sub(VectorFormat vform,
954*f5c631daSSadaf Ebrahimi LogicVRegister dst,
955*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
956*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
957*f5c631daSSadaf Ebrahimi int lane_size = LaneSizeInBitsFromFormat(vform);
958*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
959*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
960*f5c631daSSadaf Ebrahimi // Test for unsigned saturation.
961*f5c631daSSadaf Ebrahimi uint64_t ua = src1.UintLeftJustified(vform, i);
962*f5c631daSSadaf Ebrahimi uint64_t ub = src2.UintLeftJustified(vform, i);
963*f5c631daSSadaf Ebrahimi uint64_t ur = ua - ub;
964*f5c631daSSadaf Ebrahimi if (ub > ua) {
965*f5c631daSSadaf Ebrahimi dst.SetUnsignedSat(i, false);
966*f5c631daSSadaf Ebrahimi }
967*f5c631daSSadaf Ebrahimi
968*f5c631daSSadaf Ebrahimi // Test for signed saturation.
969*f5c631daSSadaf Ebrahimi bool pos_a = (ua >> 63) == 0;
970*f5c631daSSadaf Ebrahimi bool pos_b = (ub >> 63) == 0;
971*f5c631daSSadaf Ebrahimi bool pos_r = (ur >> 63) == 0;
972*f5c631daSSadaf Ebrahimi // If the signs of the operands are different, and the sign of the first
973*f5c631daSSadaf Ebrahimi // operand doesn't match the result, there was an overflow.
974*f5c631daSSadaf Ebrahimi if ((pos_a != pos_b) && (pos_a != pos_r)) {
975*f5c631daSSadaf Ebrahimi dst.SetSignedSat(i, pos_a);
976*f5c631daSSadaf Ebrahimi }
977*f5c631daSSadaf Ebrahimi
978*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, ur >> (64 - lane_size));
979*f5c631daSSadaf Ebrahimi }
980*f5c631daSSadaf Ebrahimi return dst;
981*f5c631daSSadaf Ebrahimi }
982*f5c631daSSadaf Ebrahimi
sub_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)983*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sub_uint(VectorFormat vform,
984*f5c631daSSadaf Ebrahimi LogicVRegister dst,
985*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
986*f5c631daSSadaf Ebrahimi uint64_t value) {
987*f5c631daSSadaf Ebrahimi int lane_size = LaneSizeInBitsFromFormat(vform);
988*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsUintN(lane_size, value));
989*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
990*f5c631daSSadaf Ebrahimi // Left-justify `value`.
991*f5c631daSSadaf Ebrahimi uint64_t ub = value << (64 - lane_size);
992*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
993*f5c631daSSadaf Ebrahimi // Test for unsigned saturation.
994*f5c631daSSadaf Ebrahimi uint64_t ua = src1.UintLeftJustified(vform, i);
995*f5c631daSSadaf Ebrahimi uint64_t ur = ua - ub;
996*f5c631daSSadaf Ebrahimi if (ub > ua) {
997*f5c631daSSadaf Ebrahimi dst.SetUnsignedSat(i, false);
998*f5c631daSSadaf Ebrahimi }
999*f5c631daSSadaf Ebrahimi
1000*f5c631daSSadaf Ebrahimi // Test for signed saturation.
1001*f5c631daSSadaf Ebrahimi // `value` is always positive, so we have an overflow if the (signed) result
1002*f5c631daSSadaf Ebrahimi // is greater than the first operand.
1003*f5c631daSSadaf Ebrahimi if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {
1004*f5c631daSSadaf Ebrahimi dst.SetSignedSat(i, false);
1005*f5c631daSSadaf Ebrahimi }
1006*f5c631daSSadaf Ebrahimi
1007*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, ur >> (64 - lane_size));
1008*f5c631daSSadaf Ebrahimi }
1009*f5c631daSSadaf Ebrahimi return dst;
1010*f5c631daSSadaf Ebrahimi }
1011*f5c631daSSadaf Ebrahimi
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1012*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::and_(VectorFormat vform,
1013*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1014*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1015*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1016*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1017*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1018*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1019*f5c631daSSadaf Ebrahimi }
1020*f5c631daSSadaf Ebrahimi return dst;
1021*f5c631daSSadaf Ebrahimi }
1022*f5c631daSSadaf Ebrahimi
1023*f5c631daSSadaf Ebrahimi
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1024*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::orr(VectorFormat vform,
1025*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1026*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1027*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1028*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1029*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1030*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1031*f5c631daSSadaf Ebrahimi }
1032*f5c631daSSadaf Ebrahimi return dst;
1033*f5c631daSSadaf Ebrahimi }
1034*f5c631daSSadaf Ebrahimi
1035*f5c631daSSadaf Ebrahimi
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1036*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::orn(VectorFormat vform,
1037*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1038*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1039*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1040*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1041*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1042*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1043*f5c631daSSadaf Ebrahimi }
1044*f5c631daSSadaf Ebrahimi return dst;
1045*f5c631daSSadaf Ebrahimi }
1046*f5c631daSSadaf Ebrahimi
1047*f5c631daSSadaf Ebrahimi
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1048*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::eor(VectorFormat vform,
1049*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1050*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1051*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1052*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1053*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1054*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1055*f5c631daSSadaf Ebrahimi }
1056*f5c631daSSadaf Ebrahimi return dst;
1057*f5c631daSSadaf Ebrahimi }
1058*f5c631daSSadaf Ebrahimi
1059*f5c631daSSadaf Ebrahimi
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1060*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::bic(VectorFormat vform,
1061*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1062*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1063*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1064*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1065*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1066*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1067*f5c631daSSadaf Ebrahimi }
1068*f5c631daSSadaf Ebrahimi return dst;
1069*f5c631daSSadaf Ebrahimi }
1070*f5c631daSSadaf Ebrahimi
1071*f5c631daSSadaf Ebrahimi
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1072*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::bic(VectorFormat vform,
1073*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1074*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1075*f5c631daSSadaf Ebrahimi uint64_t imm) {
1076*f5c631daSSadaf Ebrahimi uint64_t result[16];
1077*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
1078*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
1079*f5c631daSSadaf Ebrahimi result[i] = src.Uint(vform, i) & ~imm;
1080*f5c631daSSadaf Ebrahimi }
1081*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1082*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
1083*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result[i]);
1084*f5c631daSSadaf Ebrahimi }
1085*f5c631daSSadaf Ebrahimi return dst;
1086*f5c631daSSadaf Ebrahimi }
1087*f5c631daSSadaf Ebrahimi
1088*f5c631daSSadaf Ebrahimi
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1089*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::bif(VectorFormat vform,
1090*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1091*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1092*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1093*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1094*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1095*f5c631daSSadaf Ebrahimi uint64_t operand1 = dst.Uint(vform, i);
1096*f5c631daSSadaf Ebrahimi uint64_t operand2 = ~src2.Uint(vform, i);
1097*f5c631daSSadaf Ebrahimi uint64_t operand3 = src1.Uint(vform, i);
1098*f5c631daSSadaf Ebrahimi uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1099*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result);
1100*f5c631daSSadaf Ebrahimi }
1101*f5c631daSSadaf Ebrahimi return dst;
1102*f5c631daSSadaf Ebrahimi }
1103*f5c631daSSadaf Ebrahimi
1104*f5c631daSSadaf Ebrahimi
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1105*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::bit(VectorFormat vform,
1106*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1107*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1108*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1109*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1110*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1111*f5c631daSSadaf Ebrahimi uint64_t operand1 = dst.Uint(vform, i);
1112*f5c631daSSadaf Ebrahimi uint64_t operand2 = src2.Uint(vform, i);
1113*f5c631daSSadaf Ebrahimi uint64_t operand3 = src1.Uint(vform, i);
1114*f5c631daSSadaf Ebrahimi uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1115*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result);
1116*f5c631daSSadaf Ebrahimi }
1117*f5c631daSSadaf Ebrahimi return dst;
1118*f5c631daSSadaf Ebrahimi }
1119*f5c631daSSadaf Ebrahimi
1120*f5c631daSSadaf Ebrahimi
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src_mask,const LogicVRegister & src1,const LogicVRegister & src2)1121*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::bsl(VectorFormat vform,
1122*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1123*f5c631daSSadaf Ebrahimi const LogicVRegister& src_mask,
1124*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1125*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1126*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1127*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1128*f5c631daSSadaf Ebrahimi uint64_t operand1 = src2.Uint(vform, i);
1129*f5c631daSSadaf Ebrahimi uint64_t operand2 = src_mask.Uint(vform, i);
1130*f5c631daSSadaf Ebrahimi uint64_t operand3 = src1.Uint(vform, i);
1131*f5c631daSSadaf Ebrahimi uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1132*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result);
1133*f5c631daSSadaf Ebrahimi }
1134*f5c631daSSadaf Ebrahimi return dst;
1135*f5c631daSSadaf Ebrahimi }
1136*f5c631daSSadaf Ebrahimi
1137*f5c631daSSadaf Ebrahimi
sminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1138*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sminmax(VectorFormat vform,
1139*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1140*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1141*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
1142*f5c631daSSadaf Ebrahimi bool max) {
1143*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1144*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1145*f5c631daSSadaf Ebrahimi int64_t src1_val = src1.Int(vform, i);
1146*f5c631daSSadaf Ebrahimi int64_t src2_val = src2.Int(vform, i);
1147*f5c631daSSadaf Ebrahimi int64_t dst_val;
1148*f5c631daSSadaf Ebrahimi if (max) {
1149*f5c631daSSadaf Ebrahimi dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1150*f5c631daSSadaf Ebrahimi } else {
1151*f5c631daSSadaf Ebrahimi dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1152*f5c631daSSadaf Ebrahimi }
1153*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, dst_val);
1154*f5c631daSSadaf Ebrahimi }
1155*f5c631daSSadaf Ebrahimi return dst;
1156*f5c631daSSadaf Ebrahimi }
1157*f5c631daSSadaf Ebrahimi
1158*f5c631daSSadaf Ebrahimi
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1159*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::smax(VectorFormat vform,
1160*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1161*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1162*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1163*f5c631daSSadaf Ebrahimi return sminmax(vform, dst, src1, src2, true);
1164*f5c631daSSadaf Ebrahimi }
1165*f5c631daSSadaf Ebrahimi
1166*f5c631daSSadaf Ebrahimi
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1167*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::smin(VectorFormat vform,
1168*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1169*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1170*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1171*f5c631daSSadaf Ebrahimi return sminmax(vform, dst, src1, src2, false);
1172*f5c631daSSadaf Ebrahimi }
1173*f5c631daSSadaf Ebrahimi
1174*f5c631daSSadaf Ebrahimi
sminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1175*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1176*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1177*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1178*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
1179*f5c631daSSadaf Ebrahimi bool max) {
1180*f5c631daSSadaf Ebrahimi unsigned lanes = LaneCountFromFormat(vform);
1181*f5c631daSSadaf Ebrahimi int64_t result[kZRegMaxSizeInBytes];
1182*f5c631daSSadaf Ebrahimi const LogicVRegister* src = &src1;
1183*f5c631daSSadaf Ebrahimi for (unsigned j = 0; j < 2; j++) {
1184*f5c631daSSadaf Ebrahimi for (unsigned i = 0; i < lanes; i += 2) {
1185*f5c631daSSadaf Ebrahimi int64_t first_val = src->Int(vform, i);
1186*f5c631daSSadaf Ebrahimi int64_t second_val = src->Int(vform, i + 1);
1187*f5c631daSSadaf Ebrahimi int64_t dst_val;
1188*f5c631daSSadaf Ebrahimi if (max) {
1189*f5c631daSSadaf Ebrahimi dst_val = (first_val > second_val) ? first_val : second_val;
1190*f5c631daSSadaf Ebrahimi } else {
1191*f5c631daSSadaf Ebrahimi dst_val = (first_val < second_val) ? first_val : second_val;
1192*f5c631daSSadaf Ebrahimi }
1193*f5c631daSSadaf Ebrahimi VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1194*f5c631daSSadaf Ebrahimi result[(i >> 1) + (j * lanes / 2)] = dst_val;
1195*f5c631daSSadaf Ebrahimi }
1196*f5c631daSSadaf Ebrahimi src = &src2;
1197*f5c631daSSadaf Ebrahimi }
1198*f5c631daSSadaf Ebrahimi dst.SetIntArray(vform, result);
1199*f5c631daSSadaf Ebrahimi if (IsSVEFormat(vform)) {
1200*f5c631daSSadaf Ebrahimi interleave_top_bottom(vform, dst, dst);
1201*f5c631daSSadaf Ebrahimi }
1202*f5c631daSSadaf Ebrahimi return dst;
1203*f5c631daSSadaf Ebrahimi }
1204*f5c631daSSadaf Ebrahimi
1205*f5c631daSSadaf Ebrahimi
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1206*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::smaxp(VectorFormat vform,
1207*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1208*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1209*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1210*f5c631daSSadaf Ebrahimi return sminmaxp(vform, dst, src1, src2, true);
1211*f5c631daSSadaf Ebrahimi }
1212*f5c631daSSadaf Ebrahimi
1213*f5c631daSSadaf Ebrahimi
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1214*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sminp(VectorFormat vform,
1215*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1216*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1217*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1218*f5c631daSSadaf Ebrahimi return sminmaxp(vform, dst, src1, src2, false);
1219*f5c631daSSadaf Ebrahimi }
1220*f5c631daSSadaf Ebrahimi
1221*f5c631daSSadaf Ebrahimi
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1222*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::addp(VectorFormat vform,
1223*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1224*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1225*f5c631daSSadaf Ebrahimi VIXL_ASSERT(vform == kFormatD);
1226*f5c631daSSadaf Ebrahimi
1227*f5c631daSSadaf Ebrahimi uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1228*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1229*f5c631daSSadaf Ebrahimi dst.SetUint(vform, 0, dst_val);
1230*f5c631daSSadaf Ebrahimi return dst;
1231*f5c631daSSadaf Ebrahimi }
1232*f5c631daSSadaf Ebrahimi
1233*f5c631daSSadaf Ebrahimi
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1234*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::addv(VectorFormat vform,
1235*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1236*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1237*f5c631daSSadaf Ebrahimi VectorFormat vform_dst =
1238*f5c631daSSadaf Ebrahimi ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1239*f5c631daSSadaf Ebrahimi
1240*f5c631daSSadaf Ebrahimi
1241*f5c631daSSadaf Ebrahimi int64_t dst_val = 0;
1242*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1243*f5c631daSSadaf Ebrahimi dst_val += src.Int(vform, i);
1244*f5c631daSSadaf Ebrahimi }
1245*f5c631daSSadaf Ebrahimi
1246*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform_dst);
1247*f5c631daSSadaf Ebrahimi dst.SetInt(vform_dst, 0, dst_val);
1248*f5c631daSSadaf Ebrahimi return dst;
1249*f5c631daSSadaf Ebrahimi }
1250*f5c631daSSadaf Ebrahimi
1251*f5c631daSSadaf Ebrahimi
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1252*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::saddlv(VectorFormat vform,
1253*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1254*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1255*f5c631daSSadaf Ebrahimi VectorFormat vform_dst =
1256*f5c631daSSadaf Ebrahimi ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1257*f5c631daSSadaf Ebrahimi
1258*f5c631daSSadaf Ebrahimi int64_t dst_val = 0;
1259*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1260*f5c631daSSadaf Ebrahimi dst_val += src.Int(vform, i);
1261*f5c631daSSadaf Ebrahimi }
1262*f5c631daSSadaf Ebrahimi
1263*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform_dst);
1264*f5c631daSSadaf Ebrahimi dst.SetInt(vform_dst, 0, dst_val);
1265*f5c631daSSadaf Ebrahimi return dst;
1266*f5c631daSSadaf Ebrahimi }
1267*f5c631daSSadaf Ebrahimi
1268*f5c631daSSadaf Ebrahimi
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1269*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uaddlv(VectorFormat vform,
1270*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1271*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1272*f5c631daSSadaf Ebrahimi VectorFormat vform_dst =
1273*f5c631daSSadaf Ebrahimi ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1274*f5c631daSSadaf Ebrahimi
1275*f5c631daSSadaf Ebrahimi uint64_t dst_val = 0;
1276*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1277*f5c631daSSadaf Ebrahimi dst_val += src.Uint(vform, i);
1278*f5c631daSSadaf Ebrahimi }
1279*f5c631daSSadaf Ebrahimi
1280*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform_dst);
1281*f5c631daSSadaf Ebrahimi dst.SetUint(vform_dst, 0, dst_val);
1282*f5c631daSSadaf Ebrahimi return dst;
1283*f5c631daSSadaf Ebrahimi }
1284*f5c631daSSadaf Ebrahimi
1285*f5c631daSSadaf Ebrahimi
sminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1286*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1287*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1288*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
1289*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1290*f5c631daSSadaf Ebrahimi bool max) {
1291*f5c631daSSadaf Ebrahimi int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1292*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1293*f5c631daSSadaf Ebrahimi if (!pg.IsActive(vform, i)) continue;
1294*f5c631daSSadaf Ebrahimi
1295*f5c631daSSadaf Ebrahimi int64_t src_val = src.Int(vform, i);
1296*f5c631daSSadaf Ebrahimi if (max) {
1297*f5c631daSSadaf Ebrahimi dst_val = (src_val > dst_val) ? src_val : dst_val;
1298*f5c631daSSadaf Ebrahimi } else {
1299*f5c631daSSadaf Ebrahimi dst_val = (src_val < dst_val) ? src_val : dst_val;
1300*f5c631daSSadaf Ebrahimi }
1301*f5c631daSSadaf Ebrahimi }
1302*f5c631daSSadaf Ebrahimi dst.ClearForWrite(ScalarFormatFromFormat(vform));
1303*f5c631daSSadaf Ebrahimi dst.SetInt(vform, 0, dst_val);
1304*f5c631daSSadaf Ebrahimi return dst;
1305*f5c631daSSadaf Ebrahimi }
1306*f5c631daSSadaf Ebrahimi
1307*f5c631daSSadaf Ebrahimi
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1308*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::smaxv(VectorFormat vform,
1309*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1310*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1311*f5c631daSSadaf Ebrahimi sminmaxv(vform, dst, GetPTrue(), src, true);
1312*f5c631daSSadaf Ebrahimi return dst;
1313*f5c631daSSadaf Ebrahimi }
1314*f5c631daSSadaf Ebrahimi
1315*f5c631daSSadaf Ebrahimi
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1316*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sminv(VectorFormat vform,
1317*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1318*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1319*f5c631daSSadaf Ebrahimi sminmaxv(vform, dst, GetPTrue(), src, false);
1320*f5c631daSSadaf Ebrahimi return dst;
1321*f5c631daSSadaf Ebrahimi }
1322*f5c631daSSadaf Ebrahimi
1323*f5c631daSSadaf Ebrahimi
smaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1324*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::smaxv(VectorFormat vform,
1325*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1326*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
1327*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1328*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsSVEFormat(vform));
1329*f5c631daSSadaf Ebrahimi sminmaxv(vform, dst, pg, src, true);
1330*f5c631daSSadaf Ebrahimi return dst;
1331*f5c631daSSadaf Ebrahimi }
1332*f5c631daSSadaf Ebrahimi
1333*f5c631daSSadaf Ebrahimi
sminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1334*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sminv(VectorFormat vform,
1335*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1336*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
1337*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1338*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsSVEFormat(vform));
1339*f5c631daSSadaf Ebrahimi sminmaxv(vform, dst, pg, src, false);
1340*f5c631daSSadaf Ebrahimi return dst;
1341*f5c631daSSadaf Ebrahimi }
1342*f5c631daSSadaf Ebrahimi
1343*f5c631daSSadaf Ebrahimi
uminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1344*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uminmax(VectorFormat vform,
1345*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1346*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1347*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
1348*f5c631daSSadaf Ebrahimi bool max) {
1349*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1350*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1351*f5c631daSSadaf Ebrahimi uint64_t src1_val = src1.Uint(vform, i);
1352*f5c631daSSadaf Ebrahimi uint64_t src2_val = src2.Uint(vform, i);
1353*f5c631daSSadaf Ebrahimi uint64_t dst_val;
1354*f5c631daSSadaf Ebrahimi if (max) {
1355*f5c631daSSadaf Ebrahimi dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1356*f5c631daSSadaf Ebrahimi } else {
1357*f5c631daSSadaf Ebrahimi dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1358*f5c631daSSadaf Ebrahimi }
1359*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, dst_val);
1360*f5c631daSSadaf Ebrahimi }
1361*f5c631daSSadaf Ebrahimi return dst;
1362*f5c631daSSadaf Ebrahimi }
1363*f5c631daSSadaf Ebrahimi
1364*f5c631daSSadaf Ebrahimi
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1365*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::umax(VectorFormat vform,
1366*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1367*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1368*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1369*f5c631daSSadaf Ebrahimi return uminmax(vform, dst, src1, src2, true);
1370*f5c631daSSadaf Ebrahimi }
1371*f5c631daSSadaf Ebrahimi
1372*f5c631daSSadaf Ebrahimi
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1373*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::umin(VectorFormat vform,
1374*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1375*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1376*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1377*f5c631daSSadaf Ebrahimi return uminmax(vform, dst, src1, src2, false);
1378*f5c631daSSadaf Ebrahimi }
1379*f5c631daSSadaf Ebrahimi
1380*f5c631daSSadaf Ebrahimi
uminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1381*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1382*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1383*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1384*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
1385*f5c631daSSadaf Ebrahimi bool max) {
1386*f5c631daSSadaf Ebrahimi unsigned lanes = LaneCountFromFormat(vform);
1387*f5c631daSSadaf Ebrahimi uint64_t result[kZRegMaxSizeInBytes];
1388*f5c631daSSadaf Ebrahimi const LogicVRegister* src = &src1;
1389*f5c631daSSadaf Ebrahimi for (unsigned j = 0; j < 2; j++) {
1390*f5c631daSSadaf Ebrahimi for (unsigned i = 0; i < lanes; i += 2) {
1391*f5c631daSSadaf Ebrahimi uint64_t first_val = src->Uint(vform, i);
1392*f5c631daSSadaf Ebrahimi uint64_t second_val = src->Uint(vform, i + 1);
1393*f5c631daSSadaf Ebrahimi uint64_t dst_val;
1394*f5c631daSSadaf Ebrahimi if (max) {
1395*f5c631daSSadaf Ebrahimi dst_val = (first_val > second_val) ? first_val : second_val;
1396*f5c631daSSadaf Ebrahimi } else {
1397*f5c631daSSadaf Ebrahimi dst_val = (first_val < second_val) ? first_val : second_val;
1398*f5c631daSSadaf Ebrahimi }
1399*f5c631daSSadaf Ebrahimi VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1400*f5c631daSSadaf Ebrahimi result[(i >> 1) + (j * lanes / 2)] = dst_val;
1401*f5c631daSSadaf Ebrahimi }
1402*f5c631daSSadaf Ebrahimi src = &src2;
1403*f5c631daSSadaf Ebrahimi }
1404*f5c631daSSadaf Ebrahimi dst.SetUintArray(vform, result);
1405*f5c631daSSadaf Ebrahimi if (IsSVEFormat(vform)) {
1406*f5c631daSSadaf Ebrahimi interleave_top_bottom(vform, dst, dst);
1407*f5c631daSSadaf Ebrahimi }
1408*f5c631daSSadaf Ebrahimi return dst;
1409*f5c631daSSadaf Ebrahimi }
1410*f5c631daSSadaf Ebrahimi
1411*f5c631daSSadaf Ebrahimi
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1412*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::umaxp(VectorFormat vform,
1413*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1414*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1415*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1416*f5c631daSSadaf Ebrahimi return uminmaxp(vform, dst, src1, src2, true);
1417*f5c631daSSadaf Ebrahimi }
1418*f5c631daSSadaf Ebrahimi
1419*f5c631daSSadaf Ebrahimi
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1420*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uminp(VectorFormat vform,
1421*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1422*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1423*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1424*f5c631daSSadaf Ebrahimi return uminmaxp(vform, dst, src1, src2, false);
1425*f5c631daSSadaf Ebrahimi }
1426*f5c631daSSadaf Ebrahimi
1427*f5c631daSSadaf Ebrahimi
uminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1428*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1429*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1430*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
1431*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1432*f5c631daSSadaf Ebrahimi bool max) {
1433*f5c631daSSadaf Ebrahimi uint64_t dst_val = max ? 0 : UINT64_MAX;
1434*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1435*f5c631daSSadaf Ebrahimi if (!pg.IsActive(vform, i)) continue;
1436*f5c631daSSadaf Ebrahimi
1437*f5c631daSSadaf Ebrahimi uint64_t src_val = src.Uint(vform, i);
1438*f5c631daSSadaf Ebrahimi if (max) {
1439*f5c631daSSadaf Ebrahimi dst_val = (src_val > dst_val) ? src_val : dst_val;
1440*f5c631daSSadaf Ebrahimi } else {
1441*f5c631daSSadaf Ebrahimi dst_val = (src_val < dst_val) ? src_val : dst_val;
1442*f5c631daSSadaf Ebrahimi }
1443*f5c631daSSadaf Ebrahimi }
1444*f5c631daSSadaf Ebrahimi dst.ClearForWrite(ScalarFormatFromFormat(vform));
1445*f5c631daSSadaf Ebrahimi dst.SetUint(vform, 0, dst_val);
1446*f5c631daSSadaf Ebrahimi return dst;
1447*f5c631daSSadaf Ebrahimi }
1448*f5c631daSSadaf Ebrahimi
1449*f5c631daSSadaf Ebrahimi
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1450*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::umaxv(VectorFormat vform,
1451*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1452*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1453*f5c631daSSadaf Ebrahimi uminmaxv(vform, dst, GetPTrue(), src, true);
1454*f5c631daSSadaf Ebrahimi return dst;
1455*f5c631daSSadaf Ebrahimi }
1456*f5c631daSSadaf Ebrahimi
1457*f5c631daSSadaf Ebrahimi
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1458*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uminv(VectorFormat vform,
1459*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1460*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1461*f5c631daSSadaf Ebrahimi uminmaxv(vform, dst, GetPTrue(), src, false);
1462*f5c631daSSadaf Ebrahimi return dst;
1463*f5c631daSSadaf Ebrahimi }
1464*f5c631daSSadaf Ebrahimi
1465*f5c631daSSadaf Ebrahimi
umaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1466*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::umaxv(VectorFormat vform,
1467*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1468*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
1469*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1470*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsSVEFormat(vform));
1471*f5c631daSSadaf Ebrahimi uminmaxv(vform, dst, pg, src, true);
1472*f5c631daSSadaf Ebrahimi return dst;
1473*f5c631daSSadaf Ebrahimi }
1474*f5c631daSSadaf Ebrahimi
1475*f5c631daSSadaf Ebrahimi
uminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1476*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uminv(VectorFormat vform,
1477*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1478*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
1479*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1480*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsSVEFormat(vform));
1481*f5c631daSSadaf Ebrahimi uminmaxv(vform, dst, pg, src, false);
1482*f5c631daSSadaf Ebrahimi return dst;
1483*f5c631daSSadaf Ebrahimi }
1484*f5c631daSSadaf Ebrahimi
1485*f5c631daSSadaf Ebrahimi
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1486*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::shl(VectorFormat vform,
1487*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1488*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1489*f5c631daSSadaf Ebrahimi int shift) {
1490*f5c631daSSadaf Ebrahimi VIXL_ASSERT(shift >= 0);
1491*f5c631daSSadaf Ebrahimi SimVRegister temp;
1492*f5c631daSSadaf Ebrahimi LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1493*f5c631daSSadaf Ebrahimi return ushl(vform, dst, src, shiftreg);
1494*f5c631daSSadaf Ebrahimi }
1495*f5c631daSSadaf Ebrahimi
1496*f5c631daSSadaf Ebrahimi
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1497*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sshll(VectorFormat vform,
1498*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1499*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1500*f5c631daSSadaf Ebrahimi int shift) {
1501*f5c631daSSadaf Ebrahimi VIXL_ASSERT(shift >= 0);
1502*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
1503*f5c631daSSadaf Ebrahimi LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1504*f5c631daSSadaf Ebrahimi LogicVRegister extendedreg = sxtl(vform, temp2, src);
1505*f5c631daSSadaf Ebrahimi return sshl(vform, dst, extendedreg, shiftreg);
1506*f5c631daSSadaf Ebrahimi }
1507*f5c631daSSadaf Ebrahimi
1508*f5c631daSSadaf Ebrahimi
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1509*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sshll2(VectorFormat vform,
1510*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1511*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1512*f5c631daSSadaf Ebrahimi int shift) {
1513*f5c631daSSadaf Ebrahimi VIXL_ASSERT(shift >= 0);
1514*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
1515*f5c631daSSadaf Ebrahimi LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1516*f5c631daSSadaf Ebrahimi LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1517*f5c631daSSadaf Ebrahimi return sshl(vform, dst, extendedreg, shiftreg);
1518*f5c631daSSadaf Ebrahimi }
1519*f5c631daSSadaf Ebrahimi
1520*f5c631daSSadaf Ebrahimi
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1521*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::shll(VectorFormat vform,
1522*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1523*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1524*f5c631daSSadaf Ebrahimi int shift = LaneSizeInBitsFromFormat(vform) / 2;
1525*f5c631daSSadaf Ebrahimi return sshll(vform, dst, src, shift);
1526*f5c631daSSadaf Ebrahimi }
1527*f5c631daSSadaf Ebrahimi
1528*f5c631daSSadaf Ebrahimi
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1529*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::shll2(VectorFormat vform,
1530*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1531*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1532*f5c631daSSadaf Ebrahimi int shift = LaneSizeInBitsFromFormat(vform) / 2;
1533*f5c631daSSadaf Ebrahimi return sshll2(vform, dst, src, shift);
1534*f5c631daSSadaf Ebrahimi }
1535*f5c631daSSadaf Ebrahimi
1536*f5c631daSSadaf Ebrahimi
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1537*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ushll(VectorFormat vform,
1538*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1539*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1540*f5c631daSSadaf Ebrahimi int shift) {
1541*f5c631daSSadaf Ebrahimi VIXL_ASSERT(shift >= 0);
1542*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
1543*f5c631daSSadaf Ebrahimi LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1544*f5c631daSSadaf Ebrahimi LogicVRegister extendedreg = uxtl(vform, temp2, src);
1545*f5c631daSSadaf Ebrahimi return ushl(vform, dst, extendedreg, shiftreg);
1546*f5c631daSSadaf Ebrahimi }
1547*f5c631daSSadaf Ebrahimi
1548*f5c631daSSadaf Ebrahimi
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1549*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ushll2(VectorFormat vform,
1550*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1551*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1552*f5c631daSSadaf Ebrahimi int shift) {
1553*f5c631daSSadaf Ebrahimi VIXL_ASSERT(shift >= 0);
1554*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
1555*f5c631daSSadaf Ebrahimi LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1556*f5c631daSSadaf Ebrahimi LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1557*f5c631daSSadaf Ebrahimi return ushl(vform, dst, extendedreg, shiftreg);
1558*f5c631daSSadaf Ebrahimi }
1559*f5c631daSSadaf Ebrahimi
clast(VectorFormat vform,const LogicPRegister & pg,const LogicVRegister & src,int offset_from_last_active)1560*f5c631daSSadaf Ebrahimi std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,
1561*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
1562*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1563*f5c631daSSadaf Ebrahimi int offset_from_last_active) {
1564*f5c631daSSadaf Ebrahimi // Untested for any other values.
1565*f5c631daSSadaf Ebrahimi VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));
1566*f5c631daSSadaf Ebrahimi
1567*f5c631daSSadaf Ebrahimi int last_active = GetLastActive(vform, pg);
1568*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
1569*f5c631daSSadaf Ebrahimi int index =
1570*f5c631daSSadaf Ebrahimi ((last_active + offset_from_last_active) + lane_count) % lane_count;
1571*f5c631daSSadaf Ebrahimi return std::make_pair(last_active >= 0, src.Uint(vform, index));
1572*f5c631daSSadaf Ebrahimi }
1573*f5c631daSSadaf Ebrahimi
compact(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1574*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::compact(VectorFormat vform,
1575*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1576*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
1577*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1578*f5c631daSSadaf Ebrahimi int j = 0;
1579*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1580*f5c631daSSadaf Ebrahimi if (pg.IsActive(vform, i)) {
1581*f5c631daSSadaf Ebrahimi dst.SetUint(vform, j++, src.Uint(vform, i));
1582*f5c631daSSadaf Ebrahimi }
1583*f5c631daSSadaf Ebrahimi }
1584*f5c631daSSadaf Ebrahimi for (; j < LaneCountFromFormat(vform); j++) {
1585*f5c631daSSadaf Ebrahimi dst.SetUint(vform, j, 0);
1586*f5c631daSSadaf Ebrahimi }
1587*f5c631daSSadaf Ebrahimi return dst;
1588*f5c631daSSadaf Ebrahimi }
1589*f5c631daSSadaf Ebrahimi
splice(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1590*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::splice(VectorFormat vform,
1591*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1592*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
1593*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1594*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1595*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
1596*f5c631daSSadaf Ebrahimi int first_active = GetFirstActive(vform, pg);
1597*f5c631daSSadaf Ebrahimi int last_active = GetLastActive(vform, pg);
1598*f5c631daSSadaf Ebrahimi int dst_idx = 0;
1599*f5c631daSSadaf Ebrahimi uint64_t result[kZRegMaxSizeInBytes];
1600*f5c631daSSadaf Ebrahimi
1601*f5c631daSSadaf Ebrahimi if (first_active >= 0) {
1602*f5c631daSSadaf Ebrahimi VIXL_ASSERT(last_active >= first_active);
1603*f5c631daSSadaf Ebrahimi VIXL_ASSERT(last_active < lane_count);
1604*f5c631daSSadaf Ebrahimi for (int i = first_active; i <= last_active; i++) {
1605*f5c631daSSadaf Ebrahimi result[dst_idx++] = src1.Uint(vform, i);
1606*f5c631daSSadaf Ebrahimi }
1607*f5c631daSSadaf Ebrahimi }
1608*f5c631daSSadaf Ebrahimi
1609*f5c631daSSadaf Ebrahimi VIXL_ASSERT(dst_idx <= lane_count);
1610*f5c631daSSadaf Ebrahimi for (int i = dst_idx; i < lane_count; i++) {
1611*f5c631daSSadaf Ebrahimi result[i] = src2.Uint(vform, i - dst_idx);
1612*f5c631daSSadaf Ebrahimi }
1613*f5c631daSSadaf Ebrahimi
1614*f5c631daSSadaf Ebrahimi dst.SetUintArray(vform, result);
1615*f5c631daSSadaf Ebrahimi
1616*f5c631daSSadaf Ebrahimi return dst;
1617*f5c631daSSadaf Ebrahimi }
1618*f5c631daSSadaf Ebrahimi
sel(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1619*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sel(VectorFormat vform,
1620*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1621*f5c631daSSadaf Ebrahimi const SimPRegister& pg,
1622*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1623*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1624*f5c631daSSadaf Ebrahimi int p_reg_bits_per_lane =
1625*f5c631daSSadaf Ebrahimi LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;
1626*f5c631daSSadaf Ebrahimi for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
1627*f5c631daSSadaf Ebrahimi uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)
1628*f5c631daSSadaf Ebrahimi ? src1.Uint(vform, lane)
1629*f5c631daSSadaf Ebrahimi : src2.Uint(vform, lane);
1630*f5c631daSSadaf Ebrahimi dst.SetUint(vform, lane, lane_value);
1631*f5c631daSSadaf Ebrahimi }
1632*f5c631daSSadaf Ebrahimi return dst;
1633*f5c631daSSadaf Ebrahimi }
1634*f5c631daSSadaf Ebrahimi
1635*f5c631daSSadaf Ebrahimi
sel(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src1,const LogicPRegister & src2)1636*f5c631daSSadaf Ebrahimi LogicPRegister Simulator::sel(LogicPRegister dst,
1637*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
1638*f5c631daSSadaf Ebrahimi const LogicPRegister& src1,
1639*f5c631daSSadaf Ebrahimi const LogicPRegister& src2) {
1640*f5c631daSSadaf Ebrahimi for (int i = 0; i < dst.GetChunkCount(); i++) {
1641*f5c631daSSadaf Ebrahimi LogicPRegister::ChunkType mask = pg.GetChunk(i);
1642*f5c631daSSadaf Ebrahimi LogicPRegister::ChunkType result =
1643*f5c631daSSadaf Ebrahimi (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));
1644*f5c631daSSadaf Ebrahimi dst.SetChunk(i, result);
1645*f5c631daSSadaf Ebrahimi }
1646*f5c631daSSadaf Ebrahimi return dst;
1647*f5c631daSSadaf Ebrahimi }
1648*f5c631daSSadaf Ebrahimi
1649*f5c631daSSadaf Ebrahimi
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1650*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sli(VectorFormat vform,
1651*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1652*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1653*f5c631daSSadaf Ebrahimi int shift) {
1654*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1655*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
1656*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i++) {
1657*f5c631daSSadaf Ebrahimi uint64_t src_lane = src.Uint(vform, i);
1658*f5c631daSSadaf Ebrahimi uint64_t dst_lane = dst.Uint(vform, i);
1659*f5c631daSSadaf Ebrahimi uint64_t shifted = src_lane << shift;
1660*f5c631daSSadaf Ebrahimi uint64_t mask = MaxUintFromFormat(vform) << shift;
1661*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1662*f5c631daSSadaf Ebrahimi }
1663*f5c631daSSadaf Ebrahimi return dst;
1664*f5c631daSSadaf Ebrahimi }
1665*f5c631daSSadaf Ebrahimi
1666*f5c631daSSadaf Ebrahimi
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1667*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqshl(VectorFormat vform,
1668*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1669*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1670*f5c631daSSadaf Ebrahimi int shift) {
1671*f5c631daSSadaf Ebrahimi VIXL_ASSERT(shift >= 0);
1672*f5c631daSSadaf Ebrahimi SimVRegister temp;
1673*f5c631daSSadaf Ebrahimi LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1674*f5c631daSSadaf Ebrahimi return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1675*f5c631daSSadaf Ebrahimi }
1676*f5c631daSSadaf Ebrahimi
1677*f5c631daSSadaf Ebrahimi
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1678*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uqshl(VectorFormat vform,
1679*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1680*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1681*f5c631daSSadaf Ebrahimi int shift) {
1682*f5c631daSSadaf Ebrahimi VIXL_ASSERT(shift >= 0);
1683*f5c631daSSadaf Ebrahimi SimVRegister temp;
1684*f5c631daSSadaf Ebrahimi LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1685*f5c631daSSadaf Ebrahimi return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1686*f5c631daSSadaf Ebrahimi }
1687*f5c631daSSadaf Ebrahimi
1688*f5c631daSSadaf Ebrahimi
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1689*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqshlu(VectorFormat vform,
1690*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1691*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1692*f5c631daSSadaf Ebrahimi int shift) {
1693*f5c631daSSadaf Ebrahimi VIXL_ASSERT(shift >= 0);
1694*f5c631daSSadaf Ebrahimi SimVRegister temp;
1695*f5c631daSSadaf Ebrahimi LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1696*f5c631daSSadaf Ebrahimi return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1697*f5c631daSSadaf Ebrahimi }
1698*f5c631daSSadaf Ebrahimi
1699*f5c631daSSadaf Ebrahimi
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1700*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sri(VectorFormat vform,
1701*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1702*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1703*f5c631daSSadaf Ebrahimi int shift) {
1704*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1705*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
1706*f5c631daSSadaf Ebrahimi VIXL_ASSERT((shift > 0) &&
1707*f5c631daSSadaf Ebrahimi (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1708*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i++) {
1709*f5c631daSSadaf Ebrahimi uint64_t src_lane = src.Uint(vform, i);
1710*f5c631daSSadaf Ebrahimi uint64_t dst_lane = dst.Uint(vform, i);
1711*f5c631daSSadaf Ebrahimi uint64_t shifted;
1712*f5c631daSSadaf Ebrahimi uint64_t mask;
1713*f5c631daSSadaf Ebrahimi if (shift == 64) {
1714*f5c631daSSadaf Ebrahimi shifted = 0;
1715*f5c631daSSadaf Ebrahimi mask = 0;
1716*f5c631daSSadaf Ebrahimi } else {
1717*f5c631daSSadaf Ebrahimi shifted = src_lane >> shift;
1718*f5c631daSSadaf Ebrahimi mask = MaxUintFromFormat(vform) >> shift;
1719*f5c631daSSadaf Ebrahimi }
1720*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1721*f5c631daSSadaf Ebrahimi }
1722*f5c631daSSadaf Ebrahimi return dst;
1723*f5c631daSSadaf Ebrahimi }
1724*f5c631daSSadaf Ebrahimi
1725*f5c631daSSadaf Ebrahimi
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1726*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ushr(VectorFormat vform,
1727*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1728*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1729*f5c631daSSadaf Ebrahimi int shift) {
1730*f5c631daSSadaf Ebrahimi VIXL_ASSERT(shift >= 0);
1731*f5c631daSSadaf Ebrahimi SimVRegister temp;
1732*f5c631daSSadaf Ebrahimi LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1733*f5c631daSSadaf Ebrahimi return ushl(vform, dst, src, shiftreg);
1734*f5c631daSSadaf Ebrahimi }
1735*f5c631daSSadaf Ebrahimi
1736*f5c631daSSadaf Ebrahimi
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1737*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sshr(VectorFormat vform,
1738*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1739*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1740*f5c631daSSadaf Ebrahimi int shift) {
1741*f5c631daSSadaf Ebrahimi VIXL_ASSERT(shift >= 0);
1742*f5c631daSSadaf Ebrahimi SimVRegister temp;
1743*f5c631daSSadaf Ebrahimi LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1744*f5c631daSSadaf Ebrahimi return sshl(vform, dst, src, shiftreg);
1745*f5c631daSSadaf Ebrahimi }
1746*f5c631daSSadaf Ebrahimi
1747*f5c631daSSadaf Ebrahimi
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1748*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ssra(VectorFormat vform,
1749*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1750*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1751*f5c631daSSadaf Ebrahimi int shift) {
1752*f5c631daSSadaf Ebrahimi SimVRegister temp;
1753*f5c631daSSadaf Ebrahimi LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1754*f5c631daSSadaf Ebrahimi return add(vform, dst, dst, shifted_reg);
1755*f5c631daSSadaf Ebrahimi }
1756*f5c631daSSadaf Ebrahimi
1757*f5c631daSSadaf Ebrahimi
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1758*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::usra(VectorFormat vform,
1759*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1760*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1761*f5c631daSSadaf Ebrahimi int shift) {
1762*f5c631daSSadaf Ebrahimi SimVRegister temp;
1763*f5c631daSSadaf Ebrahimi LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1764*f5c631daSSadaf Ebrahimi return add(vform, dst, dst, shifted_reg);
1765*f5c631daSSadaf Ebrahimi }
1766*f5c631daSSadaf Ebrahimi
1767*f5c631daSSadaf Ebrahimi
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1768*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::srsra(VectorFormat vform,
1769*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1770*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1771*f5c631daSSadaf Ebrahimi int shift) {
1772*f5c631daSSadaf Ebrahimi SimVRegister temp;
1773*f5c631daSSadaf Ebrahimi LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1774*f5c631daSSadaf Ebrahimi return add(vform, dst, dst, shifted_reg);
1775*f5c631daSSadaf Ebrahimi }
1776*f5c631daSSadaf Ebrahimi
1777*f5c631daSSadaf Ebrahimi
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1778*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ursra(VectorFormat vform,
1779*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1780*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
1781*f5c631daSSadaf Ebrahimi int shift) {
1782*f5c631daSSadaf Ebrahimi SimVRegister temp;
1783*f5c631daSSadaf Ebrahimi LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1784*f5c631daSSadaf Ebrahimi return add(vform, dst, dst, shifted_reg);
1785*f5c631daSSadaf Ebrahimi }
1786*f5c631daSSadaf Ebrahimi
1787*f5c631daSSadaf Ebrahimi
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1788*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::cls(VectorFormat vform,
1789*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1790*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1791*f5c631daSSadaf Ebrahimi int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1792*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
1793*f5c631daSSadaf Ebrahimi
1794*f5c631daSSadaf Ebrahimi // Ensure that we can store one result per lane.
1795*f5c631daSSadaf Ebrahimi int result[kZRegMaxSizeInBytes];
1796*f5c631daSSadaf Ebrahimi
1797*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i++) {
1798*f5c631daSSadaf Ebrahimi result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);
1799*f5c631daSSadaf Ebrahimi }
1800*f5c631daSSadaf Ebrahimi
1801*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1802*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
1803*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result[i]);
1804*f5c631daSSadaf Ebrahimi }
1805*f5c631daSSadaf Ebrahimi return dst;
1806*f5c631daSSadaf Ebrahimi }
1807*f5c631daSSadaf Ebrahimi
1808*f5c631daSSadaf Ebrahimi
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1809*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::clz(VectorFormat vform,
1810*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1811*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1812*f5c631daSSadaf Ebrahimi int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1813*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
1814*f5c631daSSadaf Ebrahimi
1815*f5c631daSSadaf Ebrahimi // Ensure that we can store one result per lane.
1816*f5c631daSSadaf Ebrahimi int result[kZRegMaxSizeInBytes];
1817*f5c631daSSadaf Ebrahimi
1818*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i++) {
1819*f5c631daSSadaf Ebrahimi result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);
1820*f5c631daSSadaf Ebrahimi }
1821*f5c631daSSadaf Ebrahimi
1822*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1823*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
1824*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result[i]);
1825*f5c631daSSadaf Ebrahimi }
1826*f5c631daSSadaf Ebrahimi return dst;
1827*f5c631daSSadaf Ebrahimi }
1828*f5c631daSSadaf Ebrahimi
1829*f5c631daSSadaf Ebrahimi
cnot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1830*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::cnot(VectorFormat vform,
1831*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1832*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1833*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1834*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1835*f5c631daSSadaf Ebrahimi uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;
1836*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, value);
1837*f5c631daSSadaf Ebrahimi }
1838*f5c631daSSadaf Ebrahimi return dst;
1839*f5c631daSSadaf Ebrahimi }
1840*f5c631daSSadaf Ebrahimi
1841*f5c631daSSadaf Ebrahimi
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1842*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::cnt(VectorFormat vform,
1843*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1844*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
1845*f5c631daSSadaf Ebrahimi int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1846*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
1847*f5c631daSSadaf Ebrahimi
1848*f5c631daSSadaf Ebrahimi // Ensure that we can store one result per lane.
1849*f5c631daSSadaf Ebrahimi int result[kZRegMaxSizeInBytes];
1850*f5c631daSSadaf Ebrahimi
1851*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i++) {
1852*f5c631daSSadaf Ebrahimi result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);
1853*f5c631daSSadaf Ebrahimi }
1854*f5c631daSSadaf Ebrahimi
1855*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1856*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
1857*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result[i]);
1858*f5c631daSSadaf Ebrahimi }
1859*f5c631daSSadaf Ebrahimi return dst;
1860*f5c631daSSadaf Ebrahimi }
1861*f5c631daSSadaf Ebrahimi
CalculateSignedShiftDistance(int64_t shift_val,int esize,bool shift_in_ls_byte)1862*f5c631daSSadaf Ebrahimi static int64_t CalculateSignedShiftDistance(int64_t shift_val,
1863*f5c631daSSadaf Ebrahimi int esize,
1864*f5c631daSSadaf Ebrahimi bool shift_in_ls_byte) {
1865*f5c631daSSadaf Ebrahimi if (shift_in_ls_byte) {
1866*f5c631daSSadaf Ebrahimi // Neon uses the least-significant byte of the lane as the shift distance.
1867*f5c631daSSadaf Ebrahimi shift_val = ExtractSignedBitfield64(7, 0, shift_val);
1868*f5c631daSSadaf Ebrahimi } else {
1869*f5c631daSSadaf Ebrahimi // SVE uses a saturated shift distance in the range
1870*f5c631daSSadaf Ebrahimi // -(esize + 1) ... (esize + 1).
1871*f5c631daSSadaf Ebrahimi if (shift_val > (esize + 1)) shift_val = esize + 1;
1872*f5c631daSSadaf Ebrahimi if (shift_val < -(esize + 1)) shift_val = -(esize + 1);
1873*f5c631daSSadaf Ebrahimi }
1874*f5c631daSSadaf Ebrahimi return shift_val;
1875*f5c631daSSadaf Ebrahimi }
1876*f5c631daSSadaf Ebrahimi
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool shift_in_ls_byte)1877*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sshl(VectorFormat vform,
1878*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1879*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1880*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
1881*f5c631daSSadaf Ebrahimi bool shift_in_ls_byte) {
1882*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1883*f5c631daSSadaf Ebrahimi int esize = LaneSizeInBitsFromFormat(vform);
1884*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1885*f5c631daSSadaf Ebrahimi int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1886*f5c631daSSadaf Ebrahimi esize,
1887*f5c631daSSadaf Ebrahimi shift_in_ls_byte);
1888*f5c631daSSadaf Ebrahimi
1889*f5c631daSSadaf Ebrahimi int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1890*f5c631daSSadaf Ebrahimi
1891*f5c631daSSadaf Ebrahimi // Set signed saturation state.
1892*f5c631daSSadaf Ebrahimi if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1893*f5c631daSSadaf Ebrahimi dst.SetSignedSat(i, lj_src_val >= 0);
1894*f5c631daSSadaf Ebrahimi }
1895*f5c631daSSadaf Ebrahimi
1896*f5c631daSSadaf Ebrahimi // Set unsigned saturation state.
1897*f5c631daSSadaf Ebrahimi if (lj_src_val < 0) {
1898*f5c631daSSadaf Ebrahimi dst.SetUnsignedSat(i, false);
1899*f5c631daSSadaf Ebrahimi } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1900*f5c631daSSadaf Ebrahimi (lj_src_val != 0)) {
1901*f5c631daSSadaf Ebrahimi dst.SetUnsignedSat(i, true);
1902*f5c631daSSadaf Ebrahimi }
1903*f5c631daSSadaf Ebrahimi
1904*f5c631daSSadaf Ebrahimi int64_t src_val = src1.Int(vform, i);
1905*f5c631daSSadaf Ebrahimi bool src_is_negative = src_val < 0;
1906*f5c631daSSadaf Ebrahimi if (shift_val > 63) {
1907*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, 0);
1908*f5c631daSSadaf Ebrahimi } else if (shift_val < -63) {
1909*f5c631daSSadaf Ebrahimi dst.SetRounding(i, src_is_negative);
1910*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1911*f5c631daSSadaf Ebrahimi } else {
1912*f5c631daSSadaf Ebrahimi // Use unsigned types for shifts, as behaviour is undefined for signed
1913*f5c631daSSadaf Ebrahimi // lhs.
1914*f5c631daSSadaf Ebrahimi uint64_t usrc_val = static_cast<uint64_t>(src_val);
1915*f5c631daSSadaf Ebrahimi
1916*f5c631daSSadaf Ebrahimi if (shift_val < 0) {
1917*f5c631daSSadaf Ebrahimi // Convert to right shift.
1918*f5c631daSSadaf Ebrahimi shift_val = -shift_val;
1919*f5c631daSSadaf Ebrahimi
1920*f5c631daSSadaf Ebrahimi // Set rounding state by testing most-significant bit shifted out.
1921*f5c631daSSadaf Ebrahimi // Rounding only needed on right shifts.
1922*f5c631daSSadaf Ebrahimi if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1923*f5c631daSSadaf Ebrahimi dst.SetRounding(i, true);
1924*f5c631daSSadaf Ebrahimi }
1925*f5c631daSSadaf Ebrahimi
1926*f5c631daSSadaf Ebrahimi usrc_val >>= shift_val;
1927*f5c631daSSadaf Ebrahimi
1928*f5c631daSSadaf Ebrahimi if (src_is_negative) {
1929*f5c631daSSadaf Ebrahimi // Simulate sign-extension.
1930*f5c631daSSadaf Ebrahimi usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1931*f5c631daSSadaf Ebrahimi }
1932*f5c631daSSadaf Ebrahimi } else {
1933*f5c631daSSadaf Ebrahimi usrc_val <<= shift_val;
1934*f5c631daSSadaf Ebrahimi }
1935*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, usrc_val);
1936*f5c631daSSadaf Ebrahimi }
1937*f5c631daSSadaf Ebrahimi }
1938*f5c631daSSadaf Ebrahimi return dst;
1939*f5c631daSSadaf Ebrahimi }
1940*f5c631daSSadaf Ebrahimi
1941*f5c631daSSadaf Ebrahimi
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool shift_in_ls_byte)1942*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ushl(VectorFormat vform,
1943*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1944*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1945*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
1946*f5c631daSSadaf Ebrahimi bool shift_in_ls_byte) {
1947*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
1948*f5c631daSSadaf Ebrahimi int esize = LaneSizeInBitsFromFormat(vform);
1949*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1950*f5c631daSSadaf Ebrahimi int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1951*f5c631daSSadaf Ebrahimi esize,
1952*f5c631daSSadaf Ebrahimi shift_in_ls_byte);
1953*f5c631daSSadaf Ebrahimi
1954*f5c631daSSadaf Ebrahimi uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1955*f5c631daSSadaf Ebrahimi
1956*f5c631daSSadaf Ebrahimi // Set saturation state.
1957*f5c631daSSadaf Ebrahimi if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1958*f5c631daSSadaf Ebrahimi dst.SetUnsignedSat(i, true);
1959*f5c631daSSadaf Ebrahimi }
1960*f5c631daSSadaf Ebrahimi
1961*f5c631daSSadaf Ebrahimi uint64_t src_val = src1.Uint(vform, i);
1962*f5c631daSSadaf Ebrahimi if ((shift_val > 63) || (shift_val < -64)) {
1963*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, 0);
1964*f5c631daSSadaf Ebrahimi } else {
1965*f5c631daSSadaf Ebrahimi if (shift_val < 0) {
1966*f5c631daSSadaf Ebrahimi // Set rounding state. Rounding only needed on right shifts.
1967*f5c631daSSadaf Ebrahimi if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1968*f5c631daSSadaf Ebrahimi dst.SetRounding(i, true);
1969*f5c631daSSadaf Ebrahimi }
1970*f5c631daSSadaf Ebrahimi
1971*f5c631daSSadaf Ebrahimi if (shift_val == -64) {
1972*f5c631daSSadaf Ebrahimi src_val = 0;
1973*f5c631daSSadaf Ebrahimi } else {
1974*f5c631daSSadaf Ebrahimi src_val >>= -shift_val;
1975*f5c631daSSadaf Ebrahimi }
1976*f5c631daSSadaf Ebrahimi } else {
1977*f5c631daSSadaf Ebrahimi src_val <<= shift_val;
1978*f5c631daSSadaf Ebrahimi }
1979*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, src_val);
1980*f5c631daSSadaf Ebrahimi }
1981*f5c631daSSadaf Ebrahimi }
1982*f5c631daSSadaf Ebrahimi return dst;
1983*f5c631daSSadaf Ebrahimi }
1984*f5c631daSSadaf Ebrahimi
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1985*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sshr(VectorFormat vform,
1986*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1987*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1988*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
1989*f5c631daSSadaf Ebrahimi SimVRegister temp;
1990*f5c631daSSadaf Ebrahimi // Saturate to sidestep the min-int problem.
1991*f5c631daSSadaf Ebrahimi neg(vform, temp, src2).SignedSaturate(vform);
1992*f5c631daSSadaf Ebrahimi sshl(vform, dst, src1, temp, false);
1993*f5c631daSSadaf Ebrahimi return dst;
1994*f5c631daSSadaf Ebrahimi }
1995*f5c631daSSadaf Ebrahimi
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1996*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ushr(VectorFormat vform,
1997*f5c631daSSadaf Ebrahimi LogicVRegister dst,
1998*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
1999*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
2000*f5c631daSSadaf Ebrahimi SimVRegister temp;
2001*f5c631daSSadaf Ebrahimi // Saturate to sidestep the min-int problem.
2002*f5c631daSSadaf Ebrahimi neg(vform, temp, src2).SignedSaturate(vform);
2003*f5c631daSSadaf Ebrahimi ushl(vform, dst, src1, temp, false);
2004*f5c631daSSadaf Ebrahimi return dst;
2005*f5c631daSSadaf Ebrahimi }
2006*f5c631daSSadaf Ebrahimi
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2007*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::neg(VectorFormat vform,
2008*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2009*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2010*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2011*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2012*f5c631daSSadaf Ebrahimi // Test for signed saturation.
2013*f5c631daSSadaf Ebrahimi int64_t sa = src.Int(vform, i);
2014*f5c631daSSadaf Ebrahimi if (sa == MinIntFromFormat(vform)) {
2015*f5c631daSSadaf Ebrahimi dst.SetSignedSat(i, true);
2016*f5c631daSSadaf Ebrahimi }
2017*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2018*f5c631daSSadaf Ebrahimi }
2019*f5c631daSSadaf Ebrahimi return dst;
2020*f5c631daSSadaf Ebrahimi }
2021*f5c631daSSadaf Ebrahimi
2022*f5c631daSSadaf Ebrahimi
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2023*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::suqadd(VectorFormat vform,
2024*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2025*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
2026*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
2027*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2028*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2029*f5c631daSSadaf Ebrahimi int64_t sa = src1.IntLeftJustified(vform, i);
2030*f5c631daSSadaf Ebrahimi uint64_t ub = src2.UintLeftJustified(vform, i);
2031*f5c631daSSadaf Ebrahimi uint64_t ur = sa + ub;
2032*f5c631daSSadaf Ebrahimi
2033*f5c631daSSadaf Ebrahimi int64_t sr;
2034*f5c631daSSadaf Ebrahimi memcpy(&sr, &ur, sizeof(sr));
2035*f5c631daSSadaf Ebrahimi if (sr < sa) { // Test for signed positive saturation.
2036*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, MaxIntFromFormat(vform));
2037*f5c631daSSadaf Ebrahimi } else {
2038*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, src1.Int(vform, i) + src2.Uint(vform, i));
2039*f5c631daSSadaf Ebrahimi }
2040*f5c631daSSadaf Ebrahimi }
2041*f5c631daSSadaf Ebrahimi return dst;
2042*f5c631daSSadaf Ebrahimi }
2043*f5c631daSSadaf Ebrahimi
2044*f5c631daSSadaf Ebrahimi
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2045*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::usqadd(VectorFormat vform,
2046*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2047*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
2048*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
2049*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2050*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2051*f5c631daSSadaf Ebrahimi uint64_t ua = src1.UintLeftJustified(vform, i);
2052*f5c631daSSadaf Ebrahimi int64_t sb = src2.IntLeftJustified(vform, i);
2053*f5c631daSSadaf Ebrahimi uint64_t ur = ua + sb;
2054*f5c631daSSadaf Ebrahimi
2055*f5c631daSSadaf Ebrahimi if ((sb > 0) && (ur <= ua)) {
2056*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
2057*f5c631daSSadaf Ebrahimi } else if ((sb < 0) && (ur >= ua)) {
2058*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, 0); // Negative saturation.
2059*f5c631daSSadaf Ebrahimi } else {
2060*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, src1.Uint(vform, i) + src2.Int(vform, i));
2061*f5c631daSSadaf Ebrahimi }
2062*f5c631daSSadaf Ebrahimi }
2063*f5c631daSSadaf Ebrahimi return dst;
2064*f5c631daSSadaf Ebrahimi }
2065*f5c631daSSadaf Ebrahimi
2066*f5c631daSSadaf Ebrahimi
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2067*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::abs(VectorFormat vform,
2068*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2069*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2070*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2071*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2072*f5c631daSSadaf Ebrahimi // Test for signed saturation.
2073*f5c631daSSadaf Ebrahimi int64_t sa = src.Int(vform, i);
2074*f5c631daSSadaf Ebrahimi if (sa == MinIntFromFormat(vform)) {
2075*f5c631daSSadaf Ebrahimi dst.SetSignedSat(i, true);
2076*f5c631daSSadaf Ebrahimi }
2077*f5c631daSSadaf Ebrahimi if (sa < 0) {
2078*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2079*f5c631daSSadaf Ebrahimi } else {
2080*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, sa);
2081*f5c631daSSadaf Ebrahimi }
2082*f5c631daSSadaf Ebrahimi }
2083*f5c631daSSadaf Ebrahimi return dst;
2084*f5c631daSSadaf Ebrahimi }
2085*f5c631daSSadaf Ebrahimi
2086*f5c631daSSadaf Ebrahimi
andv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2087*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::andv(VectorFormat vform,
2088*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2089*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
2090*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2091*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsSVEFormat(vform));
2092*f5c631daSSadaf Ebrahimi uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));
2093*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2094*f5c631daSSadaf Ebrahimi if (!pg.IsActive(vform, i)) continue;
2095*f5c631daSSadaf Ebrahimi
2096*f5c631daSSadaf Ebrahimi result &= src.Uint(vform, i);
2097*f5c631daSSadaf Ebrahimi }
2098*f5c631daSSadaf Ebrahimi VectorFormat vform_dst =
2099*f5c631daSSadaf Ebrahimi ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2100*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform_dst);
2101*f5c631daSSadaf Ebrahimi dst.SetUint(vform_dst, 0, result);
2102*f5c631daSSadaf Ebrahimi return dst;
2103*f5c631daSSadaf Ebrahimi }
2104*f5c631daSSadaf Ebrahimi
2105*f5c631daSSadaf Ebrahimi
eorv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2106*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::eorv(VectorFormat vform,
2107*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2108*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
2109*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2110*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsSVEFormat(vform));
2111*f5c631daSSadaf Ebrahimi uint64_t result = 0;
2112*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2113*f5c631daSSadaf Ebrahimi if (!pg.IsActive(vform, i)) continue;
2114*f5c631daSSadaf Ebrahimi
2115*f5c631daSSadaf Ebrahimi result ^= src.Uint(vform, i);
2116*f5c631daSSadaf Ebrahimi }
2117*f5c631daSSadaf Ebrahimi VectorFormat vform_dst =
2118*f5c631daSSadaf Ebrahimi ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2119*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform_dst);
2120*f5c631daSSadaf Ebrahimi dst.SetUint(vform_dst, 0, result);
2121*f5c631daSSadaf Ebrahimi return dst;
2122*f5c631daSSadaf Ebrahimi }
2123*f5c631daSSadaf Ebrahimi
2124*f5c631daSSadaf Ebrahimi
orv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2125*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::orv(VectorFormat vform,
2126*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2127*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
2128*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2129*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsSVEFormat(vform));
2130*f5c631daSSadaf Ebrahimi uint64_t result = 0;
2131*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2132*f5c631daSSadaf Ebrahimi if (!pg.IsActive(vform, i)) continue;
2133*f5c631daSSadaf Ebrahimi
2134*f5c631daSSadaf Ebrahimi result |= src.Uint(vform, i);
2135*f5c631daSSadaf Ebrahimi }
2136*f5c631daSSadaf Ebrahimi VectorFormat vform_dst =
2137*f5c631daSSadaf Ebrahimi ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2138*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform_dst);
2139*f5c631daSSadaf Ebrahimi dst.SetUint(vform_dst, 0, result);
2140*f5c631daSSadaf Ebrahimi return dst;
2141*f5c631daSSadaf Ebrahimi }
2142*f5c631daSSadaf Ebrahimi
2143*f5c631daSSadaf Ebrahimi
saddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2144*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::saddv(VectorFormat vform,
2145*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2146*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
2147*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2148*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsSVEFormat(vform));
2149*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);
2150*f5c631daSSadaf Ebrahimi int64_t result = 0;
2151*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2152*f5c631daSSadaf Ebrahimi if (!pg.IsActive(vform, i)) continue;
2153*f5c631daSSadaf Ebrahimi
2154*f5c631daSSadaf Ebrahimi // The destination register always has D-lane sizes and the source register
2155*f5c631daSSadaf Ebrahimi // always has S-lanes or smaller, so signed integer overflow -- undefined
2156*f5c631daSSadaf Ebrahimi // behaviour -- can't occur.
2157*f5c631daSSadaf Ebrahimi result += src.Int(vform, i);
2158*f5c631daSSadaf Ebrahimi }
2159*f5c631daSSadaf Ebrahimi
2160*f5c631daSSadaf Ebrahimi dst.ClearForWrite(kFormatD);
2161*f5c631daSSadaf Ebrahimi dst.SetInt(kFormatD, 0, result);
2162*f5c631daSSadaf Ebrahimi return dst;
2163*f5c631daSSadaf Ebrahimi }
2164*f5c631daSSadaf Ebrahimi
2165*f5c631daSSadaf Ebrahimi
uaddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2166*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uaddv(VectorFormat vform,
2167*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2168*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
2169*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2170*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsSVEFormat(vform));
2171*f5c631daSSadaf Ebrahimi uint64_t result = 0;
2172*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2173*f5c631daSSadaf Ebrahimi if (!pg.IsActive(vform, i)) continue;
2174*f5c631daSSadaf Ebrahimi
2175*f5c631daSSadaf Ebrahimi result += src.Uint(vform, i);
2176*f5c631daSSadaf Ebrahimi }
2177*f5c631daSSadaf Ebrahimi
2178*f5c631daSSadaf Ebrahimi dst.ClearForWrite(kFormatD);
2179*f5c631daSSadaf Ebrahimi dst.SetUint(kFormatD, 0, result);
2180*f5c631daSSadaf Ebrahimi return dst;
2181*f5c631daSSadaf Ebrahimi }
2182*f5c631daSSadaf Ebrahimi
2183*f5c631daSSadaf Ebrahimi
extractnarrow(VectorFormat dstform,LogicVRegister dst,bool dst_is_signed,const LogicVRegister & src,bool src_is_signed)2184*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2185*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2186*f5c631daSSadaf Ebrahimi bool dst_is_signed,
2187*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
2188*f5c631daSSadaf Ebrahimi bool src_is_signed) {
2189*f5c631daSSadaf Ebrahimi bool upperhalf = false;
2190*f5c631daSSadaf Ebrahimi VectorFormat srcform = dstform;
2191*f5c631daSSadaf Ebrahimi if ((dstform == kFormat16B) || (dstform == kFormat8H) ||
2192*f5c631daSSadaf Ebrahimi (dstform == kFormat4S)) {
2193*f5c631daSSadaf Ebrahimi upperhalf = true;
2194*f5c631daSSadaf Ebrahimi srcform = VectorFormatHalfLanes(srcform);
2195*f5c631daSSadaf Ebrahimi }
2196*f5c631daSSadaf Ebrahimi srcform = VectorFormatDoubleWidth(srcform);
2197*f5c631daSSadaf Ebrahimi
2198*f5c631daSSadaf Ebrahimi LogicVRegister src_copy = src;
2199*f5c631daSSadaf Ebrahimi
2200*f5c631daSSadaf Ebrahimi int offset;
2201*f5c631daSSadaf Ebrahimi if (upperhalf) {
2202*f5c631daSSadaf Ebrahimi offset = LaneCountFromFormat(dstform) / 2;
2203*f5c631daSSadaf Ebrahimi } else {
2204*f5c631daSSadaf Ebrahimi offset = 0;
2205*f5c631daSSadaf Ebrahimi dst.ClearForWrite(dstform);
2206*f5c631daSSadaf Ebrahimi }
2207*f5c631daSSadaf Ebrahimi
2208*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2209*f5c631daSSadaf Ebrahimi int64_t ssrc = src_copy.Int(srcform, i);
2210*f5c631daSSadaf Ebrahimi uint64_t usrc = src_copy.Uint(srcform, i);
2211*f5c631daSSadaf Ebrahimi
2212*f5c631daSSadaf Ebrahimi // Test for signed saturation
2213*f5c631daSSadaf Ebrahimi if (ssrc > MaxIntFromFormat(dstform)) {
2214*f5c631daSSadaf Ebrahimi dst.SetSignedSat(offset + i, true);
2215*f5c631daSSadaf Ebrahimi } else if (ssrc < MinIntFromFormat(dstform)) {
2216*f5c631daSSadaf Ebrahimi dst.SetSignedSat(offset + i, false);
2217*f5c631daSSadaf Ebrahimi }
2218*f5c631daSSadaf Ebrahimi
2219*f5c631daSSadaf Ebrahimi // Test for unsigned saturation
2220*f5c631daSSadaf Ebrahimi if (src_is_signed) {
2221*f5c631daSSadaf Ebrahimi if (ssrc > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2222*f5c631daSSadaf Ebrahimi dst.SetUnsignedSat(offset + i, true);
2223*f5c631daSSadaf Ebrahimi } else if (ssrc < 0) {
2224*f5c631daSSadaf Ebrahimi dst.SetUnsignedSat(offset + i, false);
2225*f5c631daSSadaf Ebrahimi }
2226*f5c631daSSadaf Ebrahimi } else {
2227*f5c631daSSadaf Ebrahimi if (usrc > MaxUintFromFormat(dstform)) {
2228*f5c631daSSadaf Ebrahimi dst.SetUnsignedSat(offset + i, true);
2229*f5c631daSSadaf Ebrahimi }
2230*f5c631daSSadaf Ebrahimi }
2231*f5c631daSSadaf Ebrahimi
2232*f5c631daSSadaf Ebrahimi int64_t result;
2233*f5c631daSSadaf Ebrahimi if (src_is_signed) {
2234*f5c631daSSadaf Ebrahimi result = ssrc & MaxUintFromFormat(dstform);
2235*f5c631daSSadaf Ebrahimi } else {
2236*f5c631daSSadaf Ebrahimi result = usrc & MaxUintFromFormat(dstform);
2237*f5c631daSSadaf Ebrahimi }
2238*f5c631daSSadaf Ebrahimi
2239*f5c631daSSadaf Ebrahimi if (dst_is_signed) {
2240*f5c631daSSadaf Ebrahimi dst.SetInt(dstform, offset + i, result);
2241*f5c631daSSadaf Ebrahimi } else {
2242*f5c631daSSadaf Ebrahimi dst.SetUint(dstform, offset + i, result);
2243*f5c631daSSadaf Ebrahimi }
2244*f5c631daSSadaf Ebrahimi }
2245*f5c631daSSadaf Ebrahimi return dst;
2246*f5c631daSSadaf Ebrahimi }
2247*f5c631daSSadaf Ebrahimi
2248*f5c631daSSadaf Ebrahimi
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2249*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::xtn(VectorFormat vform,
2250*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2251*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2252*f5c631daSSadaf Ebrahimi return extractnarrow(vform, dst, true, src, true);
2253*f5c631daSSadaf Ebrahimi }
2254*f5c631daSSadaf Ebrahimi
2255*f5c631daSSadaf Ebrahimi
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2256*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqxtn(VectorFormat vform,
2257*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2258*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2259*f5c631daSSadaf Ebrahimi return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2260*f5c631daSSadaf Ebrahimi }
2261*f5c631daSSadaf Ebrahimi
2262*f5c631daSSadaf Ebrahimi
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2263*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqxtun(VectorFormat vform,
2264*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2265*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2266*f5c631daSSadaf Ebrahimi return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2267*f5c631daSSadaf Ebrahimi }
2268*f5c631daSSadaf Ebrahimi
2269*f5c631daSSadaf Ebrahimi
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2270*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uqxtn(VectorFormat vform,
2271*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2272*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2273*f5c631daSSadaf Ebrahimi return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2274*f5c631daSSadaf Ebrahimi }
2275*f5c631daSSadaf Ebrahimi
2276*f5c631daSSadaf Ebrahimi
absdiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_signed)2277*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::absdiff(VectorFormat vform,
2278*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2279*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
2280*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
2281*f5c631daSSadaf Ebrahimi bool is_signed) {
2282*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2283*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2284*f5c631daSSadaf Ebrahimi bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
2285*f5c631daSSadaf Ebrahimi : (src1.Uint(vform, i) > src2.Uint(vform, i));
2286*f5c631daSSadaf Ebrahimi // Always calculate the answer using unsigned arithmetic, to avoid
2287*f5c631daSSadaf Ebrahimi // implemenation-defined signed overflow.
2288*f5c631daSSadaf Ebrahimi if (src1_gt_src2) {
2289*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
2290*f5c631daSSadaf Ebrahimi } else {
2291*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));
2292*f5c631daSSadaf Ebrahimi }
2293*f5c631daSSadaf Ebrahimi }
2294*f5c631daSSadaf Ebrahimi return dst;
2295*f5c631daSSadaf Ebrahimi }
2296*f5c631daSSadaf Ebrahimi
2297*f5c631daSSadaf Ebrahimi
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2298*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::saba(VectorFormat vform,
2299*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2300*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
2301*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
2302*f5c631daSSadaf Ebrahimi SimVRegister temp;
2303*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2304*f5c631daSSadaf Ebrahimi absdiff(vform, temp, src1, src2, true);
2305*f5c631daSSadaf Ebrahimi add(vform, dst, dst, temp);
2306*f5c631daSSadaf Ebrahimi return dst;
2307*f5c631daSSadaf Ebrahimi }
2308*f5c631daSSadaf Ebrahimi
2309*f5c631daSSadaf Ebrahimi
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2310*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uaba(VectorFormat vform,
2311*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2312*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
2313*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
2314*f5c631daSSadaf Ebrahimi SimVRegister temp;
2315*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2316*f5c631daSSadaf Ebrahimi absdiff(vform, temp, src1, src2, false);
2317*f5c631daSSadaf Ebrahimi add(vform, dst, dst, temp);
2318*f5c631daSSadaf Ebrahimi return dst;
2319*f5c631daSSadaf Ebrahimi }
2320*f5c631daSSadaf Ebrahimi
2321*f5c631daSSadaf Ebrahimi
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2322*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::not_(VectorFormat vform,
2323*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2324*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2325*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2326*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2327*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, ~src.Uint(vform, i));
2328*f5c631daSSadaf Ebrahimi }
2329*f5c631daSSadaf Ebrahimi return dst;
2330*f5c631daSSadaf Ebrahimi }
2331*f5c631daSSadaf Ebrahimi
2332*f5c631daSSadaf Ebrahimi
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2333*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::rbit(VectorFormat vform,
2334*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2335*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2336*f5c631daSSadaf Ebrahimi uint64_t result[kZRegMaxSizeInBytes];
2337*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
2338*f5c631daSSadaf Ebrahimi int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2339*f5c631daSSadaf Ebrahimi uint64_t reversed_value;
2340*f5c631daSSadaf Ebrahimi uint64_t value;
2341*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i++) {
2342*f5c631daSSadaf Ebrahimi value = src.Uint(vform, i);
2343*f5c631daSSadaf Ebrahimi reversed_value = 0;
2344*f5c631daSSadaf Ebrahimi for (int j = 0; j < lane_size_in_bits; j++) {
2345*f5c631daSSadaf Ebrahimi reversed_value = (reversed_value << 1) | (value & 1);
2346*f5c631daSSadaf Ebrahimi value >>= 1;
2347*f5c631daSSadaf Ebrahimi }
2348*f5c631daSSadaf Ebrahimi result[i] = reversed_value;
2349*f5c631daSSadaf Ebrahimi }
2350*f5c631daSSadaf Ebrahimi
2351*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2352*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
2353*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result[i]);
2354*f5c631daSSadaf Ebrahimi }
2355*f5c631daSSadaf Ebrahimi return dst;
2356*f5c631daSSadaf Ebrahimi }
2357*f5c631daSSadaf Ebrahimi
2358*f5c631daSSadaf Ebrahimi
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2359*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::rev(VectorFormat vform,
2360*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2361*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2362*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsSVEFormat(vform));
2363*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
2364*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count / 2; i++) {
2365*f5c631daSSadaf Ebrahimi uint64_t t = src.Uint(vform, i);
2366*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));
2367*f5c631daSSadaf Ebrahimi dst.SetUint(vform, lane_count - i - 1, t);
2368*f5c631daSSadaf Ebrahimi }
2369*f5c631daSSadaf Ebrahimi return dst;
2370*f5c631daSSadaf Ebrahimi }
2371*f5c631daSSadaf Ebrahimi
2372*f5c631daSSadaf Ebrahimi
rev_byte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rev_size)2373*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::rev_byte(VectorFormat vform,
2374*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2375*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
2376*f5c631daSSadaf Ebrahimi int rev_size) {
2377*f5c631daSSadaf Ebrahimi uint64_t result[kZRegMaxSizeInBytes] = {};
2378*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
2379*f5c631daSSadaf Ebrahimi int lane_size = LaneSizeInBytesFromFormat(vform);
2380*f5c631daSSadaf Ebrahimi int lanes_per_loop = rev_size / lane_size;
2381*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i += lanes_per_loop) {
2382*f5c631daSSadaf Ebrahimi for (int j = 0; j < lanes_per_loop; j++) {
2383*f5c631daSSadaf Ebrahimi result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);
2384*f5c631daSSadaf Ebrahimi }
2385*f5c631daSSadaf Ebrahimi }
2386*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2387*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
2388*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result[i]);
2389*f5c631daSSadaf Ebrahimi }
2390*f5c631daSSadaf Ebrahimi return dst;
2391*f5c631daSSadaf Ebrahimi }
2392*f5c631daSSadaf Ebrahimi
2393*f5c631daSSadaf Ebrahimi
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2394*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::rev16(VectorFormat vform,
2395*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2396*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2397*f5c631daSSadaf Ebrahimi return rev_byte(vform, dst, src, 2);
2398*f5c631daSSadaf Ebrahimi }
2399*f5c631daSSadaf Ebrahimi
2400*f5c631daSSadaf Ebrahimi
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2401*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::rev32(VectorFormat vform,
2402*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2403*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2404*f5c631daSSadaf Ebrahimi return rev_byte(vform, dst, src, 4);
2405*f5c631daSSadaf Ebrahimi }
2406*f5c631daSSadaf Ebrahimi
2407*f5c631daSSadaf Ebrahimi
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2408*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::rev64(VectorFormat vform,
2409*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2410*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2411*f5c631daSSadaf Ebrahimi return rev_byte(vform, dst, src, 8);
2412*f5c631daSSadaf Ebrahimi }
2413*f5c631daSSadaf Ebrahimi
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)2414*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::addlp(VectorFormat vform,
2415*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2416*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
2417*f5c631daSSadaf Ebrahimi bool is_signed,
2418*f5c631daSSadaf Ebrahimi bool do_accumulate) {
2419*f5c631daSSadaf Ebrahimi VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2420*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= kSRegSize);
2421*f5c631daSSadaf Ebrahimi
2422*f5c631daSSadaf Ebrahimi uint64_t result[kZRegMaxSizeInBytes];
2423*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
2424*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i++) {
2425*f5c631daSSadaf Ebrahimi if (is_signed) {
2426*f5c631daSSadaf Ebrahimi result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2427*f5c631daSSadaf Ebrahimi src.Int(vformsrc, 2 * i + 1));
2428*f5c631daSSadaf Ebrahimi } else {
2429*f5c631daSSadaf Ebrahimi result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2430*f5c631daSSadaf Ebrahimi }
2431*f5c631daSSadaf Ebrahimi }
2432*f5c631daSSadaf Ebrahimi
2433*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2434*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
2435*f5c631daSSadaf Ebrahimi if (do_accumulate) {
2436*f5c631daSSadaf Ebrahimi result[i] += dst.Uint(vform, i);
2437*f5c631daSSadaf Ebrahimi }
2438*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result[i]);
2439*f5c631daSSadaf Ebrahimi }
2440*f5c631daSSadaf Ebrahimi
2441*f5c631daSSadaf Ebrahimi return dst;
2442*f5c631daSSadaf Ebrahimi }
2443*f5c631daSSadaf Ebrahimi
2444*f5c631daSSadaf Ebrahimi
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2445*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::saddlp(VectorFormat vform,
2446*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2447*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2448*f5c631daSSadaf Ebrahimi return addlp(vform, dst, src, true, false);
2449*f5c631daSSadaf Ebrahimi }
2450*f5c631daSSadaf Ebrahimi
2451*f5c631daSSadaf Ebrahimi
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2452*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uaddlp(VectorFormat vform,
2453*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2454*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2455*f5c631daSSadaf Ebrahimi return addlp(vform, dst, src, false, false);
2456*f5c631daSSadaf Ebrahimi }
2457*f5c631daSSadaf Ebrahimi
2458*f5c631daSSadaf Ebrahimi
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2459*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sadalp(VectorFormat vform,
2460*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2461*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2462*f5c631daSSadaf Ebrahimi return addlp(vform, dst, src, true, true);
2463*f5c631daSSadaf Ebrahimi }
2464*f5c631daSSadaf Ebrahimi
2465*f5c631daSSadaf Ebrahimi
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2466*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uadalp(VectorFormat vform,
2467*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2468*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2469*f5c631daSSadaf Ebrahimi return addlp(vform, dst, src, false, true);
2470*f5c631daSSadaf Ebrahimi }
2471*f5c631daSSadaf Ebrahimi
ror(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rotation)2472*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ror(VectorFormat vform,
2473*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2474*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
2475*f5c631daSSadaf Ebrahimi int rotation) {
2476*f5c631daSSadaf Ebrahimi int width = LaneSizeInBitsFromFormat(vform);
2477*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2478*f5c631daSSadaf Ebrahimi uint64_t value = src.Uint(vform, i);
2479*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, RotateRight(value, rotation, width));
2480*f5c631daSSadaf Ebrahimi }
2481*f5c631daSSadaf Ebrahimi return dst;
2482*f5c631daSSadaf Ebrahimi }
2483*f5c631daSSadaf Ebrahimi
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)2484*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ext(VectorFormat vform,
2485*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2486*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
2487*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
2488*f5c631daSSadaf Ebrahimi int index) {
2489*f5c631daSSadaf Ebrahimi uint8_t result[kZRegMaxSizeInBytes] = {};
2490*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
2491*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count - index; ++i) {
2492*f5c631daSSadaf Ebrahimi result[i] = src1.Uint(vform, i + index);
2493*f5c631daSSadaf Ebrahimi }
2494*f5c631daSSadaf Ebrahimi for (int i = 0; i < index; ++i) {
2495*f5c631daSSadaf Ebrahimi result[lane_count - index + i] = src2.Uint(vform, i);
2496*f5c631daSSadaf Ebrahimi }
2497*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2498*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
2499*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result[i]);
2500*f5c631daSSadaf Ebrahimi }
2501*f5c631daSSadaf Ebrahimi return dst;
2502*f5c631daSSadaf Ebrahimi }
2503*f5c631daSSadaf Ebrahimi
rotate_elements_right(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int index)2504*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::rotate_elements_right(VectorFormat vform,
2505*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2506*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
2507*f5c631daSSadaf Ebrahimi int index) {
2508*f5c631daSSadaf Ebrahimi if (index < 0) index += LaneCountFromFormat(vform);
2509*f5c631daSSadaf Ebrahimi VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform)));
2510*f5c631daSSadaf Ebrahimi index *= LaneSizeInBytesFromFormat(vform);
2511*f5c631daSSadaf Ebrahimi return ext(kFormatVnB, dst, src, src, index);
2512*f5c631daSSadaf Ebrahimi }
2513*f5c631daSSadaf Ebrahimi
2514*f5c631daSSadaf Ebrahimi
2515*f5c631daSSadaf Ebrahimi template <typename T>
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2516*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fadda(VectorFormat vform,
2517*f5c631daSSadaf Ebrahimi LogicVRegister acc,
2518*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
2519*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2520*f5c631daSSadaf Ebrahimi T result = acc.Float<T>(0);
2521*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2522*f5c631daSSadaf Ebrahimi if (!pg.IsActive(vform, i)) continue;
2523*f5c631daSSadaf Ebrahimi
2524*f5c631daSSadaf Ebrahimi result = FPAdd(result, src.Float<T>(i));
2525*f5c631daSSadaf Ebrahimi }
2526*f5c631daSSadaf Ebrahimi VectorFormat vform_dst =
2527*f5c631daSSadaf Ebrahimi ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2528*f5c631daSSadaf Ebrahimi acc.ClearForWrite(vform_dst);
2529*f5c631daSSadaf Ebrahimi acc.SetFloat(0, result);
2530*f5c631daSSadaf Ebrahimi return acc;
2531*f5c631daSSadaf Ebrahimi }
2532*f5c631daSSadaf Ebrahimi
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2533*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fadda(VectorFormat vform,
2534*f5c631daSSadaf Ebrahimi LogicVRegister acc,
2535*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
2536*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
2537*f5c631daSSadaf Ebrahimi switch (LaneSizeInBitsFromFormat(vform)) {
2538*f5c631daSSadaf Ebrahimi case kHRegSize:
2539*f5c631daSSadaf Ebrahimi fadda<SimFloat16>(vform, acc, pg, src);
2540*f5c631daSSadaf Ebrahimi break;
2541*f5c631daSSadaf Ebrahimi case kSRegSize:
2542*f5c631daSSadaf Ebrahimi fadda<float>(vform, acc, pg, src);
2543*f5c631daSSadaf Ebrahimi break;
2544*f5c631daSSadaf Ebrahimi case kDRegSize:
2545*f5c631daSSadaf Ebrahimi fadda<double>(vform, acc, pg, src);
2546*f5c631daSSadaf Ebrahimi break;
2547*f5c631daSSadaf Ebrahimi default:
2548*f5c631daSSadaf Ebrahimi VIXL_UNREACHABLE();
2549*f5c631daSSadaf Ebrahimi }
2550*f5c631daSSadaf Ebrahimi return acc;
2551*f5c631daSSadaf Ebrahimi }
2552*f5c631daSSadaf Ebrahimi
2553*f5c631daSSadaf Ebrahimi template <typename T>
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2554*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcadd(VectorFormat vform,
2555*f5c631daSSadaf Ebrahimi LogicVRegister dst, // d
2556*f5c631daSSadaf Ebrahimi const LogicVRegister& src1, // n
2557*f5c631daSSadaf Ebrahimi const LogicVRegister& src2, // m
2558*f5c631daSSadaf Ebrahimi int rot) {
2559*f5c631daSSadaf Ebrahimi int elements = LaneCountFromFormat(vform);
2560*f5c631daSSadaf Ebrahimi
2561*f5c631daSSadaf Ebrahimi T element1, element3;
2562*f5c631daSSadaf Ebrahimi rot = (rot == 1) ? 270 : 90;
2563*f5c631daSSadaf Ebrahimi
2564*f5c631daSSadaf Ebrahimi // Loop example:
2565*f5c631daSSadaf Ebrahimi // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2566*f5c631daSSadaf Ebrahimi // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2567*f5c631daSSadaf Ebrahimi
2568*f5c631daSSadaf Ebrahimi for (int e = 0; e <= (elements / 2) - 1; e++) {
2569*f5c631daSSadaf Ebrahimi switch (rot) {
2570*f5c631daSSadaf Ebrahimi case 90:
2571*f5c631daSSadaf Ebrahimi element1 = FPNeg(src2.Float<T>(e * 2 + 1));
2572*f5c631daSSadaf Ebrahimi element3 = src2.Float<T>(e * 2);
2573*f5c631daSSadaf Ebrahimi break;
2574*f5c631daSSadaf Ebrahimi case 270:
2575*f5c631daSSadaf Ebrahimi element1 = src2.Float<T>(e * 2 + 1);
2576*f5c631daSSadaf Ebrahimi element3 = FPNeg(src2.Float<T>(e * 2));
2577*f5c631daSSadaf Ebrahimi break;
2578*f5c631daSSadaf Ebrahimi default:
2579*f5c631daSSadaf Ebrahimi VIXL_UNREACHABLE();
2580*f5c631daSSadaf Ebrahimi return dst; // prevents "element(n) may be unintialized" errors
2581*f5c631daSSadaf Ebrahimi }
2582*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2583*f5c631daSSadaf Ebrahimi dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));
2584*f5c631daSSadaf Ebrahimi dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));
2585*f5c631daSSadaf Ebrahimi }
2586*f5c631daSSadaf Ebrahimi return dst;
2587*f5c631daSSadaf Ebrahimi }
2588*f5c631daSSadaf Ebrahimi
2589*f5c631daSSadaf Ebrahimi
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2590*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcadd(VectorFormat vform,
2591*f5c631daSSadaf Ebrahimi LogicVRegister dst, // d
2592*f5c631daSSadaf Ebrahimi const LogicVRegister& src1, // n
2593*f5c631daSSadaf Ebrahimi const LogicVRegister& src2, // m
2594*f5c631daSSadaf Ebrahimi int rot) {
2595*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2596*f5c631daSSadaf Ebrahimi fcadd<SimFloat16>(vform, dst, src1, src2, rot);
2597*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2598*f5c631daSSadaf Ebrahimi fcadd<float>(vform, dst, src1, src2, rot);
2599*f5c631daSSadaf Ebrahimi } else {
2600*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
2601*f5c631daSSadaf Ebrahimi fcadd<double>(vform, dst, src1, src2, rot);
2602*f5c631daSSadaf Ebrahimi }
2603*f5c631daSSadaf Ebrahimi return dst;
2604*f5c631daSSadaf Ebrahimi }
2605*f5c631daSSadaf Ebrahimi
2606*f5c631daSSadaf Ebrahimi template <typename T>
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int index,int rot)2607*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcmla(VectorFormat vform,
2608*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2609*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
2610*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
2611*f5c631daSSadaf Ebrahimi const LogicVRegister& acc,
2612*f5c631daSSadaf Ebrahimi int index,
2613*f5c631daSSadaf Ebrahimi int rot) {
2614*f5c631daSSadaf Ebrahimi int elements = LaneCountFromFormat(vform);
2615*f5c631daSSadaf Ebrahimi
2616*f5c631daSSadaf Ebrahimi T element1, element2, element3, element4;
2617*f5c631daSSadaf Ebrahimi rot *= 90;
2618*f5c631daSSadaf Ebrahimi
2619*f5c631daSSadaf Ebrahimi // Loop example:
2620*f5c631daSSadaf Ebrahimi // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2621*f5c631daSSadaf Ebrahimi // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2622*f5c631daSSadaf Ebrahimi
2623*f5c631daSSadaf Ebrahimi for (int e = 0; e <= (elements / 2) - 1; e++) {
2624*f5c631daSSadaf Ebrahimi // Index == -1 indicates a vector/vector rather than vector/indexed-element
2625*f5c631daSSadaf Ebrahimi // operation.
2626*f5c631daSSadaf Ebrahimi int f = (index < 0) ? e : index;
2627*f5c631daSSadaf Ebrahimi
2628*f5c631daSSadaf Ebrahimi switch (rot) {
2629*f5c631daSSadaf Ebrahimi case 0:
2630*f5c631daSSadaf Ebrahimi element1 = src2.Float<T>(f * 2);
2631*f5c631daSSadaf Ebrahimi element2 = src1.Float<T>(e * 2);
2632*f5c631daSSadaf Ebrahimi element3 = src2.Float<T>(f * 2 + 1);
2633*f5c631daSSadaf Ebrahimi element4 = src1.Float<T>(e * 2);
2634*f5c631daSSadaf Ebrahimi break;
2635*f5c631daSSadaf Ebrahimi case 90:
2636*f5c631daSSadaf Ebrahimi element1 = FPNeg(src2.Float<T>(f * 2 + 1));
2637*f5c631daSSadaf Ebrahimi element2 = src1.Float<T>(e * 2 + 1);
2638*f5c631daSSadaf Ebrahimi element3 = src2.Float<T>(f * 2);
2639*f5c631daSSadaf Ebrahimi element4 = src1.Float<T>(e * 2 + 1);
2640*f5c631daSSadaf Ebrahimi break;
2641*f5c631daSSadaf Ebrahimi case 180:
2642*f5c631daSSadaf Ebrahimi element1 = FPNeg(src2.Float<T>(f * 2));
2643*f5c631daSSadaf Ebrahimi element2 = src1.Float<T>(e * 2);
2644*f5c631daSSadaf Ebrahimi element3 = FPNeg(src2.Float<T>(f * 2 + 1));
2645*f5c631daSSadaf Ebrahimi element4 = src1.Float<T>(e * 2);
2646*f5c631daSSadaf Ebrahimi break;
2647*f5c631daSSadaf Ebrahimi case 270:
2648*f5c631daSSadaf Ebrahimi element1 = src2.Float<T>(f * 2 + 1);
2649*f5c631daSSadaf Ebrahimi element2 = src1.Float<T>(e * 2 + 1);
2650*f5c631daSSadaf Ebrahimi element3 = FPNeg(src2.Float<T>(f * 2));
2651*f5c631daSSadaf Ebrahimi element4 = src1.Float<T>(e * 2 + 1);
2652*f5c631daSSadaf Ebrahimi break;
2653*f5c631daSSadaf Ebrahimi default:
2654*f5c631daSSadaf Ebrahimi VIXL_UNREACHABLE();
2655*f5c631daSSadaf Ebrahimi return dst; // prevents "element(n) may be unintialized" errors
2656*f5c631daSSadaf Ebrahimi }
2657*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2658*f5c631daSSadaf Ebrahimi dst.SetFloat<T>(vform,
2659*f5c631daSSadaf Ebrahimi e * 2,
2660*f5c631daSSadaf Ebrahimi FPMulAdd(acc.Float<T>(e * 2), element2, element1));
2661*f5c631daSSadaf Ebrahimi dst.SetFloat<T>(vform,
2662*f5c631daSSadaf Ebrahimi e * 2 + 1,
2663*f5c631daSSadaf Ebrahimi FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));
2664*f5c631daSSadaf Ebrahimi }
2665*f5c631daSSadaf Ebrahimi return dst;
2666*f5c631daSSadaf Ebrahimi }
2667*f5c631daSSadaf Ebrahimi
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int rot)2668*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcmla(VectorFormat vform,
2669*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2670*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
2671*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
2672*f5c631daSSadaf Ebrahimi const LogicVRegister& acc,
2673*f5c631daSSadaf Ebrahimi int rot) {
2674*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2675*f5c631daSSadaf Ebrahimi fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);
2676*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2677*f5c631daSSadaf Ebrahimi fcmla<float>(vform, dst, src1, src2, acc, -1, rot);
2678*f5c631daSSadaf Ebrahimi } else {
2679*f5c631daSSadaf Ebrahimi fcmla<double>(vform, dst, src1, src2, acc, -1, rot);
2680*f5c631daSSadaf Ebrahimi }
2681*f5c631daSSadaf Ebrahimi return dst;
2682*f5c631daSSadaf Ebrahimi }
2683*f5c631daSSadaf Ebrahimi
2684*f5c631daSSadaf Ebrahimi
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2685*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcmla(VectorFormat vform,
2686*f5c631daSSadaf Ebrahimi LogicVRegister dst, // d
2687*f5c631daSSadaf Ebrahimi const LogicVRegister& src1, // n
2688*f5c631daSSadaf Ebrahimi const LogicVRegister& src2, // m
2689*f5c631daSSadaf Ebrahimi int index,
2690*f5c631daSSadaf Ebrahimi int rot) {
2691*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2692*f5c631daSSadaf Ebrahimi VIXL_UNIMPLEMENTED();
2693*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2694*f5c631daSSadaf Ebrahimi fcmla<float>(vform, dst, src1, src2, dst, index, rot);
2695*f5c631daSSadaf Ebrahimi } else {
2696*f5c631daSSadaf Ebrahimi fcmla<double>(vform, dst, src1, src2, dst, index, rot);
2697*f5c631daSSadaf Ebrahimi }
2698*f5c631daSSadaf Ebrahimi return dst;
2699*f5c631daSSadaf Ebrahimi }
2700*f5c631daSSadaf Ebrahimi
cadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot,bool saturate)2701*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::cadd(VectorFormat vform,
2702*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2703*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
2704*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
2705*f5c631daSSadaf Ebrahimi int rot,
2706*f5c631daSSadaf Ebrahimi bool saturate) {
2707*f5c631daSSadaf Ebrahimi SimVRegister src1_r, src1_i;
2708*f5c631daSSadaf Ebrahimi SimVRegister src2_r, src2_i;
2709*f5c631daSSadaf Ebrahimi SimVRegister zero;
2710*f5c631daSSadaf Ebrahimi zero.Clear();
2711*f5c631daSSadaf Ebrahimi uzp1(vform, src1_r, src1, zero);
2712*f5c631daSSadaf Ebrahimi uzp2(vform, src1_i, src1, zero);
2713*f5c631daSSadaf Ebrahimi uzp1(vform, src2_r, src2, zero);
2714*f5c631daSSadaf Ebrahimi uzp2(vform, src2_i, src2, zero);
2715*f5c631daSSadaf Ebrahimi
2716*f5c631daSSadaf Ebrahimi if (rot == 90) {
2717*f5c631daSSadaf Ebrahimi if (saturate) {
2718*f5c631daSSadaf Ebrahimi sub(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2719*f5c631daSSadaf Ebrahimi add(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2720*f5c631daSSadaf Ebrahimi } else {
2721*f5c631daSSadaf Ebrahimi sub(vform, src1_r, src1_r, src2_i);
2722*f5c631daSSadaf Ebrahimi add(vform, src1_i, src1_i, src2_r);
2723*f5c631daSSadaf Ebrahimi }
2724*f5c631daSSadaf Ebrahimi } else {
2725*f5c631daSSadaf Ebrahimi VIXL_ASSERT(rot == 270);
2726*f5c631daSSadaf Ebrahimi if (saturate) {
2727*f5c631daSSadaf Ebrahimi add(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2728*f5c631daSSadaf Ebrahimi sub(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2729*f5c631daSSadaf Ebrahimi } else {
2730*f5c631daSSadaf Ebrahimi add(vform, src1_r, src1_r, src2_i);
2731*f5c631daSSadaf Ebrahimi sub(vform, src1_i, src1_i, src2_r);
2732*f5c631daSSadaf Ebrahimi }
2733*f5c631daSSadaf Ebrahimi }
2734*f5c631daSSadaf Ebrahimi
2735*f5c631daSSadaf Ebrahimi zip1(vform, dst, src1_r, src1_i);
2736*f5c631daSSadaf Ebrahimi return dst;
2737*f5c631daSSadaf Ebrahimi }
2738*f5c631daSSadaf Ebrahimi
cmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2739*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::cmla(VectorFormat vform,
2740*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2741*f5c631daSSadaf Ebrahimi const LogicVRegister& srca,
2742*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
2743*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
2744*f5c631daSSadaf Ebrahimi int rot) {
2745*f5c631daSSadaf Ebrahimi SimVRegister src1_a;
2746*f5c631daSSadaf Ebrahimi SimVRegister src2_a, src2_b;
2747*f5c631daSSadaf Ebrahimi SimVRegister srca_i, srca_r;
2748*f5c631daSSadaf Ebrahimi SimVRegister zero, temp;
2749*f5c631daSSadaf Ebrahimi zero.Clear();
2750*f5c631daSSadaf Ebrahimi
2751*f5c631daSSadaf Ebrahimi if ((rot == 0) || (rot == 180)) {
2752*f5c631daSSadaf Ebrahimi uzp1(vform, src1_a, src1, zero);
2753*f5c631daSSadaf Ebrahimi uzp1(vform, src2_a, src2, zero);
2754*f5c631daSSadaf Ebrahimi uzp2(vform, src2_b, src2, zero);
2755*f5c631daSSadaf Ebrahimi } else {
2756*f5c631daSSadaf Ebrahimi uzp2(vform, src1_a, src1, zero);
2757*f5c631daSSadaf Ebrahimi uzp2(vform, src2_a, src2, zero);
2758*f5c631daSSadaf Ebrahimi uzp1(vform, src2_b, src2, zero);
2759*f5c631daSSadaf Ebrahimi }
2760*f5c631daSSadaf Ebrahimi
2761*f5c631daSSadaf Ebrahimi uzp1(vform, srca_r, srca, zero);
2762*f5c631daSSadaf Ebrahimi uzp2(vform, srca_i, srca, zero);
2763*f5c631daSSadaf Ebrahimi
2764*f5c631daSSadaf Ebrahimi bool sub_r = (rot == 90) || (rot == 180);
2765*f5c631daSSadaf Ebrahimi bool sub_i = (rot == 180) || (rot == 270);
2766*f5c631daSSadaf Ebrahimi
2767*f5c631daSSadaf Ebrahimi mul(vform, temp, src1_a, src2_a);
2768*f5c631daSSadaf Ebrahimi if (sub_r) {
2769*f5c631daSSadaf Ebrahimi sub(vform, srca_r, srca_r, temp);
2770*f5c631daSSadaf Ebrahimi } else {
2771*f5c631daSSadaf Ebrahimi add(vform, srca_r, srca_r, temp);
2772*f5c631daSSadaf Ebrahimi }
2773*f5c631daSSadaf Ebrahimi
2774*f5c631daSSadaf Ebrahimi mul(vform, temp, src1_a, src2_b);
2775*f5c631daSSadaf Ebrahimi if (sub_i) {
2776*f5c631daSSadaf Ebrahimi sub(vform, srca_i, srca_i, temp);
2777*f5c631daSSadaf Ebrahimi } else {
2778*f5c631daSSadaf Ebrahimi add(vform, srca_i, srca_i, temp);
2779*f5c631daSSadaf Ebrahimi }
2780*f5c631daSSadaf Ebrahimi
2781*f5c631daSSadaf Ebrahimi zip1(vform, dst, srca_r, srca_i);
2782*f5c631daSSadaf Ebrahimi return dst;
2783*f5c631daSSadaf Ebrahimi }
2784*f5c631daSSadaf Ebrahimi
cmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2785*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::cmla(VectorFormat vform,
2786*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2787*f5c631daSSadaf Ebrahimi const LogicVRegister& srca,
2788*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
2789*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
2790*f5c631daSSadaf Ebrahimi int index,
2791*f5c631daSSadaf Ebrahimi int rot) {
2792*f5c631daSSadaf Ebrahimi SimVRegister temp;
2793*f5c631daSSadaf Ebrahimi dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
2794*f5c631daSSadaf Ebrahimi return cmla(vform, dst, srca, src1, temp, rot);
2795*f5c631daSSadaf Ebrahimi }
2796*f5c631daSSadaf Ebrahimi
bgrp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool do_bext)2797*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::bgrp(VectorFormat vform,
2798*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2799*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
2800*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
2801*f5c631daSSadaf Ebrahimi bool do_bext) {
2802*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2803*f5c631daSSadaf Ebrahimi uint64_t value = src1.Uint(vform, i);
2804*f5c631daSSadaf Ebrahimi uint64_t mask = src2.Uint(vform, i);
2805*f5c631daSSadaf Ebrahimi int high_pos = 0;
2806*f5c631daSSadaf Ebrahimi int low_pos = 0;
2807*f5c631daSSadaf Ebrahimi uint64_t result_high = 0;
2808*f5c631daSSadaf Ebrahimi uint64_t result_low = 0;
2809*f5c631daSSadaf Ebrahimi for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2810*f5c631daSSadaf Ebrahimi if ((mask & 1) == 0) {
2811*f5c631daSSadaf Ebrahimi result_high |= (value & 1) << high_pos;
2812*f5c631daSSadaf Ebrahimi high_pos++;
2813*f5c631daSSadaf Ebrahimi } else {
2814*f5c631daSSadaf Ebrahimi result_low |= (value & 1) << low_pos;
2815*f5c631daSSadaf Ebrahimi low_pos++;
2816*f5c631daSSadaf Ebrahimi }
2817*f5c631daSSadaf Ebrahimi mask >>= 1;
2818*f5c631daSSadaf Ebrahimi value >>= 1;
2819*f5c631daSSadaf Ebrahimi }
2820*f5c631daSSadaf Ebrahimi
2821*f5c631daSSadaf Ebrahimi if (!do_bext) {
2822*f5c631daSSadaf Ebrahimi result_low |= result_high << low_pos;
2823*f5c631daSSadaf Ebrahimi }
2824*f5c631daSSadaf Ebrahimi
2825*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result_low);
2826*f5c631daSSadaf Ebrahimi }
2827*f5c631daSSadaf Ebrahimi return dst;
2828*f5c631daSSadaf Ebrahimi }
2829*f5c631daSSadaf Ebrahimi
bdep(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2830*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::bdep(VectorFormat vform,
2831*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2832*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
2833*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
2834*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2835*f5c631daSSadaf Ebrahimi uint64_t value = src1.Uint(vform, i);
2836*f5c631daSSadaf Ebrahimi uint64_t mask = src2.Uint(vform, i);
2837*f5c631daSSadaf Ebrahimi uint64_t result = 0;
2838*f5c631daSSadaf Ebrahimi for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2839*f5c631daSSadaf Ebrahimi if ((mask & 1) == 1) {
2840*f5c631daSSadaf Ebrahimi result |= (value & 1) << j;
2841*f5c631daSSadaf Ebrahimi value >>= 1;
2842*f5c631daSSadaf Ebrahimi }
2843*f5c631daSSadaf Ebrahimi mask >>= 1;
2844*f5c631daSSadaf Ebrahimi }
2845*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result);
2846*f5c631daSSadaf Ebrahimi }
2847*f5c631daSSadaf Ebrahimi return dst;
2848*f5c631daSSadaf Ebrahimi }
2849*f5c631daSSadaf Ebrahimi
histogram(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2,bool do_segmented)2850*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::histogram(VectorFormat vform,
2851*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2852*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
2853*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
2854*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
2855*f5c631daSSadaf Ebrahimi bool do_segmented) {
2856*f5c631daSSadaf Ebrahimi int elements_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
2857*f5c631daSSadaf Ebrahimi uint64_t result[kZRegMaxSizeInBytes];
2858*f5c631daSSadaf Ebrahimi
2859*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2860*f5c631daSSadaf Ebrahimi uint64_t count = 0;
2861*f5c631daSSadaf Ebrahimi uint64_t value = src1.Uint(vform, i);
2862*f5c631daSSadaf Ebrahimi
2863*f5c631daSSadaf Ebrahimi int segment = do_segmented ? (i / elements_per_segment) : 0;
2864*f5c631daSSadaf Ebrahimi int segment_offset = segment * elements_per_segment;
2865*f5c631daSSadaf Ebrahimi int hist_limit = do_segmented ? elements_per_segment : (i + 1);
2866*f5c631daSSadaf Ebrahimi for (int j = 0; j < hist_limit; j++) {
2867*f5c631daSSadaf Ebrahimi if (pg.IsActive(vform, j) &&
2868*f5c631daSSadaf Ebrahimi (value == src2.Uint(vform, j + segment_offset))) {
2869*f5c631daSSadaf Ebrahimi count++;
2870*f5c631daSSadaf Ebrahimi }
2871*f5c631daSSadaf Ebrahimi }
2872*f5c631daSSadaf Ebrahimi result[i] = count;
2873*f5c631daSSadaf Ebrahimi }
2874*f5c631daSSadaf Ebrahimi dst.SetUintArray(vform, result);
2875*f5c631daSSadaf Ebrahimi return dst;
2876*f5c631daSSadaf Ebrahimi }
2877*f5c631daSSadaf Ebrahimi
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2878*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::dup_element(VectorFormat vform,
2879*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2880*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
2881*f5c631daSSadaf Ebrahimi int src_index) {
2882*f5c631daSSadaf Ebrahimi if ((vform == kFormatVnQ) || (vform == kFormatVnO)) {
2883*f5c631daSSadaf Ebrahimi // When duplicating an element larger than 64 bits, split the element into
2884*f5c631daSSadaf Ebrahimi // 64-bit parts, and duplicate the parts across the destination.
2885*f5c631daSSadaf Ebrahimi uint64_t d[4];
2886*f5c631daSSadaf Ebrahimi int count = (vform == kFormatVnQ) ? 2 : 4;
2887*f5c631daSSadaf Ebrahimi for (int i = 0; i < count; i++) {
2888*f5c631daSSadaf Ebrahimi d[i] = src.Uint(kFormatVnD, (src_index * count) + i);
2889*f5c631daSSadaf Ebrahimi }
2890*f5c631daSSadaf Ebrahimi dst.Clear();
2891*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform) * count; i++) {
2892*f5c631daSSadaf Ebrahimi dst.SetUint(kFormatVnD, i, d[i % count]);
2893*f5c631daSSadaf Ebrahimi }
2894*f5c631daSSadaf Ebrahimi } else {
2895*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
2896*f5c631daSSadaf Ebrahimi uint64_t value = src.Uint(vform, src_index);
2897*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2898*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
2899*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, value);
2900*f5c631daSSadaf Ebrahimi }
2901*f5c631daSSadaf Ebrahimi }
2902*f5c631daSSadaf Ebrahimi return dst;
2903*f5c631daSSadaf Ebrahimi }
2904*f5c631daSSadaf Ebrahimi
dup_elements_to_segments(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2905*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
2906*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2907*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
2908*f5c631daSSadaf Ebrahimi int src_index) {
2909*f5c631daSSadaf Ebrahimi // In SVE, a segment is a 128-bit portion of a vector, like a Q register,
2910*f5c631daSSadaf Ebrahimi // whereas in NEON, the size of segment is equal to the size of register
2911*f5c631daSSadaf Ebrahimi // itself.
2912*f5c631daSSadaf Ebrahimi int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));
2913*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));
2914*f5c631daSSadaf Ebrahimi int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);
2915*f5c631daSSadaf Ebrahimi
2916*f5c631daSSadaf Ebrahimi VIXL_ASSERT(src_index >= 0);
2917*f5c631daSSadaf Ebrahimi VIXL_ASSERT(src_index < lanes_per_segment);
2918*f5c631daSSadaf Ebrahimi
2919*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2920*f5c631daSSadaf Ebrahimi for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {
2921*f5c631daSSadaf Ebrahimi uint64_t value = src.Uint(vform, j + src_index);
2922*f5c631daSSadaf Ebrahimi for (int i = 0; i < lanes_per_segment; i++) {
2923*f5c631daSSadaf Ebrahimi dst.SetUint(vform, j + i, value);
2924*f5c631daSSadaf Ebrahimi }
2925*f5c631daSSadaf Ebrahimi }
2926*f5c631daSSadaf Ebrahimi return dst;
2927*f5c631daSSadaf Ebrahimi }
2928*f5c631daSSadaf Ebrahimi
dup_elements_to_segments(VectorFormat vform,LogicVRegister dst,const std::pair<int,int> & src_and_index)2929*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::dup_elements_to_segments(
2930*f5c631daSSadaf Ebrahimi VectorFormat vform,
2931*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2932*f5c631daSSadaf Ebrahimi const std::pair<int, int>& src_and_index) {
2933*f5c631daSSadaf Ebrahimi return dup_elements_to_segments(vform,
2934*f5c631daSSadaf Ebrahimi dst,
2935*f5c631daSSadaf Ebrahimi ReadVRegister(src_and_index.first),
2936*f5c631daSSadaf Ebrahimi src_and_index.second);
2937*f5c631daSSadaf Ebrahimi }
2938*f5c631daSSadaf Ebrahimi
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2939*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2940*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2941*f5c631daSSadaf Ebrahimi uint64_t imm) {
2942*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
2943*f5c631daSSadaf Ebrahimi uint64_t value = imm & MaxUintFromFormat(vform);
2944*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
2945*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
2946*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, value);
2947*f5c631daSSadaf Ebrahimi }
2948*f5c631daSSadaf Ebrahimi return dst;
2949*f5c631daSSadaf Ebrahimi }
2950*f5c631daSSadaf Ebrahimi
2951*f5c631daSSadaf Ebrahimi
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2952*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ins_element(VectorFormat vform,
2953*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2954*f5c631daSSadaf Ebrahimi int dst_index,
2955*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
2956*f5c631daSSadaf Ebrahimi int src_index) {
2957*f5c631daSSadaf Ebrahimi dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2958*f5c631daSSadaf Ebrahimi return dst;
2959*f5c631daSSadaf Ebrahimi }
2960*f5c631daSSadaf Ebrahimi
2961*f5c631daSSadaf Ebrahimi
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2962*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2963*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2964*f5c631daSSadaf Ebrahimi int dst_index,
2965*f5c631daSSadaf Ebrahimi uint64_t imm) {
2966*f5c631daSSadaf Ebrahimi uint64_t value = imm & MaxUintFromFormat(vform);
2967*f5c631daSSadaf Ebrahimi dst.SetUint(vform, dst_index, value);
2968*f5c631daSSadaf Ebrahimi return dst;
2969*f5c631daSSadaf Ebrahimi }
2970*f5c631daSSadaf Ebrahimi
2971*f5c631daSSadaf Ebrahimi
index(VectorFormat vform,LogicVRegister dst,uint64_t start,uint64_t step)2972*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::index(VectorFormat vform,
2973*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2974*f5c631daSSadaf Ebrahimi uint64_t start,
2975*f5c631daSSadaf Ebrahimi uint64_t step) {
2976*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsSVEFormat(vform));
2977*f5c631daSSadaf Ebrahimi uint64_t value = start;
2978*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2979*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, value);
2980*f5c631daSSadaf Ebrahimi value += step;
2981*f5c631daSSadaf Ebrahimi }
2982*f5c631daSSadaf Ebrahimi return dst;
2983*f5c631daSSadaf Ebrahimi }
2984*f5c631daSSadaf Ebrahimi
2985*f5c631daSSadaf Ebrahimi
insr(VectorFormat vform,LogicVRegister dst,uint64_t imm)2986*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::insr(VectorFormat vform,
2987*f5c631daSSadaf Ebrahimi LogicVRegister dst,
2988*f5c631daSSadaf Ebrahimi uint64_t imm) {
2989*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsSVEFormat(vform));
2990*f5c631daSSadaf Ebrahimi for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {
2991*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, dst.Uint(vform, i - 1));
2992*f5c631daSSadaf Ebrahimi }
2993*f5c631daSSadaf Ebrahimi dst.SetUint(vform, 0, imm);
2994*f5c631daSSadaf Ebrahimi return dst;
2995*f5c631daSSadaf Ebrahimi }
2996*f5c631daSSadaf Ebrahimi
2997*f5c631daSSadaf Ebrahimi
mov(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2998*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::mov(VectorFormat vform,
2999*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3000*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
3001*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
3002*f5c631daSSadaf Ebrahimi for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
3003*f5c631daSSadaf Ebrahimi dst.SetUint(vform, lane, src.Uint(vform, lane));
3004*f5c631daSSadaf Ebrahimi }
3005*f5c631daSSadaf Ebrahimi return dst;
3006*f5c631daSSadaf Ebrahimi }
3007*f5c631daSSadaf Ebrahimi
3008*f5c631daSSadaf Ebrahimi
mov(LogicPRegister dst,const LogicPRegister & src)3009*f5c631daSSadaf Ebrahimi LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {
3010*f5c631daSSadaf Ebrahimi // Avoid a copy if the registers already alias.
3011*f5c631daSSadaf Ebrahimi if (dst.Aliases(src)) return dst;
3012*f5c631daSSadaf Ebrahimi
3013*f5c631daSSadaf Ebrahimi for (int i = 0; i < dst.GetChunkCount(); i++) {
3014*f5c631daSSadaf Ebrahimi dst.SetChunk(i, src.GetChunk(i));
3015*f5c631daSSadaf Ebrahimi }
3016*f5c631daSSadaf Ebrahimi return dst;
3017*f5c631daSSadaf Ebrahimi }
3018*f5c631daSSadaf Ebrahimi
3019*f5c631daSSadaf Ebrahimi
mov_merging(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3020*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::mov_merging(VectorFormat vform,
3021*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3022*f5c631daSSadaf Ebrahimi const SimPRegister& pg,
3023*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
3024*f5c631daSSadaf Ebrahimi return sel(vform, dst, pg, src, dst);
3025*f5c631daSSadaf Ebrahimi }
3026*f5c631daSSadaf Ebrahimi
mov_zeroing(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3027*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
3028*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3029*f5c631daSSadaf Ebrahimi const SimPRegister& pg,
3030*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
3031*f5c631daSSadaf Ebrahimi SimVRegister zero;
3032*f5c631daSSadaf Ebrahimi dup_immediate(vform, zero, 0);
3033*f5c631daSSadaf Ebrahimi return sel(vform, dst, pg, src, zero);
3034*f5c631daSSadaf Ebrahimi }
3035*f5c631daSSadaf Ebrahimi
mov_alternating(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int start_at)3036*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::mov_alternating(VectorFormat vform,
3037*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3038*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3039*f5c631daSSadaf Ebrahimi int start_at) {
3040*f5c631daSSadaf Ebrahimi VIXL_ASSERT((start_at == 0) || (start_at == 1));
3041*f5c631daSSadaf Ebrahimi for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) {
3042*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, src.Uint(vform, i));
3043*f5c631daSSadaf Ebrahimi }
3044*f5c631daSSadaf Ebrahimi return dst;
3045*f5c631daSSadaf Ebrahimi }
3046*f5c631daSSadaf Ebrahimi
mov_merging(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3047*f5c631daSSadaf Ebrahimi LogicPRegister Simulator::mov_merging(LogicPRegister dst,
3048*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
3049*f5c631daSSadaf Ebrahimi const LogicPRegister& src) {
3050*f5c631daSSadaf Ebrahimi return sel(dst, pg, src, dst);
3051*f5c631daSSadaf Ebrahimi }
3052*f5c631daSSadaf Ebrahimi
mov_zeroing(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3053*f5c631daSSadaf Ebrahimi LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
3054*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
3055*f5c631daSSadaf Ebrahimi const LogicPRegister& src) {
3056*f5c631daSSadaf Ebrahimi SimPRegister all_false;
3057*f5c631daSSadaf Ebrahimi return sel(dst, pg, src, pfalse(all_false));
3058*f5c631daSSadaf Ebrahimi }
3059*f5c631daSSadaf Ebrahimi
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)3060*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::movi(VectorFormat vform,
3061*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3062*f5c631daSSadaf Ebrahimi uint64_t imm) {
3063*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
3064*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
3065*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
3066*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, imm);
3067*f5c631daSSadaf Ebrahimi }
3068*f5c631daSSadaf Ebrahimi return dst;
3069*f5c631daSSadaf Ebrahimi }
3070*f5c631daSSadaf Ebrahimi
3071*f5c631daSSadaf Ebrahimi
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)3072*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::mvni(VectorFormat vform,
3073*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3074*f5c631daSSadaf Ebrahimi uint64_t imm) {
3075*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
3076*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
3077*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
3078*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, ~imm);
3079*f5c631daSSadaf Ebrahimi }
3080*f5c631daSSadaf Ebrahimi return dst;
3081*f5c631daSSadaf Ebrahimi }
3082*f5c631daSSadaf Ebrahimi
3083*f5c631daSSadaf Ebrahimi
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)3084*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::orr(VectorFormat vform,
3085*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3086*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3087*f5c631daSSadaf Ebrahimi uint64_t imm) {
3088*f5c631daSSadaf Ebrahimi uint64_t result[16];
3089*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
3090*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
3091*f5c631daSSadaf Ebrahimi result[i] = src.Uint(vform, i) | imm;
3092*f5c631daSSadaf Ebrahimi }
3093*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
3094*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
3095*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result[i]);
3096*f5c631daSSadaf Ebrahimi }
3097*f5c631daSSadaf Ebrahimi return dst;
3098*f5c631daSSadaf Ebrahimi }
3099*f5c631daSSadaf Ebrahimi
3100*f5c631daSSadaf Ebrahimi
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_2)3101*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uxtl(VectorFormat vform,
3102*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3103*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3104*f5c631daSSadaf Ebrahimi bool is_2) {
3105*f5c631daSSadaf Ebrahimi VectorFormat vform_half = VectorFormatHalfWidth(vform);
3106*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
3107*f5c631daSSadaf Ebrahimi int src_offset = is_2 ? lane_count : 0;
3108*f5c631daSSadaf Ebrahimi
3109*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
3110*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i++) {
3111*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, src.Uint(vform_half, src_offset + i));
3112*f5c631daSSadaf Ebrahimi }
3113*f5c631daSSadaf Ebrahimi return dst;
3114*f5c631daSSadaf Ebrahimi }
3115*f5c631daSSadaf Ebrahimi
3116*f5c631daSSadaf Ebrahimi
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_2)3117*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sxtl(VectorFormat vform,
3118*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3119*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3120*f5c631daSSadaf Ebrahimi bool is_2) {
3121*f5c631daSSadaf Ebrahimi VectorFormat vform_half = VectorFormatHalfWidth(vform);
3122*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
3123*f5c631daSSadaf Ebrahimi int src_offset = is_2 ? lane_count : 0;
3124*f5c631daSSadaf Ebrahimi
3125*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
3126*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3127*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, src.Int(vform_half, src_offset + i));
3128*f5c631daSSadaf Ebrahimi }
3129*f5c631daSSadaf Ebrahimi return dst;
3130*f5c631daSSadaf Ebrahimi }
3131*f5c631daSSadaf Ebrahimi
3132*f5c631daSSadaf Ebrahimi
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3133*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uxtl2(VectorFormat vform,
3134*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3135*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
3136*f5c631daSSadaf Ebrahimi return uxtl(vform, dst, src, /* is_2 = */ true);
3137*f5c631daSSadaf Ebrahimi }
3138*f5c631daSSadaf Ebrahimi
3139*f5c631daSSadaf Ebrahimi
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3140*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sxtl2(VectorFormat vform,
3141*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3142*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
3143*f5c631daSSadaf Ebrahimi return sxtl(vform, dst, src, /* is_2 = */ true);
3144*f5c631daSSadaf Ebrahimi }
3145*f5c631daSSadaf Ebrahimi
3146*f5c631daSSadaf Ebrahimi
uxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3147*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uxt(VectorFormat vform,
3148*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3149*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3150*f5c631daSSadaf Ebrahimi unsigned from_size_in_bits) {
3151*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
3152*f5c631daSSadaf Ebrahimi uint64_t mask = GetUintMask(from_size_in_bits);
3153*f5c631daSSadaf Ebrahimi
3154*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
3155*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i++) {
3156*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, src.Uint(vform, i) & mask);
3157*f5c631daSSadaf Ebrahimi }
3158*f5c631daSSadaf Ebrahimi return dst;
3159*f5c631daSSadaf Ebrahimi }
3160*f5c631daSSadaf Ebrahimi
3161*f5c631daSSadaf Ebrahimi
sxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3162*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sxt(VectorFormat vform,
3163*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3164*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3165*f5c631daSSadaf Ebrahimi unsigned from_size_in_bits) {
3166*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
3167*f5c631daSSadaf Ebrahimi
3168*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
3169*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i++) {
3170*f5c631daSSadaf Ebrahimi uint64_t value =
3171*f5c631daSSadaf Ebrahimi ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));
3172*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, value);
3173*f5c631daSSadaf Ebrahimi }
3174*f5c631daSSadaf Ebrahimi return dst;
3175*f5c631daSSadaf Ebrahimi }
3176*f5c631daSSadaf Ebrahimi
3177*f5c631daSSadaf Ebrahimi
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3178*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::shrn(VectorFormat vform,
3179*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3180*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3181*f5c631daSSadaf Ebrahimi int shift) {
3182*f5c631daSSadaf Ebrahimi SimVRegister temp;
3183*f5c631daSSadaf Ebrahimi VectorFormat vform_src = VectorFormatDoubleWidth(vform);
3184*f5c631daSSadaf Ebrahimi VectorFormat vform_dst = vform;
3185*f5c631daSSadaf Ebrahimi LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
3186*f5c631daSSadaf Ebrahimi return extractnarrow(vform_dst, dst, false, shifted_src, false);
3187*f5c631daSSadaf Ebrahimi }
3188*f5c631daSSadaf Ebrahimi
3189*f5c631daSSadaf Ebrahimi
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3190*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::shrn2(VectorFormat vform,
3191*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3192*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3193*f5c631daSSadaf Ebrahimi int shift) {
3194*f5c631daSSadaf Ebrahimi SimVRegister temp;
3195*f5c631daSSadaf Ebrahimi VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3196*f5c631daSSadaf Ebrahimi VectorFormat vformdst = vform;
3197*f5c631daSSadaf Ebrahimi LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
3198*f5c631daSSadaf Ebrahimi return extractnarrow(vformdst, dst, false, shifted_src, false);
3199*f5c631daSSadaf Ebrahimi }
3200*f5c631daSSadaf Ebrahimi
3201*f5c631daSSadaf Ebrahimi
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3202*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::rshrn(VectorFormat vform,
3203*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3204*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3205*f5c631daSSadaf Ebrahimi int shift) {
3206*f5c631daSSadaf Ebrahimi SimVRegister temp;
3207*f5c631daSSadaf Ebrahimi VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3208*f5c631daSSadaf Ebrahimi VectorFormat vformdst = vform;
3209*f5c631daSSadaf Ebrahimi LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3210*f5c631daSSadaf Ebrahimi return extractnarrow(vformdst, dst, false, shifted_src, false);
3211*f5c631daSSadaf Ebrahimi }
3212*f5c631daSSadaf Ebrahimi
3213*f5c631daSSadaf Ebrahimi
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3214*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::rshrn2(VectorFormat vform,
3215*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3216*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3217*f5c631daSSadaf Ebrahimi int shift) {
3218*f5c631daSSadaf Ebrahimi SimVRegister temp;
3219*f5c631daSSadaf Ebrahimi VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3220*f5c631daSSadaf Ebrahimi VectorFormat vformdst = vform;
3221*f5c631daSSadaf Ebrahimi LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3222*f5c631daSSadaf Ebrahimi return extractnarrow(vformdst, dst, false, shifted_src, false);
3223*f5c631daSSadaf Ebrahimi }
3224*f5c631daSSadaf Ebrahimi
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & ind,bool zero_out_of_bounds,const LogicVRegister * tab1,const LogicVRegister * tab2,const LogicVRegister * tab3,const LogicVRegister * tab4)3225*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::Table(VectorFormat vform,
3226*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3227*f5c631daSSadaf Ebrahimi const LogicVRegister& ind,
3228*f5c631daSSadaf Ebrahimi bool zero_out_of_bounds,
3229*f5c631daSSadaf Ebrahimi const LogicVRegister* tab1,
3230*f5c631daSSadaf Ebrahimi const LogicVRegister* tab2,
3231*f5c631daSSadaf Ebrahimi const LogicVRegister* tab3,
3232*f5c631daSSadaf Ebrahimi const LogicVRegister* tab4) {
3233*f5c631daSSadaf Ebrahimi VIXL_ASSERT(tab1 != NULL);
3234*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
3235*f5c631daSSadaf Ebrahimi VIXL_ASSERT((tab3 == NULL) || (lane_count <= 16));
3236*f5c631daSSadaf Ebrahimi uint64_t table[kZRegMaxSizeInBytes * 2];
3237*f5c631daSSadaf Ebrahimi uint64_t result[kZRegMaxSizeInBytes];
3238*f5c631daSSadaf Ebrahimi
3239*f5c631daSSadaf Ebrahimi // For Neon, the table source registers are always 16B, and Neon allows only
3240*f5c631daSSadaf Ebrahimi // 8B or 16B vform for the destination, so infer the table format from the
3241*f5c631daSSadaf Ebrahimi // destination.
3242*f5c631daSSadaf Ebrahimi VectorFormat vform_tab = (vform == kFormat8B) ? kFormat16B : vform;
3243*f5c631daSSadaf Ebrahimi
3244*f5c631daSSadaf Ebrahimi uint64_t tab_size = tab1->UintArray(vform_tab, &table[0]);
3245*f5c631daSSadaf Ebrahimi if (tab2 != NULL) tab_size += tab2->UintArray(vform_tab, &table[tab_size]);
3246*f5c631daSSadaf Ebrahimi if (tab3 != NULL) tab_size += tab3->UintArray(vform_tab, &table[tab_size]);
3247*f5c631daSSadaf Ebrahimi if (tab4 != NULL) tab_size += tab4->UintArray(vform_tab, &table[tab_size]);
3248*f5c631daSSadaf Ebrahimi
3249*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i++) {
3250*f5c631daSSadaf Ebrahimi uint64_t index = ind.Uint(vform, i);
3251*f5c631daSSadaf Ebrahimi result[i] = zero_out_of_bounds ? 0 : dst.Uint(vform, i);
3252*f5c631daSSadaf Ebrahimi if (index < tab_size) result[i] = table[index];
3253*f5c631daSSadaf Ebrahimi }
3254*f5c631daSSadaf Ebrahimi dst.SetUintArray(vform, result);
3255*f5c631daSSadaf Ebrahimi return dst;
3256*f5c631daSSadaf Ebrahimi }
3257*f5c631daSSadaf Ebrahimi
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3258*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::tbl(VectorFormat vform,
3259*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3260*f5c631daSSadaf Ebrahimi const LogicVRegister& tab,
3261*f5c631daSSadaf Ebrahimi const LogicVRegister& ind) {
3262*f5c631daSSadaf Ebrahimi return Table(vform, dst, ind, true, &tab);
3263*f5c631daSSadaf Ebrahimi }
3264*f5c631daSSadaf Ebrahimi
3265*f5c631daSSadaf Ebrahimi
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3266*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::tbl(VectorFormat vform,
3267*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3268*f5c631daSSadaf Ebrahimi const LogicVRegister& tab,
3269*f5c631daSSadaf Ebrahimi const LogicVRegister& tab2,
3270*f5c631daSSadaf Ebrahimi const LogicVRegister& ind) {
3271*f5c631daSSadaf Ebrahimi return Table(vform, dst, ind, true, &tab, &tab2);
3272*f5c631daSSadaf Ebrahimi }
3273*f5c631daSSadaf Ebrahimi
3274*f5c631daSSadaf Ebrahimi
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3275*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::tbl(VectorFormat vform,
3276*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3277*f5c631daSSadaf Ebrahimi const LogicVRegister& tab,
3278*f5c631daSSadaf Ebrahimi const LogicVRegister& tab2,
3279*f5c631daSSadaf Ebrahimi const LogicVRegister& tab3,
3280*f5c631daSSadaf Ebrahimi const LogicVRegister& ind) {
3281*f5c631daSSadaf Ebrahimi return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
3282*f5c631daSSadaf Ebrahimi }
3283*f5c631daSSadaf Ebrahimi
3284*f5c631daSSadaf Ebrahimi
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3285*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::tbl(VectorFormat vform,
3286*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3287*f5c631daSSadaf Ebrahimi const LogicVRegister& tab,
3288*f5c631daSSadaf Ebrahimi const LogicVRegister& tab2,
3289*f5c631daSSadaf Ebrahimi const LogicVRegister& tab3,
3290*f5c631daSSadaf Ebrahimi const LogicVRegister& tab4,
3291*f5c631daSSadaf Ebrahimi const LogicVRegister& ind) {
3292*f5c631daSSadaf Ebrahimi return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
3293*f5c631daSSadaf Ebrahimi }
3294*f5c631daSSadaf Ebrahimi
3295*f5c631daSSadaf Ebrahimi
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3296*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::tbx(VectorFormat vform,
3297*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3298*f5c631daSSadaf Ebrahimi const LogicVRegister& tab,
3299*f5c631daSSadaf Ebrahimi const LogicVRegister& ind) {
3300*f5c631daSSadaf Ebrahimi return Table(vform, dst, ind, false, &tab);
3301*f5c631daSSadaf Ebrahimi }
3302*f5c631daSSadaf Ebrahimi
3303*f5c631daSSadaf Ebrahimi
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3304*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::tbx(VectorFormat vform,
3305*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3306*f5c631daSSadaf Ebrahimi const LogicVRegister& tab,
3307*f5c631daSSadaf Ebrahimi const LogicVRegister& tab2,
3308*f5c631daSSadaf Ebrahimi const LogicVRegister& ind) {
3309*f5c631daSSadaf Ebrahimi return Table(vform, dst, ind, false, &tab, &tab2);
3310*f5c631daSSadaf Ebrahimi }
3311*f5c631daSSadaf Ebrahimi
3312*f5c631daSSadaf Ebrahimi
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3313*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::tbx(VectorFormat vform,
3314*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3315*f5c631daSSadaf Ebrahimi const LogicVRegister& tab,
3316*f5c631daSSadaf Ebrahimi const LogicVRegister& tab2,
3317*f5c631daSSadaf Ebrahimi const LogicVRegister& tab3,
3318*f5c631daSSadaf Ebrahimi const LogicVRegister& ind) {
3319*f5c631daSSadaf Ebrahimi return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
3320*f5c631daSSadaf Ebrahimi }
3321*f5c631daSSadaf Ebrahimi
3322*f5c631daSSadaf Ebrahimi
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3323*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::tbx(VectorFormat vform,
3324*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3325*f5c631daSSadaf Ebrahimi const LogicVRegister& tab,
3326*f5c631daSSadaf Ebrahimi const LogicVRegister& tab2,
3327*f5c631daSSadaf Ebrahimi const LogicVRegister& tab3,
3328*f5c631daSSadaf Ebrahimi const LogicVRegister& tab4,
3329*f5c631daSSadaf Ebrahimi const LogicVRegister& ind) {
3330*f5c631daSSadaf Ebrahimi return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
3331*f5c631daSSadaf Ebrahimi }
3332*f5c631daSSadaf Ebrahimi
3333*f5c631daSSadaf Ebrahimi
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3334*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uqshrn(VectorFormat vform,
3335*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3336*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3337*f5c631daSSadaf Ebrahimi int shift) {
3338*f5c631daSSadaf Ebrahimi return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
3339*f5c631daSSadaf Ebrahimi }
3340*f5c631daSSadaf Ebrahimi
3341*f5c631daSSadaf Ebrahimi
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3342*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uqshrn2(VectorFormat vform,
3343*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3344*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3345*f5c631daSSadaf Ebrahimi int shift) {
3346*f5c631daSSadaf Ebrahimi return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3347*f5c631daSSadaf Ebrahimi }
3348*f5c631daSSadaf Ebrahimi
3349*f5c631daSSadaf Ebrahimi
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3350*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uqrshrn(VectorFormat vform,
3351*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3352*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3353*f5c631daSSadaf Ebrahimi int shift) {
3354*f5c631daSSadaf Ebrahimi return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
3355*f5c631daSSadaf Ebrahimi }
3356*f5c631daSSadaf Ebrahimi
3357*f5c631daSSadaf Ebrahimi
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3358*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
3359*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3360*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3361*f5c631daSSadaf Ebrahimi int shift) {
3362*f5c631daSSadaf Ebrahimi return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3363*f5c631daSSadaf Ebrahimi }
3364*f5c631daSSadaf Ebrahimi
3365*f5c631daSSadaf Ebrahimi
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3366*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqshrn(VectorFormat vform,
3367*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3368*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3369*f5c631daSSadaf Ebrahimi int shift) {
3370*f5c631daSSadaf Ebrahimi SimVRegister temp;
3371*f5c631daSSadaf Ebrahimi VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3372*f5c631daSSadaf Ebrahimi VectorFormat vformdst = vform;
3373*f5c631daSSadaf Ebrahimi LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3374*f5c631daSSadaf Ebrahimi return sqxtn(vformdst, dst, shifted_src);
3375*f5c631daSSadaf Ebrahimi }
3376*f5c631daSSadaf Ebrahimi
3377*f5c631daSSadaf Ebrahimi
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3378*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqshrn2(VectorFormat vform,
3379*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3380*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3381*f5c631daSSadaf Ebrahimi int shift) {
3382*f5c631daSSadaf Ebrahimi SimVRegister temp;
3383*f5c631daSSadaf Ebrahimi VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3384*f5c631daSSadaf Ebrahimi VectorFormat vformdst = vform;
3385*f5c631daSSadaf Ebrahimi LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3386*f5c631daSSadaf Ebrahimi return sqxtn(vformdst, dst, shifted_src);
3387*f5c631daSSadaf Ebrahimi }
3388*f5c631daSSadaf Ebrahimi
3389*f5c631daSSadaf Ebrahimi
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3390*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqrshrn(VectorFormat vform,
3391*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3392*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3393*f5c631daSSadaf Ebrahimi int shift) {
3394*f5c631daSSadaf Ebrahimi SimVRegister temp;
3395*f5c631daSSadaf Ebrahimi VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3396*f5c631daSSadaf Ebrahimi VectorFormat vformdst = vform;
3397*f5c631daSSadaf Ebrahimi LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3398*f5c631daSSadaf Ebrahimi return sqxtn(vformdst, dst, shifted_src);
3399*f5c631daSSadaf Ebrahimi }
3400*f5c631daSSadaf Ebrahimi
3401*f5c631daSSadaf Ebrahimi
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3402*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
3403*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3404*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3405*f5c631daSSadaf Ebrahimi int shift) {
3406*f5c631daSSadaf Ebrahimi SimVRegister temp;
3407*f5c631daSSadaf Ebrahimi VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3408*f5c631daSSadaf Ebrahimi VectorFormat vformdst = vform;
3409*f5c631daSSadaf Ebrahimi LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3410*f5c631daSSadaf Ebrahimi return sqxtn(vformdst, dst, shifted_src);
3411*f5c631daSSadaf Ebrahimi }
3412*f5c631daSSadaf Ebrahimi
3413*f5c631daSSadaf Ebrahimi
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3414*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqshrun(VectorFormat vform,
3415*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3416*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3417*f5c631daSSadaf Ebrahimi int shift) {
3418*f5c631daSSadaf Ebrahimi SimVRegister temp;
3419*f5c631daSSadaf Ebrahimi VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3420*f5c631daSSadaf Ebrahimi VectorFormat vformdst = vform;
3421*f5c631daSSadaf Ebrahimi LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3422*f5c631daSSadaf Ebrahimi return sqxtun(vformdst, dst, shifted_src);
3423*f5c631daSSadaf Ebrahimi }
3424*f5c631daSSadaf Ebrahimi
3425*f5c631daSSadaf Ebrahimi
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3426*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqshrun2(VectorFormat vform,
3427*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3428*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3429*f5c631daSSadaf Ebrahimi int shift) {
3430*f5c631daSSadaf Ebrahimi SimVRegister temp;
3431*f5c631daSSadaf Ebrahimi VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3432*f5c631daSSadaf Ebrahimi VectorFormat vformdst = vform;
3433*f5c631daSSadaf Ebrahimi LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3434*f5c631daSSadaf Ebrahimi return sqxtun(vformdst, dst, shifted_src);
3435*f5c631daSSadaf Ebrahimi }
3436*f5c631daSSadaf Ebrahimi
3437*f5c631daSSadaf Ebrahimi
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3438*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqrshrun(VectorFormat vform,
3439*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3440*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3441*f5c631daSSadaf Ebrahimi int shift) {
3442*f5c631daSSadaf Ebrahimi SimVRegister temp;
3443*f5c631daSSadaf Ebrahimi VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3444*f5c631daSSadaf Ebrahimi VectorFormat vformdst = vform;
3445*f5c631daSSadaf Ebrahimi LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3446*f5c631daSSadaf Ebrahimi return sqxtun(vformdst, dst, shifted_src);
3447*f5c631daSSadaf Ebrahimi }
3448*f5c631daSSadaf Ebrahimi
3449*f5c631daSSadaf Ebrahimi
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3450*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
3451*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3452*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
3453*f5c631daSSadaf Ebrahimi int shift) {
3454*f5c631daSSadaf Ebrahimi SimVRegister temp;
3455*f5c631daSSadaf Ebrahimi VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3456*f5c631daSSadaf Ebrahimi VectorFormat vformdst = vform;
3457*f5c631daSSadaf Ebrahimi LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3458*f5c631daSSadaf Ebrahimi return sqxtun(vformdst, dst, shifted_src);
3459*f5c631daSSadaf Ebrahimi }
3460*f5c631daSSadaf Ebrahimi
3461*f5c631daSSadaf Ebrahimi
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3462*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uaddl(VectorFormat vform,
3463*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3464*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3465*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3466*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3467*f5c631daSSadaf Ebrahimi uxtl(vform, temp1, src1);
3468*f5c631daSSadaf Ebrahimi uxtl(vform, temp2, src2);
3469*f5c631daSSadaf Ebrahimi add(vform, dst, temp1, temp2);
3470*f5c631daSSadaf Ebrahimi return dst;
3471*f5c631daSSadaf Ebrahimi }
3472*f5c631daSSadaf Ebrahimi
3473*f5c631daSSadaf Ebrahimi
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3474*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uaddl2(VectorFormat vform,
3475*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3476*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3477*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3478*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3479*f5c631daSSadaf Ebrahimi uxtl2(vform, temp1, src1);
3480*f5c631daSSadaf Ebrahimi uxtl2(vform, temp2, src2);
3481*f5c631daSSadaf Ebrahimi add(vform, dst, temp1, temp2);
3482*f5c631daSSadaf Ebrahimi return dst;
3483*f5c631daSSadaf Ebrahimi }
3484*f5c631daSSadaf Ebrahimi
3485*f5c631daSSadaf Ebrahimi
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3486*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uaddw(VectorFormat vform,
3487*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3488*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3489*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3490*f5c631daSSadaf Ebrahimi SimVRegister temp;
3491*f5c631daSSadaf Ebrahimi uxtl(vform, temp, src2);
3492*f5c631daSSadaf Ebrahimi add(vform, dst, src1, temp);
3493*f5c631daSSadaf Ebrahimi return dst;
3494*f5c631daSSadaf Ebrahimi }
3495*f5c631daSSadaf Ebrahimi
3496*f5c631daSSadaf Ebrahimi
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3497*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uaddw2(VectorFormat vform,
3498*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3499*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3500*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3501*f5c631daSSadaf Ebrahimi SimVRegister temp;
3502*f5c631daSSadaf Ebrahimi uxtl2(vform, temp, src2);
3503*f5c631daSSadaf Ebrahimi add(vform, dst, src1, temp);
3504*f5c631daSSadaf Ebrahimi return dst;
3505*f5c631daSSadaf Ebrahimi }
3506*f5c631daSSadaf Ebrahimi
3507*f5c631daSSadaf Ebrahimi
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3508*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::saddl(VectorFormat vform,
3509*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3510*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3511*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3512*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3513*f5c631daSSadaf Ebrahimi sxtl(vform, temp1, src1);
3514*f5c631daSSadaf Ebrahimi sxtl(vform, temp2, src2);
3515*f5c631daSSadaf Ebrahimi add(vform, dst, temp1, temp2);
3516*f5c631daSSadaf Ebrahimi return dst;
3517*f5c631daSSadaf Ebrahimi }
3518*f5c631daSSadaf Ebrahimi
3519*f5c631daSSadaf Ebrahimi
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3520*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::saddl2(VectorFormat vform,
3521*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3522*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3523*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3524*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3525*f5c631daSSadaf Ebrahimi sxtl2(vform, temp1, src1);
3526*f5c631daSSadaf Ebrahimi sxtl2(vform, temp2, src2);
3527*f5c631daSSadaf Ebrahimi add(vform, dst, temp1, temp2);
3528*f5c631daSSadaf Ebrahimi return dst;
3529*f5c631daSSadaf Ebrahimi }
3530*f5c631daSSadaf Ebrahimi
3531*f5c631daSSadaf Ebrahimi
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3532*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::saddw(VectorFormat vform,
3533*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3534*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3535*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3536*f5c631daSSadaf Ebrahimi SimVRegister temp;
3537*f5c631daSSadaf Ebrahimi sxtl(vform, temp, src2);
3538*f5c631daSSadaf Ebrahimi add(vform, dst, src1, temp);
3539*f5c631daSSadaf Ebrahimi return dst;
3540*f5c631daSSadaf Ebrahimi }
3541*f5c631daSSadaf Ebrahimi
3542*f5c631daSSadaf Ebrahimi
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3543*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::saddw2(VectorFormat vform,
3544*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3545*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3546*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3547*f5c631daSSadaf Ebrahimi SimVRegister temp;
3548*f5c631daSSadaf Ebrahimi sxtl2(vform, temp, src2);
3549*f5c631daSSadaf Ebrahimi add(vform, dst, src1, temp);
3550*f5c631daSSadaf Ebrahimi return dst;
3551*f5c631daSSadaf Ebrahimi }
3552*f5c631daSSadaf Ebrahimi
3553*f5c631daSSadaf Ebrahimi
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3554*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::usubl(VectorFormat vform,
3555*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3556*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3557*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3558*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3559*f5c631daSSadaf Ebrahimi uxtl(vform, temp1, src1);
3560*f5c631daSSadaf Ebrahimi uxtl(vform, temp2, src2);
3561*f5c631daSSadaf Ebrahimi sub(vform, dst, temp1, temp2);
3562*f5c631daSSadaf Ebrahimi return dst;
3563*f5c631daSSadaf Ebrahimi }
3564*f5c631daSSadaf Ebrahimi
3565*f5c631daSSadaf Ebrahimi
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3566*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::usubl2(VectorFormat vform,
3567*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3568*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3569*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3570*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3571*f5c631daSSadaf Ebrahimi uxtl2(vform, temp1, src1);
3572*f5c631daSSadaf Ebrahimi uxtl2(vform, temp2, src2);
3573*f5c631daSSadaf Ebrahimi sub(vform, dst, temp1, temp2);
3574*f5c631daSSadaf Ebrahimi return dst;
3575*f5c631daSSadaf Ebrahimi }
3576*f5c631daSSadaf Ebrahimi
3577*f5c631daSSadaf Ebrahimi
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3578*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::usubw(VectorFormat vform,
3579*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3580*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3581*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3582*f5c631daSSadaf Ebrahimi SimVRegister temp;
3583*f5c631daSSadaf Ebrahimi uxtl(vform, temp, src2);
3584*f5c631daSSadaf Ebrahimi sub(vform, dst, src1, temp);
3585*f5c631daSSadaf Ebrahimi return dst;
3586*f5c631daSSadaf Ebrahimi }
3587*f5c631daSSadaf Ebrahimi
3588*f5c631daSSadaf Ebrahimi
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3589*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::usubw2(VectorFormat vform,
3590*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3591*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3592*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3593*f5c631daSSadaf Ebrahimi SimVRegister temp;
3594*f5c631daSSadaf Ebrahimi uxtl2(vform, temp, src2);
3595*f5c631daSSadaf Ebrahimi sub(vform, dst, src1, temp);
3596*f5c631daSSadaf Ebrahimi return dst;
3597*f5c631daSSadaf Ebrahimi }
3598*f5c631daSSadaf Ebrahimi
3599*f5c631daSSadaf Ebrahimi
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3600*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ssubl(VectorFormat vform,
3601*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3602*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3603*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3604*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3605*f5c631daSSadaf Ebrahimi sxtl(vform, temp1, src1);
3606*f5c631daSSadaf Ebrahimi sxtl(vform, temp2, src2);
3607*f5c631daSSadaf Ebrahimi sub(vform, dst, temp1, temp2);
3608*f5c631daSSadaf Ebrahimi return dst;
3609*f5c631daSSadaf Ebrahimi }
3610*f5c631daSSadaf Ebrahimi
3611*f5c631daSSadaf Ebrahimi
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3612*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ssubl2(VectorFormat vform,
3613*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3614*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3615*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3616*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3617*f5c631daSSadaf Ebrahimi sxtl2(vform, temp1, src1);
3618*f5c631daSSadaf Ebrahimi sxtl2(vform, temp2, src2);
3619*f5c631daSSadaf Ebrahimi sub(vform, dst, temp1, temp2);
3620*f5c631daSSadaf Ebrahimi return dst;
3621*f5c631daSSadaf Ebrahimi }
3622*f5c631daSSadaf Ebrahimi
3623*f5c631daSSadaf Ebrahimi
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3624*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ssubw(VectorFormat vform,
3625*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3626*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3627*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3628*f5c631daSSadaf Ebrahimi SimVRegister temp;
3629*f5c631daSSadaf Ebrahimi sxtl(vform, temp, src2);
3630*f5c631daSSadaf Ebrahimi sub(vform, dst, src1, temp);
3631*f5c631daSSadaf Ebrahimi return dst;
3632*f5c631daSSadaf Ebrahimi }
3633*f5c631daSSadaf Ebrahimi
3634*f5c631daSSadaf Ebrahimi
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3635*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ssubw2(VectorFormat vform,
3636*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3637*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3638*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3639*f5c631daSSadaf Ebrahimi SimVRegister temp;
3640*f5c631daSSadaf Ebrahimi sxtl2(vform, temp, src2);
3641*f5c631daSSadaf Ebrahimi sub(vform, dst, src1, temp);
3642*f5c631daSSadaf Ebrahimi return dst;
3643*f5c631daSSadaf Ebrahimi }
3644*f5c631daSSadaf Ebrahimi
3645*f5c631daSSadaf Ebrahimi
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3646*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uabal(VectorFormat vform,
3647*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3648*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3649*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3650*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3651*f5c631daSSadaf Ebrahimi uxtl(vform, temp1, src1);
3652*f5c631daSSadaf Ebrahimi uxtl(vform, temp2, src2);
3653*f5c631daSSadaf Ebrahimi uaba(vform, dst, temp1, temp2);
3654*f5c631daSSadaf Ebrahimi return dst;
3655*f5c631daSSadaf Ebrahimi }
3656*f5c631daSSadaf Ebrahimi
3657*f5c631daSSadaf Ebrahimi
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3658*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uabal2(VectorFormat vform,
3659*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3660*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3661*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3662*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3663*f5c631daSSadaf Ebrahimi uxtl2(vform, temp1, src1);
3664*f5c631daSSadaf Ebrahimi uxtl2(vform, temp2, src2);
3665*f5c631daSSadaf Ebrahimi uaba(vform, dst, temp1, temp2);
3666*f5c631daSSadaf Ebrahimi return dst;
3667*f5c631daSSadaf Ebrahimi }
3668*f5c631daSSadaf Ebrahimi
3669*f5c631daSSadaf Ebrahimi
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3670*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sabal(VectorFormat vform,
3671*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3672*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3673*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3674*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3675*f5c631daSSadaf Ebrahimi sxtl(vform, temp1, src1);
3676*f5c631daSSadaf Ebrahimi sxtl(vform, temp2, src2);
3677*f5c631daSSadaf Ebrahimi saba(vform, dst, temp1, temp2);
3678*f5c631daSSadaf Ebrahimi return dst;
3679*f5c631daSSadaf Ebrahimi }
3680*f5c631daSSadaf Ebrahimi
3681*f5c631daSSadaf Ebrahimi
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3682*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sabal2(VectorFormat vform,
3683*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3684*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3685*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3686*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3687*f5c631daSSadaf Ebrahimi sxtl2(vform, temp1, src1);
3688*f5c631daSSadaf Ebrahimi sxtl2(vform, temp2, src2);
3689*f5c631daSSadaf Ebrahimi saba(vform, dst, temp1, temp2);
3690*f5c631daSSadaf Ebrahimi return dst;
3691*f5c631daSSadaf Ebrahimi }
3692*f5c631daSSadaf Ebrahimi
3693*f5c631daSSadaf Ebrahimi
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3694*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uabdl(VectorFormat vform,
3695*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3696*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3697*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3698*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3699*f5c631daSSadaf Ebrahimi uxtl(vform, temp1, src1);
3700*f5c631daSSadaf Ebrahimi uxtl(vform, temp2, src2);
3701*f5c631daSSadaf Ebrahimi absdiff(vform, dst, temp1, temp2, false);
3702*f5c631daSSadaf Ebrahimi return dst;
3703*f5c631daSSadaf Ebrahimi }
3704*f5c631daSSadaf Ebrahimi
3705*f5c631daSSadaf Ebrahimi
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3706*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uabdl2(VectorFormat vform,
3707*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3708*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3709*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3710*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3711*f5c631daSSadaf Ebrahimi uxtl2(vform, temp1, src1);
3712*f5c631daSSadaf Ebrahimi uxtl2(vform, temp2, src2);
3713*f5c631daSSadaf Ebrahimi absdiff(vform, dst, temp1, temp2, false);
3714*f5c631daSSadaf Ebrahimi return dst;
3715*f5c631daSSadaf Ebrahimi }
3716*f5c631daSSadaf Ebrahimi
3717*f5c631daSSadaf Ebrahimi
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3718*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sabdl(VectorFormat vform,
3719*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3720*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3721*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3722*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3723*f5c631daSSadaf Ebrahimi sxtl(vform, temp1, src1);
3724*f5c631daSSadaf Ebrahimi sxtl(vform, temp2, src2);
3725*f5c631daSSadaf Ebrahimi absdiff(vform, dst, temp1, temp2, true);
3726*f5c631daSSadaf Ebrahimi return dst;
3727*f5c631daSSadaf Ebrahimi }
3728*f5c631daSSadaf Ebrahimi
3729*f5c631daSSadaf Ebrahimi
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3730*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sabdl2(VectorFormat vform,
3731*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3732*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3733*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3734*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3735*f5c631daSSadaf Ebrahimi sxtl2(vform, temp1, src1);
3736*f5c631daSSadaf Ebrahimi sxtl2(vform, temp2, src2);
3737*f5c631daSSadaf Ebrahimi absdiff(vform, dst, temp1, temp2, true);
3738*f5c631daSSadaf Ebrahimi return dst;
3739*f5c631daSSadaf Ebrahimi }
3740*f5c631daSSadaf Ebrahimi
3741*f5c631daSSadaf Ebrahimi
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3742*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::umull(VectorFormat vform,
3743*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3744*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3745*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
3746*f5c631daSSadaf Ebrahimi bool is_2) {
3747*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3748*f5c631daSSadaf Ebrahimi uxtl(vform, temp1, src1, is_2);
3749*f5c631daSSadaf Ebrahimi uxtl(vform, temp2, src2, is_2);
3750*f5c631daSSadaf Ebrahimi mul(vform, dst, temp1, temp2);
3751*f5c631daSSadaf Ebrahimi return dst;
3752*f5c631daSSadaf Ebrahimi }
3753*f5c631daSSadaf Ebrahimi
3754*f5c631daSSadaf Ebrahimi
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3755*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::umull2(VectorFormat vform,
3756*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3757*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3758*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3759*f5c631daSSadaf Ebrahimi return umull(vform, dst, src1, src2, /* is_2 = */ true);
3760*f5c631daSSadaf Ebrahimi }
3761*f5c631daSSadaf Ebrahimi
3762*f5c631daSSadaf Ebrahimi
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3763*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::smull(VectorFormat vform,
3764*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3765*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3766*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
3767*f5c631daSSadaf Ebrahimi bool is_2) {
3768*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3769*f5c631daSSadaf Ebrahimi sxtl(vform, temp1, src1, is_2);
3770*f5c631daSSadaf Ebrahimi sxtl(vform, temp2, src2, is_2);
3771*f5c631daSSadaf Ebrahimi mul(vform, dst, temp1, temp2);
3772*f5c631daSSadaf Ebrahimi return dst;
3773*f5c631daSSadaf Ebrahimi }
3774*f5c631daSSadaf Ebrahimi
3775*f5c631daSSadaf Ebrahimi
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3776*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::smull2(VectorFormat vform,
3777*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3778*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3779*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3780*f5c631daSSadaf Ebrahimi return smull(vform, dst, src1, src2, /* is_2 = */ true);
3781*f5c631daSSadaf Ebrahimi }
3782*f5c631daSSadaf Ebrahimi
3783*f5c631daSSadaf Ebrahimi
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3784*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::umlsl(VectorFormat vform,
3785*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3786*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3787*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
3788*f5c631daSSadaf Ebrahimi bool is_2) {
3789*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3790*f5c631daSSadaf Ebrahimi uxtl(vform, temp1, src1, is_2);
3791*f5c631daSSadaf Ebrahimi uxtl(vform, temp2, src2, is_2);
3792*f5c631daSSadaf Ebrahimi mls(vform, dst, dst, temp1, temp2);
3793*f5c631daSSadaf Ebrahimi return dst;
3794*f5c631daSSadaf Ebrahimi }
3795*f5c631daSSadaf Ebrahimi
3796*f5c631daSSadaf Ebrahimi
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3797*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::umlsl2(VectorFormat vform,
3798*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3799*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3800*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3801*f5c631daSSadaf Ebrahimi return umlsl(vform, dst, src1, src2, /* is_2 = */ true);
3802*f5c631daSSadaf Ebrahimi }
3803*f5c631daSSadaf Ebrahimi
3804*f5c631daSSadaf Ebrahimi
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3805*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::smlsl(VectorFormat vform,
3806*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3807*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3808*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
3809*f5c631daSSadaf Ebrahimi bool is_2) {
3810*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3811*f5c631daSSadaf Ebrahimi sxtl(vform, temp1, src1, is_2);
3812*f5c631daSSadaf Ebrahimi sxtl(vform, temp2, src2, is_2);
3813*f5c631daSSadaf Ebrahimi mls(vform, dst, dst, temp1, temp2);
3814*f5c631daSSadaf Ebrahimi return dst;
3815*f5c631daSSadaf Ebrahimi }
3816*f5c631daSSadaf Ebrahimi
3817*f5c631daSSadaf Ebrahimi
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3818*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::smlsl2(VectorFormat vform,
3819*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3820*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3821*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3822*f5c631daSSadaf Ebrahimi return smlsl(vform, dst, src1, src2, /* is_2 = */ true);
3823*f5c631daSSadaf Ebrahimi }
3824*f5c631daSSadaf Ebrahimi
3825*f5c631daSSadaf Ebrahimi
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3826*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::umlal(VectorFormat vform,
3827*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3828*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3829*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
3830*f5c631daSSadaf Ebrahimi bool is_2) {
3831*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3832*f5c631daSSadaf Ebrahimi uxtl(vform, temp1, src1, is_2);
3833*f5c631daSSadaf Ebrahimi uxtl(vform, temp2, src2, is_2);
3834*f5c631daSSadaf Ebrahimi mla(vform, dst, dst, temp1, temp2);
3835*f5c631daSSadaf Ebrahimi return dst;
3836*f5c631daSSadaf Ebrahimi }
3837*f5c631daSSadaf Ebrahimi
3838*f5c631daSSadaf Ebrahimi
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3839*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::umlal2(VectorFormat vform,
3840*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3841*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3842*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3843*f5c631daSSadaf Ebrahimi return umlal(vform, dst, src1, src2, /* is_2 = */ true);
3844*f5c631daSSadaf Ebrahimi }
3845*f5c631daSSadaf Ebrahimi
3846*f5c631daSSadaf Ebrahimi
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3847*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::smlal(VectorFormat vform,
3848*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3849*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3850*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
3851*f5c631daSSadaf Ebrahimi bool is_2) {
3852*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
3853*f5c631daSSadaf Ebrahimi sxtl(vform, temp1, src1, is_2);
3854*f5c631daSSadaf Ebrahimi sxtl(vform, temp2, src2, is_2);
3855*f5c631daSSadaf Ebrahimi mla(vform, dst, dst, temp1, temp2);
3856*f5c631daSSadaf Ebrahimi return dst;
3857*f5c631daSSadaf Ebrahimi }
3858*f5c631daSSadaf Ebrahimi
3859*f5c631daSSadaf Ebrahimi
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3860*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::smlal2(VectorFormat vform,
3861*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3862*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3863*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3864*f5c631daSSadaf Ebrahimi return smlal(vform, dst, src1, src2, /* is_2 = */ true);
3865*f5c631daSSadaf Ebrahimi }
3866*f5c631daSSadaf Ebrahimi
3867*f5c631daSSadaf Ebrahimi
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3868*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3869*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3870*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3871*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
3872*f5c631daSSadaf Ebrahimi bool is_2) {
3873*f5c631daSSadaf Ebrahimi SimVRegister temp;
3874*f5c631daSSadaf Ebrahimi LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3875*f5c631daSSadaf Ebrahimi return add(vform, dst, dst, product).SignedSaturate(vform);
3876*f5c631daSSadaf Ebrahimi }
3877*f5c631daSSadaf Ebrahimi
3878*f5c631daSSadaf Ebrahimi
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3879*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3880*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3881*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3882*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3883*f5c631daSSadaf Ebrahimi return sqdmlal(vform, dst, src1, src2, /* is_2 = */ true);
3884*f5c631daSSadaf Ebrahimi }
3885*f5c631daSSadaf Ebrahimi
3886*f5c631daSSadaf Ebrahimi
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3887*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3888*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3889*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3890*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
3891*f5c631daSSadaf Ebrahimi bool is_2) {
3892*f5c631daSSadaf Ebrahimi SimVRegister temp;
3893*f5c631daSSadaf Ebrahimi LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3894*f5c631daSSadaf Ebrahimi return sub(vform, dst, dst, product).SignedSaturate(vform);
3895*f5c631daSSadaf Ebrahimi }
3896*f5c631daSSadaf Ebrahimi
3897*f5c631daSSadaf Ebrahimi
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3898*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3899*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3900*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3901*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3902*f5c631daSSadaf Ebrahimi return sqdmlsl(vform, dst, src1, src2, /* is_2 = */ true);
3903*f5c631daSSadaf Ebrahimi }
3904*f5c631daSSadaf Ebrahimi
3905*f5c631daSSadaf Ebrahimi
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3906*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqdmull(VectorFormat vform,
3907*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3908*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3909*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
3910*f5c631daSSadaf Ebrahimi bool is_2) {
3911*f5c631daSSadaf Ebrahimi SimVRegister temp;
3912*f5c631daSSadaf Ebrahimi LogicVRegister product = smull(vform, temp, src1, src2, is_2);
3913*f5c631daSSadaf Ebrahimi return add(vform, dst, product, product).SignedSaturate(vform);
3914*f5c631daSSadaf Ebrahimi }
3915*f5c631daSSadaf Ebrahimi
3916*f5c631daSSadaf Ebrahimi
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3917*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3918*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3919*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3920*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
3921*f5c631daSSadaf Ebrahimi return sqdmull(vform, dst, src1, src2, /* is_2 = */ true);
3922*f5c631daSSadaf Ebrahimi }
3923*f5c631daSSadaf Ebrahimi
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3924*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3925*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3926*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3927*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
3928*f5c631daSSadaf Ebrahimi bool round) {
3929*f5c631daSSadaf Ebrahimi int esize = LaneSizeInBitsFromFormat(vform);
3930*f5c631daSSadaf Ebrahimi
3931*f5c631daSSadaf Ebrahimi SimVRegister temp_lo, temp_hi;
3932*f5c631daSSadaf Ebrahimi
3933*f5c631daSSadaf Ebrahimi // Compute low and high multiplication results.
3934*f5c631daSSadaf Ebrahimi mul(vform, temp_lo, src1, src2);
3935*f5c631daSSadaf Ebrahimi smulh(vform, temp_hi, src1, src2);
3936*f5c631daSSadaf Ebrahimi
3937*f5c631daSSadaf Ebrahimi // Double by shifting high half, and adding in most-significant bit of low
3938*f5c631daSSadaf Ebrahimi // half.
3939*f5c631daSSadaf Ebrahimi shl(vform, temp_hi, temp_hi, 1);
3940*f5c631daSSadaf Ebrahimi usra(vform, temp_hi, temp_lo, esize - 1);
3941*f5c631daSSadaf Ebrahimi
3942*f5c631daSSadaf Ebrahimi if (round) {
3943*f5c631daSSadaf Ebrahimi // Add the second (due to doubling) most-significant bit of the low half
3944*f5c631daSSadaf Ebrahimi // into the result.
3945*f5c631daSSadaf Ebrahimi shl(vform, temp_lo, temp_lo, 1);
3946*f5c631daSSadaf Ebrahimi usra(vform, temp_hi, temp_lo, esize - 1);
3947*f5c631daSSadaf Ebrahimi }
3948*f5c631daSSadaf Ebrahimi
3949*f5c631daSSadaf Ebrahimi SimPRegister not_sat;
3950*f5c631daSSadaf Ebrahimi LogicPRegister ptemp(not_sat);
3951*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
3952*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3953*f5c631daSSadaf Ebrahimi // Saturation only occurs when src1 = src2 = minimum representable value.
3954*f5c631daSSadaf Ebrahimi // Check this as a special case.
3955*f5c631daSSadaf Ebrahimi ptemp.SetActive(vform, i, true);
3956*f5c631daSSadaf Ebrahimi if ((src1.Int(vform, i) == MinIntFromFormat(vform)) &&
3957*f5c631daSSadaf Ebrahimi (src2.Int(vform, i) == MinIntFromFormat(vform))) {
3958*f5c631daSSadaf Ebrahimi ptemp.SetActive(vform, i, false);
3959*f5c631daSSadaf Ebrahimi }
3960*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, MaxIntFromFormat(vform));
3961*f5c631daSSadaf Ebrahimi }
3962*f5c631daSSadaf Ebrahimi
3963*f5c631daSSadaf Ebrahimi mov_merging(vform, dst, not_sat, temp_hi);
3964*f5c631daSSadaf Ebrahimi return dst;
3965*f5c631daSSadaf Ebrahimi }
3966*f5c631daSSadaf Ebrahimi
3967*f5c631daSSadaf Ebrahimi
dot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_src1_signed,bool is_src2_signed)3968*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::dot(VectorFormat vform,
3969*f5c631daSSadaf Ebrahimi LogicVRegister dst,
3970*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
3971*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
3972*f5c631daSSadaf Ebrahimi bool is_src1_signed,
3973*f5c631daSSadaf Ebrahimi bool is_src2_signed) {
3974*f5c631daSSadaf Ebrahimi VectorFormat quarter_vform =
3975*f5c631daSSadaf Ebrahimi VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
3976*f5c631daSSadaf Ebrahimi
3977*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
3978*f5c631daSSadaf Ebrahimi for (int e = 0; e < LaneCountFromFormat(vform); e++) {
3979*f5c631daSSadaf Ebrahimi uint64_t result = 0;
3980*f5c631daSSadaf Ebrahimi int64_t element1, element2;
3981*f5c631daSSadaf Ebrahimi for (int i = 0; i < 4; i++) {
3982*f5c631daSSadaf Ebrahimi int index = 4 * e + i;
3983*f5c631daSSadaf Ebrahimi if (is_src1_signed) {
3984*f5c631daSSadaf Ebrahimi element1 = src1.Int(quarter_vform, index);
3985*f5c631daSSadaf Ebrahimi } else {
3986*f5c631daSSadaf Ebrahimi element1 = src1.Uint(quarter_vform, index);
3987*f5c631daSSadaf Ebrahimi }
3988*f5c631daSSadaf Ebrahimi if (is_src2_signed) {
3989*f5c631daSSadaf Ebrahimi element2 = src2.Int(quarter_vform, index);
3990*f5c631daSSadaf Ebrahimi } else {
3991*f5c631daSSadaf Ebrahimi element2 = src2.Uint(quarter_vform, index);
3992*f5c631daSSadaf Ebrahimi }
3993*f5c631daSSadaf Ebrahimi result += element1 * element2;
3994*f5c631daSSadaf Ebrahimi }
3995*f5c631daSSadaf Ebrahimi dst.SetUint(vform, e, result + dst.Uint(vform, e));
3996*f5c631daSSadaf Ebrahimi }
3997*f5c631daSSadaf Ebrahimi return dst;
3998*f5c631daSSadaf Ebrahimi }
3999*f5c631daSSadaf Ebrahimi
4000*f5c631daSSadaf Ebrahimi
sdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4001*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sdot(VectorFormat vform,
4002*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4003*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4004*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4005*f5c631daSSadaf Ebrahimi return dot(vform, dst, src1, src2, true, true);
4006*f5c631daSSadaf Ebrahimi }
4007*f5c631daSSadaf Ebrahimi
4008*f5c631daSSadaf Ebrahimi
udot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4009*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::udot(VectorFormat vform,
4010*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4011*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4012*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4013*f5c631daSSadaf Ebrahimi return dot(vform, dst, src1, src2, false, false);
4014*f5c631daSSadaf Ebrahimi }
4015*f5c631daSSadaf Ebrahimi
usdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4016*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::usdot(VectorFormat vform,
4017*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4018*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4019*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4020*f5c631daSSadaf Ebrahimi return dot(vform, dst, src1, src2, false, true);
4021*f5c631daSSadaf Ebrahimi }
4022*f5c631daSSadaf Ebrahimi
cdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & acc,const LogicVRegister & src1,const LogicVRegister & src2,int rot)4023*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::cdot(VectorFormat vform,
4024*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4025*f5c631daSSadaf Ebrahimi const LogicVRegister& acc,
4026*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4027*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
4028*f5c631daSSadaf Ebrahimi int rot) {
4029*f5c631daSSadaf Ebrahimi VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
4030*f5c631daSSadaf Ebrahimi VectorFormat quarter_vform =
4031*f5c631daSSadaf Ebrahimi VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
4032*f5c631daSSadaf Ebrahimi
4033*f5c631daSSadaf Ebrahimi int sel_a = ((rot == 0) || (rot == 180)) ? 0 : 1;
4034*f5c631daSSadaf Ebrahimi int sel_b = 1 - sel_a;
4035*f5c631daSSadaf Ebrahimi int sub_i = ((rot == 90) || (rot == 180)) ? 1 : -1;
4036*f5c631daSSadaf Ebrahimi
4037*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4038*f5c631daSSadaf Ebrahimi int64_t result = acc.Int(vform, i);
4039*f5c631daSSadaf Ebrahimi for (int j = 0; j < 2; j++) {
4040*f5c631daSSadaf Ebrahimi int64_t r1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 0);
4041*f5c631daSSadaf Ebrahimi int64_t i1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 1);
4042*f5c631daSSadaf Ebrahimi int64_t r2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_a);
4043*f5c631daSSadaf Ebrahimi int64_t i2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_b);
4044*f5c631daSSadaf Ebrahimi result += (r1 * r2) + (sub_i * i1 * i2);
4045*f5c631daSSadaf Ebrahimi }
4046*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, result);
4047*f5c631daSSadaf Ebrahimi }
4048*f5c631daSSadaf Ebrahimi return dst;
4049*f5c631daSSadaf Ebrahimi }
4050*f5c631daSSadaf Ebrahimi
sqrdcmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int rot)4051*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4052*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4053*f5c631daSSadaf Ebrahimi const LogicVRegister& srca,
4054*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4055*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
4056*f5c631daSSadaf Ebrahimi int rot) {
4057*f5c631daSSadaf Ebrahimi SimVRegister src1_a, src1_b;
4058*f5c631daSSadaf Ebrahimi SimVRegister src2_a, src2_b;
4059*f5c631daSSadaf Ebrahimi SimVRegister srca_i, srca_r;
4060*f5c631daSSadaf Ebrahimi SimVRegister zero, temp;
4061*f5c631daSSadaf Ebrahimi zero.Clear();
4062*f5c631daSSadaf Ebrahimi
4063*f5c631daSSadaf Ebrahimi if ((rot == 0) || (rot == 180)) {
4064*f5c631daSSadaf Ebrahimi uzp1(vform, src1_a, src1, zero);
4065*f5c631daSSadaf Ebrahimi uzp1(vform, src2_a, src2, zero);
4066*f5c631daSSadaf Ebrahimi uzp2(vform, src2_b, src2, zero);
4067*f5c631daSSadaf Ebrahimi } else {
4068*f5c631daSSadaf Ebrahimi uzp2(vform, src1_a, src1, zero);
4069*f5c631daSSadaf Ebrahimi uzp2(vform, src2_a, src2, zero);
4070*f5c631daSSadaf Ebrahimi uzp1(vform, src2_b, src2, zero);
4071*f5c631daSSadaf Ebrahimi }
4072*f5c631daSSadaf Ebrahimi
4073*f5c631daSSadaf Ebrahimi uzp1(vform, srca_r, srca, zero);
4074*f5c631daSSadaf Ebrahimi uzp2(vform, srca_i, srca, zero);
4075*f5c631daSSadaf Ebrahimi
4076*f5c631daSSadaf Ebrahimi bool sub_r = (rot == 90) || (rot == 180);
4077*f5c631daSSadaf Ebrahimi bool sub_i = (rot == 180) || (rot == 270);
4078*f5c631daSSadaf Ebrahimi
4079*f5c631daSSadaf Ebrahimi const bool round = true;
4080*f5c631daSSadaf Ebrahimi sqrdmlash(vform, srca_r, src1_a, src2_a, round, sub_r);
4081*f5c631daSSadaf Ebrahimi sqrdmlash(vform, srca_i, src1_a, src2_b, round, sub_i);
4082*f5c631daSSadaf Ebrahimi zip1(vform, dst, srca_r, srca_i);
4083*f5c631daSSadaf Ebrahimi return dst;
4084*f5c631daSSadaf Ebrahimi }
4085*f5c631daSSadaf Ebrahimi
sqrdcmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)4086*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4087*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4088*f5c631daSSadaf Ebrahimi const LogicVRegister& srca,
4089*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4090*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
4091*f5c631daSSadaf Ebrahimi int index,
4092*f5c631daSSadaf Ebrahimi int rot) {
4093*f5c631daSSadaf Ebrahimi SimVRegister temp;
4094*f5c631daSSadaf Ebrahimi dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
4095*f5c631daSSadaf Ebrahimi return sqrdcmlah(vform, dst, srca, src1, temp, rot);
4096*f5c631daSSadaf Ebrahimi }
4097*f5c631daSSadaf Ebrahimi
sqrdmlash_d(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)4098*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqrdmlash_d(VectorFormat vform,
4099*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4100*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4101*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
4102*f5c631daSSadaf Ebrahimi bool round,
4103*f5c631daSSadaf Ebrahimi bool sub_op) {
4104*f5c631daSSadaf Ebrahimi // 2 * INT_64_MIN * INT_64_MIN causes INT_128 to overflow.
4105*f5c631daSSadaf Ebrahimi // To avoid this, we use:
4106*f5c631daSSadaf Ebrahimi // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4107*f5c631daSSadaf Ebrahimi // which is same as:
4108*f5c631daSSadaf Ebrahimi // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4109*f5c631daSSadaf Ebrahimi
4110*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4111*f5c631daSSadaf Ebrahimi int esize = kDRegSize;
4112*f5c631daSSadaf Ebrahimi vixl_uint128_t round_const, accum;
4113*f5c631daSSadaf Ebrahimi round_const.first = 0;
4114*f5c631daSSadaf Ebrahimi if (round) {
4115*f5c631daSSadaf Ebrahimi round_const.second = UINT64_C(1) << (esize - 2);
4116*f5c631daSSadaf Ebrahimi } else {
4117*f5c631daSSadaf Ebrahimi round_const.second = 0;
4118*f5c631daSSadaf Ebrahimi }
4119*f5c631daSSadaf Ebrahimi
4120*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
4121*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4122*f5c631daSSadaf Ebrahimi // Shift the whole value left by `esize - 1` bits.
4123*f5c631daSSadaf Ebrahimi accum.first = dst.Int(vform, i) >> 1;
4124*f5c631daSSadaf Ebrahimi accum.second = dst.Int(vform, i) << (esize - 1);
4125*f5c631daSSadaf Ebrahimi
4126*f5c631daSSadaf Ebrahimi vixl_uint128_t product = Mul64(src1.Int(vform, i), src2.Int(vform, i));
4127*f5c631daSSadaf Ebrahimi
4128*f5c631daSSadaf Ebrahimi if (sub_op) {
4129*f5c631daSSadaf Ebrahimi product = Neg128(product);
4130*f5c631daSSadaf Ebrahimi }
4131*f5c631daSSadaf Ebrahimi accum = Add128(accum, product);
4132*f5c631daSSadaf Ebrahimi
4133*f5c631daSSadaf Ebrahimi // Perform rounding.
4134*f5c631daSSadaf Ebrahimi accum = Add128(accum, round_const);
4135*f5c631daSSadaf Ebrahimi
4136*f5c631daSSadaf Ebrahimi // Arithmetic shift the whole value right by `esize - 1` bits.
4137*f5c631daSSadaf Ebrahimi accum.second = (accum.first << 1) | (accum.second >> (esize - 1));
4138*f5c631daSSadaf Ebrahimi accum.first = -(accum.first >> (esize - 1));
4139*f5c631daSSadaf Ebrahimi
4140*f5c631daSSadaf Ebrahimi // Perform saturation.
4141*f5c631daSSadaf Ebrahimi bool is_pos = (accum.first == 0) ? true : false;
4142*f5c631daSSadaf Ebrahimi if (is_pos &&
4143*f5c631daSSadaf Ebrahimi (accum.second > static_cast<uint64_t>(MaxIntFromFormat(vform)))) {
4144*f5c631daSSadaf Ebrahimi accum.second = MaxIntFromFormat(vform);
4145*f5c631daSSadaf Ebrahimi } else if (!is_pos && (accum.second <
4146*f5c631daSSadaf Ebrahimi static_cast<uint64_t>(MinIntFromFormat(vform)))) {
4147*f5c631daSSadaf Ebrahimi accum.second = MinIntFromFormat(vform);
4148*f5c631daSSadaf Ebrahimi }
4149*f5c631daSSadaf Ebrahimi
4150*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, accum.second);
4151*f5c631daSSadaf Ebrahimi }
4152*f5c631daSSadaf Ebrahimi
4153*f5c631daSSadaf Ebrahimi return dst;
4154*f5c631daSSadaf Ebrahimi }
4155*f5c631daSSadaf Ebrahimi
sqrdmlash(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)4156*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
4157*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4158*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4159*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
4160*f5c631daSSadaf Ebrahimi bool round,
4161*f5c631daSSadaf Ebrahimi bool sub_op) {
4162*f5c631daSSadaf Ebrahimi // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
4163*f5c631daSSadaf Ebrahimi // To avoid this, we use:
4164*f5c631daSSadaf Ebrahimi // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4165*f5c631daSSadaf Ebrahimi // which is same as:
4166*f5c631daSSadaf Ebrahimi // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4167*f5c631daSSadaf Ebrahimi
4168*f5c631daSSadaf Ebrahimi if (vform == kFormatVnD) {
4169*f5c631daSSadaf Ebrahimi return sqrdmlash_d(vform, dst, src1, src2, round, sub_op);
4170*f5c631daSSadaf Ebrahimi }
4171*f5c631daSSadaf Ebrahimi
4172*f5c631daSSadaf Ebrahimi int esize = LaneSizeInBitsFromFormat(vform);
4173*f5c631daSSadaf Ebrahimi int round_const = round ? (1 << (esize - 2)) : 0;
4174*f5c631daSSadaf Ebrahimi int64_t accum;
4175*f5c631daSSadaf Ebrahimi
4176*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
4177*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4178*f5c631daSSadaf Ebrahimi accum = dst.Int(vform, i) << (esize - 1);
4179*f5c631daSSadaf Ebrahimi if (sub_op) {
4180*f5c631daSSadaf Ebrahimi accum -= src1.Int(vform, i) * src2.Int(vform, i);
4181*f5c631daSSadaf Ebrahimi } else {
4182*f5c631daSSadaf Ebrahimi accum += src1.Int(vform, i) * src2.Int(vform, i);
4183*f5c631daSSadaf Ebrahimi }
4184*f5c631daSSadaf Ebrahimi accum += round_const;
4185*f5c631daSSadaf Ebrahimi accum = accum >> (esize - 1);
4186*f5c631daSSadaf Ebrahimi
4187*f5c631daSSadaf Ebrahimi if (accum > MaxIntFromFormat(vform)) {
4188*f5c631daSSadaf Ebrahimi accum = MaxIntFromFormat(vform);
4189*f5c631daSSadaf Ebrahimi } else if (accum < MinIntFromFormat(vform)) {
4190*f5c631daSSadaf Ebrahimi accum = MinIntFromFormat(vform);
4191*f5c631daSSadaf Ebrahimi }
4192*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, accum);
4193*f5c631daSSadaf Ebrahimi }
4194*f5c631daSSadaf Ebrahimi return dst;
4195*f5c631daSSadaf Ebrahimi }
4196*f5c631daSSadaf Ebrahimi
4197*f5c631daSSadaf Ebrahimi
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4198*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
4199*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4200*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4201*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
4202*f5c631daSSadaf Ebrahimi bool round) {
4203*f5c631daSSadaf Ebrahimi return sqrdmlash(vform, dst, src1, src2, round, false);
4204*f5c631daSSadaf Ebrahimi }
4205*f5c631daSSadaf Ebrahimi
4206*f5c631daSSadaf Ebrahimi
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4207*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
4208*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4209*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4210*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
4211*f5c631daSSadaf Ebrahimi bool round) {
4212*f5c631daSSadaf Ebrahimi return sqrdmlash(vform, dst, src1, src2, round, true);
4213*f5c631daSSadaf Ebrahimi }
4214*f5c631daSSadaf Ebrahimi
4215*f5c631daSSadaf Ebrahimi
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4216*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::sqdmulh(VectorFormat vform,
4217*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4218*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4219*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4220*f5c631daSSadaf Ebrahimi return sqrdmulh(vform, dst, src1, src2, false);
4221*f5c631daSSadaf Ebrahimi }
4222*f5c631daSSadaf Ebrahimi
4223*f5c631daSSadaf Ebrahimi
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4224*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::addhn(VectorFormat vform,
4225*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4226*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4227*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4228*f5c631daSSadaf Ebrahimi SimVRegister temp;
4229*f5c631daSSadaf Ebrahimi add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4230*f5c631daSSadaf Ebrahimi shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4231*f5c631daSSadaf Ebrahimi return dst;
4232*f5c631daSSadaf Ebrahimi }
4233*f5c631daSSadaf Ebrahimi
4234*f5c631daSSadaf Ebrahimi
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4235*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::addhn2(VectorFormat vform,
4236*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4237*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4238*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4239*f5c631daSSadaf Ebrahimi SimVRegister temp;
4240*f5c631daSSadaf Ebrahimi add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4241*f5c631daSSadaf Ebrahimi shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4242*f5c631daSSadaf Ebrahimi return dst;
4243*f5c631daSSadaf Ebrahimi }
4244*f5c631daSSadaf Ebrahimi
4245*f5c631daSSadaf Ebrahimi
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4246*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::raddhn(VectorFormat vform,
4247*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4248*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4249*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4250*f5c631daSSadaf Ebrahimi SimVRegister temp;
4251*f5c631daSSadaf Ebrahimi add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4252*f5c631daSSadaf Ebrahimi rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4253*f5c631daSSadaf Ebrahimi return dst;
4254*f5c631daSSadaf Ebrahimi }
4255*f5c631daSSadaf Ebrahimi
4256*f5c631daSSadaf Ebrahimi
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4257*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::raddhn2(VectorFormat vform,
4258*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4259*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4260*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4261*f5c631daSSadaf Ebrahimi SimVRegister temp;
4262*f5c631daSSadaf Ebrahimi add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4263*f5c631daSSadaf Ebrahimi rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4264*f5c631daSSadaf Ebrahimi return dst;
4265*f5c631daSSadaf Ebrahimi }
4266*f5c631daSSadaf Ebrahimi
4267*f5c631daSSadaf Ebrahimi
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4268*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::subhn(VectorFormat vform,
4269*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4270*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4271*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4272*f5c631daSSadaf Ebrahimi SimVRegister temp;
4273*f5c631daSSadaf Ebrahimi sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4274*f5c631daSSadaf Ebrahimi shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4275*f5c631daSSadaf Ebrahimi return dst;
4276*f5c631daSSadaf Ebrahimi }
4277*f5c631daSSadaf Ebrahimi
4278*f5c631daSSadaf Ebrahimi
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4279*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::subhn2(VectorFormat vform,
4280*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4281*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4282*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4283*f5c631daSSadaf Ebrahimi SimVRegister temp;
4284*f5c631daSSadaf Ebrahimi sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4285*f5c631daSSadaf Ebrahimi shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4286*f5c631daSSadaf Ebrahimi return dst;
4287*f5c631daSSadaf Ebrahimi }
4288*f5c631daSSadaf Ebrahimi
4289*f5c631daSSadaf Ebrahimi
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4290*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::rsubhn(VectorFormat vform,
4291*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4292*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4293*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4294*f5c631daSSadaf Ebrahimi SimVRegister temp;
4295*f5c631daSSadaf Ebrahimi sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4296*f5c631daSSadaf Ebrahimi rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4297*f5c631daSSadaf Ebrahimi return dst;
4298*f5c631daSSadaf Ebrahimi }
4299*f5c631daSSadaf Ebrahimi
4300*f5c631daSSadaf Ebrahimi
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4301*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::rsubhn2(VectorFormat vform,
4302*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4303*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4304*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4305*f5c631daSSadaf Ebrahimi SimVRegister temp;
4306*f5c631daSSadaf Ebrahimi sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4307*f5c631daSSadaf Ebrahimi rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4308*f5c631daSSadaf Ebrahimi return dst;
4309*f5c631daSSadaf Ebrahimi }
4310*f5c631daSSadaf Ebrahimi
4311*f5c631daSSadaf Ebrahimi
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4312*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::trn1(VectorFormat vform,
4313*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4314*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4315*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4316*f5c631daSSadaf Ebrahimi uint64_t result[kZRegMaxSizeInBytes] = {};
4317*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
4318*f5c631daSSadaf Ebrahimi int pairs = lane_count / 2;
4319*f5c631daSSadaf Ebrahimi for (int i = 0; i < pairs; ++i) {
4320*f5c631daSSadaf Ebrahimi result[2 * i] = src1.Uint(vform, 2 * i);
4321*f5c631daSSadaf Ebrahimi result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
4322*f5c631daSSadaf Ebrahimi }
4323*f5c631daSSadaf Ebrahimi
4324*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
4325*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
4326*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result[i]);
4327*f5c631daSSadaf Ebrahimi }
4328*f5c631daSSadaf Ebrahimi return dst;
4329*f5c631daSSadaf Ebrahimi }
4330*f5c631daSSadaf Ebrahimi
4331*f5c631daSSadaf Ebrahimi
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4332*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::trn2(VectorFormat vform,
4333*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4334*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4335*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4336*f5c631daSSadaf Ebrahimi uint64_t result[kZRegMaxSizeInBytes] = {};
4337*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
4338*f5c631daSSadaf Ebrahimi int pairs = lane_count / 2;
4339*f5c631daSSadaf Ebrahimi for (int i = 0; i < pairs; ++i) {
4340*f5c631daSSadaf Ebrahimi result[2 * i] = src1.Uint(vform, (2 * i) + 1);
4341*f5c631daSSadaf Ebrahimi result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
4342*f5c631daSSadaf Ebrahimi }
4343*f5c631daSSadaf Ebrahimi
4344*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
4345*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
4346*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result[i]);
4347*f5c631daSSadaf Ebrahimi }
4348*f5c631daSSadaf Ebrahimi return dst;
4349*f5c631daSSadaf Ebrahimi }
4350*f5c631daSSadaf Ebrahimi
4351*f5c631daSSadaf Ebrahimi
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4352*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::zip1(VectorFormat vform,
4353*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4354*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4355*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4356*f5c631daSSadaf Ebrahimi uint64_t result[kZRegMaxSizeInBytes] = {};
4357*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
4358*f5c631daSSadaf Ebrahimi int pairs = lane_count / 2;
4359*f5c631daSSadaf Ebrahimi for (int i = 0; i < pairs; ++i) {
4360*f5c631daSSadaf Ebrahimi result[2 * i] = src1.Uint(vform, i);
4361*f5c631daSSadaf Ebrahimi result[(2 * i) + 1] = src2.Uint(vform, i);
4362*f5c631daSSadaf Ebrahimi }
4363*f5c631daSSadaf Ebrahimi
4364*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
4365*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
4366*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result[i]);
4367*f5c631daSSadaf Ebrahimi }
4368*f5c631daSSadaf Ebrahimi return dst;
4369*f5c631daSSadaf Ebrahimi }
4370*f5c631daSSadaf Ebrahimi
4371*f5c631daSSadaf Ebrahimi
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4372*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::zip2(VectorFormat vform,
4373*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4374*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4375*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4376*f5c631daSSadaf Ebrahimi uint64_t result[kZRegMaxSizeInBytes] = {};
4377*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
4378*f5c631daSSadaf Ebrahimi int pairs = lane_count / 2;
4379*f5c631daSSadaf Ebrahimi for (int i = 0; i < pairs; ++i) {
4380*f5c631daSSadaf Ebrahimi result[2 * i] = src1.Uint(vform, pairs + i);
4381*f5c631daSSadaf Ebrahimi result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
4382*f5c631daSSadaf Ebrahimi }
4383*f5c631daSSadaf Ebrahimi
4384*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
4385*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
4386*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result[i]);
4387*f5c631daSSadaf Ebrahimi }
4388*f5c631daSSadaf Ebrahimi return dst;
4389*f5c631daSSadaf Ebrahimi }
4390*f5c631daSSadaf Ebrahimi
4391*f5c631daSSadaf Ebrahimi
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4392*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uzp1(VectorFormat vform,
4393*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4394*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4395*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4396*f5c631daSSadaf Ebrahimi uint64_t result[kZRegMaxSizeInBytes * 2];
4397*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
4398*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
4399*f5c631daSSadaf Ebrahimi result[i] = src1.Uint(vform, i);
4400*f5c631daSSadaf Ebrahimi result[lane_count + i] = src2.Uint(vform, i);
4401*f5c631daSSadaf Ebrahimi }
4402*f5c631daSSadaf Ebrahimi
4403*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
4404*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
4405*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result[2 * i]);
4406*f5c631daSSadaf Ebrahimi }
4407*f5c631daSSadaf Ebrahimi return dst;
4408*f5c631daSSadaf Ebrahimi }
4409*f5c631daSSadaf Ebrahimi
4410*f5c631daSSadaf Ebrahimi
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4411*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::uzp2(VectorFormat vform,
4412*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4413*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
4414*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
4415*f5c631daSSadaf Ebrahimi uint64_t result[kZRegMaxSizeInBytes * 2];
4416*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
4417*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
4418*f5c631daSSadaf Ebrahimi result[i] = src1.Uint(vform, i);
4419*f5c631daSSadaf Ebrahimi result[lane_count + i] = src2.Uint(vform, i);
4420*f5c631daSSadaf Ebrahimi }
4421*f5c631daSSadaf Ebrahimi
4422*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
4423*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
4424*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result[(2 * i) + 1]);
4425*f5c631daSSadaf Ebrahimi }
4426*f5c631daSSadaf Ebrahimi return dst;
4427*f5c631daSSadaf Ebrahimi }
4428*f5c631daSSadaf Ebrahimi
interleave_top_bottom(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4429*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::interleave_top_bottom(VectorFormat vform,
4430*f5c631daSSadaf Ebrahimi LogicVRegister dst,
4431*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
4432*f5c631daSSadaf Ebrahimi // Interleave the top and bottom half of a vector, ie. for a vector:
4433*f5c631daSSadaf Ebrahimi //
4434*f5c631daSSadaf Ebrahimi // [ ... | F | D | B | ... | E | C | A ]
4435*f5c631daSSadaf Ebrahimi //
4436*f5c631daSSadaf Ebrahimi // where B is the first element in the top half of the vector, produce a
4437*f5c631daSSadaf Ebrahimi // result vector:
4438*f5c631daSSadaf Ebrahimi //
4439*f5c631daSSadaf Ebrahimi // [ ... | ... | F | E | D | C | B | A ]
4440*f5c631daSSadaf Ebrahimi
4441*f5c631daSSadaf Ebrahimi uint64_t result[kZRegMaxSizeInBytes] = {};
4442*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
4443*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i += 2) {
4444*f5c631daSSadaf Ebrahimi result[i] = src.Uint(vform, i / 2);
4445*f5c631daSSadaf Ebrahimi result[i + 1] = src.Uint(vform, (lane_count / 2) + (i / 2));
4446*f5c631daSSadaf Ebrahimi }
4447*f5c631daSSadaf Ebrahimi dst.SetUintArray(vform, result);
4448*f5c631daSSadaf Ebrahimi return dst;
4449*f5c631daSSadaf Ebrahimi }
4450*f5c631daSSadaf Ebrahimi
4451*f5c631daSSadaf Ebrahimi template <typename T>
FPNeg(T op)4452*f5c631daSSadaf Ebrahimi T Simulator::FPNeg(T op) {
4453*f5c631daSSadaf Ebrahimi return -op;
4454*f5c631daSSadaf Ebrahimi }
4455*f5c631daSSadaf Ebrahimi
4456*f5c631daSSadaf Ebrahimi template <typename T>
FPAdd(T op1,T op2)4457*f5c631daSSadaf Ebrahimi T Simulator::FPAdd(T op1, T op2) {
4458*f5c631daSSadaf Ebrahimi T result = FPProcessNaNs(op1, op2);
4459*f5c631daSSadaf Ebrahimi if (IsNaN(result)) {
4460*f5c631daSSadaf Ebrahimi return result;
4461*f5c631daSSadaf Ebrahimi }
4462*f5c631daSSadaf Ebrahimi
4463*f5c631daSSadaf Ebrahimi if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
4464*f5c631daSSadaf Ebrahimi // inf + -inf returns the default NaN.
4465*f5c631daSSadaf Ebrahimi FPProcessException();
4466*f5c631daSSadaf Ebrahimi return FPDefaultNaN<T>();
4467*f5c631daSSadaf Ebrahimi } else {
4468*f5c631daSSadaf Ebrahimi // Other cases should be handled by standard arithmetic.
4469*f5c631daSSadaf Ebrahimi return op1 + op2;
4470*f5c631daSSadaf Ebrahimi }
4471*f5c631daSSadaf Ebrahimi }
4472*f5c631daSSadaf Ebrahimi
4473*f5c631daSSadaf Ebrahimi
4474*f5c631daSSadaf Ebrahimi template <typename T>
FPSub(T op1,T op2)4475*f5c631daSSadaf Ebrahimi T Simulator::FPSub(T op1, T op2) {
4476*f5c631daSSadaf Ebrahimi // NaNs should be handled elsewhere.
4477*f5c631daSSadaf Ebrahimi VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4478*f5c631daSSadaf Ebrahimi
4479*f5c631daSSadaf Ebrahimi if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
4480*f5c631daSSadaf Ebrahimi // inf - inf returns the default NaN.
4481*f5c631daSSadaf Ebrahimi FPProcessException();
4482*f5c631daSSadaf Ebrahimi return FPDefaultNaN<T>();
4483*f5c631daSSadaf Ebrahimi } else {
4484*f5c631daSSadaf Ebrahimi // Other cases should be handled by standard arithmetic.
4485*f5c631daSSadaf Ebrahimi return op1 - op2;
4486*f5c631daSSadaf Ebrahimi }
4487*f5c631daSSadaf Ebrahimi }
4488*f5c631daSSadaf Ebrahimi
4489*f5c631daSSadaf Ebrahimi template <typename T>
FPMulNaNs(T op1,T op2)4490*f5c631daSSadaf Ebrahimi T Simulator::FPMulNaNs(T op1, T op2) {
4491*f5c631daSSadaf Ebrahimi T result = FPProcessNaNs(op1, op2);
4492*f5c631daSSadaf Ebrahimi return IsNaN(result) ? result : FPMul(op1, op2);
4493*f5c631daSSadaf Ebrahimi }
4494*f5c631daSSadaf Ebrahimi
4495*f5c631daSSadaf Ebrahimi template <typename T>
FPMul(T op1,T op2)4496*f5c631daSSadaf Ebrahimi T Simulator::FPMul(T op1, T op2) {
4497*f5c631daSSadaf Ebrahimi // NaNs should be handled elsewhere.
4498*f5c631daSSadaf Ebrahimi VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4499*f5c631daSSadaf Ebrahimi
4500*f5c631daSSadaf Ebrahimi if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4501*f5c631daSSadaf Ebrahimi // inf * 0.0 returns the default NaN.
4502*f5c631daSSadaf Ebrahimi FPProcessException();
4503*f5c631daSSadaf Ebrahimi return FPDefaultNaN<T>();
4504*f5c631daSSadaf Ebrahimi } else {
4505*f5c631daSSadaf Ebrahimi // Other cases should be handled by standard arithmetic.
4506*f5c631daSSadaf Ebrahimi return op1 * op2;
4507*f5c631daSSadaf Ebrahimi }
4508*f5c631daSSadaf Ebrahimi }
4509*f5c631daSSadaf Ebrahimi
4510*f5c631daSSadaf Ebrahimi
4511*f5c631daSSadaf Ebrahimi template <typename T>
FPMulx(T op1,T op2)4512*f5c631daSSadaf Ebrahimi T Simulator::FPMulx(T op1, T op2) {
4513*f5c631daSSadaf Ebrahimi if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4514*f5c631daSSadaf Ebrahimi // inf * 0.0 returns +/-2.0.
4515*f5c631daSSadaf Ebrahimi T two = 2.0;
4516*f5c631daSSadaf Ebrahimi return copysign(1.0, op1) * copysign(1.0, op2) * two;
4517*f5c631daSSadaf Ebrahimi }
4518*f5c631daSSadaf Ebrahimi return FPMul(op1, op2);
4519*f5c631daSSadaf Ebrahimi }
4520*f5c631daSSadaf Ebrahimi
4521*f5c631daSSadaf Ebrahimi
4522*f5c631daSSadaf Ebrahimi template <typename T>
FPMulAdd(T a,T op1,T op2)4523*f5c631daSSadaf Ebrahimi T Simulator::FPMulAdd(T a, T op1, T op2) {
4524*f5c631daSSadaf Ebrahimi T result = FPProcessNaNs3(a, op1, op2);
4525*f5c631daSSadaf Ebrahimi
4526*f5c631daSSadaf Ebrahimi T sign_a = copysign(1.0, a);
4527*f5c631daSSadaf Ebrahimi T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
4528*f5c631daSSadaf Ebrahimi bool isinf_prod = IsInf(op1) || IsInf(op2);
4529*f5c631daSSadaf Ebrahimi bool operation_generates_nan =
4530*f5c631daSSadaf Ebrahimi (IsInf(op1) && (op2 == 0.0)) || // inf * 0.0
4531*f5c631daSSadaf Ebrahimi (IsInf(op2) && (op1 == 0.0)) || // 0.0 * inf
4532*f5c631daSSadaf Ebrahimi (IsInf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
4533*f5c631daSSadaf Ebrahimi
4534*f5c631daSSadaf Ebrahimi if (IsNaN(result)) {
4535*f5c631daSSadaf Ebrahimi // Generated NaNs override quiet NaNs propagated from a.
4536*f5c631daSSadaf Ebrahimi if (operation_generates_nan && IsQuietNaN(a)) {
4537*f5c631daSSadaf Ebrahimi FPProcessException();
4538*f5c631daSSadaf Ebrahimi return FPDefaultNaN<T>();
4539*f5c631daSSadaf Ebrahimi } else {
4540*f5c631daSSadaf Ebrahimi return result;
4541*f5c631daSSadaf Ebrahimi }
4542*f5c631daSSadaf Ebrahimi }
4543*f5c631daSSadaf Ebrahimi
4544*f5c631daSSadaf Ebrahimi // If the operation would produce a NaN, return the default NaN.
4545*f5c631daSSadaf Ebrahimi if (operation_generates_nan) {
4546*f5c631daSSadaf Ebrahimi FPProcessException();
4547*f5c631daSSadaf Ebrahimi return FPDefaultNaN<T>();
4548*f5c631daSSadaf Ebrahimi }
4549*f5c631daSSadaf Ebrahimi
4550*f5c631daSSadaf Ebrahimi // Work around broken fma implementations for exact zero results: The sign of
4551*f5c631daSSadaf Ebrahimi // exact 0.0 results is positive unless both a and op1 * op2 are negative.
4552*f5c631daSSadaf Ebrahimi if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
4553*f5c631daSSadaf Ebrahimi return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;
4554*f5c631daSSadaf Ebrahimi }
4555*f5c631daSSadaf Ebrahimi
4556*f5c631daSSadaf Ebrahimi result = FusedMultiplyAdd(op1, op2, a);
4557*f5c631daSSadaf Ebrahimi VIXL_ASSERT(!IsNaN(result));
4558*f5c631daSSadaf Ebrahimi
4559*f5c631daSSadaf Ebrahimi // Work around broken fma implementations for rounded zero results: If a is
4560*f5c631daSSadaf Ebrahimi // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
4561*f5c631daSSadaf Ebrahimi if ((a == 0.0) && (result == 0.0)) {
4562*f5c631daSSadaf Ebrahimi return copysign(0.0, sign_prod);
4563*f5c631daSSadaf Ebrahimi }
4564*f5c631daSSadaf Ebrahimi
4565*f5c631daSSadaf Ebrahimi return result;
4566*f5c631daSSadaf Ebrahimi }
4567*f5c631daSSadaf Ebrahimi
4568*f5c631daSSadaf Ebrahimi
4569*f5c631daSSadaf Ebrahimi template <typename T>
FPDiv(T op1,T op2)4570*f5c631daSSadaf Ebrahimi T Simulator::FPDiv(T op1, T op2) {
4571*f5c631daSSadaf Ebrahimi // NaNs should be handled elsewhere.
4572*f5c631daSSadaf Ebrahimi VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4573*f5c631daSSadaf Ebrahimi
4574*f5c631daSSadaf Ebrahimi if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
4575*f5c631daSSadaf Ebrahimi // inf / inf and 0.0 / 0.0 return the default NaN.
4576*f5c631daSSadaf Ebrahimi FPProcessException();
4577*f5c631daSSadaf Ebrahimi return FPDefaultNaN<T>();
4578*f5c631daSSadaf Ebrahimi } else {
4579*f5c631daSSadaf Ebrahimi if (op2 == 0.0) {
4580*f5c631daSSadaf Ebrahimi FPProcessException();
4581*f5c631daSSadaf Ebrahimi if (!IsNaN(op1)) {
4582*f5c631daSSadaf Ebrahimi double op1_sign = copysign(1.0, op1);
4583*f5c631daSSadaf Ebrahimi double op2_sign = copysign(1.0, op2);
4584*f5c631daSSadaf Ebrahimi return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
4585*f5c631daSSadaf Ebrahimi }
4586*f5c631daSSadaf Ebrahimi }
4587*f5c631daSSadaf Ebrahimi
4588*f5c631daSSadaf Ebrahimi // Other cases should be handled by standard arithmetic.
4589*f5c631daSSadaf Ebrahimi return op1 / op2;
4590*f5c631daSSadaf Ebrahimi }
4591*f5c631daSSadaf Ebrahimi }
4592*f5c631daSSadaf Ebrahimi
4593*f5c631daSSadaf Ebrahimi
4594*f5c631daSSadaf Ebrahimi template <typename T>
FPSqrt(T op)4595*f5c631daSSadaf Ebrahimi T Simulator::FPSqrt(T op) {
4596*f5c631daSSadaf Ebrahimi if (IsNaN(op)) {
4597*f5c631daSSadaf Ebrahimi return FPProcessNaN(op);
4598*f5c631daSSadaf Ebrahimi } else if (op < T(0.0)) {
4599*f5c631daSSadaf Ebrahimi FPProcessException();
4600*f5c631daSSadaf Ebrahimi return FPDefaultNaN<T>();
4601*f5c631daSSadaf Ebrahimi } else {
4602*f5c631daSSadaf Ebrahimi return sqrt(op);
4603*f5c631daSSadaf Ebrahimi }
4604*f5c631daSSadaf Ebrahimi }
4605*f5c631daSSadaf Ebrahimi
4606*f5c631daSSadaf Ebrahimi
4607*f5c631daSSadaf Ebrahimi template <typename T>
FPMax(T a,T b)4608*f5c631daSSadaf Ebrahimi T Simulator::FPMax(T a, T b) {
4609*f5c631daSSadaf Ebrahimi T result = FPProcessNaNs(a, b);
4610*f5c631daSSadaf Ebrahimi if (IsNaN(result)) return result;
4611*f5c631daSSadaf Ebrahimi
4612*f5c631daSSadaf Ebrahimi if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4613*f5c631daSSadaf Ebrahimi // a and b are zero, and the sign differs: return +0.0.
4614*f5c631daSSadaf Ebrahimi return 0.0;
4615*f5c631daSSadaf Ebrahimi } else {
4616*f5c631daSSadaf Ebrahimi return (a > b) ? a : b;
4617*f5c631daSSadaf Ebrahimi }
4618*f5c631daSSadaf Ebrahimi }
4619*f5c631daSSadaf Ebrahimi
4620*f5c631daSSadaf Ebrahimi
4621*f5c631daSSadaf Ebrahimi template <typename T>
FPMaxNM(T a,T b)4622*f5c631daSSadaf Ebrahimi T Simulator::FPMaxNM(T a, T b) {
4623*f5c631daSSadaf Ebrahimi if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4624*f5c631daSSadaf Ebrahimi a = kFP64NegativeInfinity;
4625*f5c631daSSadaf Ebrahimi } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4626*f5c631daSSadaf Ebrahimi b = kFP64NegativeInfinity;
4627*f5c631daSSadaf Ebrahimi }
4628*f5c631daSSadaf Ebrahimi
4629*f5c631daSSadaf Ebrahimi T result = FPProcessNaNs(a, b);
4630*f5c631daSSadaf Ebrahimi return IsNaN(result) ? result : FPMax(a, b);
4631*f5c631daSSadaf Ebrahimi }
4632*f5c631daSSadaf Ebrahimi
4633*f5c631daSSadaf Ebrahimi
4634*f5c631daSSadaf Ebrahimi template <typename T>
FPMin(T a,T b)4635*f5c631daSSadaf Ebrahimi T Simulator::FPMin(T a, T b) {
4636*f5c631daSSadaf Ebrahimi T result = FPProcessNaNs(a, b);
4637*f5c631daSSadaf Ebrahimi if (IsNaN(result)) return result;
4638*f5c631daSSadaf Ebrahimi
4639*f5c631daSSadaf Ebrahimi if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4640*f5c631daSSadaf Ebrahimi // a and b are zero, and the sign differs: return -0.0.
4641*f5c631daSSadaf Ebrahimi return -0.0;
4642*f5c631daSSadaf Ebrahimi } else {
4643*f5c631daSSadaf Ebrahimi return (a < b) ? a : b;
4644*f5c631daSSadaf Ebrahimi }
4645*f5c631daSSadaf Ebrahimi }
4646*f5c631daSSadaf Ebrahimi
4647*f5c631daSSadaf Ebrahimi
4648*f5c631daSSadaf Ebrahimi template <typename T>
FPMinNM(T a,T b)4649*f5c631daSSadaf Ebrahimi T Simulator::FPMinNM(T a, T b) {
4650*f5c631daSSadaf Ebrahimi if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4651*f5c631daSSadaf Ebrahimi a = kFP64PositiveInfinity;
4652*f5c631daSSadaf Ebrahimi } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4653*f5c631daSSadaf Ebrahimi b = kFP64PositiveInfinity;
4654*f5c631daSSadaf Ebrahimi }
4655*f5c631daSSadaf Ebrahimi
4656*f5c631daSSadaf Ebrahimi T result = FPProcessNaNs(a, b);
4657*f5c631daSSadaf Ebrahimi return IsNaN(result) ? result : FPMin(a, b);
4658*f5c631daSSadaf Ebrahimi }
4659*f5c631daSSadaf Ebrahimi
4660*f5c631daSSadaf Ebrahimi
4661*f5c631daSSadaf Ebrahimi template <typename T>
FPRecipStepFused(T op1,T op2)4662*f5c631daSSadaf Ebrahimi T Simulator::FPRecipStepFused(T op1, T op2) {
4663*f5c631daSSadaf Ebrahimi const T two = 2.0;
4664*f5c631daSSadaf Ebrahimi if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4665*f5c631daSSadaf Ebrahimi return two;
4666*f5c631daSSadaf Ebrahimi } else if (IsInf(op1) || IsInf(op2)) {
4667*f5c631daSSadaf Ebrahimi // Return +inf if signs match, otherwise -inf.
4668*f5c631daSSadaf Ebrahimi return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4669*f5c631daSSadaf Ebrahimi : kFP64NegativeInfinity;
4670*f5c631daSSadaf Ebrahimi } else {
4671*f5c631daSSadaf Ebrahimi return FusedMultiplyAdd(op1, op2, two);
4672*f5c631daSSadaf Ebrahimi }
4673*f5c631daSSadaf Ebrahimi }
4674*f5c631daSSadaf Ebrahimi
4675*f5c631daSSadaf Ebrahimi template <typename T>
IsNormal(T value)4676*f5c631daSSadaf Ebrahimi bool IsNormal(T value) {
4677*f5c631daSSadaf Ebrahimi return std::isnormal(value);
4678*f5c631daSSadaf Ebrahimi }
4679*f5c631daSSadaf Ebrahimi
4680*f5c631daSSadaf Ebrahimi template <>
IsNormal(SimFloat16 value)4681*f5c631daSSadaf Ebrahimi bool IsNormal(SimFloat16 value) {
4682*f5c631daSSadaf Ebrahimi uint16_t rawbits = Float16ToRawbits(value);
4683*f5c631daSSadaf Ebrahimi uint16_t exp_mask = 0x7c00;
4684*f5c631daSSadaf Ebrahimi // Check that the exponent is neither all zeroes or all ones.
4685*f5c631daSSadaf Ebrahimi return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
4686*f5c631daSSadaf Ebrahimi }
4687*f5c631daSSadaf Ebrahimi
4688*f5c631daSSadaf Ebrahimi
4689*f5c631daSSadaf Ebrahimi template <typename T>
FPRSqrtStepFused(T op1,T op2)4690*f5c631daSSadaf Ebrahimi T Simulator::FPRSqrtStepFused(T op1, T op2) {
4691*f5c631daSSadaf Ebrahimi const T one_point_five = 1.5;
4692*f5c631daSSadaf Ebrahimi const T two = 2.0;
4693*f5c631daSSadaf Ebrahimi
4694*f5c631daSSadaf Ebrahimi if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4695*f5c631daSSadaf Ebrahimi return one_point_five;
4696*f5c631daSSadaf Ebrahimi } else if (IsInf(op1) || IsInf(op2)) {
4697*f5c631daSSadaf Ebrahimi // Return +inf if signs match, otherwise -inf.
4698*f5c631daSSadaf Ebrahimi return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4699*f5c631daSSadaf Ebrahimi : kFP64NegativeInfinity;
4700*f5c631daSSadaf Ebrahimi } else {
4701*f5c631daSSadaf Ebrahimi // The multiply-add-halve operation must be fully fused, so avoid interim
4702*f5c631daSSadaf Ebrahimi // rounding by checking which operand can be losslessly divided by two
4703*f5c631daSSadaf Ebrahimi // before doing the multiply-add.
4704*f5c631daSSadaf Ebrahimi if (IsNormal(op1 / two)) {
4705*f5c631daSSadaf Ebrahimi return FusedMultiplyAdd(op1 / two, op2, one_point_five);
4706*f5c631daSSadaf Ebrahimi } else if (IsNormal(op2 / two)) {
4707*f5c631daSSadaf Ebrahimi return FusedMultiplyAdd(op1, op2 / two, one_point_five);
4708*f5c631daSSadaf Ebrahimi } else {
4709*f5c631daSSadaf Ebrahimi // Neither operand is normal after halving: the result is dominated by
4710*f5c631daSSadaf Ebrahimi // the addition term, so just return that.
4711*f5c631daSSadaf Ebrahimi return one_point_five;
4712*f5c631daSSadaf Ebrahimi }
4713*f5c631daSSadaf Ebrahimi }
4714*f5c631daSSadaf Ebrahimi }
4715*f5c631daSSadaf Ebrahimi
FPToFixedJS(double value)4716*f5c631daSSadaf Ebrahimi int32_t Simulator::FPToFixedJS(double value) {
4717*f5c631daSSadaf Ebrahimi // The Z-flag is set when the conversion from double precision floating-point
4718*f5c631daSSadaf Ebrahimi // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
4719*f5c631daSSadaf Ebrahimi // outside the bounds of a 32-bit integer, or isn't an exact integer then the
4720*f5c631daSSadaf Ebrahimi // Z-flag is unset.
4721*f5c631daSSadaf Ebrahimi int Z = 1;
4722*f5c631daSSadaf Ebrahimi int32_t result;
4723*f5c631daSSadaf Ebrahimi
4724*f5c631daSSadaf Ebrahimi if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4725*f5c631daSSadaf Ebrahimi (value == kFP64NegativeInfinity)) {
4726*f5c631daSSadaf Ebrahimi // +/- zero and infinity all return zero, however -0 and +/- Infinity also
4727*f5c631daSSadaf Ebrahimi // unset the Z-flag.
4728*f5c631daSSadaf Ebrahimi result = 0.0;
4729*f5c631daSSadaf Ebrahimi if ((value != 0.0) || std::signbit(value)) {
4730*f5c631daSSadaf Ebrahimi Z = 0;
4731*f5c631daSSadaf Ebrahimi }
4732*f5c631daSSadaf Ebrahimi } else if (std::isnan(value)) {
4733*f5c631daSSadaf Ebrahimi // NaN values unset the Z-flag and set the result to 0.
4734*f5c631daSSadaf Ebrahimi FPProcessNaN(value);
4735*f5c631daSSadaf Ebrahimi result = 0;
4736*f5c631daSSadaf Ebrahimi Z = 0;
4737*f5c631daSSadaf Ebrahimi } else {
4738*f5c631daSSadaf Ebrahimi // All other values are converted to an integer representation, rounded
4739*f5c631daSSadaf Ebrahimi // toward zero.
4740*f5c631daSSadaf Ebrahimi double int_result = std::floor(value);
4741*f5c631daSSadaf Ebrahimi double error = value - int_result;
4742*f5c631daSSadaf Ebrahimi
4743*f5c631daSSadaf Ebrahimi if ((error != 0.0) && (int_result < 0.0)) {
4744*f5c631daSSadaf Ebrahimi int_result++;
4745*f5c631daSSadaf Ebrahimi }
4746*f5c631daSSadaf Ebrahimi
4747*f5c631daSSadaf Ebrahimi // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
4748*f5c631daSSadaf Ebrahimi // write a one-liner with std::round, but the behaviour on ties is incorrect
4749*f5c631daSSadaf Ebrahimi // for our purposes.
4750*f5c631daSSadaf Ebrahimi double mod_const = static_cast<double>(UINT64_C(1) << 32);
4751*f5c631daSSadaf Ebrahimi double mod_error =
4752*f5c631daSSadaf Ebrahimi (int_result / mod_const) - std::floor(int_result / mod_const);
4753*f5c631daSSadaf Ebrahimi double constrained;
4754*f5c631daSSadaf Ebrahimi if (mod_error == 0.5) {
4755*f5c631daSSadaf Ebrahimi constrained = INT32_MIN;
4756*f5c631daSSadaf Ebrahimi } else {
4757*f5c631daSSadaf Ebrahimi constrained = int_result - mod_const * round(int_result / mod_const);
4758*f5c631daSSadaf Ebrahimi }
4759*f5c631daSSadaf Ebrahimi
4760*f5c631daSSadaf Ebrahimi VIXL_ASSERT(std::floor(constrained) == constrained);
4761*f5c631daSSadaf Ebrahimi VIXL_ASSERT(constrained >= INT32_MIN);
4762*f5c631daSSadaf Ebrahimi VIXL_ASSERT(constrained <= INT32_MAX);
4763*f5c631daSSadaf Ebrahimi
4764*f5c631daSSadaf Ebrahimi // Take the bottom 32 bits of the result as a 32-bit integer.
4765*f5c631daSSadaf Ebrahimi result = static_cast<int32_t>(constrained);
4766*f5c631daSSadaf Ebrahimi
4767*f5c631daSSadaf Ebrahimi if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
4768*f5c631daSSadaf Ebrahimi (error != 0.0)) {
4769*f5c631daSSadaf Ebrahimi // If the integer result is out of range or the conversion isn't exact,
4770*f5c631daSSadaf Ebrahimi // take exception and unset the Z-flag.
4771*f5c631daSSadaf Ebrahimi FPProcessException();
4772*f5c631daSSadaf Ebrahimi Z = 0;
4773*f5c631daSSadaf Ebrahimi }
4774*f5c631daSSadaf Ebrahimi }
4775*f5c631daSSadaf Ebrahimi
4776*f5c631daSSadaf Ebrahimi ReadNzcv().SetN(0);
4777*f5c631daSSadaf Ebrahimi ReadNzcv().SetZ(Z);
4778*f5c631daSSadaf Ebrahimi ReadNzcv().SetC(0);
4779*f5c631daSSadaf Ebrahimi ReadNzcv().SetV(0);
4780*f5c631daSSadaf Ebrahimi
4781*f5c631daSSadaf Ebrahimi return result;
4782*f5c631daSSadaf Ebrahimi }
4783*f5c631daSSadaf Ebrahimi
FPRoundIntCommon(double value,FPRounding round_mode)4784*f5c631daSSadaf Ebrahimi double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) {
4785*f5c631daSSadaf Ebrahimi VIXL_ASSERT((value != kFP64PositiveInfinity) &&
4786*f5c631daSSadaf Ebrahimi (value != kFP64NegativeInfinity));
4787*f5c631daSSadaf Ebrahimi VIXL_ASSERT(!IsNaN(value));
4788*f5c631daSSadaf Ebrahimi
4789*f5c631daSSadaf Ebrahimi double int_result = std::floor(value);
4790*f5c631daSSadaf Ebrahimi double error = value - int_result;
4791*f5c631daSSadaf Ebrahimi switch (round_mode) {
4792*f5c631daSSadaf Ebrahimi case FPTieAway: {
4793*f5c631daSSadaf Ebrahimi // Take care of correctly handling the range ]-0.5, -0.0], which must
4794*f5c631daSSadaf Ebrahimi // yield -0.0.
4795*f5c631daSSadaf Ebrahimi if ((-0.5 < value) && (value < 0.0)) {
4796*f5c631daSSadaf Ebrahimi int_result = -0.0;
4797*f5c631daSSadaf Ebrahimi
4798*f5c631daSSadaf Ebrahimi } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
4799*f5c631daSSadaf Ebrahimi // If the error is greater than 0.5, or is equal to 0.5 and the integer
4800*f5c631daSSadaf Ebrahimi // result is positive, round up.
4801*f5c631daSSadaf Ebrahimi int_result++;
4802*f5c631daSSadaf Ebrahimi }
4803*f5c631daSSadaf Ebrahimi break;
4804*f5c631daSSadaf Ebrahimi }
4805*f5c631daSSadaf Ebrahimi case FPTieEven: {
4806*f5c631daSSadaf Ebrahimi // Take care of correctly handling the range [-0.5, -0.0], which must
4807*f5c631daSSadaf Ebrahimi // yield -0.0.
4808*f5c631daSSadaf Ebrahimi if ((-0.5 <= value) && (value < 0.0)) {
4809*f5c631daSSadaf Ebrahimi int_result = -0.0;
4810*f5c631daSSadaf Ebrahimi
4811*f5c631daSSadaf Ebrahimi // If the error is greater than 0.5, or is equal to 0.5 and the integer
4812*f5c631daSSadaf Ebrahimi // result is odd, round up.
4813*f5c631daSSadaf Ebrahimi } else if ((error > 0.5) ||
4814*f5c631daSSadaf Ebrahimi ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
4815*f5c631daSSadaf Ebrahimi int_result++;
4816*f5c631daSSadaf Ebrahimi }
4817*f5c631daSSadaf Ebrahimi break;
4818*f5c631daSSadaf Ebrahimi }
4819*f5c631daSSadaf Ebrahimi case FPZero: {
4820*f5c631daSSadaf Ebrahimi // If value>0 then we take floor(value)
4821*f5c631daSSadaf Ebrahimi // otherwise, ceil(value).
4822*f5c631daSSadaf Ebrahimi if (value < 0) {
4823*f5c631daSSadaf Ebrahimi int_result = ceil(value);
4824*f5c631daSSadaf Ebrahimi }
4825*f5c631daSSadaf Ebrahimi break;
4826*f5c631daSSadaf Ebrahimi }
4827*f5c631daSSadaf Ebrahimi case FPNegativeInfinity: {
4828*f5c631daSSadaf Ebrahimi // We always use floor(value).
4829*f5c631daSSadaf Ebrahimi break;
4830*f5c631daSSadaf Ebrahimi }
4831*f5c631daSSadaf Ebrahimi case FPPositiveInfinity: {
4832*f5c631daSSadaf Ebrahimi // Take care of correctly handling the range ]-1.0, -0.0], which must
4833*f5c631daSSadaf Ebrahimi // yield -0.0.
4834*f5c631daSSadaf Ebrahimi if ((-1.0 < value) && (value < 0.0)) {
4835*f5c631daSSadaf Ebrahimi int_result = -0.0;
4836*f5c631daSSadaf Ebrahimi
4837*f5c631daSSadaf Ebrahimi // If the error is non-zero, round up.
4838*f5c631daSSadaf Ebrahimi } else if (error > 0.0) {
4839*f5c631daSSadaf Ebrahimi int_result++;
4840*f5c631daSSadaf Ebrahimi }
4841*f5c631daSSadaf Ebrahimi break;
4842*f5c631daSSadaf Ebrahimi }
4843*f5c631daSSadaf Ebrahimi default:
4844*f5c631daSSadaf Ebrahimi VIXL_UNIMPLEMENTED();
4845*f5c631daSSadaf Ebrahimi }
4846*f5c631daSSadaf Ebrahimi return int_result;
4847*f5c631daSSadaf Ebrahimi }
4848*f5c631daSSadaf Ebrahimi
FPRoundInt(double value,FPRounding round_mode)4849*f5c631daSSadaf Ebrahimi double Simulator::FPRoundInt(double value, FPRounding round_mode) {
4850*f5c631daSSadaf Ebrahimi if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4851*f5c631daSSadaf Ebrahimi (value == kFP64NegativeInfinity)) {
4852*f5c631daSSadaf Ebrahimi return value;
4853*f5c631daSSadaf Ebrahimi } else if (IsNaN(value)) {
4854*f5c631daSSadaf Ebrahimi return FPProcessNaN(value);
4855*f5c631daSSadaf Ebrahimi }
4856*f5c631daSSadaf Ebrahimi return FPRoundIntCommon(value, round_mode);
4857*f5c631daSSadaf Ebrahimi }
4858*f5c631daSSadaf Ebrahimi
FPRoundInt(double value,FPRounding round_mode,FrintMode frint_mode)4859*f5c631daSSadaf Ebrahimi double Simulator::FPRoundInt(double value,
4860*f5c631daSSadaf Ebrahimi FPRounding round_mode,
4861*f5c631daSSadaf Ebrahimi FrintMode frint_mode) {
4862*f5c631daSSadaf Ebrahimi if (frint_mode == kFrintToInteger) {
4863*f5c631daSSadaf Ebrahimi return FPRoundInt(value, round_mode);
4864*f5c631daSSadaf Ebrahimi }
4865*f5c631daSSadaf Ebrahimi
4866*f5c631daSSadaf Ebrahimi VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64));
4867*f5c631daSSadaf Ebrahimi
4868*f5c631daSSadaf Ebrahimi if (value == 0.0) {
4869*f5c631daSSadaf Ebrahimi return value;
4870*f5c631daSSadaf Ebrahimi }
4871*f5c631daSSadaf Ebrahimi
4872*f5c631daSSadaf Ebrahimi if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) ||
4873*f5c631daSSadaf Ebrahimi IsNaN(value)) {
4874*f5c631daSSadaf Ebrahimi if (frint_mode == kFrintToInt32) {
4875*f5c631daSSadaf Ebrahimi return INT32_MIN;
4876*f5c631daSSadaf Ebrahimi } else {
4877*f5c631daSSadaf Ebrahimi return INT64_MIN;
4878*f5c631daSSadaf Ebrahimi }
4879*f5c631daSSadaf Ebrahimi }
4880*f5c631daSSadaf Ebrahimi
4881*f5c631daSSadaf Ebrahimi double result = FPRoundIntCommon(value, round_mode);
4882*f5c631daSSadaf Ebrahimi
4883*f5c631daSSadaf Ebrahimi // We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly
4884*f5c631daSSadaf Ebrahimi // representable as a double, and is rounded to (INT64_MAX + 1) when
4885*f5c631daSSadaf Ebrahimi // converted. To avoid this, we compare `result >= int64_max_plus_one`
4886*f5c631daSSadaf Ebrahimi // instead; this is safe because `result` is known to be integral, and
4887*f5c631daSSadaf Ebrahimi // `int64_max_plus_one` is exactly representable as a double.
4888*f5c631daSSadaf Ebrahimi constexpr uint64_t int64_max_plus_one = static_cast<uint64_t>(INT64_MAX) + 1;
4889*f5c631daSSadaf Ebrahimi VIXL_STATIC_ASSERT(static_cast<uint64_t>(static_cast<double>(
4890*f5c631daSSadaf Ebrahimi int64_max_plus_one)) == int64_max_plus_one);
4891*f5c631daSSadaf Ebrahimi
4892*f5c631daSSadaf Ebrahimi if (frint_mode == kFrintToInt32) {
4893*f5c631daSSadaf Ebrahimi if ((result > INT32_MAX) || (result < INT32_MIN)) {
4894*f5c631daSSadaf Ebrahimi return INT32_MIN;
4895*f5c631daSSadaf Ebrahimi }
4896*f5c631daSSadaf Ebrahimi } else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) {
4897*f5c631daSSadaf Ebrahimi return INT64_MIN;
4898*f5c631daSSadaf Ebrahimi }
4899*f5c631daSSadaf Ebrahimi
4900*f5c631daSSadaf Ebrahimi return result;
4901*f5c631daSSadaf Ebrahimi }
4902*f5c631daSSadaf Ebrahimi
FPToInt16(double value,FPRounding rmode)4903*f5c631daSSadaf Ebrahimi int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
4904*f5c631daSSadaf Ebrahimi value = FPRoundInt(value, rmode);
4905*f5c631daSSadaf Ebrahimi if (value >= kHMaxInt) {
4906*f5c631daSSadaf Ebrahimi return kHMaxInt;
4907*f5c631daSSadaf Ebrahimi } else if (value < kHMinInt) {
4908*f5c631daSSadaf Ebrahimi return kHMinInt;
4909*f5c631daSSadaf Ebrahimi }
4910*f5c631daSSadaf Ebrahimi return IsNaN(value) ? 0 : static_cast<int16_t>(value);
4911*f5c631daSSadaf Ebrahimi }
4912*f5c631daSSadaf Ebrahimi
4913*f5c631daSSadaf Ebrahimi
FPToInt32(double value,FPRounding rmode)4914*f5c631daSSadaf Ebrahimi int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
4915*f5c631daSSadaf Ebrahimi value = FPRoundInt(value, rmode);
4916*f5c631daSSadaf Ebrahimi if (value >= kWMaxInt) {
4917*f5c631daSSadaf Ebrahimi return kWMaxInt;
4918*f5c631daSSadaf Ebrahimi } else if (value < kWMinInt) {
4919*f5c631daSSadaf Ebrahimi return kWMinInt;
4920*f5c631daSSadaf Ebrahimi }
4921*f5c631daSSadaf Ebrahimi return IsNaN(value) ? 0 : static_cast<int32_t>(value);
4922*f5c631daSSadaf Ebrahimi }
4923*f5c631daSSadaf Ebrahimi
4924*f5c631daSSadaf Ebrahimi
FPToInt64(double value,FPRounding rmode)4925*f5c631daSSadaf Ebrahimi int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
4926*f5c631daSSadaf Ebrahimi value = FPRoundInt(value, rmode);
4927*f5c631daSSadaf Ebrahimi // This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues
4928*f5c631daSSadaf Ebrahimi // as a result of kMaxInt not being representable as a double.
4929*f5c631daSSadaf Ebrahimi if (value >= 9223372036854775808.) {
4930*f5c631daSSadaf Ebrahimi return kXMaxInt;
4931*f5c631daSSadaf Ebrahimi } else if (value < kXMinInt) {
4932*f5c631daSSadaf Ebrahimi return kXMinInt;
4933*f5c631daSSadaf Ebrahimi }
4934*f5c631daSSadaf Ebrahimi return IsNaN(value) ? 0 : static_cast<int64_t>(value);
4935*f5c631daSSadaf Ebrahimi }
4936*f5c631daSSadaf Ebrahimi
4937*f5c631daSSadaf Ebrahimi
FPToUInt16(double value,FPRounding rmode)4938*f5c631daSSadaf Ebrahimi uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
4939*f5c631daSSadaf Ebrahimi value = FPRoundInt(value, rmode);
4940*f5c631daSSadaf Ebrahimi if (value >= kHMaxUInt) {
4941*f5c631daSSadaf Ebrahimi return kHMaxUInt;
4942*f5c631daSSadaf Ebrahimi } else if (value < 0.0) {
4943*f5c631daSSadaf Ebrahimi return 0;
4944*f5c631daSSadaf Ebrahimi }
4945*f5c631daSSadaf Ebrahimi return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
4946*f5c631daSSadaf Ebrahimi }
4947*f5c631daSSadaf Ebrahimi
4948*f5c631daSSadaf Ebrahimi
FPToUInt32(double value,FPRounding rmode)4949*f5c631daSSadaf Ebrahimi uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
4950*f5c631daSSadaf Ebrahimi value = FPRoundInt(value, rmode);
4951*f5c631daSSadaf Ebrahimi if (value >= kWMaxUInt) {
4952*f5c631daSSadaf Ebrahimi return kWMaxUInt;
4953*f5c631daSSadaf Ebrahimi } else if (value < 0.0) {
4954*f5c631daSSadaf Ebrahimi return 0;
4955*f5c631daSSadaf Ebrahimi }
4956*f5c631daSSadaf Ebrahimi return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
4957*f5c631daSSadaf Ebrahimi }
4958*f5c631daSSadaf Ebrahimi
4959*f5c631daSSadaf Ebrahimi
FPToUInt64(double value,FPRounding rmode)4960*f5c631daSSadaf Ebrahimi uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
4961*f5c631daSSadaf Ebrahimi value = FPRoundInt(value, rmode);
4962*f5c631daSSadaf Ebrahimi // This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues
4963*f5c631daSSadaf Ebrahimi // as a result of kMaxUInt not being representable as a double.
4964*f5c631daSSadaf Ebrahimi if (value >= 18446744073709551616.) {
4965*f5c631daSSadaf Ebrahimi return kXMaxUInt;
4966*f5c631daSSadaf Ebrahimi } else if (value < 0.0) {
4967*f5c631daSSadaf Ebrahimi return 0;
4968*f5c631daSSadaf Ebrahimi }
4969*f5c631daSSadaf Ebrahimi return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
4970*f5c631daSSadaf Ebrahimi }
4971*f5c631daSSadaf Ebrahimi
4972*f5c631daSSadaf Ebrahimi
4973*f5c631daSSadaf Ebrahimi #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
4974*f5c631daSSadaf Ebrahimi template <typename T> \
4975*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::FN(VectorFormat vform, \
4976*f5c631daSSadaf Ebrahimi LogicVRegister dst, \
4977*f5c631daSSadaf Ebrahimi const LogicVRegister& src1, \
4978*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) { \
4979*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform); \
4980*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
4981*f5c631daSSadaf Ebrahimi T op1 = src1.Float<T>(i); \
4982*f5c631daSSadaf Ebrahimi T op2 = src2.Float<T>(i); \
4983*f5c631daSSadaf Ebrahimi T result; \
4984*f5c631daSSadaf Ebrahimi if (PROCNAN) { \
4985*f5c631daSSadaf Ebrahimi result = FPProcessNaNs(op1, op2); \
4986*f5c631daSSadaf Ebrahimi if (!IsNaN(result)) { \
4987*f5c631daSSadaf Ebrahimi result = OP(op1, op2); \
4988*f5c631daSSadaf Ebrahimi } \
4989*f5c631daSSadaf Ebrahimi } else { \
4990*f5c631daSSadaf Ebrahimi result = OP(op1, op2); \
4991*f5c631daSSadaf Ebrahimi } \
4992*f5c631daSSadaf Ebrahimi dst.SetFloat(vform, i, result); \
4993*f5c631daSSadaf Ebrahimi } \
4994*f5c631daSSadaf Ebrahimi return dst; \
4995*f5c631daSSadaf Ebrahimi } \
4996*f5c631daSSadaf Ebrahimi \
4997*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::FN(VectorFormat vform, \
4998*f5c631daSSadaf Ebrahimi LogicVRegister dst, \
4999*f5c631daSSadaf Ebrahimi const LogicVRegister& src1, \
5000*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) { \
5001*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { \
5002*f5c631daSSadaf Ebrahimi FN<SimFloat16>(vform, dst, src1, src2); \
5003*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
5004*f5c631daSSadaf Ebrahimi FN<float>(vform, dst, src1, src2); \
5005*f5c631daSSadaf Ebrahimi } else { \
5006*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
5007*f5c631daSSadaf Ebrahimi FN<double>(vform, dst, src1, src2); \
5008*f5c631daSSadaf Ebrahimi } \
5009*f5c631daSSadaf Ebrahimi return dst; \
5010*f5c631daSSadaf Ebrahimi }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)5011*f5c631daSSadaf Ebrahimi NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
5012*f5c631daSSadaf Ebrahimi #undef DEFINE_NEON_FP_VECTOR_OP
5013*f5c631daSSadaf Ebrahimi
5014*f5c631daSSadaf Ebrahimi
5015*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fnmul(VectorFormat vform,
5016*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5017*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5018*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
5019*f5c631daSSadaf Ebrahimi SimVRegister temp;
5020*f5c631daSSadaf Ebrahimi LogicVRegister product = fmul(vform, temp, src1, src2);
5021*f5c631daSSadaf Ebrahimi return fneg(vform, dst, product);
5022*f5c631daSSadaf Ebrahimi }
5023*f5c631daSSadaf Ebrahimi
5024*f5c631daSSadaf Ebrahimi
5025*f5c631daSSadaf Ebrahimi template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5026*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::frecps(VectorFormat vform,
5027*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5028*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5029*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
5030*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5031*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5032*f5c631daSSadaf Ebrahimi T op1 = -src1.Float<T>(i);
5033*f5c631daSSadaf Ebrahimi T op2 = src2.Float<T>(i);
5034*f5c631daSSadaf Ebrahimi T result = FPProcessNaNs(op1, op2);
5035*f5c631daSSadaf Ebrahimi dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
5036*f5c631daSSadaf Ebrahimi }
5037*f5c631daSSadaf Ebrahimi return dst;
5038*f5c631daSSadaf Ebrahimi }
5039*f5c631daSSadaf Ebrahimi
5040*f5c631daSSadaf Ebrahimi
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5041*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::frecps(VectorFormat vform,
5042*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5043*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5044*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
5045*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5046*f5c631daSSadaf Ebrahimi frecps<SimFloat16>(vform, dst, src1, src2);
5047*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5048*f5c631daSSadaf Ebrahimi frecps<float>(vform, dst, src1, src2);
5049*f5c631daSSadaf Ebrahimi } else {
5050*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5051*f5c631daSSadaf Ebrahimi frecps<double>(vform, dst, src1, src2);
5052*f5c631daSSadaf Ebrahimi }
5053*f5c631daSSadaf Ebrahimi return dst;
5054*f5c631daSSadaf Ebrahimi }
5055*f5c631daSSadaf Ebrahimi
5056*f5c631daSSadaf Ebrahimi
5057*f5c631daSSadaf Ebrahimi template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5058*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::frsqrts(VectorFormat vform,
5059*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5060*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5061*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
5062*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5063*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5064*f5c631daSSadaf Ebrahimi T op1 = -src1.Float<T>(i);
5065*f5c631daSSadaf Ebrahimi T op2 = src2.Float<T>(i);
5066*f5c631daSSadaf Ebrahimi T result = FPProcessNaNs(op1, op2);
5067*f5c631daSSadaf Ebrahimi dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
5068*f5c631daSSadaf Ebrahimi }
5069*f5c631daSSadaf Ebrahimi return dst;
5070*f5c631daSSadaf Ebrahimi }
5071*f5c631daSSadaf Ebrahimi
5072*f5c631daSSadaf Ebrahimi
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5073*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::frsqrts(VectorFormat vform,
5074*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5075*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5076*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
5077*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5078*f5c631daSSadaf Ebrahimi frsqrts<SimFloat16>(vform, dst, src1, src2);
5079*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5080*f5c631daSSadaf Ebrahimi frsqrts<float>(vform, dst, src1, src2);
5081*f5c631daSSadaf Ebrahimi } else {
5082*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5083*f5c631daSSadaf Ebrahimi frsqrts<double>(vform, dst, src1, src2);
5084*f5c631daSSadaf Ebrahimi }
5085*f5c631daSSadaf Ebrahimi return dst;
5086*f5c631daSSadaf Ebrahimi }
5087*f5c631daSSadaf Ebrahimi
5088*f5c631daSSadaf Ebrahimi
5089*f5c631daSSadaf Ebrahimi template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5090*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcmp(VectorFormat vform,
5091*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5092*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5093*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
5094*f5c631daSSadaf Ebrahimi Condition cond) {
5095*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5096*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5097*f5c631daSSadaf Ebrahimi bool result = false;
5098*f5c631daSSadaf Ebrahimi T op1 = src1.Float<T>(i);
5099*f5c631daSSadaf Ebrahimi T op2 = src2.Float<T>(i);
5100*f5c631daSSadaf Ebrahimi bool unordered = IsNaN(FPProcessNaNs(op1, op2));
5101*f5c631daSSadaf Ebrahimi
5102*f5c631daSSadaf Ebrahimi switch (cond) {
5103*f5c631daSSadaf Ebrahimi case eq:
5104*f5c631daSSadaf Ebrahimi result = (op1 == op2);
5105*f5c631daSSadaf Ebrahimi break;
5106*f5c631daSSadaf Ebrahimi case ge:
5107*f5c631daSSadaf Ebrahimi result = (op1 >= op2);
5108*f5c631daSSadaf Ebrahimi break;
5109*f5c631daSSadaf Ebrahimi case gt:
5110*f5c631daSSadaf Ebrahimi result = (op1 > op2);
5111*f5c631daSSadaf Ebrahimi break;
5112*f5c631daSSadaf Ebrahimi case le:
5113*f5c631daSSadaf Ebrahimi result = (op1 <= op2);
5114*f5c631daSSadaf Ebrahimi break;
5115*f5c631daSSadaf Ebrahimi case lt:
5116*f5c631daSSadaf Ebrahimi result = (op1 < op2);
5117*f5c631daSSadaf Ebrahimi break;
5118*f5c631daSSadaf Ebrahimi case ne:
5119*f5c631daSSadaf Ebrahimi result = (op1 != op2);
5120*f5c631daSSadaf Ebrahimi break;
5121*f5c631daSSadaf Ebrahimi case uo:
5122*f5c631daSSadaf Ebrahimi result = unordered;
5123*f5c631daSSadaf Ebrahimi break;
5124*f5c631daSSadaf Ebrahimi default:
5125*f5c631daSSadaf Ebrahimi // Other conditions are defined in terms of those above.
5126*f5c631daSSadaf Ebrahimi VIXL_UNREACHABLE();
5127*f5c631daSSadaf Ebrahimi break;
5128*f5c631daSSadaf Ebrahimi }
5129*f5c631daSSadaf Ebrahimi
5130*f5c631daSSadaf Ebrahimi if (result && unordered) {
5131*f5c631daSSadaf Ebrahimi // Only `uo` and `ne` can be true for unordered comparisons.
5132*f5c631daSSadaf Ebrahimi VIXL_ASSERT((cond == uo) || (cond == ne));
5133*f5c631daSSadaf Ebrahimi }
5134*f5c631daSSadaf Ebrahimi
5135*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
5136*f5c631daSSadaf Ebrahimi }
5137*f5c631daSSadaf Ebrahimi return dst;
5138*f5c631daSSadaf Ebrahimi }
5139*f5c631daSSadaf Ebrahimi
5140*f5c631daSSadaf Ebrahimi
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5141*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcmp(VectorFormat vform,
5142*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5143*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5144*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
5145*f5c631daSSadaf Ebrahimi Condition cond) {
5146*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5147*f5c631daSSadaf Ebrahimi fcmp<SimFloat16>(vform, dst, src1, src2, cond);
5148*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5149*f5c631daSSadaf Ebrahimi fcmp<float>(vform, dst, src1, src2, cond);
5150*f5c631daSSadaf Ebrahimi } else {
5151*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5152*f5c631daSSadaf Ebrahimi fcmp<double>(vform, dst, src1, src2, cond);
5153*f5c631daSSadaf Ebrahimi }
5154*f5c631daSSadaf Ebrahimi return dst;
5155*f5c631daSSadaf Ebrahimi }
5156*f5c631daSSadaf Ebrahimi
5157*f5c631daSSadaf Ebrahimi
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)5158*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
5159*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5160*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
5161*f5c631daSSadaf Ebrahimi Condition cond) {
5162*f5c631daSSadaf Ebrahimi SimVRegister temp;
5163*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5164*f5c631daSSadaf Ebrahimi LogicVRegister zero_reg =
5165*f5c631daSSadaf Ebrahimi dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
5166*f5c631daSSadaf Ebrahimi fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
5167*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5168*f5c631daSSadaf Ebrahimi LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
5169*f5c631daSSadaf Ebrahimi fcmp<float>(vform, dst, src, zero_reg, cond);
5170*f5c631daSSadaf Ebrahimi } else {
5171*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5172*f5c631daSSadaf Ebrahimi LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
5173*f5c631daSSadaf Ebrahimi fcmp<double>(vform, dst, src, zero_reg, cond);
5174*f5c631daSSadaf Ebrahimi }
5175*f5c631daSSadaf Ebrahimi return dst;
5176*f5c631daSSadaf Ebrahimi }
5177*f5c631daSSadaf Ebrahimi
5178*f5c631daSSadaf Ebrahimi
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5179*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fabscmp(VectorFormat vform,
5180*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5181*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5182*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
5183*f5c631daSSadaf Ebrahimi Condition cond) {
5184*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2;
5185*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5186*f5c631daSSadaf Ebrahimi LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
5187*f5c631daSSadaf Ebrahimi LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
5188*f5c631daSSadaf Ebrahimi fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
5189*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5190*f5c631daSSadaf Ebrahimi LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
5191*f5c631daSSadaf Ebrahimi LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
5192*f5c631daSSadaf Ebrahimi fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
5193*f5c631daSSadaf Ebrahimi } else {
5194*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5195*f5c631daSSadaf Ebrahimi LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
5196*f5c631daSSadaf Ebrahimi LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
5197*f5c631daSSadaf Ebrahimi fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
5198*f5c631daSSadaf Ebrahimi }
5199*f5c631daSSadaf Ebrahimi return dst;
5200*f5c631daSSadaf Ebrahimi }
5201*f5c631daSSadaf Ebrahimi
5202*f5c631daSSadaf Ebrahimi
5203*f5c631daSSadaf Ebrahimi template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5204*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmla(VectorFormat vform,
5205*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5206*f5c631daSSadaf Ebrahimi const LogicVRegister& srca,
5207*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5208*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
5209*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5210*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5211*f5c631daSSadaf Ebrahimi T op1 = src1.Float<T>(i);
5212*f5c631daSSadaf Ebrahimi T op2 = src2.Float<T>(i);
5213*f5c631daSSadaf Ebrahimi T acc = srca.Float<T>(i);
5214*f5c631daSSadaf Ebrahimi T result = FPMulAdd(acc, op1, op2);
5215*f5c631daSSadaf Ebrahimi dst.SetFloat(vform, i, result);
5216*f5c631daSSadaf Ebrahimi }
5217*f5c631daSSadaf Ebrahimi return dst;
5218*f5c631daSSadaf Ebrahimi }
5219*f5c631daSSadaf Ebrahimi
5220*f5c631daSSadaf Ebrahimi
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5221*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmla(VectorFormat vform,
5222*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5223*f5c631daSSadaf Ebrahimi const LogicVRegister& srca,
5224*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5225*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
5226*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5227*f5c631daSSadaf Ebrahimi fmla<SimFloat16>(vform, dst, srca, src1, src2);
5228*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5229*f5c631daSSadaf Ebrahimi fmla<float>(vform, dst, srca, src1, src2);
5230*f5c631daSSadaf Ebrahimi } else {
5231*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5232*f5c631daSSadaf Ebrahimi fmla<double>(vform, dst, srca, src1, src2);
5233*f5c631daSSadaf Ebrahimi }
5234*f5c631daSSadaf Ebrahimi return dst;
5235*f5c631daSSadaf Ebrahimi }
5236*f5c631daSSadaf Ebrahimi
5237*f5c631daSSadaf Ebrahimi
5238*f5c631daSSadaf Ebrahimi template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5239*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmls(VectorFormat vform,
5240*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5241*f5c631daSSadaf Ebrahimi const LogicVRegister& srca,
5242*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5243*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
5244*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5245*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5246*f5c631daSSadaf Ebrahimi T op1 = -src1.Float<T>(i);
5247*f5c631daSSadaf Ebrahimi T op2 = src2.Float<T>(i);
5248*f5c631daSSadaf Ebrahimi T acc = srca.Float<T>(i);
5249*f5c631daSSadaf Ebrahimi T result = FPMulAdd(acc, op1, op2);
5250*f5c631daSSadaf Ebrahimi dst.SetFloat(i, result);
5251*f5c631daSSadaf Ebrahimi }
5252*f5c631daSSadaf Ebrahimi return dst;
5253*f5c631daSSadaf Ebrahimi }
5254*f5c631daSSadaf Ebrahimi
5255*f5c631daSSadaf Ebrahimi
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5256*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmls(VectorFormat vform,
5257*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5258*f5c631daSSadaf Ebrahimi const LogicVRegister& srca,
5259*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5260*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
5261*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5262*f5c631daSSadaf Ebrahimi fmls<SimFloat16>(vform, dst, srca, src1, src2);
5263*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5264*f5c631daSSadaf Ebrahimi fmls<float>(vform, dst, srca, src1, src2);
5265*f5c631daSSadaf Ebrahimi } else {
5266*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5267*f5c631daSSadaf Ebrahimi fmls<double>(vform, dst, srca, src1, src2);
5268*f5c631daSSadaf Ebrahimi }
5269*f5c631daSSadaf Ebrahimi return dst;
5270*f5c631daSSadaf Ebrahimi }
5271*f5c631daSSadaf Ebrahimi
5272*f5c631daSSadaf Ebrahimi
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5273*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmlal(VectorFormat vform,
5274*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5275*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5276*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
5277*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5278*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5279*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5280*f5c631daSSadaf Ebrahimi float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5281*f5c631daSSadaf Ebrahimi float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5282*f5c631daSSadaf Ebrahimi float acc = dst.Float<float>(i);
5283*f5c631daSSadaf Ebrahimi float result = FPMulAdd(acc, op1, op2);
5284*f5c631daSSadaf Ebrahimi dst.SetFloat(i, result);
5285*f5c631daSSadaf Ebrahimi }
5286*f5c631daSSadaf Ebrahimi return dst;
5287*f5c631daSSadaf Ebrahimi }
5288*f5c631daSSadaf Ebrahimi
5289*f5c631daSSadaf Ebrahimi
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5290*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmlal2(VectorFormat vform,
5291*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5292*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5293*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
5294*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5295*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5296*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5297*f5c631daSSadaf Ebrahimi int src = i + LaneCountFromFormat(vform);
5298*f5c631daSSadaf Ebrahimi float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5299*f5c631daSSadaf Ebrahimi float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5300*f5c631daSSadaf Ebrahimi float acc = dst.Float<float>(i);
5301*f5c631daSSadaf Ebrahimi float result = FPMulAdd(acc, op1, op2);
5302*f5c631daSSadaf Ebrahimi dst.SetFloat(i, result);
5303*f5c631daSSadaf Ebrahimi }
5304*f5c631daSSadaf Ebrahimi return dst;
5305*f5c631daSSadaf Ebrahimi }
5306*f5c631daSSadaf Ebrahimi
5307*f5c631daSSadaf Ebrahimi
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5308*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmlsl(VectorFormat vform,
5309*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5310*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5311*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
5312*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5313*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5314*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5315*f5c631daSSadaf Ebrahimi float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5316*f5c631daSSadaf Ebrahimi float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5317*f5c631daSSadaf Ebrahimi float acc = dst.Float<float>(i);
5318*f5c631daSSadaf Ebrahimi float result = FPMulAdd(acc, op1, op2);
5319*f5c631daSSadaf Ebrahimi dst.SetFloat(i, result);
5320*f5c631daSSadaf Ebrahimi }
5321*f5c631daSSadaf Ebrahimi return dst;
5322*f5c631daSSadaf Ebrahimi }
5323*f5c631daSSadaf Ebrahimi
5324*f5c631daSSadaf Ebrahimi
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5325*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5326*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5327*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5328*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
5329*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5330*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5331*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5332*f5c631daSSadaf Ebrahimi int src = i + LaneCountFromFormat(vform);
5333*f5c631daSSadaf Ebrahimi float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5334*f5c631daSSadaf Ebrahimi float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5335*f5c631daSSadaf Ebrahimi float acc = dst.Float<float>(i);
5336*f5c631daSSadaf Ebrahimi float result = FPMulAdd(acc, op1, op2);
5337*f5c631daSSadaf Ebrahimi dst.SetFloat(i, result);
5338*f5c631daSSadaf Ebrahimi }
5339*f5c631daSSadaf Ebrahimi return dst;
5340*f5c631daSSadaf Ebrahimi }
5341*f5c631daSSadaf Ebrahimi
5342*f5c631daSSadaf Ebrahimi
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5343*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmlal(VectorFormat vform,
5344*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5345*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5346*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
5347*f5c631daSSadaf Ebrahimi int index) {
5348*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5349*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5350*f5c631daSSadaf Ebrahimi float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5351*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5352*f5c631daSSadaf Ebrahimi float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5353*f5c631daSSadaf Ebrahimi float acc = dst.Float<float>(i);
5354*f5c631daSSadaf Ebrahimi float result = FPMulAdd(acc, op1, op2);
5355*f5c631daSSadaf Ebrahimi dst.SetFloat(i, result);
5356*f5c631daSSadaf Ebrahimi }
5357*f5c631daSSadaf Ebrahimi return dst;
5358*f5c631daSSadaf Ebrahimi }
5359*f5c631daSSadaf Ebrahimi
5360*f5c631daSSadaf Ebrahimi
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5361*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmlal2(VectorFormat vform,
5362*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5363*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5364*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
5365*f5c631daSSadaf Ebrahimi int index) {
5366*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5367*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5368*f5c631daSSadaf Ebrahimi float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5369*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5370*f5c631daSSadaf Ebrahimi int src = i + LaneCountFromFormat(vform);
5371*f5c631daSSadaf Ebrahimi float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5372*f5c631daSSadaf Ebrahimi float acc = dst.Float<float>(i);
5373*f5c631daSSadaf Ebrahimi float result = FPMulAdd(acc, op1, op2);
5374*f5c631daSSadaf Ebrahimi dst.SetFloat(i, result);
5375*f5c631daSSadaf Ebrahimi }
5376*f5c631daSSadaf Ebrahimi return dst;
5377*f5c631daSSadaf Ebrahimi }
5378*f5c631daSSadaf Ebrahimi
5379*f5c631daSSadaf Ebrahimi
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5380*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmlsl(VectorFormat vform,
5381*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5382*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5383*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
5384*f5c631daSSadaf Ebrahimi int index) {
5385*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5386*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5387*f5c631daSSadaf Ebrahimi float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5388*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5389*f5c631daSSadaf Ebrahimi float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5390*f5c631daSSadaf Ebrahimi float acc = dst.Float<float>(i);
5391*f5c631daSSadaf Ebrahimi float result = FPMulAdd(acc, op1, op2);
5392*f5c631daSSadaf Ebrahimi dst.SetFloat(i, result);
5393*f5c631daSSadaf Ebrahimi }
5394*f5c631daSSadaf Ebrahimi return dst;
5395*f5c631daSSadaf Ebrahimi }
5396*f5c631daSSadaf Ebrahimi
5397*f5c631daSSadaf Ebrahimi
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5398*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5399*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5400*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5401*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
5402*f5c631daSSadaf Ebrahimi int index) {
5403*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5404*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5405*f5c631daSSadaf Ebrahimi float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5406*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5407*f5c631daSSadaf Ebrahimi int src = i + LaneCountFromFormat(vform);
5408*f5c631daSSadaf Ebrahimi float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5409*f5c631daSSadaf Ebrahimi float acc = dst.Float<float>(i);
5410*f5c631daSSadaf Ebrahimi float result = FPMulAdd(acc, op1, op2);
5411*f5c631daSSadaf Ebrahimi dst.SetFloat(i, result);
5412*f5c631daSSadaf Ebrahimi }
5413*f5c631daSSadaf Ebrahimi return dst;
5414*f5c631daSSadaf Ebrahimi }
5415*f5c631daSSadaf Ebrahimi
5416*f5c631daSSadaf Ebrahimi
5417*f5c631daSSadaf Ebrahimi template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5418*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fneg(VectorFormat vform,
5419*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5420*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
5421*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5422*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5423*f5c631daSSadaf Ebrahimi T op = src.Float<T>(i);
5424*f5c631daSSadaf Ebrahimi op = -op;
5425*f5c631daSSadaf Ebrahimi dst.SetFloat(i, op);
5426*f5c631daSSadaf Ebrahimi }
5427*f5c631daSSadaf Ebrahimi return dst;
5428*f5c631daSSadaf Ebrahimi }
5429*f5c631daSSadaf Ebrahimi
5430*f5c631daSSadaf Ebrahimi
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5431*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fneg(VectorFormat vform,
5432*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5433*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
5434*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5435*f5c631daSSadaf Ebrahimi fneg<SimFloat16>(vform, dst, src);
5436*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5437*f5c631daSSadaf Ebrahimi fneg<float>(vform, dst, src);
5438*f5c631daSSadaf Ebrahimi } else {
5439*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5440*f5c631daSSadaf Ebrahimi fneg<double>(vform, dst, src);
5441*f5c631daSSadaf Ebrahimi }
5442*f5c631daSSadaf Ebrahimi return dst;
5443*f5c631daSSadaf Ebrahimi }
5444*f5c631daSSadaf Ebrahimi
5445*f5c631daSSadaf Ebrahimi
5446*f5c631daSSadaf Ebrahimi template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5447*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fabs_(VectorFormat vform,
5448*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5449*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
5450*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5451*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5452*f5c631daSSadaf Ebrahimi T op = src.Float<T>(i);
5453*f5c631daSSadaf Ebrahimi if (copysign(1.0, op) < 0.0) {
5454*f5c631daSSadaf Ebrahimi op = -op;
5455*f5c631daSSadaf Ebrahimi }
5456*f5c631daSSadaf Ebrahimi dst.SetFloat(i, op);
5457*f5c631daSSadaf Ebrahimi }
5458*f5c631daSSadaf Ebrahimi return dst;
5459*f5c631daSSadaf Ebrahimi }
5460*f5c631daSSadaf Ebrahimi
5461*f5c631daSSadaf Ebrahimi
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5462*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fabs_(VectorFormat vform,
5463*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5464*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
5465*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5466*f5c631daSSadaf Ebrahimi fabs_<SimFloat16>(vform, dst, src);
5467*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5468*f5c631daSSadaf Ebrahimi fabs_<float>(vform, dst, src);
5469*f5c631daSSadaf Ebrahimi } else {
5470*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5471*f5c631daSSadaf Ebrahimi fabs_<double>(vform, dst, src);
5472*f5c631daSSadaf Ebrahimi }
5473*f5c631daSSadaf Ebrahimi return dst;
5474*f5c631daSSadaf Ebrahimi }
5475*f5c631daSSadaf Ebrahimi
5476*f5c631daSSadaf Ebrahimi
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5477*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fabd(VectorFormat vform,
5478*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5479*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5480*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
5481*f5c631daSSadaf Ebrahimi SimVRegister temp;
5482*f5c631daSSadaf Ebrahimi fsub(vform, temp, src1, src2);
5483*f5c631daSSadaf Ebrahimi fabs_(vform, dst, temp);
5484*f5c631daSSadaf Ebrahimi return dst;
5485*f5c631daSSadaf Ebrahimi }
5486*f5c631daSSadaf Ebrahimi
5487*f5c631daSSadaf Ebrahimi
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5488*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fsqrt(VectorFormat vform,
5489*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5490*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
5491*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5492*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5493*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5494*f5c631daSSadaf Ebrahimi SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
5495*f5c631daSSadaf Ebrahimi dst.SetFloat(i, result);
5496*f5c631daSSadaf Ebrahimi }
5497*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5498*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5499*f5c631daSSadaf Ebrahimi float result = FPSqrt(src.Float<float>(i));
5500*f5c631daSSadaf Ebrahimi dst.SetFloat(i, result);
5501*f5c631daSSadaf Ebrahimi }
5502*f5c631daSSadaf Ebrahimi } else {
5503*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5504*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5505*f5c631daSSadaf Ebrahimi double result = FPSqrt(src.Float<double>(i));
5506*f5c631daSSadaf Ebrahimi dst.SetFloat(i, result);
5507*f5c631daSSadaf Ebrahimi }
5508*f5c631daSSadaf Ebrahimi }
5509*f5c631daSSadaf Ebrahimi return dst;
5510*f5c631daSSadaf Ebrahimi }
5511*f5c631daSSadaf Ebrahimi
5512*f5c631daSSadaf Ebrahimi
5513*f5c631daSSadaf Ebrahimi #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
5514*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::FNP(VectorFormat vform, \
5515*f5c631daSSadaf Ebrahimi LogicVRegister dst, \
5516*f5c631daSSadaf Ebrahimi const LogicVRegister& src1, \
5517*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) { \
5518*f5c631daSSadaf Ebrahimi SimVRegister temp1, temp2; \
5519*f5c631daSSadaf Ebrahimi uzp1(vform, temp1, src1, src2); \
5520*f5c631daSSadaf Ebrahimi uzp2(vform, temp2, src1, src2); \
5521*f5c631daSSadaf Ebrahimi FN(vform, dst, temp1, temp2); \
5522*f5c631daSSadaf Ebrahimi if (IsSVEFormat(vform)) { \
5523*f5c631daSSadaf Ebrahimi interleave_top_bottom(vform, dst, dst); \
5524*f5c631daSSadaf Ebrahimi } \
5525*f5c631daSSadaf Ebrahimi return dst; \
5526*f5c631daSSadaf Ebrahimi } \
5527*f5c631daSSadaf Ebrahimi \
5528*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::FNP(VectorFormat vform, \
5529*f5c631daSSadaf Ebrahimi LogicVRegister dst, \
5530*f5c631daSSadaf Ebrahimi const LogicVRegister& src) { \
5531*f5c631daSSadaf Ebrahimi if (vform == kFormatH) { \
5532*f5c631daSSadaf Ebrahimi SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))), \
5533*f5c631daSSadaf Ebrahimi SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \
5534*f5c631daSSadaf Ebrahimi dst.SetUint(vform, 0, Float16ToRawbits(result)); \
5535*f5c631daSSadaf Ebrahimi } else if (vform == kFormatS) { \
5536*f5c631daSSadaf Ebrahimi float result = OP(src.Float<float>(0), src.Float<float>(1)); \
5537*f5c631daSSadaf Ebrahimi dst.SetFloat(0, result); \
5538*f5c631daSSadaf Ebrahimi } else { \
5539*f5c631daSSadaf Ebrahimi VIXL_ASSERT(vform == kFormatD); \
5540*f5c631daSSadaf Ebrahimi double result = OP(src.Float<double>(0), src.Float<double>(1)); \
5541*f5c631daSSadaf Ebrahimi dst.SetFloat(0, result); \
5542*f5c631daSSadaf Ebrahimi } \
5543*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform); \
5544*f5c631daSSadaf Ebrahimi return dst; \
5545*f5c631daSSadaf Ebrahimi }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)5546*f5c631daSSadaf Ebrahimi NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
5547*f5c631daSSadaf Ebrahimi #undef DEFINE_NEON_FP_PAIR_OP
5548*f5c631daSSadaf Ebrahimi
5549*f5c631daSSadaf Ebrahimi template <typename T>
5550*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,
5551*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5552*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
5553*f5c631daSSadaf Ebrahimi typename TFPPairOp<T>::type fn,
5554*f5c631daSSadaf Ebrahimi uint64_t inactive_value) {
5555*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
5556*f5c631daSSadaf Ebrahimi T result[kZRegMaxSizeInBytes / sizeof(T)];
5557*f5c631daSSadaf Ebrahimi // Copy the source vector into a working array. Initialise the unused elements
5558*f5c631daSSadaf Ebrahimi // at the end of the array to the same value that a false predicate would set.
5559*f5c631daSSadaf Ebrahimi for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {
5560*f5c631daSSadaf Ebrahimi result[i] = (i < lane_count)
5561*f5c631daSSadaf Ebrahimi ? src.Float<T>(i)
5562*f5c631daSSadaf Ebrahimi : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);
5563*f5c631daSSadaf Ebrahimi }
5564*f5c631daSSadaf Ebrahimi
5565*f5c631daSSadaf Ebrahimi // Pairwise reduce the elements to a single value, using the pair op function
5566*f5c631daSSadaf Ebrahimi // argument.
5567*f5c631daSSadaf Ebrahimi for (int step = 1; step < lane_count; step *= 2) {
5568*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i += step * 2) {
5569*f5c631daSSadaf Ebrahimi result[i] = (this->*fn)(result[i], result[i + step]);
5570*f5c631daSSadaf Ebrahimi }
5571*f5c631daSSadaf Ebrahimi }
5572*f5c631daSSadaf Ebrahimi dst.ClearForWrite(ScalarFormatFromFormat(vform));
5573*f5c631daSSadaf Ebrahimi dst.SetFloat<T>(0, result[0]);
5574*f5c631daSSadaf Ebrahimi return dst;
5575*f5c631daSSadaf Ebrahimi }
5576*f5c631daSSadaf Ebrahimi
FPPairedAcrossHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,typename TFPPairOp<SimFloat16>::type fn16,typename TFPPairOp<float>::type fn32,typename TFPPairOp<double>::type fn64,uint64_t inactive_value)5577*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::FPPairedAcrossHelper(
5578*f5c631daSSadaf Ebrahimi VectorFormat vform,
5579*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5580*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
5581*f5c631daSSadaf Ebrahimi typename TFPPairOp<SimFloat16>::type fn16,
5582*f5c631daSSadaf Ebrahimi typename TFPPairOp<float>::type fn32,
5583*f5c631daSSadaf Ebrahimi typename TFPPairOp<double>::type fn64,
5584*f5c631daSSadaf Ebrahimi uint64_t inactive_value) {
5585*f5c631daSSadaf Ebrahimi switch (LaneSizeInBitsFromFormat(vform)) {
5586*f5c631daSSadaf Ebrahimi case kHRegSize:
5587*f5c631daSSadaf Ebrahimi return FPPairedAcrossHelper<SimFloat16>(vform,
5588*f5c631daSSadaf Ebrahimi dst,
5589*f5c631daSSadaf Ebrahimi src,
5590*f5c631daSSadaf Ebrahimi fn16,
5591*f5c631daSSadaf Ebrahimi inactive_value);
5592*f5c631daSSadaf Ebrahimi case kSRegSize:
5593*f5c631daSSadaf Ebrahimi return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);
5594*f5c631daSSadaf Ebrahimi default:
5595*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5596*f5c631daSSadaf Ebrahimi return FPPairedAcrossHelper<double>(vform,
5597*f5c631daSSadaf Ebrahimi dst,
5598*f5c631daSSadaf Ebrahimi src,
5599*f5c631daSSadaf Ebrahimi fn64,
5600*f5c631daSSadaf Ebrahimi inactive_value);
5601*f5c631daSSadaf Ebrahimi }
5602*f5c631daSSadaf Ebrahimi }
5603*f5c631daSSadaf Ebrahimi
faddv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5604*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::faddv(VectorFormat vform,
5605*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5606*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
5607*f5c631daSSadaf Ebrahimi return FPPairedAcrossHelper(vform,
5608*f5c631daSSadaf Ebrahimi dst,
5609*f5c631daSSadaf Ebrahimi src,
5610*f5c631daSSadaf Ebrahimi &Simulator::FPAdd<SimFloat16>,
5611*f5c631daSSadaf Ebrahimi &Simulator::FPAdd<float>,
5612*f5c631daSSadaf Ebrahimi &Simulator::FPAdd<double>,
5613*f5c631daSSadaf Ebrahimi 0);
5614*f5c631daSSadaf Ebrahimi }
5615*f5c631daSSadaf Ebrahimi
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5616*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmaxv(VectorFormat vform,
5617*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5618*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
5619*f5c631daSSadaf Ebrahimi int lane_size = LaneSizeInBitsFromFormat(vform);
5620*f5c631daSSadaf Ebrahimi uint64_t inactive_value =
5621*f5c631daSSadaf Ebrahimi FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
5622*f5c631daSSadaf Ebrahimi return FPPairedAcrossHelper(vform,
5623*f5c631daSSadaf Ebrahimi dst,
5624*f5c631daSSadaf Ebrahimi src,
5625*f5c631daSSadaf Ebrahimi &Simulator::FPMax<SimFloat16>,
5626*f5c631daSSadaf Ebrahimi &Simulator::FPMax<float>,
5627*f5c631daSSadaf Ebrahimi &Simulator::FPMax<double>,
5628*f5c631daSSadaf Ebrahimi inactive_value);
5629*f5c631daSSadaf Ebrahimi }
5630*f5c631daSSadaf Ebrahimi
5631*f5c631daSSadaf Ebrahimi
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5632*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fminv(VectorFormat vform,
5633*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5634*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
5635*f5c631daSSadaf Ebrahimi int lane_size = LaneSizeInBitsFromFormat(vform);
5636*f5c631daSSadaf Ebrahimi uint64_t inactive_value =
5637*f5c631daSSadaf Ebrahimi FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
5638*f5c631daSSadaf Ebrahimi return FPPairedAcrossHelper(vform,
5639*f5c631daSSadaf Ebrahimi dst,
5640*f5c631daSSadaf Ebrahimi src,
5641*f5c631daSSadaf Ebrahimi &Simulator::FPMin<SimFloat16>,
5642*f5c631daSSadaf Ebrahimi &Simulator::FPMin<float>,
5643*f5c631daSSadaf Ebrahimi &Simulator::FPMin<double>,
5644*f5c631daSSadaf Ebrahimi inactive_value);
5645*f5c631daSSadaf Ebrahimi }
5646*f5c631daSSadaf Ebrahimi
5647*f5c631daSSadaf Ebrahimi
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5648*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
5649*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5650*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
5651*f5c631daSSadaf Ebrahimi int lane_size = LaneSizeInBitsFromFormat(vform);
5652*f5c631daSSadaf Ebrahimi uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5653*f5c631daSSadaf Ebrahimi return FPPairedAcrossHelper(vform,
5654*f5c631daSSadaf Ebrahimi dst,
5655*f5c631daSSadaf Ebrahimi src,
5656*f5c631daSSadaf Ebrahimi &Simulator::FPMaxNM<SimFloat16>,
5657*f5c631daSSadaf Ebrahimi &Simulator::FPMaxNM<float>,
5658*f5c631daSSadaf Ebrahimi &Simulator::FPMaxNM<double>,
5659*f5c631daSSadaf Ebrahimi inactive_value);
5660*f5c631daSSadaf Ebrahimi }
5661*f5c631daSSadaf Ebrahimi
5662*f5c631daSSadaf Ebrahimi
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5663*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fminnmv(VectorFormat vform,
5664*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5665*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
5666*f5c631daSSadaf Ebrahimi int lane_size = LaneSizeInBitsFromFormat(vform);
5667*f5c631daSSadaf Ebrahimi uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5668*f5c631daSSadaf Ebrahimi return FPPairedAcrossHelper(vform,
5669*f5c631daSSadaf Ebrahimi dst,
5670*f5c631daSSadaf Ebrahimi src,
5671*f5c631daSSadaf Ebrahimi &Simulator::FPMinNM<SimFloat16>,
5672*f5c631daSSadaf Ebrahimi &Simulator::FPMinNM<float>,
5673*f5c631daSSadaf Ebrahimi &Simulator::FPMinNM<double>,
5674*f5c631daSSadaf Ebrahimi inactive_value);
5675*f5c631daSSadaf Ebrahimi }
5676*f5c631daSSadaf Ebrahimi
5677*f5c631daSSadaf Ebrahimi
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5678*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmul(VectorFormat vform,
5679*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5680*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5681*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
5682*f5c631daSSadaf Ebrahimi int index) {
5683*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5684*f5c631daSSadaf Ebrahimi SimVRegister temp;
5685*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5686*f5c631daSSadaf Ebrahimi LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5687*f5c631daSSadaf Ebrahimi fmul<SimFloat16>(vform, dst, src1, index_reg);
5688*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5689*f5c631daSSadaf Ebrahimi LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5690*f5c631daSSadaf Ebrahimi fmul<float>(vform, dst, src1, index_reg);
5691*f5c631daSSadaf Ebrahimi } else {
5692*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5693*f5c631daSSadaf Ebrahimi LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5694*f5c631daSSadaf Ebrahimi fmul<double>(vform, dst, src1, index_reg);
5695*f5c631daSSadaf Ebrahimi }
5696*f5c631daSSadaf Ebrahimi return dst;
5697*f5c631daSSadaf Ebrahimi }
5698*f5c631daSSadaf Ebrahimi
5699*f5c631daSSadaf Ebrahimi
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5700*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmla(VectorFormat vform,
5701*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5702*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5703*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
5704*f5c631daSSadaf Ebrahimi int index) {
5705*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5706*f5c631daSSadaf Ebrahimi SimVRegister temp;
5707*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5708*f5c631daSSadaf Ebrahimi LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5709*f5c631daSSadaf Ebrahimi fmla<SimFloat16>(vform, dst, dst, src1, index_reg);
5710*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5711*f5c631daSSadaf Ebrahimi LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5712*f5c631daSSadaf Ebrahimi fmla<float>(vform, dst, dst, src1, index_reg);
5713*f5c631daSSadaf Ebrahimi } else {
5714*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5715*f5c631daSSadaf Ebrahimi LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5716*f5c631daSSadaf Ebrahimi fmla<double>(vform, dst, dst, src1, index_reg);
5717*f5c631daSSadaf Ebrahimi }
5718*f5c631daSSadaf Ebrahimi return dst;
5719*f5c631daSSadaf Ebrahimi }
5720*f5c631daSSadaf Ebrahimi
5721*f5c631daSSadaf Ebrahimi
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5722*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmls(VectorFormat vform,
5723*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5724*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5725*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
5726*f5c631daSSadaf Ebrahimi int index) {
5727*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5728*f5c631daSSadaf Ebrahimi SimVRegister temp;
5729*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5730*f5c631daSSadaf Ebrahimi LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5731*f5c631daSSadaf Ebrahimi fmls<SimFloat16>(vform, dst, dst, src1, index_reg);
5732*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5733*f5c631daSSadaf Ebrahimi LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5734*f5c631daSSadaf Ebrahimi fmls<float>(vform, dst, dst, src1, index_reg);
5735*f5c631daSSadaf Ebrahimi } else {
5736*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5737*f5c631daSSadaf Ebrahimi LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5738*f5c631daSSadaf Ebrahimi fmls<double>(vform, dst, dst, src1, index_reg);
5739*f5c631daSSadaf Ebrahimi }
5740*f5c631daSSadaf Ebrahimi return dst;
5741*f5c631daSSadaf Ebrahimi }
5742*f5c631daSSadaf Ebrahimi
5743*f5c631daSSadaf Ebrahimi
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5744*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmulx(VectorFormat vform,
5745*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5746*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
5747*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
5748*f5c631daSSadaf Ebrahimi int index) {
5749*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5750*f5c631daSSadaf Ebrahimi SimVRegister temp;
5751*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5752*f5c631daSSadaf Ebrahimi LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5753*f5c631daSSadaf Ebrahimi fmulx<SimFloat16>(vform, dst, src1, index_reg);
5754*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5755*f5c631daSSadaf Ebrahimi LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5756*f5c631daSSadaf Ebrahimi fmulx<float>(vform, dst, src1, index_reg);
5757*f5c631daSSadaf Ebrahimi } else {
5758*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5759*f5c631daSSadaf Ebrahimi LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5760*f5c631daSSadaf Ebrahimi fmulx<double>(vform, dst, src1, index_reg);
5761*f5c631daSSadaf Ebrahimi }
5762*f5c631daSSadaf Ebrahimi return dst;
5763*f5c631daSSadaf Ebrahimi }
5764*f5c631daSSadaf Ebrahimi
5765*f5c631daSSadaf Ebrahimi
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception,FrintMode frint_mode)5766*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::frint(VectorFormat vform,
5767*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5768*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
5769*f5c631daSSadaf Ebrahimi FPRounding rounding_mode,
5770*f5c631daSSadaf Ebrahimi bool inexact_exception,
5771*f5c631daSSadaf Ebrahimi FrintMode frint_mode) {
5772*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5773*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5774*f5c631daSSadaf Ebrahimi VIXL_ASSERT(frint_mode == kFrintToInteger);
5775*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5776*f5c631daSSadaf Ebrahimi SimFloat16 input = src.Float<SimFloat16>(i);
5777*f5c631daSSadaf Ebrahimi SimFloat16 rounded = FPRoundInt(input, rounding_mode);
5778*f5c631daSSadaf Ebrahimi if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5779*f5c631daSSadaf Ebrahimi FPProcessException();
5780*f5c631daSSadaf Ebrahimi }
5781*f5c631daSSadaf Ebrahimi dst.SetFloat<SimFloat16>(i, rounded);
5782*f5c631daSSadaf Ebrahimi }
5783*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5784*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5785*f5c631daSSadaf Ebrahimi float input = src.Float<float>(i);
5786*f5c631daSSadaf Ebrahimi float rounded = FPRoundInt(input, rounding_mode, frint_mode);
5787*f5c631daSSadaf Ebrahimi
5788*f5c631daSSadaf Ebrahimi if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5789*f5c631daSSadaf Ebrahimi FPProcessException();
5790*f5c631daSSadaf Ebrahimi }
5791*f5c631daSSadaf Ebrahimi dst.SetFloat<float>(i, rounded);
5792*f5c631daSSadaf Ebrahimi }
5793*f5c631daSSadaf Ebrahimi } else {
5794*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5795*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5796*f5c631daSSadaf Ebrahimi double input = src.Float<double>(i);
5797*f5c631daSSadaf Ebrahimi double rounded = FPRoundInt(input, rounding_mode, frint_mode);
5798*f5c631daSSadaf Ebrahimi if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5799*f5c631daSSadaf Ebrahimi FPProcessException();
5800*f5c631daSSadaf Ebrahimi }
5801*f5c631daSSadaf Ebrahimi dst.SetFloat<double>(i, rounded);
5802*f5c631daSSadaf Ebrahimi }
5803*f5c631daSSadaf Ebrahimi }
5804*f5c631daSSadaf Ebrahimi return dst;
5805*f5c631daSSadaf Ebrahimi }
5806*f5c631daSSadaf Ebrahimi
fcvt(VectorFormat dst_vform,VectorFormat src_vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)5807*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcvt(VectorFormat dst_vform,
5808*f5c631daSSadaf Ebrahimi VectorFormat src_vform,
5809*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5810*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
5811*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
5812*f5c631daSSadaf Ebrahimi unsigned dst_data_size_in_bits = LaneSizeInBitsFromFormat(dst_vform);
5813*f5c631daSSadaf Ebrahimi unsigned src_data_size_in_bits = LaneSizeInBitsFromFormat(src_vform);
5814*f5c631daSSadaf Ebrahimi VectorFormat vform = SVEFormatFromLaneSizeInBits(
5815*f5c631daSSadaf Ebrahimi std::max(dst_data_size_in_bits, src_data_size_in_bits));
5816*f5c631daSSadaf Ebrahimi
5817*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5818*f5c631daSSadaf Ebrahimi if (!pg.IsActive(vform, i)) continue;
5819*f5c631daSSadaf Ebrahimi
5820*f5c631daSSadaf Ebrahimi uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5821*f5c631daSSadaf Ebrahimi 0,
5822*f5c631daSSadaf Ebrahimi src.Uint(vform, i));
5823*f5c631daSSadaf Ebrahimi double dst_value =
5824*f5c631daSSadaf Ebrahimi RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);
5825*f5c631daSSadaf Ebrahimi
5826*f5c631daSSadaf Ebrahimi uint64_t dst_raw_bits =
5827*f5c631daSSadaf Ebrahimi FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);
5828*f5c631daSSadaf Ebrahimi
5829*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, dst_raw_bits);
5830*f5c631daSSadaf Ebrahimi }
5831*f5c631daSSadaf Ebrahimi
5832*f5c631daSSadaf Ebrahimi return dst;
5833*f5c631daSSadaf Ebrahimi }
5834*f5c631daSSadaf Ebrahimi
fcvts(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5835*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcvts(VectorFormat vform,
5836*f5c631daSSadaf Ebrahimi unsigned dst_data_size_in_bits,
5837*f5c631daSSadaf Ebrahimi unsigned src_data_size_in_bits,
5838*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5839*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
5840*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
5841*f5c631daSSadaf Ebrahimi FPRounding round,
5842*f5c631daSSadaf Ebrahimi int fbits) {
5843*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5844*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5845*f5c631daSSadaf Ebrahimi
5846*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5847*f5c631daSSadaf Ebrahimi if (!pg.IsActive(vform, i)) continue;
5848*f5c631daSSadaf Ebrahimi
5849*f5c631daSSadaf Ebrahimi uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5850*f5c631daSSadaf Ebrahimi 0,
5851*f5c631daSSadaf Ebrahimi src.Uint(vform, i));
5852*f5c631daSSadaf Ebrahimi double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5853*f5c631daSSadaf Ebrahimi std::pow(2.0, fbits);
5854*f5c631daSSadaf Ebrahimi
5855*f5c631daSSadaf Ebrahimi switch (dst_data_size_in_bits) {
5856*f5c631daSSadaf Ebrahimi case kHRegSize:
5857*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, FPToInt16(result, round));
5858*f5c631daSSadaf Ebrahimi break;
5859*f5c631daSSadaf Ebrahimi case kSRegSize:
5860*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, FPToInt32(result, round));
5861*f5c631daSSadaf Ebrahimi break;
5862*f5c631daSSadaf Ebrahimi case kDRegSize:
5863*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, FPToInt64(result, round));
5864*f5c631daSSadaf Ebrahimi break;
5865*f5c631daSSadaf Ebrahimi default:
5866*f5c631daSSadaf Ebrahimi VIXL_UNIMPLEMENTED();
5867*f5c631daSSadaf Ebrahimi break;
5868*f5c631daSSadaf Ebrahimi }
5869*f5c631daSSadaf Ebrahimi }
5870*f5c631daSSadaf Ebrahimi
5871*f5c631daSSadaf Ebrahimi return dst;
5872*f5c631daSSadaf Ebrahimi }
5873*f5c631daSSadaf Ebrahimi
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5874*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcvts(VectorFormat vform,
5875*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5876*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
5877*f5c631daSSadaf Ebrahimi FPRounding round,
5878*f5c631daSSadaf Ebrahimi int fbits) {
5879*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5880*f5c631daSSadaf Ebrahimi return fcvts(vform,
5881*f5c631daSSadaf Ebrahimi LaneSizeInBitsFromFormat(vform),
5882*f5c631daSSadaf Ebrahimi LaneSizeInBitsFromFormat(vform),
5883*f5c631daSSadaf Ebrahimi dst,
5884*f5c631daSSadaf Ebrahimi GetPTrue(),
5885*f5c631daSSadaf Ebrahimi src,
5886*f5c631daSSadaf Ebrahimi round,
5887*f5c631daSSadaf Ebrahimi fbits);
5888*f5c631daSSadaf Ebrahimi }
5889*f5c631daSSadaf Ebrahimi
fcvtu(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5890*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcvtu(VectorFormat vform,
5891*f5c631daSSadaf Ebrahimi unsigned dst_data_size_in_bits,
5892*f5c631daSSadaf Ebrahimi unsigned src_data_size_in_bits,
5893*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5894*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
5895*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
5896*f5c631daSSadaf Ebrahimi FPRounding round,
5897*f5c631daSSadaf Ebrahimi int fbits) {
5898*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5899*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5900*f5c631daSSadaf Ebrahimi
5901*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5902*f5c631daSSadaf Ebrahimi if (!pg.IsActive(vform, i)) continue;
5903*f5c631daSSadaf Ebrahimi
5904*f5c631daSSadaf Ebrahimi uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5905*f5c631daSSadaf Ebrahimi 0,
5906*f5c631daSSadaf Ebrahimi src.Uint(vform, i));
5907*f5c631daSSadaf Ebrahimi double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5908*f5c631daSSadaf Ebrahimi std::pow(2.0, fbits);
5909*f5c631daSSadaf Ebrahimi
5910*f5c631daSSadaf Ebrahimi switch (dst_data_size_in_bits) {
5911*f5c631daSSadaf Ebrahimi case kHRegSize:
5912*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, FPToUInt16(result, round));
5913*f5c631daSSadaf Ebrahimi break;
5914*f5c631daSSadaf Ebrahimi case kSRegSize:
5915*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, FPToUInt32(result, round));
5916*f5c631daSSadaf Ebrahimi break;
5917*f5c631daSSadaf Ebrahimi case kDRegSize:
5918*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, FPToUInt64(result, round));
5919*f5c631daSSadaf Ebrahimi break;
5920*f5c631daSSadaf Ebrahimi default:
5921*f5c631daSSadaf Ebrahimi VIXL_UNIMPLEMENTED();
5922*f5c631daSSadaf Ebrahimi break;
5923*f5c631daSSadaf Ebrahimi }
5924*f5c631daSSadaf Ebrahimi }
5925*f5c631daSSadaf Ebrahimi
5926*f5c631daSSadaf Ebrahimi return dst;
5927*f5c631daSSadaf Ebrahimi }
5928*f5c631daSSadaf Ebrahimi
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5929*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcvtu(VectorFormat vform,
5930*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5931*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
5932*f5c631daSSadaf Ebrahimi FPRounding round,
5933*f5c631daSSadaf Ebrahimi int fbits) {
5934*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5935*f5c631daSSadaf Ebrahimi return fcvtu(vform,
5936*f5c631daSSadaf Ebrahimi LaneSizeInBitsFromFormat(vform),
5937*f5c631daSSadaf Ebrahimi LaneSizeInBitsFromFormat(vform),
5938*f5c631daSSadaf Ebrahimi dst,
5939*f5c631daSSadaf Ebrahimi GetPTrue(),
5940*f5c631daSSadaf Ebrahimi src,
5941*f5c631daSSadaf Ebrahimi round,
5942*f5c631daSSadaf Ebrahimi fbits);
5943*f5c631daSSadaf Ebrahimi }
5944*f5c631daSSadaf Ebrahimi
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5945*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcvtl(VectorFormat vform,
5946*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5947*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
5948*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5949*f5c631daSSadaf Ebrahimi for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5950*f5c631daSSadaf Ebrahimi // TODO: Full support for SimFloat16 in SimRegister(s).
5951*f5c631daSSadaf Ebrahimi dst.SetFloat(i,
5952*f5c631daSSadaf Ebrahimi FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
5953*f5c631daSSadaf Ebrahimi ReadDN()));
5954*f5c631daSSadaf Ebrahimi }
5955*f5c631daSSadaf Ebrahimi } else {
5956*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5957*f5c631daSSadaf Ebrahimi for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5958*f5c631daSSadaf Ebrahimi dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
5959*f5c631daSSadaf Ebrahimi }
5960*f5c631daSSadaf Ebrahimi }
5961*f5c631daSSadaf Ebrahimi return dst;
5962*f5c631daSSadaf Ebrahimi }
5963*f5c631daSSadaf Ebrahimi
5964*f5c631daSSadaf Ebrahimi
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5965*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcvtl2(VectorFormat vform,
5966*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5967*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
5968*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform);
5969*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5970*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i++) {
5971*f5c631daSSadaf Ebrahimi // TODO: Full support for SimFloat16 in SimRegister(s).
5972*f5c631daSSadaf Ebrahimi dst.SetFloat(i,
5973*f5c631daSSadaf Ebrahimi FPToFloat(RawbitsToFloat16(
5974*f5c631daSSadaf Ebrahimi src.Float<uint16_t>(i + lane_count)),
5975*f5c631daSSadaf Ebrahimi ReadDN()));
5976*f5c631daSSadaf Ebrahimi }
5977*f5c631daSSadaf Ebrahimi } else {
5978*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5979*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; i++) {
5980*f5c631daSSadaf Ebrahimi dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
5981*f5c631daSSadaf Ebrahimi }
5982*f5c631daSSadaf Ebrahimi }
5983*f5c631daSSadaf Ebrahimi return dst;
5984*f5c631daSSadaf Ebrahimi }
5985*f5c631daSSadaf Ebrahimi
5986*f5c631daSSadaf Ebrahimi
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5987*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcvtn(VectorFormat vform,
5988*f5c631daSSadaf Ebrahimi LogicVRegister dst,
5989*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
5990*f5c631daSSadaf Ebrahimi SimVRegister tmp;
5991*f5c631daSSadaf Ebrahimi LogicVRegister srctmp = mov(kFormat2D, tmp, src);
5992*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
5993*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5994*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5995*f5c631daSSadaf Ebrahimi dst.SetFloat(i,
5996*f5c631daSSadaf Ebrahimi Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i),
5997*f5c631daSSadaf Ebrahimi FPTieEven,
5998*f5c631daSSadaf Ebrahimi ReadDN())));
5999*f5c631daSSadaf Ebrahimi }
6000*f5c631daSSadaf Ebrahimi } else {
6001*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6002*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6003*f5c631daSSadaf Ebrahimi dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN()));
6004*f5c631daSSadaf Ebrahimi }
6005*f5c631daSSadaf Ebrahimi }
6006*f5c631daSSadaf Ebrahimi return dst;
6007*f5c631daSSadaf Ebrahimi }
6008*f5c631daSSadaf Ebrahimi
6009*f5c631daSSadaf Ebrahimi
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6010*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcvtn2(VectorFormat vform,
6011*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6012*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
6013*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform) / 2;
6014*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6015*f5c631daSSadaf Ebrahimi for (int i = lane_count - 1; i >= 0; i--) {
6016*f5c631daSSadaf Ebrahimi dst.SetFloat(i + lane_count,
6017*f5c631daSSadaf Ebrahimi Float16ToRawbits(
6018*f5c631daSSadaf Ebrahimi FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
6019*f5c631daSSadaf Ebrahimi }
6020*f5c631daSSadaf Ebrahimi } else {
6021*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6022*f5c631daSSadaf Ebrahimi for (int i = lane_count - 1; i >= 0; i--) {
6023*f5c631daSSadaf Ebrahimi dst.SetFloat(i + lane_count,
6024*f5c631daSSadaf Ebrahimi FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
6025*f5c631daSSadaf Ebrahimi }
6026*f5c631daSSadaf Ebrahimi }
6027*f5c631daSSadaf Ebrahimi return dst;
6028*f5c631daSSadaf Ebrahimi }
6029*f5c631daSSadaf Ebrahimi
6030*f5c631daSSadaf Ebrahimi
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6031*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcvtxn(VectorFormat vform,
6032*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6033*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
6034*f5c631daSSadaf Ebrahimi SimVRegister tmp;
6035*f5c631daSSadaf Ebrahimi LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6036*f5c631daSSadaf Ebrahimi int input_lane_count = LaneCountFromFormat(vform);
6037*f5c631daSSadaf Ebrahimi if (IsSVEFormat(vform)) {
6038*f5c631daSSadaf Ebrahimi mov(kFormatVnB, tmp, src);
6039*f5c631daSSadaf Ebrahimi input_lane_count /= 2;
6040*f5c631daSSadaf Ebrahimi }
6041*f5c631daSSadaf Ebrahimi
6042*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
6043*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6044*f5c631daSSadaf Ebrahimi
6045*f5c631daSSadaf Ebrahimi for (int i = 0; i < input_lane_count; i++) {
6046*f5c631daSSadaf Ebrahimi dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN()));
6047*f5c631daSSadaf Ebrahimi }
6048*f5c631daSSadaf Ebrahimi return dst;
6049*f5c631daSSadaf Ebrahimi }
6050*f5c631daSSadaf Ebrahimi
6051*f5c631daSSadaf Ebrahimi
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6052*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
6053*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6054*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
6055*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6056*f5c631daSSadaf Ebrahimi int lane_count = LaneCountFromFormat(vform) / 2;
6057*f5c631daSSadaf Ebrahimi for (int i = lane_count - 1; i >= 0; i--) {
6058*f5c631daSSadaf Ebrahimi dst.SetFloat(i + lane_count,
6059*f5c631daSSadaf Ebrahimi FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
6060*f5c631daSSadaf Ebrahimi }
6061*f5c631daSSadaf Ebrahimi return dst;
6062*f5c631daSSadaf Ebrahimi }
6063*f5c631daSSadaf Ebrahimi
6064*f5c631daSSadaf Ebrahimi
6065*f5c631daSSadaf Ebrahimi // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)6066*f5c631daSSadaf Ebrahimi double Simulator::recip_sqrt_estimate(double a) {
6067*f5c631daSSadaf Ebrahimi int quot0, quot1, s;
6068*f5c631daSSadaf Ebrahimi double r;
6069*f5c631daSSadaf Ebrahimi if (a < 0.5) {
6070*f5c631daSSadaf Ebrahimi quot0 = static_cast<int>(a * 512.0);
6071*f5c631daSSadaf Ebrahimi r = 1.0 / sqrt((static_cast<double>(quot0) + 0.5) / 512.0);
6072*f5c631daSSadaf Ebrahimi } else {
6073*f5c631daSSadaf Ebrahimi quot1 = static_cast<int>(a * 256.0);
6074*f5c631daSSadaf Ebrahimi r = 1.0 / sqrt((static_cast<double>(quot1) + 0.5) / 256.0);
6075*f5c631daSSadaf Ebrahimi }
6076*f5c631daSSadaf Ebrahimi s = static_cast<int>(256.0 * r + 0.5);
6077*f5c631daSSadaf Ebrahimi return static_cast<double>(s) / 256.0;
6078*f5c631daSSadaf Ebrahimi }
6079*f5c631daSSadaf Ebrahimi
6080*f5c631daSSadaf Ebrahimi
Bits(uint64_t val,int start_bit,int end_bit)6081*f5c631daSSadaf Ebrahimi static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
6082*f5c631daSSadaf Ebrahimi return ExtractUnsignedBitfield64(start_bit, end_bit, val);
6083*f5c631daSSadaf Ebrahimi }
6084*f5c631daSSadaf Ebrahimi
6085*f5c631daSSadaf Ebrahimi
6086*f5c631daSSadaf Ebrahimi template <typename T>
FPRecipSqrtEstimate(T op)6087*f5c631daSSadaf Ebrahimi T Simulator::FPRecipSqrtEstimate(T op) {
6088*f5c631daSSadaf Ebrahimi if (IsNaN(op)) {
6089*f5c631daSSadaf Ebrahimi return FPProcessNaN(op);
6090*f5c631daSSadaf Ebrahimi } else if (op == 0.0) {
6091*f5c631daSSadaf Ebrahimi if (copysign(1.0, op) < 0.0) {
6092*f5c631daSSadaf Ebrahimi return kFP64NegativeInfinity;
6093*f5c631daSSadaf Ebrahimi } else {
6094*f5c631daSSadaf Ebrahimi return kFP64PositiveInfinity;
6095*f5c631daSSadaf Ebrahimi }
6096*f5c631daSSadaf Ebrahimi } else if (copysign(1.0, op) < 0.0) {
6097*f5c631daSSadaf Ebrahimi FPProcessException();
6098*f5c631daSSadaf Ebrahimi return FPDefaultNaN<T>();
6099*f5c631daSSadaf Ebrahimi } else if (IsInf(op)) {
6100*f5c631daSSadaf Ebrahimi return 0.0;
6101*f5c631daSSadaf Ebrahimi } else {
6102*f5c631daSSadaf Ebrahimi uint64_t fraction;
6103*f5c631daSSadaf Ebrahimi int exp, result_exp;
6104*f5c631daSSadaf Ebrahimi
6105*f5c631daSSadaf Ebrahimi if (IsFloat16<T>()) {
6106*f5c631daSSadaf Ebrahimi exp = Float16Exp(op);
6107*f5c631daSSadaf Ebrahimi fraction = Float16Mantissa(op);
6108*f5c631daSSadaf Ebrahimi fraction <<= 42;
6109*f5c631daSSadaf Ebrahimi } else if (IsFloat32<T>()) {
6110*f5c631daSSadaf Ebrahimi exp = FloatExp(op);
6111*f5c631daSSadaf Ebrahimi fraction = FloatMantissa(op);
6112*f5c631daSSadaf Ebrahimi fraction <<= 29;
6113*f5c631daSSadaf Ebrahimi } else {
6114*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsFloat64<T>());
6115*f5c631daSSadaf Ebrahimi exp = DoubleExp(op);
6116*f5c631daSSadaf Ebrahimi fraction = DoubleMantissa(op);
6117*f5c631daSSadaf Ebrahimi }
6118*f5c631daSSadaf Ebrahimi
6119*f5c631daSSadaf Ebrahimi if (exp == 0) {
6120*f5c631daSSadaf Ebrahimi while (Bits(fraction, 51, 51) == 0) {
6121*f5c631daSSadaf Ebrahimi fraction = Bits(fraction, 50, 0) << 1;
6122*f5c631daSSadaf Ebrahimi exp -= 1;
6123*f5c631daSSadaf Ebrahimi }
6124*f5c631daSSadaf Ebrahimi fraction = Bits(fraction, 50, 0) << 1;
6125*f5c631daSSadaf Ebrahimi }
6126*f5c631daSSadaf Ebrahimi
6127*f5c631daSSadaf Ebrahimi double scaled;
6128*f5c631daSSadaf Ebrahimi if (Bits(exp, 0, 0) == 0) {
6129*f5c631daSSadaf Ebrahimi scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6130*f5c631daSSadaf Ebrahimi } else {
6131*f5c631daSSadaf Ebrahimi scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
6132*f5c631daSSadaf Ebrahimi }
6133*f5c631daSSadaf Ebrahimi
6134*f5c631daSSadaf Ebrahimi if (IsFloat16<T>()) {
6135*f5c631daSSadaf Ebrahimi result_exp = (44 - exp) / 2;
6136*f5c631daSSadaf Ebrahimi } else if (IsFloat32<T>()) {
6137*f5c631daSSadaf Ebrahimi result_exp = (380 - exp) / 2;
6138*f5c631daSSadaf Ebrahimi } else {
6139*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsFloat64<T>());
6140*f5c631daSSadaf Ebrahimi result_exp = (3068 - exp) / 2;
6141*f5c631daSSadaf Ebrahimi }
6142*f5c631daSSadaf Ebrahimi
6143*f5c631daSSadaf Ebrahimi uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
6144*f5c631daSSadaf Ebrahimi
6145*f5c631daSSadaf Ebrahimi if (IsFloat16<T>()) {
6146*f5c631daSSadaf Ebrahimi uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6147*f5c631daSSadaf Ebrahimi uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
6148*f5c631daSSadaf Ebrahimi return Float16Pack(0, exp_bits, est_bits);
6149*f5c631daSSadaf Ebrahimi } else if (IsFloat32<T>()) {
6150*f5c631daSSadaf Ebrahimi uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6151*f5c631daSSadaf Ebrahimi uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
6152*f5c631daSSadaf Ebrahimi return FloatPack(0, exp_bits, est_bits);
6153*f5c631daSSadaf Ebrahimi } else {
6154*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsFloat64<T>());
6155*f5c631daSSadaf Ebrahimi return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
6156*f5c631daSSadaf Ebrahimi }
6157*f5c631daSSadaf Ebrahimi }
6158*f5c631daSSadaf Ebrahimi }
6159*f5c631daSSadaf Ebrahimi
6160*f5c631daSSadaf Ebrahimi
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6161*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::frsqrte(VectorFormat vform,
6162*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6163*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
6164*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
6165*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6166*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6167*f5c631daSSadaf Ebrahimi SimFloat16 input = src.Float<SimFloat16>(i);
6168*f5c631daSSadaf Ebrahimi dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));
6169*f5c631daSSadaf Ebrahimi }
6170*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6171*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6172*f5c631daSSadaf Ebrahimi float input = src.Float<float>(i);
6173*f5c631daSSadaf Ebrahimi dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));
6174*f5c631daSSadaf Ebrahimi }
6175*f5c631daSSadaf Ebrahimi } else {
6176*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6177*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6178*f5c631daSSadaf Ebrahimi double input = src.Float<double>(i);
6179*f5c631daSSadaf Ebrahimi dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));
6180*f5c631daSSadaf Ebrahimi }
6181*f5c631daSSadaf Ebrahimi }
6182*f5c631daSSadaf Ebrahimi return dst;
6183*f5c631daSSadaf Ebrahimi }
6184*f5c631daSSadaf Ebrahimi
6185*f5c631daSSadaf Ebrahimi template <typename T>
FPRecipEstimate(T op,FPRounding rounding)6186*f5c631daSSadaf Ebrahimi T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
6187*f5c631daSSadaf Ebrahimi uint32_t sign;
6188*f5c631daSSadaf Ebrahimi
6189*f5c631daSSadaf Ebrahimi if (IsFloat16<T>()) {
6190*f5c631daSSadaf Ebrahimi sign = Float16Sign(op);
6191*f5c631daSSadaf Ebrahimi } else if (IsFloat32<T>()) {
6192*f5c631daSSadaf Ebrahimi sign = FloatSign(op);
6193*f5c631daSSadaf Ebrahimi } else {
6194*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsFloat64<T>());
6195*f5c631daSSadaf Ebrahimi sign = DoubleSign(op);
6196*f5c631daSSadaf Ebrahimi }
6197*f5c631daSSadaf Ebrahimi
6198*f5c631daSSadaf Ebrahimi if (IsNaN(op)) {
6199*f5c631daSSadaf Ebrahimi return FPProcessNaN(op);
6200*f5c631daSSadaf Ebrahimi } else if (IsInf(op)) {
6201*f5c631daSSadaf Ebrahimi return (sign == 1) ? -0.0 : 0.0;
6202*f5c631daSSadaf Ebrahimi } else if (op == 0.0) {
6203*f5c631daSSadaf Ebrahimi FPProcessException(); // FPExc_DivideByZero exception.
6204*f5c631daSSadaf Ebrahimi return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6205*f5c631daSSadaf Ebrahimi } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
6206*f5c631daSSadaf Ebrahimi (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
6207*f5c631daSSadaf Ebrahimi (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
6208*f5c631daSSadaf Ebrahimi bool overflow_to_inf = false;
6209*f5c631daSSadaf Ebrahimi switch (rounding) {
6210*f5c631daSSadaf Ebrahimi case FPTieEven:
6211*f5c631daSSadaf Ebrahimi overflow_to_inf = true;
6212*f5c631daSSadaf Ebrahimi break;
6213*f5c631daSSadaf Ebrahimi case FPPositiveInfinity:
6214*f5c631daSSadaf Ebrahimi overflow_to_inf = (sign == 0);
6215*f5c631daSSadaf Ebrahimi break;
6216*f5c631daSSadaf Ebrahimi case FPNegativeInfinity:
6217*f5c631daSSadaf Ebrahimi overflow_to_inf = (sign == 1);
6218*f5c631daSSadaf Ebrahimi break;
6219*f5c631daSSadaf Ebrahimi case FPZero:
6220*f5c631daSSadaf Ebrahimi overflow_to_inf = false;
6221*f5c631daSSadaf Ebrahimi break;
6222*f5c631daSSadaf Ebrahimi default:
6223*f5c631daSSadaf Ebrahimi break;
6224*f5c631daSSadaf Ebrahimi }
6225*f5c631daSSadaf Ebrahimi FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
6226*f5c631daSSadaf Ebrahimi if (overflow_to_inf) {
6227*f5c631daSSadaf Ebrahimi return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6228*f5c631daSSadaf Ebrahimi } else {
6229*f5c631daSSadaf Ebrahimi // Return FPMaxNormal(sign).
6230*f5c631daSSadaf Ebrahimi if (IsFloat16<T>()) {
6231*f5c631daSSadaf Ebrahimi return Float16Pack(sign, 0x1f, 0x3ff);
6232*f5c631daSSadaf Ebrahimi } else if (IsFloat32<T>()) {
6233*f5c631daSSadaf Ebrahimi return FloatPack(sign, 0xfe, 0x07fffff);
6234*f5c631daSSadaf Ebrahimi } else {
6235*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsFloat64<T>());
6236*f5c631daSSadaf Ebrahimi return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
6237*f5c631daSSadaf Ebrahimi }
6238*f5c631daSSadaf Ebrahimi }
6239*f5c631daSSadaf Ebrahimi } else {
6240*f5c631daSSadaf Ebrahimi uint64_t fraction;
6241*f5c631daSSadaf Ebrahimi int exp, result_exp;
6242*f5c631daSSadaf Ebrahimi
6243*f5c631daSSadaf Ebrahimi if (IsFloat16<T>()) {
6244*f5c631daSSadaf Ebrahimi sign = Float16Sign(op);
6245*f5c631daSSadaf Ebrahimi exp = Float16Exp(op);
6246*f5c631daSSadaf Ebrahimi fraction = Float16Mantissa(op);
6247*f5c631daSSadaf Ebrahimi fraction <<= 42;
6248*f5c631daSSadaf Ebrahimi } else if (IsFloat32<T>()) {
6249*f5c631daSSadaf Ebrahimi sign = FloatSign(op);
6250*f5c631daSSadaf Ebrahimi exp = FloatExp(op);
6251*f5c631daSSadaf Ebrahimi fraction = FloatMantissa(op);
6252*f5c631daSSadaf Ebrahimi fraction <<= 29;
6253*f5c631daSSadaf Ebrahimi } else {
6254*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsFloat64<T>());
6255*f5c631daSSadaf Ebrahimi sign = DoubleSign(op);
6256*f5c631daSSadaf Ebrahimi exp = DoubleExp(op);
6257*f5c631daSSadaf Ebrahimi fraction = DoubleMantissa(op);
6258*f5c631daSSadaf Ebrahimi }
6259*f5c631daSSadaf Ebrahimi
6260*f5c631daSSadaf Ebrahimi if (exp == 0) {
6261*f5c631daSSadaf Ebrahimi if (Bits(fraction, 51, 51) == 0) {
6262*f5c631daSSadaf Ebrahimi exp -= 1;
6263*f5c631daSSadaf Ebrahimi fraction = Bits(fraction, 49, 0) << 2;
6264*f5c631daSSadaf Ebrahimi } else {
6265*f5c631daSSadaf Ebrahimi fraction = Bits(fraction, 50, 0) << 1;
6266*f5c631daSSadaf Ebrahimi }
6267*f5c631daSSadaf Ebrahimi }
6268*f5c631daSSadaf Ebrahimi
6269*f5c631daSSadaf Ebrahimi double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6270*f5c631daSSadaf Ebrahimi
6271*f5c631daSSadaf Ebrahimi if (IsFloat16<T>()) {
6272*f5c631daSSadaf Ebrahimi result_exp = (29 - exp); // In range 29-30 = -1 to 29+1 = 30.
6273*f5c631daSSadaf Ebrahimi } else if (IsFloat32<T>()) {
6274*f5c631daSSadaf Ebrahimi result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
6275*f5c631daSSadaf Ebrahimi } else {
6276*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsFloat64<T>());
6277*f5c631daSSadaf Ebrahimi result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.
6278*f5c631daSSadaf Ebrahimi }
6279*f5c631daSSadaf Ebrahimi
6280*f5c631daSSadaf Ebrahimi double estimate = recip_estimate(scaled);
6281*f5c631daSSadaf Ebrahimi
6282*f5c631daSSadaf Ebrahimi fraction = DoubleMantissa(estimate);
6283*f5c631daSSadaf Ebrahimi if (result_exp == 0) {
6284*f5c631daSSadaf Ebrahimi fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
6285*f5c631daSSadaf Ebrahimi } else if (result_exp == -1) {
6286*f5c631daSSadaf Ebrahimi fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
6287*f5c631daSSadaf Ebrahimi result_exp = 0;
6288*f5c631daSSadaf Ebrahimi }
6289*f5c631daSSadaf Ebrahimi if (IsFloat16<T>()) {
6290*f5c631daSSadaf Ebrahimi uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6291*f5c631daSSadaf Ebrahimi uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
6292*f5c631daSSadaf Ebrahimi return Float16Pack(sign, exp_bits, frac_bits);
6293*f5c631daSSadaf Ebrahimi } else if (IsFloat32<T>()) {
6294*f5c631daSSadaf Ebrahimi uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6295*f5c631daSSadaf Ebrahimi uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
6296*f5c631daSSadaf Ebrahimi return FloatPack(sign, exp_bits, frac_bits);
6297*f5c631daSSadaf Ebrahimi } else {
6298*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsFloat64<T>());
6299*f5c631daSSadaf Ebrahimi return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
6300*f5c631daSSadaf Ebrahimi }
6301*f5c631daSSadaf Ebrahimi }
6302*f5c631daSSadaf Ebrahimi }
6303*f5c631daSSadaf Ebrahimi
6304*f5c631daSSadaf Ebrahimi
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)6305*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::frecpe(VectorFormat vform,
6306*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6307*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
6308*f5c631daSSadaf Ebrahimi FPRounding round) {
6309*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
6310*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6311*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6312*f5c631daSSadaf Ebrahimi SimFloat16 input = src.Float<SimFloat16>(i);
6313*f5c631daSSadaf Ebrahimi dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));
6314*f5c631daSSadaf Ebrahimi }
6315*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6316*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6317*f5c631daSSadaf Ebrahimi float input = src.Float<float>(i);
6318*f5c631daSSadaf Ebrahimi dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));
6319*f5c631daSSadaf Ebrahimi }
6320*f5c631daSSadaf Ebrahimi } else {
6321*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6322*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6323*f5c631daSSadaf Ebrahimi double input = src.Float<double>(i);
6324*f5c631daSSadaf Ebrahimi dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));
6325*f5c631daSSadaf Ebrahimi }
6326*f5c631daSSadaf Ebrahimi }
6327*f5c631daSSadaf Ebrahimi return dst;
6328*f5c631daSSadaf Ebrahimi }
6329*f5c631daSSadaf Ebrahimi
6330*f5c631daSSadaf Ebrahimi
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6331*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ursqrte(VectorFormat vform,
6332*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6333*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
6334*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
6335*f5c631daSSadaf Ebrahimi uint64_t operand;
6336*f5c631daSSadaf Ebrahimi uint32_t result;
6337*f5c631daSSadaf Ebrahimi double dp_operand, dp_result;
6338*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6339*f5c631daSSadaf Ebrahimi operand = src.Uint(vform, i);
6340*f5c631daSSadaf Ebrahimi if (operand <= 0x3FFFFFFF) {
6341*f5c631daSSadaf Ebrahimi result = 0xFFFFFFFF;
6342*f5c631daSSadaf Ebrahimi } else {
6343*f5c631daSSadaf Ebrahimi dp_operand = operand * std::pow(2.0, -32);
6344*f5c631daSSadaf Ebrahimi dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
6345*f5c631daSSadaf Ebrahimi result = static_cast<uint32_t>(dp_result);
6346*f5c631daSSadaf Ebrahimi }
6347*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result);
6348*f5c631daSSadaf Ebrahimi }
6349*f5c631daSSadaf Ebrahimi return dst;
6350*f5c631daSSadaf Ebrahimi }
6351*f5c631daSSadaf Ebrahimi
6352*f5c631daSSadaf Ebrahimi
6353*f5c631daSSadaf Ebrahimi // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)6354*f5c631daSSadaf Ebrahimi double Simulator::recip_estimate(double a) {
6355*f5c631daSSadaf Ebrahimi int q, s;
6356*f5c631daSSadaf Ebrahimi double r;
6357*f5c631daSSadaf Ebrahimi q = static_cast<int>(a * 512.0);
6358*f5c631daSSadaf Ebrahimi r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
6359*f5c631daSSadaf Ebrahimi s = static_cast<int>(256.0 * r + 0.5);
6360*f5c631daSSadaf Ebrahimi return static_cast<double>(s) / 256.0;
6361*f5c631daSSadaf Ebrahimi }
6362*f5c631daSSadaf Ebrahimi
6363*f5c631daSSadaf Ebrahimi
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6364*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::urecpe(VectorFormat vform,
6365*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6366*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
6367*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
6368*f5c631daSSadaf Ebrahimi uint64_t operand;
6369*f5c631daSSadaf Ebrahimi uint32_t result;
6370*f5c631daSSadaf Ebrahimi double dp_operand, dp_result;
6371*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6372*f5c631daSSadaf Ebrahimi operand = src.Uint(vform, i);
6373*f5c631daSSadaf Ebrahimi if (operand <= 0x7FFFFFFF) {
6374*f5c631daSSadaf Ebrahimi result = 0xFFFFFFFF;
6375*f5c631daSSadaf Ebrahimi } else {
6376*f5c631daSSadaf Ebrahimi dp_operand = operand * std::pow(2.0, -32);
6377*f5c631daSSadaf Ebrahimi dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
6378*f5c631daSSadaf Ebrahimi result = static_cast<uint32_t>(dp_result);
6379*f5c631daSSadaf Ebrahimi }
6380*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result);
6381*f5c631daSSadaf Ebrahimi }
6382*f5c631daSSadaf Ebrahimi return dst;
6383*f5c631daSSadaf Ebrahimi }
6384*f5c631daSSadaf Ebrahimi
pfalse(LogicPRegister dst)6385*f5c631daSSadaf Ebrahimi LogicPRegister Simulator::pfalse(LogicPRegister dst) {
6386*f5c631daSSadaf Ebrahimi dst.Clear();
6387*f5c631daSSadaf Ebrahimi return dst;
6388*f5c631daSSadaf Ebrahimi }
6389*f5c631daSSadaf Ebrahimi
pfirst(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6390*f5c631daSSadaf Ebrahimi LogicPRegister Simulator::pfirst(LogicPRegister dst,
6391*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
6392*f5c631daSSadaf Ebrahimi const LogicPRegister& src) {
6393*f5c631daSSadaf Ebrahimi int first_pg = GetFirstActive(kFormatVnB, pg);
6394*f5c631daSSadaf Ebrahimi VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));
6395*f5c631daSSadaf Ebrahimi mov(dst, src);
6396*f5c631daSSadaf Ebrahimi if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);
6397*f5c631daSSadaf Ebrahimi return dst;
6398*f5c631daSSadaf Ebrahimi }
6399*f5c631daSSadaf Ebrahimi
ptrue(VectorFormat vform,LogicPRegister dst,int pattern)6400*f5c631daSSadaf Ebrahimi LogicPRegister Simulator::ptrue(VectorFormat vform,
6401*f5c631daSSadaf Ebrahimi LogicPRegister dst,
6402*f5c631daSSadaf Ebrahimi int pattern) {
6403*f5c631daSSadaf Ebrahimi int count = GetPredicateConstraintLaneCount(vform, pattern);
6404*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6405*f5c631daSSadaf Ebrahimi dst.SetActive(vform, i, i < count);
6406*f5c631daSSadaf Ebrahimi }
6407*f5c631daSSadaf Ebrahimi return dst;
6408*f5c631daSSadaf Ebrahimi }
6409*f5c631daSSadaf Ebrahimi
pnext(VectorFormat vform,LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6410*f5c631daSSadaf Ebrahimi LogicPRegister Simulator::pnext(VectorFormat vform,
6411*f5c631daSSadaf Ebrahimi LogicPRegister dst,
6412*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
6413*f5c631daSSadaf Ebrahimi const LogicPRegister& src) {
6414*f5c631daSSadaf Ebrahimi int next = GetLastActive(vform, src) + 1;
6415*f5c631daSSadaf Ebrahimi while (next < LaneCountFromFormat(vform)) {
6416*f5c631daSSadaf Ebrahimi if (pg.IsActive(vform, next)) break;
6417*f5c631daSSadaf Ebrahimi next++;
6418*f5c631daSSadaf Ebrahimi }
6419*f5c631daSSadaf Ebrahimi
6420*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6421*f5c631daSSadaf Ebrahimi dst.SetActive(vform, i, (i == next));
6422*f5c631daSSadaf Ebrahimi }
6423*f5c631daSSadaf Ebrahimi return dst;
6424*f5c631daSSadaf Ebrahimi }
6425*f5c631daSSadaf Ebrahimi
6426*f5c631daSSadaf Ebrahimi template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6427*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::frecpx(VectorFormat vform,
6428*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6429*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
6430*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
6431*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6432*f5c631daSSadaf Ebrahimi T op = src.Float<T>(i);
6433*f5c631daSSadaf Ebrahimi T result;
6434*f5c631daSSadaf Ebrahimi if (IsNaN(op)) {
6435*f5c631daSSadaf Ebrahimi result = FPProcessNaN(op);
6436*f5c631daSSadaf Ebrahimi } else {
6437*f5c631daSSadaf Ebrahimi int exp;
6438*f5c631daSSadaf Ebrahimi uint32_t sign;
6439*f5c631daSSadaf Ebrahimi if (IsFloat16<T>()) {
6440*f5c631daSSadaf Ebrahimi sign = Float16Sign(op);
6441*f5c631daSSadaf Ebrahimi exp = Float16Exp(op);
6442*f5c631daSSadaf Ebrahimi exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
6443*f5c631daSSadaf Ebrahimi result = Float16Pack(sign, exp, 0);
6444*f5c631daSSadaf Ebrahimi } else if (IsFloat32<T>()) {
6445*f5c631daSSadaf Ebrahimi sign = FloatSign(op);
6446*f5c631daSSadaf Ebrahimi exp = FloatExp(op);
6447*f5c631daSSadaf Ebrahimi exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
6448*f5c631daSSadaf Ebrahimi result = FloatPack(sign, exp, 0);
6449*f5c631daSSadaf Ebrahimi } else {
6450*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsFloat64<T>());
6451*f5c631daSSadaf Ebrahimi sign = DoubleSign(op);
6452*f5c631daSSadaf Ebrahimi exp = DoubleExp(op);
6453*f5c631daSSadaf Ebrahimi exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
6454*f5c631daSSadaf Ebrahimi result = DoublePack(sign, exp, 0);
6455*f5c631daSSadaf Ebrahimi }
6456*f5c631daSSadaf Ebrahimi }
6457*f5c631daSSadaf Ebrahimi dst.SetFloat(i, result);
6458*f5c631daSSadaf Ebrahimi }
6459*f5c631daSSadaf Ebrahimi return dst;
6460*f5c631daSSadaf Ebrahimi }
6461*f5c631daSSadaf Ebrahimi
6462*f5c631daSSadaf Ebrahimi
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6463*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::frecpx(VectorFormat vform,
6464*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6465*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
6466*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6467*f5c631daSSadaf Ebrahimi frecpx<SimFloat16>(vform, dst, src);
6468*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6469*f5c631daSSadaf Ebrahimi frecpx<float>(vform, dst, src);
6470*f5c631daSSadaf Ebrahimi } else {
6471*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6472*f5c631daSSadaf Ebrahimi frecpx<double>(vform, dst, src);
6473*f5c631daSSadaf Ebrahimi }
6474*f5c631daSSadaf Ebrahimi return dst;
6475*f5c631daSSadaf Ebrahimi }
6476*f5c631daSSadaf Ebrahimi
flogb(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6477*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::flogb(VectorFormat vform,
6478*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6479*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
6480*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6481*f5c631daSSadaf Ebrahimi double op = 0.0;
6482*f5c631daSSadaf Ebrahimi switch (vform) {
6483*f5c631daSSadaf Ebrahimi case kFormatVnH:
6484*f5c631daSSadaf Ebrahimi op = FPToDouble(src.Float<SimFloat16>(i), kIgnoreDefaultNaN);
6485*f5c631daSSadaf Ebrahimi break;
6486*f5c631daSSadaf Ebrahimi case kFormatVnS:
6487*f5c631daSSadaf Ebrahimi op = src.Float<float>(i);
6488*f5c631daSSadaf Ebrahimi break;
6489*f5c631daSSadaf Ebrahimi case kFormatVnD:
6490*f5c631daSSadaf Ebrahimi op = src.Float<double>(i);
6491*f5c631daSSadaf Ebrahimi break;
6492*f5c631daSSadaf Ebrahimi default:
6493*f5c631daSSadaf Ebrahimi VIXL_UNREACHABLE();
6494*f5c631daSSadaf Ebrahimi }
6495*f5c631daSSadaf Ebrahimi
6496*f5c631daSSadaf Ebrahimi switch (std::fpclassify(op)) {
6497*f5c631daSSadaf Ebrahimi case FP_INFINITE:
6498*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, MaxIntFromFormat(vform));
6499*f5c631daSSadaf Ebrahimi break;
6500*f5c631daSSadaf Ebrahimi case FP_NAN:
6501*f5c631daSSadaf Ebrahimi case FP_ZERO:
6502*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, MinIntFromFormat(vform));
6503*f5c631daSSadaf Ebrahimi break;
6504*f5c631daSSadaf Ebrahimi case FP_SUBNORMAL: {
6505*f5c631daSSadaf Ebrahimi // DoubleMantissa returns the mantissa of its input, leaving 12 zero
6506*f5c631daSSadaf Ebrahimi // bits where the sign and exponent would be. We subtract 12 to
6507*f5c631daSSadaf Ebrahimi // find the number of leading zero bits in the mantissa itself.
6508*f5c631daSSadaf Ebrahimi int64_t mant_zero_count = CountLeadingZeros(DoubleMantissa(op)) - 12;
6509*f5c631daSSadaf Ebrahimi // Log2 of a subnormal is the lowest exponent a normal number can
6510*f5c631daSSadaf Ebrahimi // represent, together with the zeros in the mantissa.
6511*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, -1023 - mant_zero_count);
6512*f5c631daSSadaf Ebrahimi break;
6513*f5c631daSSadaf Ebrahimi }
6514*f5c631daSSadaf Ebrahimi case FP_NORMAL:
6515*f5c631daSSadaf Ebrahimi // Log2 of a normal number is the exponent minus the bias.
6516*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, static_cast<int64_t>(DoubleExp(op)) - 1023);
6517*f5c631daSSadaf Ebrahimi break;
6518*f5c631daSSadaf Ebrahimi }
6519*f5c631daSSadaf Ebrahimi }
6520*f5c631daSSadaf Ebrahimi return dst;
6521*f5c631daSSadaf Ebrahimi }
6522*f5c631daSSadaf Ebrahimi
ftsmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6523*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ftsmul(VectorFormat vform,
6524*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6525*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
6526*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
6527*f5c631daSSadaf Ebrahimi SimVRegister maybe_neg_src1;
6528*f5c631daSSadaf Ebrahimi
6529*f5c631daSSadaf Ebrahimi // The bottom bit of src2 controls the sign of the result. Use it to
6530*f5c631daSSadaf Ebrahimi // conditionally invert the sign of one `fmul` operand.
6531*f5c631daSSadaf Ebrahimi shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);
6532*f5c631daSSadaf Ebrahimi eor(vform, maybe_neg_src1, maybe_neg_src1, src1);
6533*f5c631daSSadaf Ebrahimi
6534*f5c631daSSadaf Ebrahimi // Multiply src1 by the modified neg_src1, which is potentially its negation.
6535*f5c631daSSadaf Ebrahimi // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,
6536*f5c631daSSadaf Ebrahimi // rather than neg_src1, must be the first source argument.
6537*f5c631daSSadaf Ebrahimi fmul(vform, dst, src1, maybe_neg_src1);
6538*f5c631daSSadaf Ebrahimi
6539*f5c631daSSadaf Ebrahimi return dst;
6540*f5c631daSSadaf Ebrahimi }
6541*f5c631daSSadaf Ebrahimi
ftssel(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6542*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ftssel(VectorFormat vform,
6543*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6544*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
6545*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
6546*f5c631daSSadaf Ebrahimi unsigned lane_bits = LaneSizeInBitsFromFormat(vform);
6547*f5c631daSSadaf Ebrahimi uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);
6548*f5c631daSSadaf Ebrahimi uint64_t one;
6549*f5c631daSSadaf Ebrahimi
6550*f5c631daSSadaf Ebrahimi if (lane_bits == kHRegSize) {
6551*f5c631daSSadaf Ebrahimi one = Float16ToRawbits(Float16(1.0));
6552*f5c631daSSadaf Ebrahimi } else if (lane_bits == kSRegSize) {
6553*f5c631daSSadaf Ebrahimi one = FloatToRawbits(1.0);
6554*f5c631daSSadaf Ebrahimi } else {
6555*f5c631daSSadaf Ebrahimi VIXL_ASSERT(lane_bits == kDRegSize);
6556*f5c631daSSadaf Ebrahimi one = DoubleToRawbits(1.0);
6557*f5c631daSSadaf Ebrahimi }
6558*f5c631daSSadaf Ebrahimi
6559*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6560*f5c631daSSadaf Ebrahimi // Use integer accessors for this operation, as this is a data manipulation
6561*f5c631daSSadaf Ebrahimi // task requiring no calculation.
6562*f5c631daSSadaf Ebrahimi uint64_t op = src1.Uint(vform, i);
6563*f5c631daSSadaf Ebrahimi
6564*f5c631daSSadaf Ebrahimi // Only the bottom two bits of the src2 register are significant, indicating
6565*f5c631daSSadaf Ebrahimi // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1
6566*f5c631daSSadaf Ebrahimi // determines the sign of the value written to dst.
6567*f5c631daSSadaf Ebrahimi uint64_t q = src2.Uint(vform, i);
6568*f5c631daSSadaf Ebrahimi if ((q & 1) == 1) op = one;
6569*f5c631daSSadaf Ebrahimi if ((q & 2) == 2) op ^= sign_bit;
6570*f5c631daSSadaf Ebrahimi
6571*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, op);
6572*f5c631daSSadaf Ebrahimi }
6573*f5c631daSSadaf Ebrahimi
6574*f5c631daSSadaf Ebrahimi return dst;
6575*f5c631daSSadaf Ebrahimi }
6576*f5c631daSSadaf Ebrahimi
6577*f5c631daSSadaf Ebrahimi template <typename T>
FTMaddHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,uint64_t coeff_pos,uint64_t coeff_neg)6578*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,
6579*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6580*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
6581*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
6582*f5c631daSSadaf Ebrahimi uint64_t coeff_pos,
6583*f5c631daSSadaf Ebrahimi uint64_t coeff_neg) {
6584*f5c631daSSadaf Ebrahimi SimVRegister zero;
6585*f5c631daSSadaf Ebrahimi dup_immediate(kFormatVnB, zero, 0);
6586*f5c631daSSadaf Ebrahimi
6587*f5c631daSSadaf Ebrahimi SimVRegister cf;
6588*f5c631daSSadaf Ebrahimi SimVRegister cfn;
6589*f5c631daSSadaf Ebrahimi dup_immediate(vform, cf, coeff_pos);
6590*f5c631daSSadaf Ebrahimi dup_immediate(vform, cfn, coeff_neg);
6591*f5c631daSSadaf Ebrahimi
6592*f5c631daSSadaf Ebrahimi // The specification requires testing the top bit of the raw value, rather
6593*f5c631daSSadaf Ebrahimi // than the sign of the floating point number, so use an integer comparison
6594*f5c631daSSadaf Ebrahimi // here.
6595*f5c631daSSadaf Ebrahimi SimPRegister is_neg;
6596*f5c631daSSadaf Ebrahimi SVEIntCompareVectorsHelper(lt,
6597*f5c631daSSadaf Ebrahimi vform,
6598*f5c631daSSadaf Ebrahimi is_neg,
6599*f5c631daSSadaf Ebrahimi GetPTrue(),
6600*f5c631daSSadaf Ebrahimi src2,
6601*f5c631daSSadaf Ebrahimi zero,
6602*f5c631daSSadaf Ebrahimi false,
6603*f5c631daSSadaf Ebrahimi LeaveFlags);
6604*f5c631daSSadaf Ebrahimi mov_merging(vform, cf, is_neg, cfn);
6605*f5c631daSSadaf Ebrahimi
6606*f5c631daSSadaf Ebrahimi SimVRegister temp;
6607*f5c631daSSadaf Ebrahimi fabs_<T>(vform, temp, src2);
6608*f5c631daSSadaf Ebrahimi fmla<T>(vform, cf, cf, src1, temp);
6609*f5c631daSSadaf Ebrahimi mov(vform, dst, cf);
6610*f5c631daSSadaf Ebrahimi return dst;
6611*f5c631daSSadaf Ebrahimi }
6612*f5c631daSSadaf Ebrahimi
6613*f5c631daSSadaf Ebrahimi
ftmad(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,unsigned index)6614*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ftmad(VectorFormat vform,
6615*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6616*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
6617*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
6618*f5c631daSSadaf Ebrahimi unsigned index) {
6619*f5c631daSSadaf Ebrahimi static const uint64_t ftmad_coeff16[] = {0x3c00,
6620*f5c631daSSadaf Ebrahimi 0xb155,
6621*f5c631daSSadaf Ebrahimi 0x2030,
6622*f5c631daSSadaf Ebrahimi 0x0000,
6623*f5c631daSSadaf Ebrahimi 0x0000,
6624*f5c631daSSadaf Ebrahimi 0x0000,
6625*f5c631daSSadaf Ebrahimi 0x0000,
6626*f5c631daSSadaf Ebrahimi 0x0000,
6627*f5c631daSSadaf Ebrahimi 0x3c00,
6628*f5c631daSSadaf Ebrahimi 0xb800,
6629*f5c631daSSadaf Ebrahimi 0x293a,
6630*f5c631daSSadaf Ebrahimi 0x0000,
6631*f5c631daSSadaf Ebrahimi 0x0000,
6632*f5c631daSSadaf Ebrahimi 0x0000,
6633*f5c631daSSadaf Ebrahimi 0x0000,
6634*f5c631daSSadaf Ebrahimi 0x0000};
6635*f5c631daSSadaf Ebrahimi
6636*f5c631daSSadaf Ebrahimi static const uint64_t ftmad_coeff32[] = {0x3f800000,
6637*f5c631daSSadaf Ebrahimi 0xbe2aaaab,
6638*f5c631daSSadaf Ebrahimi 0x3c088886,
6639*f5c631daSSadaf Ebrahimi 0xb95008b9,
6640*f5c631daSSadaf Ebrahimi 0x36369d6d,
6641*f5c631daSSadaf Ebrahimi 0x00000000,
6642*f5c631daSSadaf Ebrahimi 0x00000000,
6643*f5c631daSSadaf Ebrahimi 0x00000000,
6644*f5c631daSSadaf Ebrahimi 0x3f800000,
6645*f5c631daSSadaf Ebrahimi 0xbf000000,
6646*f5c631daSSadaf Ebrahimi 0x3d2aaaa6,
6647*f5c631daSSadaf Ebrahimi 0xbab60705,
6648*f5c631daSSadaf Ebrahimi 0x37cd37cc,
6649*f5c631daSSadaf Ebrahimi 0x00000000,
6650*f5c631daSSadaf Ebrahimi 0x00000000,
6651*f5c631daSSadaf Ebrahimi 0x00000000};
6652*f5c631daSSadaf Ebrahimi
6653*f5c631daSSadaf Ebrahimi static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,
6654*f5c631daSSadaf Ebrahimi 0xbfc5555555555543,
6655*f5c631daSSadaf Ebrahimi 0x3f8111111110f30c,
6656*f5c631daSSadaf Ebrahimi 0xbf2a01a019b92fc6,
6657*f5c631daSSadaf Ebrahimi 0x3ec71de351f3d22b,
6658*f5c631daSSadaf Ebrahimi 0xbe5ae5e2b60f7b91,
6659*f5c631daSSadaf Ebrahimi 0x3de5d8408868552f,
6660*f5c631daSSadaf Ebrahimi 0x0000000000000000,
6661*f5c631daSSadaf Ebrahimi 0x3ff0000000000000,
6662*f5c631daSSadaf Ebrahimi 0xbfe0000000000000,
6663*f5c631daSSadaf Ebrahimi 0x3fa5555555555536,
6664*f5c631daSSadaf Ebrahimi 0xbf56c16c16c13a0b,
6665*f5c631daSSadaf Ebrahimi 0x3efa01a019b1e8d8,
6666*f5c631daSSadaf Ebrahimi 0xbe927e4f7282f468,
6667*f5c631daSSadaf Ebrahimi 0x3e21ee96d2641b13,
6668*f5c631daSSadaf Ebrahimi 0xbda8f76380fbb401};
6669*f5c631daSSadaf Ebrahimi VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));
6670*f5c631daSSadaf Ebrahimi VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));
6671*f5c631daSSadaf Ebrahimi VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));
6672*f5c631daSSadaf Ebrahimi
6673*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6674*f5c631daSSadaf Ebrahimi FTMaddHelper<SimFloat16>(vform,
6675*f5c631daSSadaf Ebrahimi dst,
6676*f5c631daSSadaf Ebrahimi src1,
6677*f5c631daSSadaf Ebrahimi src2,
6678*f5c631daSSadaf Ebrahimi ftmad_coeff16[index],
6679*f5c631daSSadaf Ebrahimi ftmad_coeff16[index + 8]);
6680*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6681*f5c631daSSadaf Ebrahimi FTMaddHelper<float>(vform,
6682*f5c631daSSadaf Ebrahimi dst,
6683*f5c631daSSadaf Ebrahimi src1,
6684*f5c631daSSadaf Ebrahimi src2,
6685*f5c631daSSadaf Ebrahimi ftmad_coeff32[index],
6686*f5c631daSSadaf Ebrahimi ftmad_coeff32[index + 8]);
6687*f5c631daSSadaf Ebrahimi } else {
6688*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6689*f5c631daSSadaf Ebrahimi FTMaddHelper<double>(vform,
6690*f5c631daSSadaf Ebrahimi dst,
6691*f5c631daSSadaf Ebrahimi src1,
6692*f5c631daSSadaf Ebrahimi src2,
6693*f5c631daSSadaf Ebrahimi ftmad_coeff64[index],
6694*f5c631daSSadaf Ebrahimi ftmad_coeff64[index + 8]);
6695*f5c631daSSadaf Ebrahimi }
6696*f5c631daSSadaf Ebrahimi return dst;
6697*f5c631daSSadaf Ebrahimi }
6698*f5c631daSSadaf Ebrahimi
fexpa(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6699*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fexpa(VectorFormat vform,
6700*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6701*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
6702*f5c631daSSadaf Ebrahimi static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,
6703*f5c631daSSadaf Ebrahimi 0x005d, 0x0075, 0x008e, 0x00a8,
6704*f5c631daSSadaf Ebrahimi 0x00c2, 0x00dc, 0x00f8, 0x0114,
6705*f5c631daSSadaf Ebrahimi 0x0130, 0x014d, 0x016b, 0x0189,
6706*f5c631daSSadaf Ebrahimi 0x01a8, 0x01c8, 0x01e8, 0x0209,
6707*f5c631daSSadaf Ebrahimi 0x022b, 0x024e, 0x0271, 0x0295,
6708*f5c631daSSadaf Ebrahimi 0x02ba, 0x02e0, 0x0306, 0x032e,
6709*f5c631daSSadaf Ebrahimi 0x0356, 0x037f, 0x03a9, 0x03d4};
6710*f5c631daSSadaf Ebrahimi
6711*f5c631daSSadaf Ebrahimi static const uint64_t fexpa_coeff32[] =
6712*f5c631daSSadaf Ebrahimi {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
6713*f5c631daSSadaf Ebrahimi 0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
6714*f5c631daSSadaf Ebrahimi 0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
6715*f5c631daSSadaf Ebrahimi 0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
6716*f5c631daSSadaf Ebrahimi 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
6717*f5c631daSSadaf Ebrahimi 0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
6718*f5c631daSSadaf Ebrahimi 0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
6719*f5c631daSSadaf Ebrahimi 0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
6720*f5c631daSSadaf Ebrahimi 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
6721*f5c631daSSadaf Ebrahimi 0x7d3e0c};
6722*f5c631daSSadaf Ebrahimi
6723*f5c631daSSadaf Ebrahimi static const uint64_t fexpa_coeff64[] =
6724*f5c631daSSadaf Ebrahimi {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,
6725*f5c631daSSadaf Ebrahimi 0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,
6726*f5c631daSSadaf Ebrahimi 0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,
6727*f5c631daSSadaf Ebrahimi 0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,
6728*f5c631daSSadaf Ebrahimi 0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,
6729*f5c631daSSadaf Ebrahimi 0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,
6730*f5c631daSSadaf Ebrahimi 0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,
6731*f5c631daSSadaf Ebrahimi 0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,
6732*f5c631daSSadaf Ebrahimi 0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,
6733*f5c631daSSadaf Ebrahimi 0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,
6734*f5c631daSSadaf Ebrahimi 0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,
6735*f5c631daSSadaf Ebrahimi 0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,
6736*f5c631daSSadaf Ebrahimi 0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,
6737*f5c631daSSadaf Ebrahimi 0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,
6738*f5c631daSSadaf Ebrahimi 0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,
6739*f5c631daSSadaf Ebrahimi 0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};
6740*f5c631daSSadaf Ebrahimi
6741*f5c631daSSadaf Ebrahimi unsigned lane_size = LaneSizeInBitsFromFormat(vform);
6742*f5c631daSSadaf Ebrahimi int index_highbit = 5;
6743*f5c631daSSadaf Ebrahimi int op_highbit, op_shift;
6744*f5c631daSSadaf Ebrahimi const uint64_t* fexpa_coeff;
6745*f5c631daSSadaf Ebrahimi
6746*f5c631daSSadaf Ebrahimi if (lane_size == kHRegSize) {
6747*f5c631daSSadaf Ebrahimi index_highbit = 4;
6748*f5c631daSSadaf Ebrahimi VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1)));
6749*f5c631daSSadaf Ebrahimi fexpa_coeff = fexpa_coeff16;
6750*f5c631daSSadaf Ebrahimi op_highbit = 9;
6751*f5c631daSSadaf Ebrahimi op_shift = 10;
6752*f5c631daSSadaf Ebrahimi } else if (lane_size == kSRegSize) {
6753*f5c631daSSadaf Ebrahimi VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1)));
6754*f5c631daSSadaf Ebrahimi fexpa_coeff = fexpa_coeff32;
6755*f5c631daSSadaf Ebrahimi op_highbit = 13;
6756*f5c631daSSadaf Ebrahimi op_shift = 23;
6757*f5c631daSSadaf Ebrahimi } else {
6758*f5c631daSSadaf Ebrahimi VIXL_ASSERT(lane_size == kDRegSize);
6759*f5c631daSSadaf Ebrahimi VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1)));
6760*f5c631daSSadaf Ebrahimi fexpa_coeff = fexpa_coeff64;
6761*f5c631daSSadaf Ebrahimi op_highbit = 16;
6762*f5c631daSSadaf Ebrahimi op_shift = 52;
6763*f5c631daSSadaf Ebrahimi }
6764*f5c631daSSadaf Ebrahimi
6765*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6766*f5c631daSSadaf Ebrahimi uint64_t op = src.Uint(vform, i);
6767*f5c631daSSadaf Ebrahimi uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];
6768*f5c631daSSadaf Ebrahimi result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);
6769*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result);
6770*f5c631daSSadaf Ebrahimi }
6771*f5c631daSSadaf Ebrahimi return dst;
6772*f5c631daSSadaf Ebrahimi }
6773*f5c631daSSadaf Ebrahimi
6774*f5c631daSSadaf Ebrahimi template <typename T>
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6775*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fscale(VectorFormat vform,
6776*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6777*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
6778*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
6779*f5c631daSSadaf Ebrahimi T two = T(2.0);
6780*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6781*f5c631daSSadaf Ebrahimi T src1_val = src1.Float<T>(i);
6782*f5c631daSSadaf Ebrahimi if (!IsNaN(src1_val)) {
6783*f5c631daSSadaf Ebrahimi int64_t scale = src2.Int(vform, i);
6784*f5c631daSSadaf Ebrahimi // TODO: this is a low-performance implementation, but it's simple and
6785*f5c631daSSadaf Ebrahimi // less likely to be buggy. Consider replacing it with something faster.
6786*f5c631daSSadaf Ebrahimi
6787*f5c631daSSadaf Ebrahimi // Scales outside of these bounds become infinity or zero, so there's no
6788*f5c631daSSadaf Ebrahimi // point iterating further.
6789*f5c631daSSadaf Ebrahimi scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
6790*f5c631daSSadaf Ebrahimi
6791*f5c631daSSadaf Ebrahimi // Compute src1_val * 2 ^ scale. If scale is positive, multiply by two and
6792*f5c631daSSadaf Ebrahimi // decrement scale until it's zero.
6793*f5c631daSSadaf Ebrahimi while (scale-- > 0) {
6794*f5c631daSSadaf Ebrahimi src1_val = FPMul(src1_val, two);
6795*f5c631daSSadaf Ebrahimi }
6796*f5c631daSSadaf Ebrahimi
6797*f5c631daSSadaf Ebrahimi // If scale is negative, divide by two and increment scale until it's
6798*f5c631daSSadaf Ebrahimi // zero. Initially, scale is (src2 - 1), so we pre-increment.
6799*f5c631daSSadaf Ebrahimi while (++scale < 0) {
6800*f5c631daSSadaf Ebrahimi src1_val = FPDiv(src1_val, two);
6801*f5c631daSSadaf Ebrahimi }
6802*f5c631daSSadaf Ebrahimi }
6803*f5c631daSSadaf Ebrahimi dst.SetFloat<T>(i, src1_val);
6804*f5c631daSSadaf Ebrahimi }
6805*f5c631daSSadaf Ebrahimi return dst;
6806*f5c631daSSadaf Ebrahimi }
6807*f5c631daSSadaf Ebrahimi
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6808*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fscale(VectorFormat vform,
6809*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6810*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
6811*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
6812*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6813*f5c631daSSadaf Ebrahimi fscale<SimFloat16>(vform, dst, src1, src2);
6814*f5c631daSSadaf Ebrahimi } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6815*f5c631daSSadaf Ebrahimi fscale<float>(vform, dst, src1, src2);
6816*f5c631daSSadaf Ebrahimi } else {
6817*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6818*f5c631daSSadaf Ebrahimi fscale<double>(vform, dst, src1, src2);
6819*f5c631daSSadaf Ebrahimi }
6820*f5c631daSSadaf Ebrahimi return dst;
6821*f5c631daSSadaf Ebrahimi }
6822*f5c631daSSadaf Ebrahimi
scvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6823*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::scvtf(VectorFormat vform,
6824*f5c631daSSadaf Ebrahimi unsigned dst_data_size_in_bits,
6825*f5c631daSSadaf Ebrahimi unsigned src_data_size_in_bits,
6826*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6827*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
6828*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
6829*f5c631daSSadaf Ebrahimi FPRounding round,
6830*f5c631daSSadaf Ebrahimi int fbits) {
6831*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6832*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6833*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
6834*f5c631daSSadaf Ebrahimi
6835*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6836*f5c631daSSadaf Ebrahimi if (!pg.IsActive(vform, i)) continue;
6837*f5c631daSSadaf Ebrahimi
6838*f5c631daSSadaf Ebrahimi int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,
6839*f5c631daSSadaf Ebrahimi 0,
6840*f5c631daSSadaf Ebrahimi src.Uint(vform, i));
6841*f5c631daSSadaf Ebrahimi
6842*f5c631daSSadaf Ebrahimi switch (dst_data_size_in_bits) {
6843*f5c631daSSadaf Ebrahimi case kHRegSize: {
6844*f5c631daSSadaf Ebrahimi SimFloat16 result = FixedToFloat16(value, fbits, round);
6845*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, Float16ToRawbits(result));
6846*f5c631daSSadaf Ebrahimi break;
6847*f5c631daSSadaf Ebrahimi }
6848*f5c631daSSadaf Ebrahimi case kSRegSize: {
6849*f5c631daSSadaf Ebrahimi float result = FixedToFloat(value, fbits, round);
6850*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, FloatToRawbits(result));
6851*f5c631daSSadaf Ebrahimi break;
6852*f5c631daSSadaf Ebrahimi }
6853*f5c631daSSadaf Ebrahimi case kDRegSize: {
6854*f5c631daSSadaf Ebrahimi double result = FixedToDouble(value, fbits, round);
6855*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, DoubleToRawbits(result));
6856*f5c631daSSadaf Ebrahimi break;
6857*f5c631daSSadaf Ebrahimi }
6858*f5c631daSSadaf Ebrahimi default:
6859*f5c631daSSadaf Ebrahimi VIXL_UNIMPLEMENTED();
6860*f5c631daSSadaf Ebrahimi break;
6861*f5c631daSSadaf Ebrahimi }
6862*f5c631daSSadaf Ebrahimi }
6863*f5c631daSSadaf Ebrahimi
6864*f5c631daSSadaf Ebrahimi return dst;
6865*f5c631daSSadaf Ebrahimi }
6866*f5c631daSSadaf Ebrahimi
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6867*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::scvtf(VectorFormat vform,
6868*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6869*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
6870*f5c631daSSadaf Ebrahimi int fbits,
6871*f5c631daSSadaf Ebrahimi FPRounding round) {
6872*f5c631daSSadaf Ebrahimi return scvtf(vform,
6873*f5c631daSSadaf Ebrahimi LaneSizeInBitsFromFormat(vform),
6874*f5c631daSSadaf Ebrahimi LaneSizeInBitsFromFormat(vform),
6875*f5c631daSSadaf Ebrahimi dst,
6876*f5c631daSSadaf Ebrahimi GetPTrue(),
6877*f5c631daSSadaf Ebrahimi src,
6878*f5c631daSSadaf Ebrahimi round,
6879*f5c631daSSadaf Ebrahimi fbits);
6880*f5c631daSSadaf Ebrahimi }
6881*f5c631daSSadaf Ebrahimi
ucvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6882*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ucvtf(VectorFormat vform,
6883*f5c631daSSadaf Ebrahimi unsigned dst_data_size_in_bits,
6884*f5c631daSSadaf Ebrahimi unsigned src_data_size_in_bits,
6885*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6886*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
6887*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
6888*f5c631daSSadaf Ebrahimi FPRounding round,
6889*f5c631daSSadaf Ebrahimi int fbits) {
6890*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6891*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6892*f5c631daSSadaf Ebrahimi dst.ClearForWrite(vform);
6893*f5c631daSSadaf Ebrahimi
6894*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6895*f5c631daSSadaf Ebrahimi if (!pg.IsActive(vform, i)) continue;
6896*f5c631daSSadaf Ebrahimi
6897*f5c631daSSadaf Ebrahimi uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
6898*f5c631daSSadaf Ebrahimi 0,
6899*f5c631daSSadaf Ebrahimi src.Uint(vform, i));
6900*f5c631daSSadaf Ebrahimi
6901*f5c631daSSadaf Ebrahimi switch (dst_data_size_in_bits) {
6902*f5c631daSSadaf Ebrahimi case kHRegSize: {
6903*f5c631daSSadaf Ebrahimi SimFloat16 result = UFixedToFloat16(value, fbits, round);
6904*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, Float16ToRawbits(result));
6905*f5c631daSSadaf Ebrahimi break;
6906*f5c631daSSadaf Ebrahimi }
6907*f5c631daSSadaf Ebrahimi case kSRegSize: {
6908*f5c631daSSadaf Ebrahimi float result = UFixedToFloat(value, fbits, round);
6909*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, FloatToRawbits(result));
6910*f5c631daSSadaf Ebrahimi break;
6911*f5c631daSSadaf Ebrahimi }
6912*f5c631daSSadaf Ebrahimi case kDRegSize: {
6913*f5c631daSSadaf Ebrahimi double result = UFixedToDouble(value, fbits, round);
6914*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, DoubleToRawbits(result));
6915*f5c631daSSadaf Ebrahimi break;
6916*f5c631daSSadaf Ebrahimi }
6917*f5c631daSSadaf Ebrahimi default:
6918*f5c631daSSadaf Ebrahimi VIXL_UNIMPLEMENTED();
6919*f5c631daSSadaf Ebrahimi break;
6920*f5c631daSSadaf Ebrahimi }
6921*f5c631daSSadaf Ebrahimi }
6922*f5c631daSSadaf Ebrahimi
6923*f5c631daSSadaf Ebrahimi return dst;
6924*f5c631daSSadaf Ebrahimi }
6925*f5c631daSSadaf Ebrahimi
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6926*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::ucvtf(VectorFormat vform,
6927*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6928*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
6929*f5c631daSSadaf Ebrahimi int fbits,
6930*f5c631daSSadaf Ebrahimi FPRounding round) {
6931*f5c631daSSadaf Ebrahimi return ucvtf(vform,
6932*f5c631daSSadaf Ebrahimi LaneSizeInBitsFromFormat(vform),
6933*f5c631daSSadaf Ebrahimi LaneSizeInBitsFromFormat(vform),
6934*f5c631daSSadaf Ebrahimi dst,
6935*f5c631daSSadaf Ebrahimi GetPTrue(),
6936*f5c631daSSadaf Ebrahimi src,
6937*f5c631daSSadaf Ebrahimi round,
6938*f5c631daSSadaf Ebrahimi fbits);
6939*f5c631daSSadaf Ebrahimi }
6940*f5c631daSSadaf Ebrahimi
unpk(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,UnpackType unpack_type,ExtendType extend_type)6941*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::unpk(VectorFormat vform,
6942*f5c631daSSadaf Ebrahimi LogicVRegister dst,
6943*f5c631daSSadaf Ebrahimi const LogicVRegister& src,
6944*f5c631daSSadaf Ebrahimi UnpackType unpack_type,
6945*f5c631daSSadaf Ebrahimi ExtendType extend_type) {
6946*f5c631daSSadaf Ebrahimi VectorFormat vform_half = VectorFormatHalfWidth(vform);
6947*f5c631daSSadaf Ebrahimi const int lane_count = LaneCountFromFormat(vform);
6948*f5c631daSSadaf Ebrahimi const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;
6949*f5c631daSSadaf Ebrahimi
6950*f5c631daSSadaf Ebrahimi switch (extend_type) {
6951*f5c631daSSadaf Ebrahimi case kSignedExtend: {
6952*f5c631daSSadaf Ebrahimi int64_t result[kZRegMaxSizeInBytes];
6953*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
6954*f5c631daSSadaf Ebrahimi result[i] = src.Int(vform_half, i + src_start_lane);
6955*f5c631daSSadaf Ebrahimi }
6956*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
6957*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, result[i]);
6958*f5c631daSSadaf Ebrahimi }
6959*f5c631daSSadaf Ebrahimi break;
6960*f5c631daSSadaf Ebrahimi }
6961*f5c631daSSadaf Ebrahimi case kUnsignedExtend: {
6962*f5c631daSSadaf Ebrahimi uint64_t result[kZRegMaxSizeInBytes];
6963*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
6964*f5c631daSSadaf Ebrahimi result[i] = src.Uint(vform_half, i + src_start_lane);
6965*f5c631daSSadaf Ebrahimi }
6966*f5c631daSSadaf Ebrahimi for (int i = 0; i < lane_count; ++i) {
6967*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, result[i]);
6968*f5c631daSSadaf Ebrahimi }
6969*f5c631daSSadaf Ebrahimi break;
6970*f5c631daSSadaf Ebrahimi }
6971*f5c631daSSadaf Ebrahimi default:
6972*f5c631daSSadaf Ebrahimi VIXL_UNREACHABLE();
6973*f5c631daSSadaf Ebrahimi }
6974*f5c631daSSadaf Ebrahimi return dst;
6975*f5c631daSSadaf Ebrahimi }
6976*f5c631daSSadaf Ebrahimi
SVEIntCompareVectorsHelper(Condition cond,VectorFormat vform,LogicPRegister dst,const LogicPRegister & mask,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements,FlagsUpdate flags)6977*f5c631daSSadaf Ebrahimi LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,
6978*f5c631daSSadaf Ebrahimi VectorFormat vform,
6979*f5c631daSSadaf Ebrahimi LogicPRegister dst,
6980*f5c631daSSadaf Ebrahimi const LogicPRegister& mask,
6981*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
6982*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
6983*f5c631daSSadaf Ebrahimi bool is_wide_elements,
6984*f5c631daSSadaf Ebrahimi FlagsUpdate flags) {
6985*f5c631daSSadaf Ebrahimi for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
6986*f5c631daSSadaf Ebrahimi bool result = false;
6987*f5c631daSSadaf Ebrahimi if (mask.IsActive(vform, lane)) {
6988*f5c631daSSadaf Ebrahimi int64_t op1 = 0xbadbeef;
6989*f5c631daSSadaf Ebrahimi int64_t op2 = 0xbadbeef;
6990*f5c631daSSadaf Ebrahimi int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;
6991*f5c631daSSadaf Ebrahimi switch (cond) {
6992*f5c631daSSadaf Ebrahimi case eq:
6993*f5c631daSSadaf Ebrahimi case ge:
6994*f5c631daSSadaf Ebrahimi case gt:
6995*f5c631daSSadaf Ebrahimi case lt:
6996*f5c631daSSadaf Ebrahimi case le:
6997*f5c631daSSadaf Ebrahimi case ne:
6998*f5c631daSSadaf Ebrahimi op1 = src1.Int(vform, lane);
6999*f5c631daSSadaf Ebrahimi op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)
7000*f5c631daSSadaf Ebrahimi : src2.Int(vform, lane);
7001*f5c631daSSadaf Ebrahimi break;
7002*f5c631daSSadaf Ebrahimi case hi:
7003*f5c631daSSadaf Ebrahimi case hs:
7004*f5c631daSSadaf Ebrahimi case ls:
7005*f5c631daSSadaf Ebrahimi case lo:
7006*f5c631daSSadaf Ebrahimi op1 = src1.Uint(vform, lane);
7007*f5c631daSSadaf Ebrahimi op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)
7008*f5c631daSSadaf Ebrahimi : src2.Uint(vform, lane);
7009*f5c631daSSadaf Ebrahimi break;
7010*f5c631daSSadaf Ebrahimi default:
7011*f5c631daSSadaf Ebrahimi VIXL_UNREACHABLE();
7012*f5c631daSSadaf Ebrahimi }
7013*f5c631daSSadaf Ebrahimi
7014*f5c631daSSadaf Ebrahimi switch (cond) {
7015*f5c631daSSadaf Ebrahimi case eq:
7016*f5c631daSSadaf Ebrahimi result = (op1 == op2);
7017*f5c631daSSadaf Ebrahimi break;
7018*f5c631daSSadaf Ebrahimi case ne:
7019*f5c631daSSadaf Ebrahimi result = (op1 != op2);
7020*f5c631daSSadaf Ebrahimi break;
7021*f5c631daSSadaf Ebrahimi case ge:
7022*f5c631daSSadaf Ebrahimi result = (op1 >= op2);
7023*f5c631daSSadaf Ebrahimi break;
7024*f5c631daSSadaf Ebrahimi case gt:
7025*f5c631daSSadaf Ebrahimi result = (op1 > op2);
7026*f5c631daSSadaf Ebrahimi break;
7027*f5c631daSSadaf Ebrahimi case le:
7028*f5c631daSSadaf Ebrahimi result = (op1 <= op2);
7029*f5c631daSSadaf Ebrahimi break;
7030*f5c631daSSadaf Ebrahimi case lt:
7031*f5c631daSSadaf Ebrahimi result = (op1 < op2);
7032*f5c631daSSadaf Ebrahimi break;
7033*f5c631daSSadaf Ebrahimi case hs:
7034*f5c631daSSadaf Ebrahimi result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));
7035*f5c631daSSadaf Ebrahimi break;
7036*f5c631daSSadaf Ebrahimi case hi:
7037*f5c631daSSadaf Ebrahimi result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));
7038*f5c631daSSadaf Ebrahimi break;
7039*f5c631daSSadaf Ebrahimi case ls:
7040*f5c631daSSadaf Ebrahimi result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));
7041*f5c631daSSadaf Ebrahimi break;
7042*f5c631daSSadaf Ebrahimi case lo:
7043*f5c631daSSadaf Ebrahimi result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));
7044*f5c631daSSadaf Ebrahimi break;
7045*f5c631daSSadaf Ebrahimi default:
7046*f5c631daSSadaf Ebrahimi VIXL_UNREACHABLE();
7047*f5c631daSSadaf Ebrahimi }
7048*f5c631daSSadaf Ebrahimi }
7049*f5c631daSSadaf Ebrahimi dst.SetActive(vform, lane, result);
7050*f5c631daSSadaf Ebrahimi }
7051*f5c631daSSadaf Ebrahimi
7052*f5c631daSSadaf Ebrahimi if (flags == SetFlags) PredTest(vform, mask, dst);
7053*f5c631daSSadaf Ebrahimi
7054*f5c631daSSadaf Ebrahimi return dst;
7055*f5c631daSSadaf Ebrahimi }
7056*f5c631daSSadaf Ebrahimi
SVEBitwiseShiftHelper(Shift shift_op,VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements)7057*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,
7058*f5c631daSSadaf Ebrahimi VectorFormat vform,
7059*f5c631daSSadaf Ebrahimi LogicVRegister dst,
7060*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
7061*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
7062*f5c631daSSadaf Ebrahimi bool is_wide_elements) {
7063*f5c631daSSadaf Ebrahimi unsigned lane_size = LaneSizeInBitsFromFormat(vform);
7064*f5c631daSSadaf Ebrahimi VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;
7065*f5c631daSSadaf Ebrahimi
7066*f5c631daSSadaf Ebrahimi for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7067*f5c631daSSadaf Ebrahimi int shift_src_lane = lane;
7068*f5c631daSSadaf Ebrahimi if (is_wide_elements) {
7069*f5c631daSSadaf Ebrahimi // If the shift amount comes from wide elements, select the D-sized lane
7070*f5c631daSSadaf Ebrahimi // which occupies the corresponding lanes of the value to be shifted.
7071*f5c631daSSadaf Ebrahimi shift_src_lane = (lane * lane_size) / kDRegSize;
7072*f5c631daSSadaf Ebrahimi }
7073*f5c631daSSadaf Ebrahimi uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);
7074*f5c631daSSadaf Ebrahimi
7075*f5c631daSSadaf Ebrahimi // Saturate shift_amount to the size of the lane that will be shifted.
7076*f5c631daSSadaf Ebrahimi if (shift_amount > lane_size) shift_amount = lane_size;
7077*f5c631daSSadaf Ebrahimi
7078*f5c631daSSadaf Ebrahimi uint64_t value = src1.Uint(vform, lane);
7079*f5c631daSSadaf Ebrahimi int64_t result = ShiftOperand(lane_size,
7080*f5c631daSSadaf Ebrahimi value,
7081*f5c631daSSadaf Ebrahimi shift_op,
7082*f5c631daSSadaf Ebrahimi static_cast<unsigned>(shift_amount));
7083*f5c631daSSadaf Ebrahimi dst.SetUint(vform, lane, result);
7084*f5c631daSSadaf Ebrahimi }
7085*f5c631daSSadaf Ebrahimi
7086*f5c631daSSadaf Ebrahimi return dst;
7087*f5c631daSSadaf Ebrahimi }
7088*f5c631daSSadaf Ebrahimi
asrd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int shift)7089*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::asrd(VectorFormat vform,
7090*f5c631daSSadaf Ebrahimi LogicVRegister dst,
7091*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
7092*f5c631daSSadaf Ebrahimi int shift) {
7093*f5c631daSSadaf Ebrahimi VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=
7094*f5c631daSSadaf Ebrahimi LaneSizeInBitsFromFormat(vform)));
7095*f5c631daSSadaf Ebrahimi
7096*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7097*f5c631daSSadaf Ebrahimi int64_t value = src1.Int(vform, i);
7098*f5c631daSSadaf Ebrahimi if (shift <= 63) {
7099*f5c631daSSadaf Ebrahimi if (value < 0) {
7100*f5c631daSSadaf Ebrahimi // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely
7101*f5c631daSSadaf Ebrahimi // cast to int64_t, and cannot cause signed overflow in the result.
7102*f5c631daSSadaf Ebrahimi value = value + GetUintMask(shift);
7103*f5c631daSSadaf Ebrahimi }
7104*f5c631daSSadaf Ebrahimi value = ShiftOperand(kDRegSize, value, ASR, shift);
7105*f5c631daSSadaf Ebrahimi } else {
7106*f5c631daSSadaf Ebrahimi value = 0;
7107*f5c631daSSadaf Ebrahimi }
7108*f5c631daSSadaf Ebrahimi dst.SetInt(vform, i, value);
7109*f5c631daSSadaf Ebrahimi }
7110*f5c631daSSadaf Ebrahimi return dst;
7111*f5c631daSSadaf Ebrahimi }
7112*f5c631daSSadaf Ebrahimi
SVEBitwiseLogicalUnpredicatedHelper(LogicalOp logical_op,VectorFormat vform,LogicVRegister zd,const LogicVRegister & zn,const LogicVRegister & zm)7113*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
7114*f5c631daSSadaf Ebrahimi LogicalOp logical_op,
7115*f5c631daSSadaf Ebrahimi VectorFormat vform,
7116*f5c631daSSadaf Ebrahimi LogicVRegister zd,
7117*f5c631daSSadaf Ebrahimi const LogicVRegister& zn,
7118*f5c631daSSadaf Ebrahimi const LogicVRegister& zm) {
7119*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsSVEFormat(vform));
7120*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7121*f5c631daSSadaf Ebrahimi uint64_t op1 = zn.Uint(vform, i);
7122*f5c631daSSadaf Ebrahimi uint64_t op2 = zm.Uint(vform, i);
7123*f5c631daSSadaf Ebrahimi uint64_t result = 0;
7124*f5c631daSSadaf Ebrahimi switch (logical_op) {
7125*f5c631daSSadaf Ebrahimi case AND:
7126*f5c631daSSadaf Ebrahimi result = op1 & op2;
7127*f5c631daSSadaf Ebrahimi break;
7128*f5c631daSSadaf Ebrahimi case BIC:
7129*f5c631daSSadaf Ebrahimi result = op1 & ~op2;
7130*f5c631daSSadaf Ebrahimi break;
7131*f5c631daSSadaf Ebrahimi case EOR:
7132*f5c631daSSadaf Ebrahimi result = op1 ^ op2;
7133*f5c631daSSadaf Ebrahimi break;
7134*f5c631daSSadaf Ebrahimi case ORR:
7135*f5c631daSSadaf Ebrahimi result = op1 | op2;
7136*f5c631daSSadaf Ebrahimi break;
7137*f5c631daSSadaf Ebrahimi default:
7138*f5c631daSSadaf Ebrahimi VIXL_UNIMPLEMENTED();
7139*f5c631daSSadaf Ebrahimi }
7140*f5c631daSSadaf Ebrahimi zd.SetUint(vform, i, result);
7141*f5c631daSSadaf Ebrahimi }
7142*f5c631daSSadaf Ebrahimi
7143*f5c631daSSadaf Ebrahimi return zd;
7144*f5c631daSSadaf Ebrahimi }
7145*f5c631daSSadaf Ebrahimi
SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,LogicPRegister pd,const LogicPRegister & pn,const LogicPRegister & pm)7146*f5c631daSSadaf Ebrahimi LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
7147*f5c631daSSadaf Ebrahimi LogicPRegister pd,
7148*f5c631daSSadaf Ebrahimi const LogicPRegister& pn,
7149*f5c631daSSadaf Ebrahimi const LogicPRegister& pm) {
7150*f5c631daSSadaf Ebrahimi for (int i = 0; i < pn.GetChunkCount(); i++) {
7151*f5c631daSSadaf Ebrahimi LogicPRegister::ChunkType op1 = pn.GetChunk(i);
7152*f5c631daSSadaf Ebrahimi LogicPRegister::ChunkType op2 = pm.GetChunk(i);
7153*f5c631daSSadaf Ebrahimi LogicPRegister::ChunkType result = 0;
7154*f5c631daSSadaf Ebrahimi switch (op) {
7155*f5c631daSSadaf Ebrahimi case ANDS_p_p_pp_z:
7156*f5c631daSSadaf Ebrahimi case AND_p_p_pp_z:
7157*f5c631daSSadaf Ebrahimi result = op1 & op2;
7158*f5c631daSSadaf Ebrahimi break;
7159*f5c631daSSadaf Ebrahimi case BICS_p_p_pp_z:
7160*f5c631daSSadaf Ebrahimi case BIC_p_p_pp_z:
7161*f5c631daSSadaf Ebrahimi result = op1 & ~op2;
7162*f5c631daSSadaf Ebrahimi break;
7163*f5c631daSSadaf Ebrahimi case EORS_p_p_pp_z:
7164*f5c631daSSadaf Ebrahimi case EOR_p_p_pp_z:
7165*f5c631daSSadaf Ebrahimi result = op1 ^ op2;
7166*f5c631daSSadaf Ebrahimi break;
7167*f5c631daSSadaf Ebrahimi case NANDS_p_p_pp_z:
7168*f5c631daSSadaf Ebrahimi case NAND_p_p_pp_z:
7169*f5c631daSSadaf Ebrahimi result = ~(op1 & op2);
7170*f5c631daSSadaf Ebrahimi break;
7171*f5c631daSSadaf Ebrahimi case NORS_p_p_pp_z:
7172*f5c631daSSadaf Ebrahimi case NOR_p_p_pp_z:
7173*f5c631daSSadaf Ebrahimi result = ~(op1 | op2);
7174*f5c631daSSadaf Ebrahimi break;
7175*f5c631daSSadaf Ebrahimi case ORNS_p_p_pp_z:
7176*f5c631daSSadaf Ebrahimi case ORN_p_p_pp_z:
7177*f5c631daSSadaf Ebrahimi result = op1 | ~op2;
7178*f5c631daSSadaf Ebrahimi break;
7179*f5c631daSSadaf Ebrahimi case ORRS_p_p_pp_z:
7180*f5c631daSSadaf Ebrahimi case ORR_p_p_pp_z:
7181*f5c631daSSadaf Ebrahimi result = op1 | op2;
7182*f5c631daSSadaf Ebrahimi break;
7183*f5c631daSSadaf Ebrahimi default:
7184*f5c631daSSadaf Ebrahimi VIXL_UNIMPLEMENTED();
7185*f5c631daSSadaf Ebrahimi }
7186*f5c631daSSadaf Ebrahimi pd.SetChunk(i, result);
7187*f5c631daSSadaf Ebrahimi }
7188*f5c631daSSadaf Ebrahimi return pd;
7189*f5c631daSSadaf Ebrahimi }
7190*f5c631daSSadaf Ebrahimi
SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op,VectorFormat vform,LogicVRegister zd,uint64_t imm)7191*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::SVEBitwiseImmHelper(
7192*f5c631daSSadaf Ebrahimi SVEBitwiseLogicalWithImm_UnpredicatedOp op,
7193*f5c631daSSadaf Ebrahimi VectorFormat vform,
7194*f5c631daSSadaf Ebrahimi LogicVRegister zd,
7195*f5c631daSSadaf Ebrahimi uint64_t imm) {
7196*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7197*f5c631daSSadaf Ebrahimi uint64_t op1 = zd.Uint(vform, i);
7198*f5c631daSSadaf Ebrahimi uint64_t result = 0;
7199*f5c631daSSadaf Ebrahimi switch (op) {
7200*f5c631daSSadaf Ebrahimi case AND_z_zi:
7201*f5c631daSSadaf Ebrahimi result = op1 & imm;
7202*f5c631daSSadaf Ebrahimi break;
7203*f5c631daSSadaf Ebrahimi case EOR_z_zi:
7204*f5c631daSSadaf Ebrahimi result = op1 ^ imm;
7205*f5c631daSSadaf Ebrahimi break;
7206*f5c631daSSadaf Ebrahimi case ORR_z_zi:
7207*f5c631daSSadaf Ebrahimi result = op1 | imm;
7208*f5c631daSSadaf Ebrahimi break;
7209*f5c631daSSadaf Ebrahimi default:
7210*f5c631daSSadaf Ebrahimi VIXL_UNIMPLEMENTED();
7211*f5c631daSSadaf Ebrahimi }
7212*f5c631daSSadaf Ebrahimi zd.SetUint(vform, i, result);
7213*f5c631daSSadaf Ebrahimi }
7214*f5c631daSSadaf Ebrahimi
7215*f5c631daSSadaf Ebrahimi return zd;
7216*f5c631daSSadaf Ebrahimi }
7217*f5c631daSSadaf Ebrahimi
SVEStructuredStoreHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr)7218*f5c631daSSadaf Ebrahimi void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
7219*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
7220*f5c631daSSadaf Ebrahimi unsigned zt_code,
7221*f5c631daSSadaf Ebrahimi const LogicSVEAddressVector& addr) {
7222*f5c631daSSadaf Ebrahimi VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7223*f5c631daSSadaf Ebrahimi
7224*f5c631daSSadaf Ebrahimi int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7225*f5c631daSSadaf Ebrahimi int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7226*f5c631daSSadaf Ebrahimi int msize_in_bytes = addr.GetMsizeInBytes();
7227*f5c631daSSadaf Ebrahimi int reg_count = addr.GetRegCount();
7228*f5c631daSSadaf Ebrahimi
7229*f5c631daSSadaf Ebrahimi VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7230*f5c631daSSadaf Ebrahimi VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7231*f5c631daSSadaf Ebrahimi
7232*f5c631daSSadaf Ebrahimi unsigned zt_codes[4] = {zt_code,
7233*f5c631daSSadaf Ebrahimi (zt_code + 1) % kNumberOfZRegisters,
7234*f5c631daSSadaf Ebrahimi (zt_code + 2) % kNumberOfZRegisters,
7235*f5c631daSSadaf Ebrahimi (zt_code + 3) % kNumberOfZRegisters};
7236*f5c631daSSadaf Ebrahimi
7237*f5c631daSSadaf Ebrahimi LogicVRegister zt[4] = {
7238*f5c631daSSadaf Ebrahimi ReadVRegister(zt_codes[0]),
7239*f5c631daSSadaf Ebrahimi ReadVRegister(zt_codes[1]),
7240*f5c631daSSadaf Ebrahimi ReadVRegister(zt_codes[2]),
7241*f5c631daSSadaf Ebrahimi ReadVRegister(zt_codes[3]),
7242*f5c631daSSadaf Ebrahimi };
7243*f5c631daSSadaf Ebrahimi
7244*f5c631daSSadaf Ebrahimi // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes
7245*f5c631daSSadaf Ebrahimi // are ignored, so read the source register using the VectorFormat that
7246*f5c631daSSadaf Ebrahimi // corresponds with the storage format, and multiply the index accordingly.
7247*f5c631daSSadaf Ebrahimi VectorFormat unpack_vform =
7248*f5c631daSSadaf Ebrahimi SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
7249*f5c631daSSadaf Ebrahimi int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;
7250*f5c631daSSadaf Ebrahimi
7251*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7252*f5c631daSSadaf Ebrahimi if (!pg.IsActive(vform, i)) continue;
7253*f5c631daSSadaf Ebrahimi
7254*f5c631daSSadaf Ebrahimi for (int r = 0; r < reg_count; r++) {
7255*f5c631daSSadaf Ebrahimi uint64_t element_address = addr.GetElementAddress(i, r);
7256*f5c631daSSadaf Ebrahimi StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address);
7257*f5c631daSSadaf Ebrahimi }
7258*f5c631daSSadaf Ebrahimi }
7259*f5c631daSSadaf Ebrahimi
7260*f5c631daSSadaf Ebrahimi if (ShouldTraceWrites()) {
7261*f5c631daSSadaf Ebrahimi PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7262*f5c631daSSadaf Ebrahimi if (esize_in_bytes_log2 == msize_in_bytes_log2) {
7263*f5c631daSSadaf Ebrahimi // Use an FP format where it's likely that we're accessing FP data.
7264*f5c631daSSadaf Ebrahimi format = GetPrintRegisterFormatTryFP(format);
7265*f5c631daSSadaf Ebrahimi }
7266*f5c631daSSadaf Ebrahimi // Stores don't represent a change to the source register's value, so only
7267*f5c631daSSadaf Ebrahimi // print the relevant part of the value.
7268*f5c631daSSadaf Ebrahimi format = GetPrintRegPartial(format);
7269*f5c631daSSadaf Ebrahimi
7270*f5c631daSSadaf Ebrahimi PrintZStructAccess(zt_code,
7271*f5c631daSSadaf Ebrahimi reg_count,
7272*f5c631daSSadaf Ebrahimi pg,
7273*f5c631daSSadaf Ebrahimi format,
7274*f5c631daSSadaf Ebrahimi msize_in_bytes,
7275*f5c631daSSadaf Ebrahimi "->",
7276*f5c631daSSadaf Ebrahimi addr);
7277*f5c631daSSadaf Ebrahimi }
7278*f5c631daSSadaf Ebrahimi }
7279*f5c631daSSadaf Ebrahimi
SVEStructuredLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,bool is_signed)7280*f5c631daSSadaf Ebrahimi void Simulator::SVEStructuredLoadHelper(VectorFormat vform,
7281*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
7282*f5c631daSSadaf Ebrahimi unsigned zt_code,
7283*f5c631daSSadaf Ebrahimi const LogicSVEAddressVector& addr,
7284*f5c631daSSadaf Ebrahimi bool is_signed) {
7285*f5c631daSSadaf Ebrahimi int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7286*f5c631daSSadaf Ebrahimi int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7287*f5c631daSSadaf Ebrahimi int msize_in_bytes = addr.GetMsizeInBytes();
7288*f5c631daSSadaf Ebrahimi int reg_count = addr.GetRegCount();
7289*f5c631daSSadaf Ebrahimi
7290*f5c631daSSadaf Ebrahimi VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7291*f5c631daSSadaf Ebrahimi VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7292*f5c631daSSadaf Ebrahimi VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7293*f5c631daSSadaf Ebrahimi
7294*f5c631daSSadaf Ebrahimi unsigned zt_codes[4] = {zt_code,
7295*f5c631daSSadaf Ebrahimi (zt_code + 1) % kNumberOfZRegisters,
7296*f5c631daSSadaf Ebrahimi (zt_code + 2) % kNumberOfZRegisters,
7297*f5c631daSSadaf Ebrahimi (zt_code + 3) % kNumberOfZRegisters};
7298*f5c631daSSadaf Ebrahimi LogicVRegister zt[4] = {
7299*f5c631daSSadaf Ebrahimi ReadVRegister(zt_codes[0]),
7300*f5c631daSSadaf Ebrahimi ReadVRegister(zt_codes[1]),
7301*f5c631daSSadaf Ebrahimi ReadVRegister(zt_codes[2]),
7302*f5c631daSSadaf Ebrahimi ReadVRegister(zt_codes[3]),
7303*f5c631daSSadaf Ebrahimi };
7304*f5c631daSSadaf Ebrahimi
7305*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7306*f5c631daSSadaf Ebrahimi for (int r = 0; r < reg_count; r++) {
7307*f5c631daSSadaf Ebrahimi uint64_t element_address = addr.GetElementAddress(i, r);
7308*f5c631daSSadaf Ebrahimi
7309*f5c631daSSadaf Ebrahimi if (!pg.IsActive(vform, i)) {
7310*f5c631daSSadaf Ebrahimi zt[r].SetUint(vform, i, 0);
7311*f5c631daSSadaf Ebrahimi continue;
7312*f5c631daSSadaf Ebrahimi }
7313*f5c631daSSadaf Ebrahimi
7314*f5c631daSSadaf Ebrahimi if (is_signed) {
7315*f5c631daSSadaf Ebrahimi LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address);
7316*f5c631daSSadaf Ebrahimi } else {
7317*f5c631daSSadaf Ebrahimi LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address);
7318*f5c631daSSadaf Ebrahimi }
7319*f5c631daSSadaf Ebrahimi }
7320*f5c631daSSadaf Ebrahimi }
7321*f5c631daSSadaf Ebrahimi
7322*f5c631daSSadaf Ebrahimi if (ShouldTraceVRegs()) {
7323*f5c631daSSadaf Ebrahimi PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7324*f5c631daSSadaf Ebrahimi if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {
7325*f5c631daSSadaf Ebrahimi // Use an FP format where it's likely that we're accessing FP data.
7326*f5c631daSSadaf Ebrahimi format = GetPrintRegisterFormatTryFP(format);
7327*f5c631daSSadaf Ebrahimi }
7328*f5c631daSSadaf Ebrahimi PrintZStructAccess(zt_code,
7329*f5c631daSSadaf Ebrahimi reg_count,
7330*f5c631daSSadaf Ebrahimi pg,
7331*f5c631daSSadaf Ebrahimi format,
7332*f5c631daSSadaf Ebrahimi msize_in_bytes,
7333*f5c631daSSadaf Ebrahimi "<-",
7334*f5c631daSSadaf Ebrahimi addr);
7335*f5c631daSSadaf Ebrahimi }
7336*f5c631daSSadaf Ebrahimi }
7337*f5c631daSSadaf Ebrahimi
brka(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7338*f5c631daSSadaf Ebrahimi LogicPRegister Simulator::brka(LogicPRegister pd,
7339*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
7340*f5c631daSSadaf Ebrahimi const LogicPRegister& pn) {
7341*f5c631daSSadaf Ebrahimi bool break_ = false;
7342*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7343*f5c631daSSadaf Ebrahimi if (pg.IsActive(kFormatVnB, i)) {
7344*f5c631daSSadaf Ebrahimi pd.SetActive(kFormatVnB, i, !break_);
7345*f5c631daSSadaf Ebrahimi break_ |= pn.IsActive(kFormatVnB, i);
7346*f5c631daSSadaf Ebrahimi }
7347*f5c631daSSadaf Ebrahimi }
7348*f5c631daSSadaf Ebrahimi
7349*f5c631daSSadaf Ebrahimi return pd;
7350*f5c631daSSadaf Ebrahimi }
7351*f5c631daSSadaf Ebrahimi
brkb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7352*f5c631daSSadaf Ebrahimi LogicPRegister Simulator::brkb(LogicPRegister pd,
7353*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
7354*f5c631daSSadaf Ebrahimi const LogicPRegister& pn) {
7355*f5c631daSSadaf Ebrahimi bool break_ = false;
7356*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7357*f5c631daSSadaf Ebrahimi if (pg.IsActive(kFormatVnB, i)) {
7358*f5c631daSSadaf Ebrahimi break_ |= pn.IsActive(kFormatVnB, i);
7359*f5c631daSSadaf Ebrahimi pd.SetActive(kFormatVnB, i, !break_);
7360*f5c631daSSadaf Ebrahimi }
7361*f5c631daSSadaf Ebrahimi }
7362*f5c631daSSadaf Ebrahimi
7363*f5c631daSSadaf Ebrahimi return pd;
7364*f5c631daSSadaf Ebrahimi }
7365*f5c631daSSadaf Ebrahimi
brkn(LogicPRegister pdm,const LogicPRegister & pg,const LogicPRegister & pn)7366*f5c631daSSadaf Ebrahimi LogicPRegister Simulator::brkn(LogicPRegister pdm,
7367*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
7368*f5c631daSSadaf Ebrahimi const LogicPRegister& pn) {
7369*f5c631daSSadaf Ebrahimi if (!IsLastActive(kFormatVnB, pg, pn)) {
7370*f5c631daSSadaf Ebrahimi pfalse(pdm);
7371*f5c631daSSadaf Ebrahimi }
7372*f5c631daSSadaf Ebrahimi return pdm;
7373*f5c631daSSadaf Ebrahimi }
7374*f5c631daSSadaf Ebrahimi
brkpa(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7375*f5c631daSSadaf Ebrahimi LogicPRegister Simulator::brkpa(LogicPRegister pd,
7376*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
7377*f5c631daSSadaf Ebrahimi const LogicPRegister& pn,
7378*f5c631daSSadaf Ebrahimi const LogicPRegister& pm) {
7379*f5c631daSSadaf Ebrahimi bool last_active = IsLastActive(kFormatVnB, pg, pn);
7380*f5c631daSSadaf Ebrahimi
7381*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7382*f5c631daSSadaf Ebrahimi bool active = false;
7383*f5c631daSSadaf Ebrahimi if (pg.IsActive(kFormatVnB, i)) {
7384*f5c631daSSadaf Ebrahimi active = last_active;
7385*f5c631daSSadaf Ebrahimi last_active = last_active && !pm.IsActive(kFormatVnB, i);
7386*f5c631daSSadaf Ebrahimi }
7387*f5c631daSSadaf Ebrahimi pd.SetActive(kFormatVnB, i, active);
7388*f5c631daSSadaf Ebrahimi }
7389*f5c631daSSadaf Ebrahimi
7390*f5c631daSSadaf Ebrahimi return pd;
7391*f5c631daSSadaf Ebrahimi }
7392*f5c631daSSadaf Ebrahimi
brkpb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7393*f5c631daSSadaf Ebrahimi LogicPRegister Simulator::brkpb(LogicPRegister pd,
7394*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
7395*f5c631daSSadaf Ebrahimi const LogicPRegister& pn,
7396*f5c631daSSadaf Ebrahimi const LogicPRegister& pm) {
7397*f5c631daSSadaf Ebrahimi bool last_active = IsLastActive(kFormatVnB, pg, pn);
7398*f5c631daSSadaf Ebrahimi
7399*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7400*f5c631daSSadaf Ebrahimi bool active = false;
7401*f5c631daSSadaf Ebrahimi if (pg.IsActive(kFormatVnB, i)) {
7402*f5c631daSSadaf Ebrahimi last_active = last_active && !pm.IsActive(kFormatVnB, i);
7403*f5c631daSSadaf Ebrahimi active = last_active;
7404*f5c631daSSadaf Ebrahimi }
7405*f5c631daSSadaf Ebrahimi pd.SetActive(kFormatVnB, i, active);
7406*f5c631daSSadaf Ebrahimi }
7407*f5c631daSSadaf Ebrahimi
7408*f5c631daSSadaf Ebrahimi return pd;
7409*f5c631daSSadaf Ebrahimi }
7410*f5c631daSSadaf Ebrahimi
SVEFaultTolerantLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,SVEFaultTolerantLoadType type,bool is_signed)7411*f5c631daSSadaf Ebrahimi void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,
7412*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
7413*f5c631daSSadaf Ebrahimi unsigned zt_code,
7414*f5c631daSSadaf Ebrahimi const LogicSVEAddressVector& addr,
7415*f5c631daSSadaf Ebrahimi SVEFaultTolerantLoadType type,
7416*f5c631daSSadaf Ebrahimi bool is_signed) {
7417*f5c631daSSadaf Ebrahimi int esize_in_bytes = LaneSizeInBytesFromFormat(vform);
7418*f5c631daSSadaf Ebrahimi int msize_in_bits = addr.GetMsizeInBits();
7419*f5c631daSSadaf Ebrahimi int msize_in_bytes = addr.GetMsizeInBytes();
7420*f5c631daSSadaf Ebrahimi
7421*f5c631daSSadaf Ebrahimi VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7422*f5c631daSSadaf Ebrahimi VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);
7423*f5c631daSSadaf Ebrahimi VIXL_ASSERT(addr.GetRegCount() == 1);
7424*f5c631daSSadaf Ebrahimi
7425*f5c631daSSadaf Ebrahimi LogicVRegister zt = ReadVRegister(zt_code);
7426*f5c631daSSadaf Ebrahimi LogicPRegister ffr = ReadFFR();
7427*f5c631daSSadaf Ebrahimi
7428*f5c631daSSadaf Ebrahimi // Non-faulting loads are allowed to fail arbitrarily. To stress user
7429*f5c631daSSadaf Ebrahimi // code, fail a random element in roughly one in eight full-vector loads.
7430*f5c631daSSadaf Ebrahimi uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_));
7431*f5c631daSSadaf Ebrahimi int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
7432*f5c631daSSadaf Ebrahimi
7433*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7434*f5c631daSSadaf Ebrahimi uint64_t value = 0;
7435*f5c631daSSadaf Ebrahimi
7436*f5c631daSSadaf Ebrahimi if (pg.IsActive(vform, i)) {
7437*f5c631daSSadaf Ebrahimi uint64_t element_address = addr.GetElementAddress(i, 0);
7438*f5c631daSSadaf Ebrahimi
7439*f5c631daSSadaf Ebrahimi if (type == kSVEFirstFaultLoad) {
7440*f5c631daSSadaf Ebrahimi // First-faulting loads always load the first active element, regardless
7441*f5c631daSSadaf Ebrahimi // of FFR. The result will be discarded if its FFR lane is inactive, but
7442*f5c631daSSadaf Ebrahimi // it could still generate a fault.
7443*f5c631daSSadaf Ebrahimi value = MemReadUint(msize_in_bytes, element_address);
7444*f5c631daSSadaf Ebrahimi // All subsequent elements have non-fault semantics.
7445*f5c631daSSadaf Ebrahimi type = kSVENonFaultLoad;
7446*f5c631daSSadaf Ebrahimi
7447*f5c631daSSadaf Ebrahimi } else if (ffr.IsActive(vform, i)) {
7448*f5c631daSSadaf Ebrahimi // Simulation of fault-tolerant loads relies on system calls, and is
7449*f5c631daSSadaf Ebrahimi // likely to be relatively slow, so we only actually perform the load if
7450*f5c631daSSadaf Ebrahimi // its FFR lane is active.
7451*f5c631daSSadaf Ebrahimi
7452*f5c631daSSadaf Ebrahimi bool can_read = (i < fake_fault_at_lane) &&
7453*f5c631daSSadaf Ebrahimi CanReadMemory(element_address, msize_in_bytes);
7454*f5c631daSSadaf Ebrahimi if (can_read) {
7455*f5c631daSSadaf Ebrahimi value = MemReadUint(msize_in_bytes, element_address);
7456*f5c631daSSadaf Ebrahimi } else {
7457*f5c631daSSadaf Ebrahimi // Propagate the fault to the end of FFR.
7458*f5c631daSSadaf Ebrahimi for (int j = i; j < LaneCountFromFormat(vform); j++) {
7459*f5c631daSSadaf Ebrahimi ffr.SetActive(vform, j, false);
7460*f5c631daSSadaf Ebrahimi }
7461*f5c631daSSadaf Ebrahimi }
7462*f5c631daSSadaf Ebrahimi }
7463*f5c631daSSadaf Ebrahimi }
7464*f5c631daSSadaf Ebrahimi
7465*f5c631daSSadaf Ebrahimi // The architecture permits a few possible results for inactive FFR lanes
7466*f5c631daSSadaf Ebrahimi // (including those caused by a fault in this instruction). We choose to
7467*f5c631daSSadaf Ebrahimi // leave the register value unchanged (like merging predication) because
7468*f5c631daSSadaf Ebrahimi // no other input to this instruction can have the same behaviour.
7469*f5c631daSSadaf Ebrahimi //
7470*f5c631daSSadaf Ebrahimi // Note that this behaviour takes precedence over pg's zeroing predication.
7471*f5c631daSSadaf Ebrahimi
7472*f5c631daSSadaf Ebrahimi if (ffr.IsActive(vform, i)) {
7473*f5c631daSSadaf Ebrahimi int msb = msize_in_bits - 1;
7474*f5c631daSSadaf Ebrahimi if (is_signed) {
7475*f5c631daSSadaf Ebrahimi zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));
7476*f5c631daSSadaf Ebrahimi } else {
7477*f5c631daSSadaf Ebrahimi zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));
7478*f5c631daSSadaf Ebrahimi }
7479*f5c631daSSadaf Ebrahimi }
7480*f5c631daSSadaf Ebrahimi }
7481*f5c631daSSadaf Ebrahimi
7482*f5c631daSSadaf Ebrahimi if (ShouldTraceVRegs()) {
7483*f5c631daSSadaf Ebrahimi PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7484*f5c631daSSadaf Ebrahimi if ((esize_in_bytes == msize_in_bytes) && !is_signed) {
7485*f5c631daSSadaf Ebrahimi // Use an FP format where it's likely that we're accessing FP data.
7486*f5c631daSSadaf Ebrahimi format = GetPrintRegisterFormatTryFP(format);
7487*f5c631daSSadaf Ebrahimi }
7488*f5c631daSSadaf Ebrahimi // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess
7489*f5c631daSSadaf Ebrahimi // expects a single mask, so combine the two predicates.
7490*f5c631daSSadaf Ebrahimi SimPRegister mask;
7491*f5c631daSSadaf Ebrahimi SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);
7492*f5c631daSSadaf Ebrahimi PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);
7493*f5c631daSSadaf Ebrahimi }
7494*f5c631daSSadaf Ebrahimi }
7495*f5c631daSSadaf Ebrahimi
SVEGatherLoadScalarPlusVectorHelper(const Instruction * instr,VectorFormat vform,SVEOffsetModifier mod)7496*f5c631daSSadaf Ebrahimi void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
7497*f5c631daSSadaf Ebrahimi VectorFormat vform,
7498*f5c631daSSadaf Ebrahimi SVEOffsetModifier mod) {
7499*f5c631daSSadaf Ebrahimi bool is_signed = instr->ExtractBit(14) == 0;
7500*f5c631daSSadaf Ebrahimi bool is_ff = instr->ExtractBit(13) == 1;
7501*f5c631daSSadaf Ebrahimi // Note that these instructions don't use the Dtype encoding.
7502*f5c631daSSadaf Ebrahimi int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
7503*f5c631daSSadaf Ebrahimi int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
7504*f5c631daSSadaf Ebrahimi uint64_t base = ReadXRegister(instr->GetRn());
7505*f5c631daSSadaf Ebrahimi LogicSVEAddressVector addr(base,
7506*f5c631daSSadaf Ebrahimi &ReadVRegister(instr->GetRm()),
7507*f5c631daSSadaf Ebrahimi vform,
7508*f5c631daSSadaf Ebrahimi mod,
7509*f5c631daSSadaf Ebrahimi scale);
7510*f5c631daSSadaf Ebrahimi addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
7511*f5c631daSSadaf Ebrahimi if (is_ff) {
7512*f5c631daSSadaf Ebrahimi SVEFaultTolerantLoadHelper(vform,
7513*f5c631daSSadaf Ebrahimi ReadPRegister(instr->GetPgLow8()),
7514*f5c631daSSadaf Ebrahimi instr->GetRt(),
7515*f5c631daSSadaf Ebrahimi addr,
7516*f5c631daSSadaf Ebrahimi kSVEFirstFaultLoad,
7517*f5c631daSSadaf Ebrahimi is_signed);
7518*f5c631daSSadaf Ebrahimi } else {
7519*f5c631daSSadaf Ebrahimi SVEStructuredLoadHelper(vform,
7520*f5c631daSSadaf Ebrahimi ReadPRegister(instr->GetPgLow8()),
7521*f5c631daSSadaf Ebrahimi instr->GetRt(),
7522*f5c631daSSadaf Ebrahimi addr,
7523*f5c631daSSadaf Ebrahimi is_signed);
7524*f5c631daSSadaf Ebrahimi }
7525*f5c631daSSadaf Ebrahimi }
7526*f5c631daSSadaf Ebrahimi
GetFirstActive(VectorFormat vform,const LogicPRegister & pg) const7527*f5c631daSSadaf Ebrahimi int Simulator::GetFirstActive(VectorFormat vform,
7528*f5c631daSSadaf Ebrahimi const LogicPRegister& pg) const {
7529*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7530*f5c631daSSadaf Ebrahimi if (pg.IsActive(vform, i)) return i;
7531*f5c631daSSadaf Ebrahimi }
7532*f5c631daSSadaf Ebrahimi return -1;
7533*f5c631daSSadaf Ebrahimi }
7534*f5c631daSSadaf Ebrahimi
GetLastActive(VectorFormat vform,const LogicPRegister & pg) const7535*f5c631daSSadaf Ebrahimi int Simulator::GetLastActive(VectorFormat vform,
7536*f5c631daSSadaf Ebrahimi const LogicPRegister& pg) const {
7537*f5c631daSSadaf Ebrahimi for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
7538*f5c631daSSadaf Ebrahimi if (pg.IsActive(vform, i)) return i;
7539*f5c631daSSadaf Ebrahimi }
7540*f5c631daSSadaf Ebrahimi return -1;
7541*f5c631daSSadaf Ebrahimi }
7542*f5c631daSSadaf Ebrahimi
CountActiveLanes(VectorFormat vform,const LogicPRegister & pg) const7543*f5c631daSSadaf Ebrahimi int Simulator::CountActiveLanes(VectorFormat vform,
7544*f5c631daSSadaf Ebrahimi const LogicPRegister& pg) const {
7545*f5c631daSSadaf Ebrahimi int count = 0;
7546*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7547*f5c631daSSadaf Ebrahimi count += pg.IsActive(vform, i) ? 1 : 0;
7548*f5c631daSSadaf Ebrahimi }
7549*f5c631daSSadaf Ebrahimi return count;
7550*f5c631daSSadaf Ebrahimi }
7551*f5c631daSSadaf Ebrahimi
CountActiveAndTrueLanes(VectorFormat vform,const LogicPRegister & pg,const LogicPRegister & pn) const7552*f5c631daSSadaf Ebrahimi int Simulator::CountActiveAndTrueLanes(VectorFormat vform,
7553*f5c631daSSadaf Ebrahimi const LogicPRegister& pg,
7554*f5c631daSSadaf Ebrahimi const LogicPRegister& pn) const {
7555*f5c631daSSadaf Ebrahimi int count = 0;
7556*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7557*f5c631daSSadaf Ebrahimi count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;
7558*f5c631daSSadaf Ebrahimi }
7559*f5c631daSSadaf Ebrahimi return count;
7560*f5c631daSSadaf Ebrahimi }
7561*f5c631daSSadaf Ebrahimi
GetPredicateConstraintLaneCount(VectorFormat vform,int pattern) const7562*f5c631daSSadaf Ebrahimi int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
7563*f5c631daSSadaf Ebrahimi int pattern) const {
7564*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsSVEFormat(vform));
7565*f5c631daSSadaf Ebrahimi int all = LaneCountFromFormat(vform);
7566*f5c631daSSadaf Ebrahimi VIXL_ASSERT(all > 0);
7567*f5c631daSSadaf Ebrahimi
7568*f5c631daSSadaf Ebrahimi switch (pattern) {
7569*f5c631daSSadaf Ebrahimi case SVE_VL1:
7570*f5c631daSSadaf Ebrahimi case SVE_VL2:
7571*f5c631daSSadaf Ebrahimi case SVE_VL3:
7572*f5c631daSSadaf Ebrahimi case SVE_VL4:
7573*f5c631daSSadaf Ebrahimi case SVE_VL5:
7574*f5c631daSSadaf Ebrahimi case SVE_VL6:
7575*f5c631daSSadaf Ebrahimi case SVE_VL7:
7576*f5c631daSSadaf Ebrahimi case SVE_VL8:
7577*f5c631daSSadaf Ebrahimi // VL1-VL8 are encoded directly.
7578*f5c631daSSadaf Ebrahimi VIXL_STATIC_ASSERT(SVE_VL1 == 1);
7579*f5c631daSSadaf Ebrahimi VIXL_STATIC_ASSERT(SVE_VL8 == 8);
7580*f5c631daSSadaf Ebrahimi return (pattern <= all) ? pattern : 0;
7581*f5c631daSSadaf Ebrahimi case SVE_VL16:
7582*f5c631daSSadaf Ebrahimi case SVE_VL32:
7583*f5c631daSSadaf Ebrahimi case SVE_VL64:
7584*f5c631daSSadaf Ebrahimi case SVE_VL128:
7585*f5c631daSSadaf Ebrahimi case SVE_VL256: {
7586*f5c631daSSadaf Ebrahimi // VL16-VL256 are encoded as log2(N) + c.
7587*f5c631daSSadaf Ebrahimi int min = 16 << (pattern - SVE_VL16);
7588*f5c631daSSadaf Ebrahimi return (min <= all) ? min : 0;
7589*f5c631daSSadaf Ebrahimi }
7590*f5c631daSSadaf Ebrahimi // Special cases.
7591*f5c631daSSadaf Ebrahimi case SVE_POW2:
7592*f5c631daSSadaf Ebrahimi return 1 << HighestSetBitPosition(all);
7593*f5c631daSSadaf Ebrahimi case SVE_MUL4:
7594*f5c631daSSadaf Ebrahimi return all - (all % 4);
7595*f5c631daSSadaf Ebrahimi case SVE_MUL3:
7596*f5c631daSSadaf Ebrahimi return all - (all % 3);
7597*f5c631daSSadaf Ebrahimi case SVE_ALL:
7598*f5c631daSSadaf Ebrahimi return all;
7599*f5c631daSSadaf Ebrahimi }
7600*f5c631daSSadaf Ebrahimi // Unnamed cases architecturally return 0.
7601*f5c631daSSadaf Ebrahimi return 0;
7602*f5c631daSSadaf Ebrahimi }
7603*f5c631daSSadaf Ebrahimi
match(VectorFormat vform,LogicPRegister dst,const LogicVRegister & haystack,const LogicVRegister & needles,bool negate_match)7604*f5c631daSSadaf Ebrahimi LogicPRegister Simulator::match(VectorFormat vform,
7605*f5c631daSSadaf Ebrahimi LogicPRegister dst,
7606*f5c631daSSadaf Ebrahimi const LogicVRegister& haystack,
7607*f5c631daSSadaf Ebrahimi const LogicVRegister& needles,
7608*f5c631daSSadaf Ebrahimi bool negate_match) {
7609*f5c631daSSadaf Ebrahimi SimVRegister ztemp;
7610*f5c631daSSadaf Ebrahimi SimPRegister ptemp;
7611*f5c631daSSadaf Ebrahimi
7612*f5c631daSSadaf Ebrahimi pfalse(dst);
7613*f5c631daSSadaf Ebrahimi int lanes_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
7614*f5c631daSSadaf Ebrahimi for (int i = 0; i < lanes_per_segment; i++) {
7615*f5c631daSSadaf Ebrahimi dup_elements_to_segments(vform, ztemp, needles, i);
7616*f5c631daSSadaf Ebrahimi SVEIntCompareVectorsHelper(eq,
7617*f5c631daSSadaf Ebrahimi vform,
7618*f5c631daSSadaf Ebrahimi ptemp,
7619*f5c631daSSadaf Ebrahimi GetPTrue(),
7620*f5c631daSSadaf Ebrahimi haystack,
7621*f5c631daSSadaf Ebrahimi ztemp,
7622*f5c631daSSadaf Ebrahimi false,
7623*f5c631daSSadaf Ebrahimi LeaveFlags);
7624*f5c631daSSadaf Ebrahimi SVEPredicateLogicalHelper(ORR_p_p_pp_z, dst, dst, ptemp);
7625*f5c631daSSadaf Ebrahimi }
7626*f5c631daSSadaf Ebrahimi if (negate_match) {
7627*f5c631daSSadaf Ebrahimi ptrue(vform, ptemp, SVE_ALL);
7628*f5c631daSSadaf Ebrahimi SVEPredicateLogicalHelper(EOR_p_p_pp_z, dst, dst, ptemp);
7629*f5c631daSSadaf Ebrahimi }
7630*f5c631daSSadaf Ebrahimi return dst;
7631*f5c631daSSadaf Ebrahimi }
7632*f5c631daSSadaf Ebrahimi
GetStructAddress(int lane) const7633*f5c631daSSadaf Ebrahimi uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
7634*f5c631daSSadaf Ebrahimi if (IsContiguous()) {
7635*f5c631daSSadaf Ebrahimi return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
7636*f5c631daSSadaf Ebrahimi }
7637*f5c631daSSadaf Ebrahimi
7638*f5c631daSSadaf Ebrahimi VIXL_ASSERT(IsScatterGather());
7639*f5c631daSSadaf Ebrahimi VIXL_ASSERT(vector_ != NULL);
7640*f5c631daSSadaf Ebrahimi
7641*f5c631daSSadaf Ebrahimi // For scatter-gather accesses, we need to extract the offset from vector_,
7642*f5c631daSSadaf Ebrahimi // and apply modifiers.
7643*f5c631daSSadaf Ebrahimi
7644*f5c631daSSadaf Ebrahimi uint64_t offset = 0;
7645*f5c631daSSadaf Ebrahimi switch (vector_form_) {
7646*f5c631daSSadaf Ebrahimi case kFormatVnS:
7647*f5c631daSSadaf Ebrahimi offset = vector_->GetLane<uint32_t>(lane);
7648*f5c631daSSadaf Ebrahimi break;
7649*f5c631daSSadaf Ebrahimi case kFormatVnD:
7650*f5c631daSSadaf Ebrahimi offset = vector_->GetLane<uint64_t>(lane);
7651*f5c631daSSadaf Ebrahimi break;
7652*f5c631daSSadaf Ebrahimi default:
7653*f5c631daSSadaf Ebrahimi VIXL_UNIMPLEMENTED();
7654*f5c631daSSadaf Ebrahimi break;
7655*f5c631daSSadaf Ebrahimi }
7656*f5c631daSSadaf Ebrahimi
7657*f5c631daSSadaf Ebrahimi switch (vector_mod_) {
7658*f5c631daSSadaf Ebrahimi case SVE_MUL_VL:
7659*f5c631daSSadaf Ebrahimi VIXL_UNIMPLEMENTED();
7660*f5c631daSSadaf Ebrahimi break;
7661*f5c631daSSadaf Ebrahimi case SVE_LSL:
7662*f5c631daSSadaf Ebrahimi // We apply the shift below. There's nothing to do here.
7663*f5c631daSSadaf Ebrahimi break;
7664*f5c631daSSadaf Ebrahimi case NO_SVE_OFFSET_MODIFIER:
7665*f5c631daSSadaf Ebrahimi VIXL_ASSERT(vector_shift_ == 0);
7666*f5c631daSSadaf Ebrahimi break;
7667*f5c631daSSadaf Ebrahimi case SVE_UXTW:
7668*f5c631daSSadaf Ebrahimi offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);
7669*f5c631daSSadaf Ebrahimi break;
7670*f5c631daSSadaf Ebrahimi case SVE_SXTW:
7671*f5c631daSSadaf Ebrahimi offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);
7672*f5c631daSSadaf Ebrahimi break;
7673*f5c631daSSadaf Ebrahimi }
7674*f5c631daSSadaf Ebrahimi
7675*f5c631daSSadaf Ebrahimi return base_ + (offset << vector_shift_);
7676*f5c631daSSadaf Ebrahimi }
7677*f5c631daSSadaf Ebrahimi
pack_odd_elements(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)7678*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::pack_odd_elements(VectorFormat vform,
7679*f5c631daSSadaf Ebrahimi LogicVRegister dst,
7680*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
7681*f5c631daSSadaf Ebrahimi SimVRegister zero;
7682*f5c631daSSadaf Ebrahimi zero.Clear();
7683*f5c631daSSadaf Ebrahimi return uzp2(vform, dst, src, zero);
7684*f5c631daSSadaf Ebrahimi }
7685*f5c631daSSadaf Ebrahimi
pack_even_elements(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)7686*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::pack_even_elements(VectorFormat vform,
7687*f5c631daSSadaf Ebrahimi LogicVRegister dst,
7688*f5c631daSSadaf Ebrahimi const LogicVRegister& src) {
7689*f5c631daSSadaf Ebrahimi SimVRegister zero;
7690*f5c631daSSadaf Ebrahimi zero.Clear();
7691*f5c631daSSadaf Ebrahimi return uzp1(vform, dst, src, zero);
7692*f5c631daSSadaf Ebrahimi }
7693*f5c631daSSadaf Ebrahimi
adcl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool top)7694*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::adcl(VectorFormat vform,
7695*f5c631daSSadaf Ebrahimi LogicVRegister dst,
7696*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
7697*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
7698*f5c631daSSadaf Ebrahimi bool top) {
7699*f5c631daSSadaf Ebrahimi unsigned reg_size = LaneSizeInBitsFromFormat(vform);
7700*f5c631daSSadaf Ebrahimi VIXL_ASSERT((reg_size == kSRegSize) || (reg_size == kDRegSize));
7701*f5c631daSSadaf Ebrahimi
7702*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
7703*f5c631daSSadaf Ebrahimi uint64_t left = src1.Uint(vform, i + (top ? 1 : 0));
7704*f5c631daSSadaf Ebrahimi uint64_t right = dst.Uint(vform, i);
7705*f5c631daSSadaf Ebrahimi unsigned carry_in = src2.Uint(vform, i + 1) & 1;
7706*f5c631daSSadaf Ebrahimi std::pair<uint64_t, uint8_t> val_and_flags =
7707*f5c631daSSadaf Ebrahimi AddWithCarry(reg_size, left, right, carry_in);
7708*f5c631daSSadaf Ebrahimi
7709*f5c631daSSadaf Ebrahimi // Set even lanes to the result of the addition.
7710*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i, val_and_flags.first);
7711*f5c631daSSadaf Ebrahimi
7712*f5c631daSSadaf Ebrahimi // Set odd lanes to the carry flag from the addition.
7713*f5c631daSSadaf Ebrahimi uint64_t carry_out = (val_and_flags.second >> 1) & 1;
7714*f5c631daSSadaf Ebrahimi dst.SetUint(vform, i + 1, carry_out);
7715*f5c631daSSadaf Ebrahimi }
7716*f5c631daSSadaf Ebrahimi return dst;
7717*f5c631daSSadaf Ebrahimi }
7718*f5c631daSSadaf Ebrahimi
7719*f5c631daSSadaf Ebrahimi // Multiply the 2x8 8-bit matrix in src1 by the 8x2 8-bit matrix in src2, add
7720*f5c631daSSadaf Ebrahimi // the 2x2 32-bit result to the matrix in srcdst, and write back to srcdst.
7721*f5c631daSSadaf Ebrahimi //
7722*f5c631daSSadaf Ebrahimi // Matrices of the form:
7723*f5c631daSSadaf Ebrahimi //
7724*f5c631daSSadaf Ebrahimi // src1 = ( a b c d e f g h ) src2 = ( A B )
7725*f5c631daSSadaf Ebrahimi // ( i j k l m n o p ) ( C D )
7726*f5c631daSSadaf Ebrahimi // ( E F )
7727*f5c631daSSadaf Ebrahimi // ( G H )
7728*f5c631daSSadaf Ebrahimi // ( I J )
7729*f5c631daSSadaf Ebrahimi // ( K L )
7730*f5c631daSSadaf Ebrahimi // ( M N )
7731*f5c631daSSadaf Ebrahimi // ( O P )
7732*f5c631daSSadaf Ebrahimi //
7733*f5c631daSSadaf Ebrahimi // Are stored in the input vector registers as:
7734*f5c631daSSadaf Ebrahimi //
7735*f5c631daSSadaf Ebrahimi // 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
7736*f5c631daSSadaf Ebrahimi // src1 = [ p | o | n | m | l | k | j | i | h | g | f | e | d | c | b | a ]
7737*f5c631daSSadaf Ebrahimi // src2 = [ P | N | L | J | H | F | D | B | O | M | K | I | G | E | C | A ]
7738*f5c631daSSadaf Ebrahimi //
matmul(VectorFormat vform_dst,LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2,bool src1_signed,bool src2_signed)7739*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::matmul(VectorFormat vform_dst,
7740*f5c631daSSadaf Ebrahimi LogicVRegister srcdst,
7741*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
7742*f5c631daSSadaf Ebrahimi const LogicVRegister& src2,
7743*f5c631daSSadaf Ebrahimi bool src1_signed,
7744*f5c631daSSadaf Ebrahimi bool src2_signed) {
7745*f5c631daSSadaf Ebrahimi // Two destination forms are supported: Q register containing four S-sized
7746*f5c631daSSadaf Ebrahimi // elements (4S) and Z register containing n S-sized elements (VnS).
7747*f5c631daSSadaf Ebrahimi VIXL_ASSERT((vform_dst == kFormat4S) || (vform_dst == kFormatVnS));
7748*f5c631daSSadaf Ebrahimi VectorFormat vform_src = kFormatVnB;
7749*f5c631daSSadaf Ebrahimi int b_per_segment = kQRegSize / kBRegSize;
7750*f5c631daSSadaf Ebrahimi int s_per_segment = kQRegSize / kSRegSize;
7751*f5c631daSSadaf Ebrahimi int64_t result[kZRegMaxSizeInBytes / kSRegSizeInBytes] = {};
7752*f5c631daSSadaf Ebrahimi int segment_count = LaneCountFromFormat(vform_dst) / 4;
7753*f5c631daSSadaf Ebrahimi for (int seg = 0; seg < segment_count; seg++) {
7754*f5c631daSSadaf Ebrahimi for (int i = 0; i < 2; i++) {
7755*f5c631daSSadaf Ebrahimi for (int j = 0; j < 2; j++) {
7756*f5c631daSSadaf Ebrahimi int dstidx = (2 * i) + j + (seg * s_per_segment);
7757*f5c631daSSadaf Ebrahimi int64_t sum = srcdst.Int(vform_dst, dstidx);
7758*f5c631daSSadaf Ebrahimi for (int k = 0; k < 8; k++) {
7759*f5c631daSSadaf Ebrahimi int idx1 = (8 * i) + k + (seg * b_per_segment);
7760*f5c631daSSadaf Ebrahimi int idx2 = (8 * j) + k + (seg * b_per_segment);
7761*f5c631daSSadaf Ebrahimi int64_t e1 = src1_signed ? src1.Int(vform_src, idx1)
7762*f5c631daSSadaf Ebrahimi : src1.Uint(vform_src, idx1);
7763*f5c631daSSadaf Ebrahimi int64_t e2 = src2_signed ? src2.Int(vform_src, idx2)
7764*f5c631daSSadaf Ebrahimi : src2.Uint(vform_src, idx2);
7765*f5c631daSSadaf Ebrahimi sum += e1 * e2;
7766*f5c631daSSadaf Ebrahimi }
7767*f5c631daSSadaf Ebrahimi result[dstidx] = sum;
7768*f5c631daSSadaf Ebrahimi }
7769*f5c631daSSadaf Ebrahimi }
7770*f5c631daSSadaf Ebrahimi }
7771*f5c631daSSadaf Ebrahimi srcdst.SetIntArray(vform_dst, result);
7772*f5c631daSSadaf Ebrahimi return srcdst;
7773*f5c631daSSadaf Ebrahimi }
7774*f5c631daSSadaf Ebrahimi
7775*f5c631daSSadaf Ebrahimi // Multiply the 2x2 FP matrix in src1 by the 2x2 FP matrix in src2, add the 2x2
7776*f5c631daSSadaf Ebrahimi // result to the matrix in srcdst, and write back to srcdst.
7777*f5c631daSSadaf Ebrahimi //
7778*f5c631daSSadaf Ebrahimi // Matrices of the form:
7779*f5c631daSSadaf Ebrahimi //
7780*f5c631daSSadaf Ebrahimi // src1 = ( a b ) src2 = ( A B )
7781*f5c631daSSadaf Ebrahimi // ( c d ) ( C D )
7782*f5c631daSSadaf Ebrahimi //
7783*f5c631daSSadaf Ebrahimi // Are stored in the input vector registers as:
7784*f5c631daSSadaf Ebrahimi //
7785*f5c631daSSadaf Ebrahimi // 3 2 1 0
7786*f5c631daSSadaf Ebrahimi // src1 = [ d | c | b | a ]
7787*f5c631daSSadaf Ebrahimi // src2 = [ D | B | C | A ]
7788*f5c631daSSadaf Ebrahimi //
7789*f5c631daSSadaf Ebrahimi template <typename T>
fmatmul(VectorFormat vform,LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)7790*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmatmul(VectorFormat vform,
7791*f5c631daSSadaf Ebrahimi LogicVRegister srcdst,
7792*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
7793*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
7794*f5c631daSSadaf Ebrahimi T result[kZRegMaxSizeInBytes / sizeof(T)];
7795*f5c631daSSadaf Ebrahimi int T_per_segment = 4;
7796*f5c631daSSadaf Ebrahimi int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T));
7797*f5c631daSSadaf Ebrahimi for (int seg = 0; seg < segment_count; seg++) {
7798*f5c631daSSadaf Ebrahimi int segoff = seg * T_per_segment;
7799*f5c631daSSadaf Ebrahimi for (int i = 0; i < 2; i++) {
7800*f5c631daSSadaf Ebrahimi for (int j = 0; j < 2; j++) {
7801*f5c631daSSadaf Ebrahimi T prod0 = FPMulNaNs(src1.Float<T>(2 * i + 0 + segoff),
7802*f5c631daSSadaf Ebrahimi src2.Float<T>(2 * j + 0 + segoff));
7803*f5c631daSSadaf Ebrahimi T prod1 = FPMulNaNs(src1.Float<T>(2 * i + 1 + segoff),
7804*f5c631daSSadaf Ebrahimi src2.Float<T>(2 * j + 1 + segoff));
7805*f5c631daSSadaf Ebrahimi T sum = FPAdd(srcdst.Float<T>(2 * i + j + segoff), prod0);
7806*f5c631daSSadaf Ebrahimi result[2 * i + j + segoff] = FPAdd(sum, prod1);
7807*f5c631daSSadaf Ebrahimi }
7808*f5c631daSSadaf Ebrahimi }
7809*f5c631daSSadaf Ebrahimi }
7810*f5c631daSSadaf Ebrahimi for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7811*f5c631daSSadaf Ebrahimi // Elements outside a multiple of 4T are set to zero. This happens only
7812*f5c631daSSadaf Ebrahimi // for double precision operations, when the VL is a multiple of 128 bits,
7813*f5c631daSSadaf Ebrahimi // but not a mutiple of 256 bits.
7814*f5c631daSSadaf Ebrahimi T value = (i < (T_per_segment * segment_count)) ? result[i] : 0;
7815*f5c631daSSadaf Ebrahimi srcdst.SetFloat<T>(vform, i, value);
7816*f5c631daSSadaf Ebrahimi }
7817*f5c631daSSadaf Ebrahimi return srcdst;
7818*f5c631daSSadaf Ebrahimi }
7819*f5c631daSSadaf Ebrahimi
fmatmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)7820*f5c631daSSadaf Ebrahimi LogicVRegister Simulator::fmatmul(VectorFormat vform,
7821*f5c631daSSadaf Ebrahimi LogicVRegister dst,
7822*f5c631daSSadaf Ebrahimi const LogicVRegister& src1,
7823*f5c631daSSadaf Ebrahimi const LogicVRegister& src2) {
7824*f5c631daSSadaf Ebrahimi if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
7825*f5c631daSSadaf Ebrahimi fmatmul<float>(vform, dst, src1, src2);
7826*f5c631daSSadaf Ebrahimi } else {
7827*f5c631daSSadaf Ebrahimi VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
7828*f5c631daSSadaf Ebrahimi fmatmul<double>(vform, dst, src1, src2);
7829*f5c631daSSadaf Ebrahimi }
7830*f5c631daSSadaf Ebrahimi return dst;
7831*f5c631daSSadaf Ebrahimi }
7832*f5c631daSSadaf Ebrahimi
7833*f5c631daSSadaf Ebrahimi } // namespace aarch64
7834*f5c631daSSadaf Ebrahimi } // namespace vixl
7835*f5c631daSSadaf Ebrahimi
7836*f5c631daSSadaf Ebrahimi #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
7837