xref: /aosp_15_r20/external/vixl/examples/aarch32/mandelbrot.cc (revision f5c631da2f1efdd72b5fd1e20510e4042af13d77)
1*f5c631daSSadaf Ebrahimi // Copyright 2017, VIXL authors
2*f5c631daSSadaf Ebrahimi // All rights reserved.
3*f5c631daSSadaf Ebrahimi //
4*f5c631daSSadaf Ebrahimi // Redistribution and use in source and binary forms, with or without
5*f5c631daSSadaf Ebrahimi // modification, are permitted provided that the following conditions are met:
6*f5c631daSSadaf Ebrahimi //
7*f5c631daSSadaf Ebrahimi //   * Redistributions of source code must retain the above copyright notice,
8*f5c631daSSadaf Ebrahimi //     this list of conditions and the following disclaimer.
9*f5c631daSSadaf Ebrahimi //   * Redistributions in binary form must reproduce the above copyright notice,
10*f5c631daSSadaf Ebrahimi //     this list of conditions and the following disclaimer in the documentation
11*f5c631daSSadaf Ebrahimi //     and/or other materials provided with the distribution.
12*f5c631daSSadaf Ebrahimi //   * Neither the name of ARM Limited nor the names of its contributors may be
13*f5c631daSSadaf Ebrahimi //     used to endorse or promote products derived from this software without
14*f5c631daSSadaf Ebrahimi //     specific prior written permission.
15*f5c631daSSadaf Ebrahimi //
16*f5c631daSSadaf Ebrahimi // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17*f5c631daSSadaf Ebrahimi // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*f5c631daSSadaf Ebrahimi // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*f5c631daSSadaf Ebrahimi // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20*f5c631daSSadaf Ebrahimi // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*f5c631daSSadaf Ebrahimi // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22*f5c631daSSadaf Ebrahimi // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23*f5c631daSSadaf Ebrahimi // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24*f5c631daSSadaf Ebrahimi // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25*f5c631daSSadaf Ebrahimi // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*f5c631daSSadaf Ebrahimi 
27*f5c631daSSadaf Ebrahimi #include "examples.h"
28*f5c631daSSadaf Ebrahimi 
29*f5c631daSSadaf Ebrahimi using namespace vixl;
30*f5c631daSSadaf Ebrahimi using namespace vixl::aarch32;
31*f5c631daSSadaf Ebrahimi 
32*f5c631daSSadaf Ebrahimi #define __ masm->
33*f5c631daSSadaf Ebrahimi 
GenerateMandelBrot(MacroAssembler * masm)34*f5c631daSSadaf Ebrahimi void GenerateMandelBrot(MacroAssembler* masm) {
35*f5c631daSSadaf Ebrahimi   const QRegister kCReal = q0;
36*f5c631daSSadaf Ebrahimi   const QRegister kCImag = q1;
37*f5c631daSSadaf Ebrahimi 
38*f5c631daSSadaf Ebrahimi   const QRegister kCRealStep = q13;
39*f5c631daSSadaf Ebrahimi   const QRegister kCImagStep = q14;
40*f5c631daSSadaf Ebrahimi 
41*f5c631daSSadaf Ebrahimi   const QRegister kModSqLimit = q15;
42*f5c631daSSadaf Ebrahimi 
43*f5c631daSSadaf Ebrahimi   // Save register values.
44*f5c631daSSadaf Ebrahimi   __ Push(RegisterList(r4, r5, r6));
45*f5c631daSSadaf Ebrahimi 
46*f5c631daSSadaf Ebrahimi   __ Vmov(F32, kCRealStep, 0.125);
47*f5c631daSSadaf Ebrahimi   __ Vmov(F32, kCImagStep, 0.0625);
48*f5c631daSSadaf Ebrahimi 
49*f5c631daSSadaf Ebrahimi   const Register kZero = r2;
50*f5c631daSSadaf Ebrahimi   __ Mov(kZero, 0);
51*f5c631daSSadaf Ebrahimi 
52*f5c631daSSadaf Ebrahimi   const DRegister kStars = d6;
53*f5c631daSSadaf Ebrahimi   const DRegister kSpaces = d7;
54*f5c631daSSadaf Ebrahimi   // Output characters - packed 4 characters into 32 bits.
55*f5c631daSSadaf Ebrahimi   __ Vmov(I8, kStars, '*');
56*f5c631daSSadaf Ebrahimi   __ Vmov(I8, kSpaces, ' ');
57*f5c631daSSadaf Ebrahimi 
58*f5c631daSSadaf Ebrahimi   const DRegisterLane kNegTwo = DRegisterLane(d7, 1);
59*f5c631daSSadaf Ebrahimi   __ Vmov(s15, -2.0);
60*f5c631daSSadaf Ebrahimi 
61*f5c631daSSadaf Ebrahimi   // Imaginary part of c.
62*f5c631daSSadaf Ebrahimi   __ Vdup(Untyped32, kCImag, kNegTwo);
63*f5c631daSSadaf Ebrahimi 
64*f5c631daSSadaf Ebrahimi   // Max modulus squared.
65*f5c631daSSadaf Ebrahimi   __ Vmov(F32, kModSqLimit, 4.0);
66*f5c631daSSadaf Ebrahimi 
67*f5c631daSSadaf Ebrahimi   // Height of output in characters.
68*f5c631daSSadaf Ebrahimi   __ Mov(r4, 64);
69*f5c631daSSadaf Ebrahimi 
70*f5c631daSSadaf Ebrahimi   // String length will be 129, so need 132 bytes of space.
71*f5c631daSSadaf Ebrahimi   const uint32_t kStringLength = 132;
72*f5c631daSSadaf Ebrahimi 
73*f5c631daSSadaf Ebrahimi   // Make space for our string.
74*f5c631daSSadaf Ebrahimi   __ Sub(sp, sp, kStringLength);
75*f5c631daSSadaf Ebrahimi 
76*f5c631daSSadaf Ebrahimi   // Set up a starting pointer for the string.
77*f5c631daSSadaf Ebrahimi   const Register kStringPtr = r6;
78*f5c631daSSadaf Ebrahimi   __ Mov(kStringPtr, sp);
79*f5c631daSSadaf Ebrahimi 
80*f5c631daSSadaf Ebrahimi   // Loop over imaginary values of c from -2 to 2, taking
81*f5c631daSSadaf Ebrahimi   // 64 equally spaced values in the range.
82*f5c631daSSadaf Ebrahimi   {
83*f5c631daSSadaf Ebrahimi     Label c_imag_loop;
84*f5c631daSSadaf Ebrahimi 
85*f5c631daSSadaf Ebrahimi     __ Bind(&c_imag_loop);
86*f5c631daSSadaf Ebrahimi 
87*f5c631daSSadaf Ebrahimi     // Real part of c.
88*f5c631daSSadaf Ebrahimi     // Store 4 equally spaced values in q0 (kCReal) to use SIMD.
89*f5c631daSSadaf Ebrahimi     __ Vmov(s0, -2.0);
90*f5c631daSSadaf Ebrahimi     __ Vmov(s1, -1.96875);
91*f5c631daSSadaf Ebrahimi     __ Vmov(s2, -1.9375);
92*f5c631daSSadaf Ebrahimi     __ Vmov(s3, -1.90625);
93*f5c631daSSadaf Ebrahimi 
94*f5c631daSSadaf Ebrahimi     // Width of output in terms of sets of 4 characters - twice that
95*f5c631daSSadaf Ebrahimi     // of height to compensate for ratio of character height to width.
96*f5c631daSSadaf Ebrahimi     __ Mov(r5, 32);
97*f5c631daSSadaf Ebrahimi 
98*f5c631daSSadaf Ebrahimi     const Register kWriteCursor = r3;
99*f5c631daSSadaf Ebrahimi     // Set a cursor ready to write the next line.
100*f5c631daSSadaf Ebrahimi     __ Mov(kWriteCursor, kStringPtr);
101*f5c631daSSadaf Ebrahimi 
102*f5c631daSSadaf Ebrahimi     // Loop over real values of c from -2 to 2, processing
103*f5c631daSSadaf Ebrahimi     // 4 different values simultaneously using SIMD.
104*f5c631daSSadaf Ebrahimi     {
105*f5c631daSSadaf Ebrahimi       const QRegister kFlags = q2;
106*f5c631daSSadaf Ebrahimi       const DRegister kLowerFlags = d4;
107*f5c631daSSadaf Ebrahimi 
108*f5c631daSSadaf Ebrahimi       Label c_real_loop;
109*f5c631daSSadaf Ebrahimi       __ Bind(&c_real_loop);
110*f5c631daSSadaf Ebrahimi 
111*f5c631daSSadaf Ebrahimi       // Get number of iterations.
112*f5c631daSSadaf Ebrahimi       __ Add(r1, r0, 1);
113*f5c631daSSadaf Ebrahimi 
114*f5c631daSSadaf Ebrahimi       // Perform the iterations of z(n+1) = zn^2 + c using SIMD.
115*f5c631daSSadaf Ebrahimi       // If the result is that c is in the set, the element of
116*f5c631daSSadaf Ebrahimi       // kFlags will be 0, else ~0.
117*f5c631daSSadaf Ebrahimi       {
118*f5c631daSSadaf Ebrahimi         const QRegister kZReal = q8;
119*f5c631daSSadaf Ebrahimi         const QRegister kZImag = q9;
120*f5c631daSSadaf Ebrahimi 
121*f5c631daSSadaf Ebrahimi         // Real part of z.
122*f5c631daSSadaf Ebrahimi         __ Vmov(F32, kZReal, 0.0);
123*f5c631daSSadaf Ebrahimi 
124*f5c631daSSadaf Ebrahimi         // Imaginary part of z.
125*f5c631daSSadaf Ebrahimi         __ Vmov(F32, kZImag, 0.0);
126*f5c631daSSadaf Ebrahimi 
127*f5c631daSSadaf Ebrahimi         __ Vmov(F32, kFlags, 0.0);
128*f5c631daSSadaf Ebrahimi 
129*f5c631daSSadaf Ebrahimi         Label iterative_formula_start, iterative_formula_end;
130*f5c631daSSadaf Ebrahimi         __ Bind(&iterative_formula_start);
131*f5c631daSSadaf Ebrahimi         __ Subs(r1, r1, 1);
132*f5c631daSSadaf Ebrahimi         __ B(le, &iterative_formula_end);
133*f5c631daSSadaf Ebrahimi 
134*f5c631daSSadaf Ebrahimi         // z(n+1) = zn^2 + c.
135*f5c631daSSadaf Ebrahimi         // re(z(n+1)) = re(c) + re(zn)^2 - im(zn)^2.
136*f5c631daSSadaf Ebrahimi         // im(z(n+1)) = im(c) + 2 * re(zn) * im(zn)
137*f5c631daSSadaf Ebrahimi 
138*f5c631daSSadaf Ebrahimi         __ Vmul(F32, q10, kZReal, kZImag);  // re(zn) * im(zn)
139*f5c631daSSadaf Ebrahimi 
140*f5c631daSSadaf Ebrahimi         __ Vmul(F32, kZReal, kZReal, kZReal);  // re(zn)^2
141*f5c631daSSadaf Ebrahimi         __ Vadd(F32, kZReal, kCReal, kZReal);  // re(c) + re(zn)^2
142*f5c631daSSadaf Ebrahimi         __ Vmls(F32, kZReal, kZImag, kZImag);  // re(c) + re(zn)^2 - im(zn)^2
143*f5c631daSSadaf Ebrahimi 
144*f5c631daSSadaf Ebrahimi         __ Vmov(F32, kZImag, kCImag);        // im(c)
145*f5c631daSSadaf Ebrahimi         __ Vmls(F32, kZImag, q10, kNegTwo);  // im(c) + 2 * re(zn) * im(zn)
146*f5c631daSSadaf Ebrahimi 
147*f5c631daSSadaf Ebrahimi         __ Vmul(F32, q10, kZReal, kZReal);    // re(z(n+1))^2
148*f5c631daSSadaf Ebrahimi         __ Vmla(F32, q10, kZImag, kZImag);    // re(z(n+1))^2 + im(z(n+1))^2
149*f5c631daSSadaf Ebrahimi         __ Vcgt(F32, q10, q10, kModSqLimit);  // |z(n+1)|^2 > 4 ? ~0 : 0
150*f5c631daSSadaf Ebrahimi         __ Vorr(F32, kFlags, kFlags, q10);    // (~0/0) | above result
151*f5c631daSSadaf Ebrahimi 
152*f5c631daSSadaf Ebrahimi         __ B(&iterative_formula_start);
153*f5c631daSSadaf Ebrahimi         __ Bind(&iterative_formula_end);
154*f5c631daSSadaf Ebrahimi       }
155*f5c631daSSadaf Ebrahimi 
156*f5c631daSSadaf Ebrahimi       // Narrow twice so that each mask is 8 bits, packed into
157*f5c631daSSadaf Ebrahimi       // a single 32 bit register s4.
158*f5c631daSSadaf Ebrahimi       // kLowerFlags is the lower half of kFlags, so the second narrow will
159*f5c631daSSadaf Ebrahimi       // be working on the results of the first to halve the size of each
160*f5c631daSSadaf Ebrahimi       // representation again.
161*f5c631daSSadaf Ebrahimi       __ Vmovn(I32, kLowerFlags, kFlags);
162*f5c631daSSadaf Ebrahimi       __ Vmovn(I16, kLowerFlags, kFlags);
163*f5c631daSSadaf Ebrahimi 
164*f5c631daSSadaf Ebrahimi       // '*' if in set, ' ' if not.
165*f5c631daSSadaf Ebrahimi       __ Vbsl(Untyped32, kLowerFlags, kSpaces, kStars);
166*f5c631daSSadaf Ebrahimi 
167*f5c631daSSadaf Ebrahimi       // Add this to the string.
168*f5c631daSSadaf Ebrahimi       __ Vst1(Untyped32,
169*f5c631daSSadaf Ebrahimi               NeonRegisterList(kLowerFlags, 0),
170*f5c631daSSadaf Ebrahimi               AlignedMemOperand(kWriteCursor, k32BitAlign, PostIndex));
171*f5c631daSSadaf Ebrahimi 
172*f5c631daSSadaf Ebrahimi       // Increase real part of c.
173*f5c631daSSadaf Ebrahimi       __ Vadd(F32, kCReal, kCReal, kCRealStep);
174*f5c631daSSadaf Ebrahimi 
175*f5c631daSSadaf Ebrahimi       __ Subs(r5, r5, 1);
176*f5c631daSSadaf Ebrahimi       __ B(ne, &c_real_loop);
177*f5c631daSSadaf Ebrahimi     }
178*f5c631daSSadaf Ebrahimi 
179*f5c631daSSadaf Ebrahimi     // Put terminating character.
180*f5c631daSSadaf Ebrahimi     __ Strb(kZero, MemOperand(kWriteCursor));
181*f5c631daSSadaf Ebrahimi 
182*f5c631daSSadaf Ebrahimi     // Print the string.
183*f5c631daSSadaf Ebrahimi     __ Printf("%s\n", kStringPtr);
184*f5c631daSSadaf Ebrahimi 
185*f5c631daSSadaf Ebrahimi     // Increase imaginary part of c.
186*f5c631daSSadaf Ebrahimi     __ Vadd(F32, kCImag, kCImag, kCImagStep);
187*f5c631daSSadaf Ebrahimi 
188*f5c631daSSadaf Ebrahimi     __ Subs(r4, r4, 1);
189*f5c631daSSadaf Ebrahimi     __ B(ne, &c_imag_loop);
190*f5c631daSSadaf Ebrahimi   }
191*f5c631daSSadaf Ebrahimi   // Restore stack pointer.
192*f5c631daSSadaf Ebrahimi   __ Add(sp, sp, kStringLength);
193*f5c631daSSadaf Ebrahimi   // Restore register values.
194*f5c631daSSadaf Ebrahimi   __ Pop(RegisterList(r4, r5, r6));
195*f5c631daSSadaf Ebrahimi   __ Bx(lr);
196*f5c631daSSadaf Ebrahimi }
197*f5c631daSSadaf Ebrahimi 
198*f5c631daSSadaf Ebrahimi #ifndef TEST_EXAMPLES
main()199*f5c631daSSadaf Ebrahimi int main() {
200*f5c631daSSadaf Ebrahimi   MacroAssembler masm;
201*f5c631daSSadaf Ebrahimi   // Generate the code for the example function.
202*f5c631daSSadaf Ebrahimi   Label mandelbrot;
203*f5c631daSSadaf Ebrahimi   masm.Bind(&mandelbrot);
204*f5c631daSSadaf Ebrahimi   GenerateMandelBrot(&masm);
205*f5c631daSSadaf Ebrahimi   masm.FinalizeCode();
206*f5c631daSSadaf Ebrahimi #ifdef VIXL_INCLUDE_SIMULATOR_AARCH32
207*f5c631daSSadaf Ebrahimi   // There is no simulator defined for VIXL AArch32.
208*f5c631daSSadaf Ebrahimi   printf("This example cannot be simulated\n");
209*f5c631daSSadaf Ebrahimi #else
210*f5c631daSSadaf Ebrahimi   byte* code = masm.GetBuffer()->GetStartAddress<byte*>();
211*f5c631daSSadaf Ebrahimi   uint32_t code_size = masm.GetSizeOfCodeGenerated();
212*f5c631daSSadaf Ebrahimi   ExecutableMemory memory(code, code_size);
213*f5c631daSSadaf Ebrahimi   // Run the example function.
214*f5c631daSSadaf Ebrahimi   double (*mandelbrot_func)(uint32_t) =
215*f5c631daSSadaf Ebrahimi       memory.GetEntryPoint<double (*)(uint32_t)>(mandelbrot,
216*f5c631daSSadaf Ebrahimi                                                  masm.GetInstructionSetInUse());
217*f5c631daSSadaf Ebrahimi   uint32_t iterations = 1000;
218*f5c631daSSadaf Ebrahimi   (*mandelbrot_func)(iterations);
219*f5c631daSSadaf Ebrahimi #endif
220*f5c631daSSadaf Ebrahimi   return 0;
221*f5c631daSSadaf Ebrahimi }
222*f5c631daSSadaf Ebrahimi #endif  // TEST_EXAMPLES
223