xref: /aosp_15_r20/external/vixl/examples/aarch64/sve-strlen.cc (revision f5c631da2f1efdd72b5fd1e20510e4042af13d77)
1*f5c631daSSadaf Ebrahimi // Copyright 2020, VIXL authors
2*f5c631daSSadaf Ebrahimi // All rights reserved.
3*f5c631daSSadaf Ebrahimi //
4*f5c631daSSadaf Ebrahimi // Redistribution and use in source and binary forms, with or without
5*f5c631daSSadaf Ebrahimi // modification, are permitted provided that the following conditions are met:
6*f5c631daSSadaf Ebrahimi //
7*f5c631daSSadaf Ebrahimi //   * Redistributions of source code must retain the above copyright notice,
8*f5c631daSSadaf Ebrahimi //     this list of conditions and the following disclaimer.
9*f5c631daSSadaf Ebrahimi //   * Redistributions in binary form must reproduce the above copyright notice,
10*f5c631daSSadaf Ebrahimi //     this list of conditions and the following disclaimer in the documentation
11*f5c631daSSadaf Ebrahimi //     and/or other materials provided with the distribution.
12*f5c631daSSadaf Ebrahimi //   * Neither the name of ARM Limited nor the names of its contributors may be
13*f5c631daSSadaf Ebrahimi //     used to endorse or promote products derived from this software without
14*f5c631daSSadaf Ebrahimi //     specific prior written permission.
15*f5c631daSSadaf Ebrahimi //
16*f5c631daSSadaf Ebrahimi // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17*f5c631daSSadaf Ebrahimi // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*f5c631daSSadaf Ebrahimi // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*f5c631daSSadaf Ebrahimi // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20*f5c631daSSadaf Ebrahimi // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*f5c631daSSadaf Ebrahimi // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22*f5c631daSSadaf Ebrahimi // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23*f5c631daSSadaf Ebrahimi // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24*f5c631daSSadaf Ebrahimi // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25*f5c631daSSadaf Ebrahimi // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*f5c631daSSadaf Ebrahimi 
27*f5c631daSSadaf Ebrahimi #include "examples.h"
28*f5c631daSSadaf Ebrahimi 
29*f5c631daSSadaf Ebrahimi using namespace vixl;
30*f5c631daSSadaf Ebrahimi using namespace vixl::aarch64;
31*f5c631daSSadaf Ebrahimi 
32*f5c631daSSadaf Ebrahimi #define __ masm->
33*f5c631daSSadaf Ebrahimi 
34*f5c631daSSadaf Ebrahimi // size_t sve_strlen(const char* str);
GenerateSVEStrlen(MacroAssembler * masm)35*f5c631daSSadaf Ebrahimi void GenerateSVEStrlen(MacroAssembler* masm) {
36*f5c631daSSadaf Ebrahimi   // We will accumulate the length as we load each chunk.
37*f5c631daSSadaf Ebrahimi   Register len = x1;
38*f5c631daSSadaf Ebrahimi   __ Mov(len, 0);
39*f5c631daSSadaf Ebrahimi 
40*f5c631daSSadaf Ebrahimi   // We want to load as much as we can on each iteration, so set up an all-true
41*f5c631daSSadaf Ebrahimi   // predicate for that purpose.
42*f5c631daSSadaf Ebrahimi   PRegister all_true = p0;
43*f5c631daSSadaf Ebrahimi   __ Ptrue(all_true.VnB());
44*f5c631daSSadaf Ebrahimi 
45*f5c631daSSadaf Ebrahimi   Label loop;
46*f5c631daSSadaf Ebrahimi   __ Bind(&loop);
47*f5c631daSSadaf Ebrahimi   // FFR is cumulative, so reset it to all-true for each iteration.
48*f5c631daSSadaf Ebrahimi   __ Setffr();
49*f5c631daSSadaf Ebrahimi 
50*f5c631daSSadaf Ebrahimi   // Load as many characters as we can from &str[len]. We have to use a NF or FF
51*f5c631daSSadaf Ebrahimi   // load, because we don't know how long the string is. An FF load is a good
52*f5c631daSSadaf Ebrahimi   // choice, because we know that we will see at least a NULL termination, even
53*f5c631daSSadaf Ebrahimi   // for an empty string.
54*f5c631daSSadaf Ebrahimi   __ Ldff1b(z0.VnB(), all_true.Zeroing(), SVEMemOperand(x0, len));
55*f5c631daSSadaf Ebrahimi   // For example, if str = "Test string.", and we load every byte:
56*f5c631daSSadaf Ebrahimi   //   z0.b:      \0 . g n i r t s   t s e T
57*f5c631daSSadaf Ebrahimi 
58*f5c631daSSadaf Ebrahimi   // FFR now represents the number of bytes that we actually loaded, so use it
59*f5c631daSSadaf Ebrahimi   // to predicate the data processing instructions.
60*f5c631daSSadaf Ebrahimi   __ Rdffr(p1.VnB());
61*f5c631daSSadaf Ebrahimi 
62*f5c631daSSadaf Ebrahimi   // Find the NULL termination (if there is one), and set the flags.
63*f5c631daSSadaf Ebrahimi   __ Cmpeq(p2.VnB(), p1.Zeroing(), z0.VnB(), 0);
64*f5c631daSSadaf Ebrahimi   //   p2.b:       1 0 0 0 0 0 0 0 0 0 0 0 0
65*f5c631daSSadaf Ebrahimi 
66*f5c631daSSadaf Ebrahimi   // Activate every lane up to (but not including) the NULL termination. If we
67*f5c631daSSadaf Ebrahimi   // found no NULL termination, this activates every lane, and indicates that we
68*f5c631daSSadaf Ebrahimi   // have to load another vector of characters. Lanes activated in this way
69*f5c631daSSadaf Ebrahimi   // represent string characters that we need to count.
70*f5c631daSSadaf Ebrahimi   __ Brkb(p1.VnB(), p1.Zeroing(), p2.VnB());
71*f5c631daSSadaf Ebrahimi   //   p1.b: 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1
72*f5c631daSSadaf Ebrahimi 
73*f5c631daSSadaf Ebrahimi   // Count the active lanes, and add them to the length count.
74*f5c631daSSadaf Ebrahimi   __ Incp(len, p1.VnB());
75*f5c631daSSadaf Ebrahimi 
76*f5c631daSSadaf Ebrahimi   // Loop until `cmpeq` finds a NULL termination.
77*f5c631daSSadaf Ebrahimi   __ B(sve_none, &loop);
78*f5c631daSSadaf Ebrahimi 
79*f5c631daSSadaf Ebrahimi   // Put the length in the AAPCS64 return register.
80*f5c631daSSadaf Ebrahimi   __ Mov(x0, len);
81*f5c631daSSadaf Ebrahimi   __ Ret();
82*f5c631daSSadaf Ebrahimi }
83*f5c631daSSadaf Ebrahimi 
84*f5c631daSSadaf Ebrahimi #ifndef TEST_EXAMPLES
85*f5c631daSSadaf Ebrahimi #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
main(void)86*f5c631daSSadaf Ebrahimi int main(void) {
87*f5c631daSSadaf Ebrahimi   MacroAssembler masm;
88*f5c631daSSadaf Ebrahimi   Decoder decoder;
89*f5c631daSSadaf Ebrahimi   Simulator simulator(&decoder);
90*f5c631daSSadaf Ebrahimi 
91*f5c631daSSadaf Ebrahimi   // We're running on the simulator, so we can assume that SVE is present.
92*f5c631daSSadaf Ebrahimi   masm.GetCPUFeatures()->Combine(CPUFeatures::kSVE);
93*f5c631daSSadaf Ebrahimi 
94*f5c631daSSadaf Ebrahimi   // Generate the code for the example function.
95*f5c631daSSadaf Ebrahimi   Label sve_strlen;
96*f5c631daSSadaf Ebrahimi   masm.Bind(&sve_strlen);
97*f5c631daSSadaf Ebrahimi   GenerateSVEStrlen(&masm);
98*f5c631daSSadaf Ebrahimi   masm.FinalizeCode();
99*f5c631daSSadaf Ebrahimi 
100*f5c631daSSadaf Ebrahimi   const char* example_input = "This is a string of exactly 42 characters.";
101*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(strlen(example_input) == 42);
102*f5c631daSSadaf Ebrahimi 
103*f5c631daSSadaf Ebrahimi   simulator.ResetState();
104*f5c631daSSadaf Ebrahimi   simulator.WriteXRegister(0, reinterpret_cast<uintptr_t>(example_input));
105*f5c631daSSadaf Ebrahimi   simulator.RunFrom(masm.GetLabelAddress<Instruction*>(&sve_strlen));
106*f5c631daSSadaf Ebrahimi 
107*f5c631daSSadaf Ebrahimi   printf("strlen(\"%s\") == %" PRIu64 "\n",
108*f5c631daSSadaf Ebrahimi          example_input,
109*f5c631daSSadaf Ebrahimi          simulator.ReadXRegister(0));
110*f5c631daSSadaf Ebrahimi 
111*f5c631daSSadaf Ebrahimi   return 0;
112*f5c631daSSadaf Ebrahimi }
113*f5c631daSSadaf Ebrahimi #else
114*f5c631daSSadaf Ebrahimi // Without the simulator there is nothing to test.
main(void)115*f5c631daSSadaf Ebrahimi int main(void) { return 0; }
116*f5c631daSSadaf Ebrahimi #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
117*f5c631daSSadaf Ebrahimi #endif  // TEST_EXAMPLES
118