1*f5c631daSSadaf Ebrahimi // Copyright 2020, VIXL authors
2*f5c631daSSadaf Ebrahimi // All rights reserved.
3*f5c631daSSadaf Ebrahimi //
4*f5c631daSSadaf Ebrahimi // Redistribution and use in source and binary forms, with or without
5*f5c631daSSadaf Ebrahimi // modification, are permitted provided that the following conditions are met:
6*f5c631daSSadaf Ebrahimi //
7*f5c631daSSadaf Ebrahimi // * Redistributions of source code must retain the above copyright notice,
8*f5c631daSSadaf Ebrahimi // this list of conditions and the following disclaimer.
9*f5c631daSSadaf Ebrahimi // * Redistributions in binary form must reproduce the above copyright notice,
10*f5c631daSSadaf Ebrahimi // this list of conditions and the following disclaimer in the documentation
11*f5c631daSSadaf Ebrahimi // and/or other materials provided with the distribution.
12*f5c631daSSadaf Ebrahimi // * Neither the name of ARM Limited nor the names of its contributors may be
13*f5c631daSSadaf Ebrahimi // used to endorse or promote products derived from this software without
14*f5c631daSSadaf Ebrahimi // specific prior written permission.
15*f5c631daSSadaf Ebrahimi //
16*f5c631daSSadaf Ebrahimi // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17*f5c631daSSadaf Ebrahimi // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*f5c631daSSadaf Ebrahimi // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*f5c631daSSadaf Ebrahimi // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20*f5c631daSSadaf Ebrahimi // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*f5c631daSSadaf Ebrahimi // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22*f5c631daSSadaf Ebrahimi // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23*f5c631daSSadaf Ebrahimi // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24*f5c631daSSadaf Ebrahimi // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25*f5c631daSSadaf Ebrahimi // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*f5c631daSSadaf Ebrahimi
27*f5c631daSSadaf Ebrahimi #include "examples.h"
28*f5c631daSSadaf Ebrahimi
29*f5c631daSSadaf Ebrahimi using namespace vixl;
30*f5c631daSSadaf Ebrahimi using namespace vixl::aarch64;
31*f5c631daSSadaf Ebrahimi
32*f5c631daSSadaf Ebrahimi #define __ masm->
33*f5c631daSSadaf Ebrahimi
34*f5c631daSSadaf Ebrahimi // size_t sve_strlen(const char* str);
GenerateSVEStrlen(MacroAssembler * masm)35*f5c631daSSadaf Ebrahimi void GenerateSVEStrlen(MacroAssembler* masm) {
36*f5c631daSSadaf Ebrahimi // We will accumulate the length as we load each chunk.
37*f5c631daSSadaf Ebrahimi Register len = x1;
38*f5c631daSSadaf Ebrahimi __ Mov(len, 0);
39*f5c631daSSadaf Ebrahimi
40*f5c631daSSadaf Ebrahimi // We want to load as much as we can on each iteration, so set up an all-true
41*f5c631daSSadaf Ebrahimi // predicate for that purpose.
42*f5c631daSSadaf Ebrahimi PRegister all_true = p0;
43*f5c631daSSadaf Ebrahimi __ Ptrue(all_true.VnB());
44*f5c631daSSadaf Ebrahimi
45*f5c631daSSadaf Ebrahimi Label loop;
46*f5c631daSSadaf Ebrahimi __ Bind(&loop);
47*f5c631daSSadaf Ebrahimi // FFR is cumulative, so reset it to all-true for each iteration.
48*f5c631daSSadaf Ebrahimi __ Setffr();
49*f5c631daSSadaf Ebrahimi
50*f5c631daSSadaf Ebrahimi // Load as many characters as we can from &str[len]. We have to use a NF or FF
51*f5c631daSSadaf Ebrahimi // load, because we don't know how long the string is. An FF load is a good
52*f5c631daSSadaf Ebrahimi // choice, because we know that we will see at least a NULL termination, even
53*f5c631daSSadaf Ebrahimi // for an empty string.
54*f5c631daSSadaf Ebrahimi __ Ldff1b(z0.VnB(), all_true.Zeroing(), SVEMemOperand(x0, len));
55*f5c631daSSadaf Ebrahimi // For example, if str = "Test string.", and we load every byte:
56*f5c631daSSadaf Ebrahimi // z0.b: \0 . g n i r t s t s e T
57*f5c631daSSadaf Ebrahimi
58*f5c631daSSadaf Ebrahimi // FFR now represents the number of bytes that we actually loaded, so use it
59*f5c631daSSadaf Ebrahimi // to predicate the data processing instructions.
60*f5c631daSSadaf Ebrahimi __ Rdffr(p1.VnB());
61*f5c631daSSadaf Ebrahimi
62*f5c631daSSadaf Ebrahimi // Find the NULL termination (if there is one), and set the flags.
63*f5c631daSSadaf Ebrahimi __ Cmpeq(p2.VnB(), p1.Zeroing(), z0.VnB(), 0);
64*f5c631daSSadaf Ebrahimi // p2.b: 1 0 0 0 0 0 0 0 0 0 0 0 0
65*f5c631daSSadaf Ebrahimi
66*f5c631daSSadaf Ebrahimi // Activate every lane up to (but not including) the NULL termination. If we
67*f5c631daSSadaf Ebrahimi // found no NULL termination, this activates every lane, and indicates that we
68*f5c631daSSadaf Ebrahimi // have to load another vector of characters. Lanes activated in this way
69*f5c631daSSadaf Ebrahimi // represent string characters that we need to count.
70*f5c631daSSadaf Ebrahimi __ Brkb(p1.VnB(), p1.Zeroing(), p2.VnB());
71*f5c631daSSadaf Ebrahimi // p1.b: 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1
72*f5c631daSSadaf Ebrahimi
73*f5c631daSSadaf Ebrahimi // Count the active lanes, and add them to the length count.
74*f5c631daSSadaf Ebrahimi __ Incp(len, p1.VnB());
75*f5c631daSSadaf Ebrahimi
76*f5c631daSSadaf Ebrahimi // Loop until `cmpeq` finds a NULL termination.
77*f5c631daSSadaf Ebrahimi __ B(sve_none, &loop);
78*f5c631daSSadaf Ebrahimi
79*f5c631daSSadaf Ebrahimi // Put the length in the AAPCS64 return register.
80*f5c631daSSadaf Ebrahimi __ Mov(x0, len);
81*f5c631daSSadaf Ebrahimi __ Ret();
82*f5c631daSSadaf Ebrahimi }
83*f5c631daSSadaf Ebrahimi
84*f5c631daSSadaf Ebrahimi #ifndef TEST_EXAMPLES
85*f5c631daSSadaf Ebrahimi #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
main(void)86*f5c631daSSadaf Ebrahimi int main(void) {
87*f5c631daSSadaf Ebrahimi MacroAssembler masm;
88*f5c631daSSadaf Ebrahimi Decoder decoder;
89*f5c631daSSadaf Ebrahimi Simulator simulator(&decoder);
90*f5c631daSSadaf Ebrahimi
91*f5c631daSSadaf Ebrahimi // We're running on the simulator, so we can assume that SVE is present.
92*f5c631daSSadaf Ebrahimi masm.GetCPUFeatures()->Combine(CPUFeatures::kSVE);
93*f5c631daSSadaf Ebrahimi
94*f5c631daSSadaf Ebrahimi // Generate the code for the example function.
95*f5c631daSSadaf Ebrahimi Label sve_strlen;
96*f5c631daSSadaf Ebrahimi masm.Bind(&sve_strlen);
97*f5c631daSSadaf Ebrahimi GenerateSVEStrlen(&masm);
98*f5c631daSSadaf Ebrahimi masm.FinalizeCode();
99*f5c631daSSadaf Ebrahimi
100*f5c631daSSadaf Ebrahimi const char* example_input = "This is a string of exactly 42 characters.";
101*f5c631daSSadaf Ebrahimi VIXL_ASSERT(strlen(example_input) == 42);
102*f5c631daSSadaf Ebrahimi
103*f5c631daSSadaf Ebrahimi simulator.ResetState();
104*f5c631daSSadaf Ebrahimi simulator.WriteXRegister(0, reinterpret_cast<uintptr_t>(example_input));
105*f5c631daSSadaf Ebrahimi simulator.RunFrom(masm.GetLabelAddress<Instruction*>(&sve_strlen));
106*f5c631daSSadaf Ebrahimi
107*f5c631daSSadaf Ebrahimi printf("strlen(\"%s\") == %" PRIu64 "\n",
108*f5c631daSSadaf Ebrahimi example_input,
109*f5c631daSSadaf Ebrahimi simulator.ReadXRegister(0));
110*f5c631daSSadaf Ebrahimi
111*f5c631daSSadaf Ebrahimi return 0;
112*f5c631daSSadaf Ebrahimi }
113*f5c631daSSadaf Ebrahimi #else
114*f5c631daSSadaf Ebrahimi // Without the simulator there is nothing to test.
main(void)115*f5c631daSSadaf Ebrahimi int main(void) { return 0; }
116*f5c631daSSadaf Ebrahimi #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
117*f5c631daSSadaf Ebrahimi #endif // TEST_EXAMPLES
118