xref: /aosp_15_r20/external/arm-optimized-routines/string/arm/strlen-armv6t2.S (revision 412f47f9e737e10ed5cc46ec6a8d7fa2264f8a14)
1*412f47f9SXin Li/*
2*412f47f9SXin Li * strlen - calculate the length of a string
3*412f47f9SXin Li *
4*412f47f9SXin Li * Copyright (c) 2010-2022, Arm Limited.
5*412f47f9SXin Li * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*412f47f9SXin Li */
7*412f47f9SXin Li
8*412f47f9SXin Li#if __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2
9*412f47f9SXin Li
10*412f47f9SXin Li/*
11*412f47f9SXin Li   Assumes:
12*412f47f9SXin Li   ARMv6T2, AArch32
13*412f47f9SXin Li
14*412f47f9SXin Li */
15*412f47f9SXin Li
16*412f47f9SXin Li#include "asmdefs.h"
17*412f47f9SXin Li
18*412f47f9SXin Li#ifdef __ARMEB__
19*412f47f9SXin Li#define S2LO		lsl
20*412f47f9SXin Li#define S2HI		lsr
21*412f47f9SXin Li#else
22*412f47f9SXin Li#define S2LO		lsr
23*412f47f9SXin Li#define S2HI		lsl
24*412f47f9SXin Li#endif
25*412f47f9SXin Li
26*412f47f9SXin Li/* Ensure the .cantunwind directive is prepended to .fnend.
27*412f47f9SXin Li   Leaf functions cannot throw exceptions - EHABI only supports
28*412f47f9SXin Li   synchronous exceptions.  */
29*412f47f9SXin Li#define IS_LEAF
30*412f47f9SXin Li
31*412f47f9SXin Li	/* This code requires Thumb.  */
32*412f47f9SXin Li	.thumb
33*412f47f9SXin Li	.syntax unified
34*412f47f9SXin Li
35*412f47f9SXin Li/* Parameters and result.  */
36*412f47f9SXin Li#define srcin		r0
37*412f47f9SXin Li#define result		r0
38*412f47f9SXin Li
39*412f47f9SXin Li/* Internal variables.  */
40*412f47f9SXin Li#define src		r1
41*412f47f9SXin Li#define data1a		r2
42*412f47f9SXin Li#define data1b		r3
43*412f47f9SXin Li#define const_m1	r12
44*412f47f9SXin Li#define const_0		r4
45*412f47f9SXin Li#define tmp1		r4		/* Overlaps const_0  */
46*412f47f9SXin Li#define tmp2		r5
47*412f47f9SXin Li
48*412f47f9SXin LiENTRY (__strlen_armv6t2)
49*412f47f9SXin Li	prologue 4 5 push_ip=HAVE_PAC_LEAF
50*412f47f9SXin Li	pld	[srcin, #0]
51*412f47f9SXin Li	bic	src, srcin, #7
52*412f47f9SXin Li	mvn	const_m1, #0
53*412f47f9SXin Li	ands	tmp1, srcin, #7		/* (8 - bytes) to alignment.  */
54*412f47f9SXin Li	pld	[src, #32]
55*412f47f9SXin Li	bne.w	L(misaligned8)
56*412f47f9SXin Li	mov	const_0, #0
57*412f47f9SXin Li	mov	result, #-8
58*412f47f9SXin LiL(loop_aligned):
59*412f47f9SXin Li	/* Bytes 0-7.  */
60*412f47f9SXin Li	ldrd	data1a, data1b, [src]
61*412f47f9SXin Li	pld	[src, #64]
62*412f47f9SXin Li	add	result, result, #8
63*412f47f9SXin LiL(start_realigned):
64*412f47f9SXin Li	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
65*412f47f9SXin Li	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
66*412f47f9SXin Li	uadd8	data1b, data1b, const_m1
67*412f47f9SXin Li	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
68*412f47f9SXin Li	cbnz	data1b, L(null_found)
69*412f47f9SXin Li
70*412f47f9SXin Li	/* Bytes 8-15.  */
71*412f47f9SXin Li	ldrd	data1a, data1b, [src, #8]
72*412f47f9SXin Li	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
73*412f47f9SXin Li	add	result, result, #8
74*412f47f9SXin Li	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
75*412f47f9SXin Li	uadd8	data1b, data1b, const_m1
76*412f47f9SXin Li	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
77*412f47f9SXin Li	cbnz	data1b, L(null_found)
78*412f47f9SXin Li
79*412f47f9SXin Li	/* Bytes 16-23.  */
80*412f47f9SXin Li	ldrd	data1a, data1b, [src, #16]
81*412f47f9SXin Li	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
82*412f47f9SXin Li	add	result, result, #8
83*412f47f9SXin Li	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
84*412f47f9SXin Li	uadd8	data1b, data1b, const_m1
85*412f47f9SXin Li	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
86*412f47f9SXin Li	cbnz	data1b, L(null_found)
87*412f47f9SXin Li
88*412f47f9SXin Li	/* Bytes 24-31.  */
89*412f47f9SXin Li	ldrd	data1a, data1b, [src, #24]
90*412f47f9SXin Li	add	src, src, #32
91*412f47f9SXin Li	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
92*412f47f9SXin Li	add	result, result, #8
93*412f47f9SXin Li	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
94*412f47f9SXin Li	uadd8	data1b, data1b, const_m1
95*412f47f9SXin Li	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
96*412f47f9SXin Li	cmp	data1b, #0
97*412f47f9SXin Li	beq	L(loop_aligned)
98*412f47f9SXin Li
99*412f47f9SXin LiL(null_found):
100*412f47f9SXin Li	.cfi_remember_state
101*412f47f9SXin Li	cmp	data1a, #0
102*412f47f9SXin Li	itt	eq
103*412f47f9SXin Li	addeq	result, result, #4
104*412f47f9SXin Li	moveq	data1a, data1b
105*412f47f9SXin Li#ifndef __ARMEB__
106*412f47f9SXin Li	rev	data1a, data1a
107*412f47f9SXin Li#endif
108*412f47f9SXin Li	clz	data1a, data1a
109*412f47f9SXin Li	add	result, result, data1a, lsr #3	/* Bits -> Bytes.  */
110*412f47f9SXin Li	epilogue 4 5 push_ip=HAVE_PAC_LEAF
111*412f47f9SXin Li
112*412f47f9SXin LiL(misaligned8):
113*412f47f9SXin Li	.cfi_restore_state
114*412f47f9SXin Li	ldrd	data1a, data1b, [src]
115*412f47f9SXin Li	and	tmp2, tmp1, #3
116*412f47f9SXin Li	rsb	result, tmp1, #0
117*412f47f9SXin Li	lsl	tmp2, tmp2, #3			/* Bytes -> bits.  */
118*412f47f9SXin Li	tst	tmp1, #4
119*412f47f9SXin Li	pld	[src, #64]
120*412f47f9SXin Li	S2HI	tmp2, const_m1, tmp2
121*412f47f9SXin Li	orn	data1a, data1a, tmp2
122*412f47f9SXin Li	itt	ne
123*412f47f9SXin Li	ornne	data1b, data1b, tmp2
124*412f47f9SXin Li	movne	data1a, const_m1
125*412f47f9SXin Li	mov	const_0, #0
126*412f47f9SXin Li	b	L(start_realigned)
127*412f47f9SXin Li
128*412f47f9SXin LiEND (__strlen_armv6t2)
129*412f47f9SXin Li
130*412f47f9SXin Li#endif /* __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2  */
131