1// Copyright 2022 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "go_asm.h"
6#include "textflag.h"
7
8TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
9	// X10 = a_base
10	// X11 = a_len
11	// X12 = a_cap (unused)
12	// X13 = b_base (want in X12)
13	// X14 = b_len (want in X13)
14	// X15 = b_cap (unused)
15	MOV	X13, X12
16	MOV	X14, X13
17	JMP	compare<>(SB)
18
19TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
20	// X10 = a_base
21	// X11 = a_len
22	// X12 = b_base
23	// X13 = b_len
24	JMP	compare<>(SB)
25
26// On entry:
27// X10 points to start of a
28// X11 length of a
29// X12 points to start of b
30// X13 length of b
31// for non-regabi X14 points to the address to store the return value (-1/0/1)
32// for regabi the return value in X10
33TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
34	BEQ	X10, X12, cmp_len
35
36	MOV	X11, X5
37	BGE	X13, X5, use_a_len // X5 = min(len(a), len(b))
38	MOV	X13, X5
39use_a_len:
40	BEQZ	X5, cmp_len
41
42	MOV	$32, X6
43	BLT	X5, X6, check8_unaligned
44
45	// Check alignment - if alignment differs we have to do one byte at a time.
46	AND	$7, X10, X7
47	AND	$7, X12, X8
48	BNE	X7, X8, check8_unaligned
49	BEQZ	X7, compare32
50
51	// Check one byte at a time until we reach 8 byte alignment.
52	SUB	X7, X0, X7
53	ADD	$8, X7, X7
54	SUB	X7, X5, X5
55align:
56	SUB	$1, X7
57	MOVBU	0(X10), X8
58	MOVBU	0(X12), X9
59	BNE	X8, X9, cmp
60	ADD	$1, X10
61	ADD	$1, X12
62	BNEZ	X7, align
63
64check32:
65	// X6 contains $32
66	BLT	X5, X6, compare16
67compare32:
68	MOV	0(X10), X15
69	MOV	0(X12), X16
70	MOV	8(X10), X17
71	MOV	8(X12), X18
72	BNE	X15, X16, cmp8a
73	BNE	X17, X18, cmp8b
74	MOV	16(X10), X15
75	MOV	16(X12), X16
76	MOV	24(X10), X17
77	MOV	24(X12), X18
78	BNE	X15, X16, cmp8a
79	BNE	X17, X18, cmp8b
80	ADD	$32, X10
81	ADD	$32, X12
82	SUB	$32, X5
83	BGE	X5, X6, compare32
84	BEQZ	X5, cmp_len
85
86check16:
87	MOV	$16, X6
88	BLT	X5, X6, check8_unaligned
89compare16:
90	MOV	0(X10), X15
91	MOV	0(X12), X16
92	MOV	8(X10), X17
93	MOV	8(X12), X18
94	BNE	X15, X16, cmp8a
95	BNE	X17, X18, cmp8b
96	ADD	$16, X10
97	ADD	$16, X12
98	SUB	$16, X5
99	BEQZ	X5, cmp_len
100
101check8_unaligned:
102	MOV	$8, X6
103	BLT	X5, X6, check4_unaligned
104compare8_unaligned:
105	MOVBU	0(X10), X8
106	MOVBU	1(X10), X15
107	MOVBU	2(X10), X17
108	MOVBU	3(X10), X19
109	MOVBU	4(X10), X21
110	MOVBU	5(X10), X23
111	MOVBU	6(X10), X25
112	MOVBU	7(X10), X29
113	MOVBU	0(X12), X9
114	MOVBU	1(X12), X16
115	MOVBU	2(X12), X18
116	MOVBU	3(X12), X20
117	MOVBU	4(X12), X22
118	MOVBU	5(X12), X24
119	MOVBU	6(X12), X28
120	MOVBU	7(X12), X30
121	BNE	X8, X9, cmp1a
122	BNE	X15, X16, cmp1b
123	BNE	X17, X18, cmp1c
124	BNE	X19, X20, cmp1d
125	BNE	X21, X22, cmp1e
126	BNE	X23, X24, cmp1f
127	BNE	X25, X28, cmp1g
128	BNE	X29, X30, cmp1h
129	ADD	$8, X10
130	ADD	$8, X12
131	SUB	$8, X5
132	BGE	X5, X6, compare8_unaligned
133	BEQZ	X5, cmp_len
134
135check4_unaligned:
136	MOV	$4, X6
137	BLT	X5, X6, compare1
138compare4_unaligned:
139	MOVBU	0(X10), X8
140	MOVBU	1(X10), X15
141	MOVBU	2(X10), X17
142	MOVBU	3(X10), X19
143	MOVBU	0(X12), X9
144	MOVBU	1(X12), X16
145	MOVBU	2(X12), X18
146	MOVBU	3(X12), X20
147	BNE	X8, X9, cmp1a
148	BNE	X15, X16, cmp1b
149	BNE	X17, X18, cmp1c
150	BNE	X19, X20, cmp1d
151	ADD	$4, X10
152	ADD	$4, X12
153	SUB	$4, X5
154	BGE	X5, X6, compare4_unaligned
155
156compare1:
157	BEQZ	X5, cmp_len
158	MOVBU	0(X10), X8
159	MOVBU	0(X12), X9
160	BNE	X8, X9, cmp
161	ADD	$1, X10
162	ADD	$1, X12
163	SUB	$1, X5
164	JMP	compare1
165
166	// Compare 8 bytes of memory in X15/X16 that are known to differ.
167cmp8a:
168	MOV	X15, X17
169	MOV	X16, X18
170
171	// Compare 8 bytes of memory in X17/X18 that are known to differ.
172cmp8b:
173	MOV	$0xff, X19
174cmp8_loop:
175	AND	X17, X19, X8
176	AND	X18, X19, X9
177	BNE	X8, X9, cmp
178	SLLI	$8, X19
179	JMP	cmp8_loop
180
181cmp1a:
182	SLTU	X9, X8, X5
183	SLTU	X8, X9, X6
184	JMP	cmp_ret
185cmp1b:
186	SLTU	X16, X15, X5
187	SLTU	X15, X16, X6
188	JMP	cmp_ret
189cmp1c:
190	SLTU	X18, X17, X5
191	SLTU	X17, X18, X6
192	JMP	cmp_ret
193cmp1d:
194	SLTU	X20, X19, X5
195	SLTU	X19, X20, X6
196	JMP	cmp_ret
197cmp1e:
198	SLTU	X22, X21, X5
199	SLTU	X21, X22, X6
200	JMP	cmp_ret
201cmp1f:
202	SLTU	X24, X23, X5
203	SLTU	X23, X24, X6
204	JMP	cmp_ret
205cmp1g:
206	SLTU	X28, X25, X5
207	SLTU	X25, X28, X6
208	JMP	cmp_ret
209cmp1h:
210	SLTU	X30, X29, X5
211	SLTU	X29, X30, X6
212	JMP	cmp_ret
213
214cmp_len:
215	MOV	X11, X8
216	MOV	X13, X9
217cmp:
218	SLTU	X9, X8, X5
219	SLTU	X8, X9, X6
220cmp_ret:
221	SUB	X5, X6, X10
222	RET
223