1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "go_asm.h"
6#include "textflag.h"
7
8// Caller must confirm availability of vx facility before calling.
9TEXT ·Index(SB),NOSPLIT|NOFRAME,$0-56
10	LMG	a_base+0(FP), R1, R2  // R1=&s[0],   R2=len(s)
11	LMG	b_base+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
12	MOVD	$ret+48(FP), R5
13	BR	indexbody<>(SB)
14
15// Caller must confirm availability of vx facility before calling.
16TEXT ·IndexString(SB),NOSPLIT|NOFRAME,$0-40
17	LMG	a_base+0(FP), R1, R2  // R1=&s[0],   R2=len(s)
18	LMG	b_base+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
19	MOVD	$ret+32(FP), R5
20	BR	indexbody<>(SB)
21
22// s: string we are searching
23// sep: string to search for
24// R1=&s[0], R2=len(s)
25// R3=&sep[0], R4=len(sep)
26// R5=&ret (int)
27// Caller must confirm availability of vx facility before calling.
28TEXT indexbody<>(SB),NOSPLIT|NOFRAME,$0
29	CMPBGT	R4, R2, notfound
30	ADD	R1, R2
31	SUB	R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
32	CMPBEQ	R4, $0, notfound
33	SUB	$1, R4 // R4=len(sep)-1 for use as VLL index
34	VLL	R4, (R3), V0 // contains first 16 bytes of sep
35	MOVD	R1, R7
36index2plus:
37	CMPBNE	R4, $1, index3plus
38	MOVD	$15(R7), R9
39	CMPBGE	R9, R2, index2to16
40	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
41	VONE	V16
42	VREPH	$0, V0, V1
43	CMPBGE	R9, R2, index2to16
44index2loop:
45	VL	0(R7), V2          // 16 bytes, even indices
46	VL	1(R7), V4          // 16 bytes, odd indices
47	VCEQH	V1, V2, V5         // compare even indices
48	VCEQH	V1, V4, V6         // compare odd indices
49	VSEL	V5, V6, V31, V7    // merge even and odd indices
50	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
51	BLT	foundV17
52	MOVD	$16(R7), R7        // R7+=16
53	ADD	$15, R7, R9
54	CMPBLE	R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
55	CMPBLE	R7, R2, index2to16
56	BR	notfound
57
58index3plus:
59	CMPBNE	R4, $2, index4plus
60	ADD	$15, R7, R9
61	CMPBGE	R9, R2, index2to16
62	MOVD	$1, R0
63	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
64	VONE	V16
65	VREPH	$0, V0, V1
66	VREPB	$2, V0, V8
67index3loop:
68	VL	(R7), V2           // load 16-bytes into V2
69	VLL	R0, 16(R7), V3     // load 2-bytes into V3
70	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
71	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<2
72	VCEQH	V1, V2, V5         // compare 2-byte even indices
73	VCEQH	V1, V4, V6         // compare 2-byte odd indices
74	VCEQB	V8, V9, V10        // compare last bytes
75	VSEL	V5, V6, V31, V7    // merge even and odd indices
76	VN	V7, V10, V7        // AND indices with last byte
77	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
78	BLT	foundV17
79	MOVD	$16(R7), R7        // R7+=16
80	ADD	$15, R7, R9
81	CMPBLE	R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
82	CMPBLE	R7, R2, index2to16
83	BR	notfound
84
85index4plus:
86	CMPBNE	R4, $3, index5plus
87	ADD	$15, R7, R9
88	CMPBGE	R9, R2, index2to16
89	MOVD	$2, R0
90	VGBM	$0x8888, V29       // 0xff000000ff000000...
91	VGBM	$0x2222, V30       // 0x0000ff000000ff00...
92	VGBM	$0xcccc, V31       // 0xffff0000ffff0000...
93	VONE	V16
94	VREPF	$0, V0, V1
95index4loop:
96	VL	(R7), V2           // load 16-bytes into V2
97	VLL	R0, 16(R7), V3     // load 3-bytes into V3
98	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
99	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<1
100	VSLDB	$3, V2, V3, V10    // V10=(V2:V3)<<1
101	VCEQF	V1, V2, V5         // compare index 0, 4, ...
102	VCEQF	V1, V4, V6         // compare index 1, 5, ...
103	VCEQF	V1, V9, V11        // compare index 2, 6, ...
104	VCEQF	V1, V10, V12       // compare index 3, 7, ...
105	VSEL	V5, V6, V29, V13   // merge index 0, 1, 4, 5, ...
106	VSEL	V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
107	VSEL	V13, V14, V31, V7  // final merge
108	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
109	BLT	foundV17
110	MOVD	$16(R7), R7        // R7+=16
111	ADD	$15, R7, R9
112	CMPBLE	R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
113	CMPBLE	R7, R2, index2to16
114	BR	notfound
115
116index5plus:
117	CMPBGT	R4, $15, index17plus
118index2to16:
119	CMPBGT	R7, R2, notfound
120	MOVD	$1(R7), R8
121	CMPBGT	R8, R2, index2to16tail
122index2to16loop:
123	// unrolled 2x
124	VLL	R4, (R7), V1
125	VLL	R4, 1(R7), V2
126	VCEQGS	V0, V1, V3
127	BEQ	found
128	MOVD	$1(R7), R7
129	VCEQGS	V0, V2, V4
130	BEQ	found
131	MOVD	$1(R7), R7
132	CMPBLT	R7, R2, index2to16loop
133	CMPBGT	R7, R2, notfound
134index2to16tail:
135	VLL	R4, (R7), V1
136	VCEQGS	V0, V1, V2
137	BEQ	found
138	BR	notfound
139
140index17plus:
141	CMPBGT	R4, $31, index33plus
142	SUB	$16, R4, R0
143	VLL	R0, 16(R3), V1
144	VONE	V7
145index17to32loop:
146	VL	(R7), V2
147	VLL	R0, 16(R7), V3
148	VCEQG	V0, V2, V4
149	VCEQG	V1, V3, V5
150	VN	V4, V5, V6
151	VCEQGS	V6, V7, V8
152	BEQ	found
153	MOVD	$1(R7), R7
154	CMPBLE  R7, R2, index17to32loop
155	BR	notfound
156
157index33plus:
158	CMPBGT	R4, $47, index49plus
159	SUB	$32, R4, R0
160	VL	16(R3), V1
161	VLL	R0, 32(R3), V2
162	VONE	V11
163index33to48loop:
164	VL	(R7), V3
165	VL	16(R7), V4
166	VLL	R0, 32(R7), V5
167	VCEQG	V0, V3, V6
168	VCEQG	V1, V4, V7
169	VCEQG	V2, V5, V8
170	VN	V6, V7, V9
171	VN	V8, V9, V10
172	VCEQGS	V10, V11, V12
173	BEQ	found
174	MOVD	$1(R7), R7
175	CMPBLE  R7, R2, index33to48loop
176	BR	notfound
177
178index49plus:
179	CMPBGT	R4, $63, index65plus
180	SUB	$48, R4, R0
181	VL	16(R3), V1
182	VL	32(R3), V2
183	VLL	R0, 48(R3), V3
184	VONE	V15
185index49to64loop:
186	VL	(R7), V4
187	VL	16(R7), V5
188	VL	32(R7), V6
189	VLL	R0, 48(R7), V7
190	VCEQG	V0, V4, V8
191	VCEQG	V1, V5, V9
192	VCEQG	V2, V6, V10
193	VCEQG	V3, V7, V11
194	VN	V8, V9, V12
195	VN	V10, V11, V13
196	VN	V12, V13, V14
197	VCEQGS	V14, V15, V16
198	BEQ	found
199	MOVD	$1(R7), R7
200	CMPBLE  R7, R2, index49to64loop
201notfound:
202	MOVD	$-1, (R5)
203	RET
204
205index65plus:
206	// not implemented
207	MOVD	$0, (R0)
208	RET
209
210foundV17: // index is in doubleword V17[0]
211	VLGVG	$0, V17, R8
212	ADD	R8, R7
213found:
214	SUB	R1, R7
215	MOVD	R7, (R5)
216	RET
217