1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package strings implements simple functions to manipulate UTF-8 encoded strings.
6//
7// For information about UTF-8 strings in Go, see https://blog.golang.org/strings.
8package strings
9
10import (
11	"internal/bytealg"
12	"internal/stringslite"
13	"unicode"
14	"unicode/utf8"
15)
16
17const maxInt = int(^uint(0) >> 1)
18
19// explode splits s into a slice of UTF-8 strings,
20// one string per Unicode character up to a maximum of n (n < 0 means no limit).
21// Invalid UTF-8 bytes are sliced individually.
22func explode(s string, n int) []string {
23	l := utf8.RuneCountInString(s)
24	if n < 0 || n > l {
25		n = l
26	}
27	a := make([]string, n)
28	for i := 0; i < n-1; i++ {
29		_, size := utf8.DecodeRuneInString(s)
30		a[i] = s[:size]
31		s = s[size:]
32	}
33	if n > 0 {
34		a[n-1] = s
35	}
36	return a
37}
38
39// Count counts the number of non-overlapping instances of substr in s.
40// If substr is an empty string, Count returns 1 + the number of Unicode code points in s.
41func Count(s, substr string) int {
42	// special case
43	if len(substr) == 0 {
44		return utf8.RuneCountInString(s) + 1
45	}
46	if len(substr) == 1 {
47		return bytealg.CountString(s, substr[0])
48	}
49	n := 0
50	for {
51		i := Index(s, substr)
52		if i == -1 {
53			return n
54		}
55		n++
56		s = s[i+len(substr):]
57	}
58}
59
60// Contains reports whether substr is within s.
61func Contains(s, substr string) bool {
62	return Index(s, substr) >= 0
63}
64
65// ContainsAny reports whether any Unicode code points in chars are within s.
66func ContainsAny(s, chars string) bool {
67	return IndexAny(s, chars) >= 0
68}
69
70// ContainsRune reports whether the Unicode code point r is within s.
71func ContainsRune(s string, r rune) bool {
72	return IndexRune(s, r) >= 0
73}
74
75// ContainsFunc reports whether any Unicode code points r within s satisfy f(r).
76func ContainsFunc(s string, f func(rune) bool) bool {
77	return IndexFunc(s, f) >= 0
78}
79
80// LastIndex returns the index of the last instance of substr in s, or -1 if substr is not present in s.
81func LastIndex(s, substr string) int {
82	n := len(substr)
83	switch {
84	case n == 0:
85		return len(s)
86	case n == 1:
87		return bytealg.LastIndexByteString(s, substr[0])
88	case n == len(s):
89		if substr == s {
90			return 0
91		}
92		return -1
93	case n > len(s):
94		return -1
95	}
96	// Rabin-Karp search from the end of the string
97	hashss, pow := bytealg.HashStrRev(substr)
98	last := len(s) - n
99	var h uint32
100	for i := len(s) - 1; i >= last; i-- {
101		h = h*bytealg.PrimeRK + uint32(s[i])
102	}
103	if h == hashss && s[last:] == substr {
104		return last
105	}
106	for i := last - 1; i >= 0; i-- {
107		h *= bytealg.PrimeRK
108		h += uint32(s[i])
109		h -= pow * uint32(s[i+n])
110		if h == hashss && s[i:i+n] == substr {
111			return i
112		}
113	}
114	return -1
115}
116
117// IndexByte returns the index of the first instance of c in s, or -1 if c is not present in s.
118func IndexByte(s string, c byte) int {
119	return stringslite.IndexByte(s, c)
120}
121
122// IndexRune returns the index of the first instance of the Unicode code point
123// r, or -1 if rune is not present in s.
124// If r is [utf8.RuneError], it returns the first instance of any
125// invalid UTF-8 byte sequence.
126func IndexRune(s string, r rune) int {
127	switch {
128	case 0 <= r && r < utf8.RuneSelf:
129		return IndexByte(s, byte(r))
130	case r == utf8.RuneError:
131		for i, r := range s {
132			if r == utf8.RuneError {
133				return i
134			}
135		}
136		return -1
137	case !utf8.ValidRune(r):
138		return -1
139	default:
140		return Index(s, string(r))
141	}
142}
143
144// IndexAny returns the index of the first instance of any Unicode code point
145// from chars in s, or -1 if no Unicode code point from chars is present in s.
146func IndexAny(s, chars string) int {
147	if chars == "" {
148		// Avoid scanning all of s.
149		return -1
150	}
151	if len(chars) == 1 {
152		// Avoid scanning all of s.
153		r := rune(chars[0])
154		if r >= utf8.RuneSelf {
155			r = utf8.RuneError
156		}
157		return IndexRune(s, r)
158	}
159	if len(s) > 8 {
160		if as, isASCII := makeASCIISet(chars); isASCII {
161			for i := 0; i < len(s); i++ {
162				if as.contains(s[i]) {
163					return i
164				}
165			}
166			return -1
167		}
168	}
169	for i, c := range s {
170		if IndexRune(chars, c) >= 0 {
171			return i
172		}
173	}
174	return -1
175}
176
177// LastIndexAny returns the index of the last instance of any Unicode code
178// point from chars in s, or -1 if no Unicode code point from chars is
179// present in s.
180func LastIndexAny(s, chars string) int {
181	if chars == "" {
182		// Avoid scanning all of s.
183		return -1
184	}
185	if len(s) == 1 {
186		rc := rune(s[0])
187		if rc >= utf8.RuneSelf {
188			rc = utf8.RuneError
189		}
190		if IndexRune(chars, rc) >= 0 {
191			return 0
192		}
193		return -1
194	}
195	if len(s) > 8 {
196		if as, isASCII := makeASCIISet(chars); isASCII {
197			for i := len(s) - 1; i >= 0; i-- {
198				if as.contains(s[i]) {
199					return i
200				}
201			}
202			return -1
203		}
204	}
205	if len(chars) == 1 {
206		rc := rune(chars[0])
207		if rc >= utf8.RuneSelf {
208			rc = utf8.RuneError
209		}
210		for i := len(s); i > 0; {
211			r, size := utf8.DecodeLastRuneInString(s[:i])
212			i -= size
213			if rc == r {
214				return i
215			}
216		}
217		return -1
218	}
219	for i := len(s); i > 0; {
220		r, size := utf8.DecodeLastRuneInString(s[:i])
221		i -= size
222		if IndexRune(chars, r) >= 0 {
223			return i
224		}
225	}
226	return -1
227}
228
229// LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
230func LastIndexByte(s string, c byte) int {
231	return bytealg.LastIndexByteString(s, c)
232}
233
234// Generic split: splits after each instance of sep,
235// including sepSave bytes of sep in the subarrays.
236func genSplit(s, sep string, sepSave, n int) []string {
237	if n == 0 {
238		return nil
239	}
240	if sep == "" {
241		return explode(s, n)
242	}
243	if n < 0 {
244		n = Count(s, sep) + 1
245	}
246
247	if n > len(s)+1 {
248		n = len(s) + 1
249	}
250	a := make([]string, n)
251	n--
252	i := 0
253	for i < n {
254		m := Index(s, sep)
255		if m < 0 {
256			break
257		}
258		a[i] = s[:m+sepSave]
259		s = s[m+len(sep):]
260		i++
261	}
262	a[i] = s
263	return a[:i+1]
264}
265
266// SplitN slices s into substrings separated by sep and returns a slice of
267// the substrings between those separators.
268//
269// The count determines the number of substrings to return:
270//   - n > 0: at most n substrings; the last substring will be the unsplit remainder;
271//   - n == 0: the result is nil (zero substrings);
272//   - n < 0: all substrings.
273//
274// Edge cases for s and sep (for example, empty strings) are handled
275// as described in the documentation for [Split].
276//
277// To split around the first instance of a separator, see [Cut].
278func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
279
280// SplitAfterN slices s into substrings after each instance of sep and
281// returns a slice of those substrings.
282//
283// The count determines the number of substrings to return:
284//   - n > 0: at most n substrings; the last substring will be the unsplit remainder;
285//   - n == 0: the result is nil (zero substrings);
286//   - n < 0: all substrings.
287//
288// Edge cases for s and sep (for example, empty strings) are handled
289// as described in the documentation for [SplitAfter].
290func SplitAfterN(s, sep string, n int) []string {
291	return genSplit(s, sep, len(sep), n)
292}
293
294// Split slices s into all substrings separated by sep and returns a slice of
295// the substrings between those separators.
296//
297// If s does not contain sep and sep is not empty, Split returns a
298// slice of length 1 whose only element is s.
299//
300// If sep is empty, Split splits after each UTF-8 sequence. If both s
301// and sep are empty, Split returns an empty slice.
302//
303// It is equivalent to [SplitN] with a count of -1.
304//
305// To split around the first instance of a separator, see [Cut].
306func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) }
307
308// SplitAfter slices s into all substrings after each instance of sep and
309// returns a slice of those substrings.
310//
311// If s does not contain sep and sep is not empty, SplitAfter returns
312// a slice of length 1 whose only element is s.
313//
314// If sep is empty, SplitAfter splits after each UTF-8 sequence. If
315// both s and sep are empty, SplitAfter returns an empty slice.
316//
317// It is equivalent to [SplitAfterN] with a count of -1.
318func SplitAfter(s, sep string) []string {
319	return genSplit(s, sep, len(sep), -1)
320}
321
322var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
323
324// Fields splits the string s around each instance of one or more consecutive white space
325// characters, as defined by [unicode.IsSpace], returning a slice of substrings of s or an
326// empty slice if s contains only white space.
327func Fields(s string) []string {
328	// First count the fields.
329	// This is an exact count if s is ASCII, otherwise it is an approximation.
330	n := 0
331	wasSpace := 1
332	// setBits is used to track which bits are set in the bytes of s.
333	setBits := uint8(0)
334	for i := 0; i < len(s); i++ {
335		r := s[i]
336		setBits |= r
337		isSpace := int(asciiSpace[r])
338		n += wasSpace & ^isSpace
339		wasSpace = isSpace
340	}
341
342	if setBits >= utf8.RuneSelf {
343		// Some runes in the input string are not ASCII.
344		return FieldsFunc(s, unicode.IsSpace)
345	}
346	// ASCII fast path
347	a := make([]string, n)
348	na := 0
349	fieldStart := 0
350	i := 0
351	// Skip spaces in the front of the input.
352	for i < len(s) && asciiSpace[s[i]] != 0 {
353		i++
354	}
355	fieldStart = i
356	for i < len(s) {
357		if asciiSpace[s[i]] == 0 {
358			i++
359			continue
360		}
361		a[na] = s[fieldStart:i]
362		na++
363		i++
364		// Skip spaces in between fields.
365		for i < len(s) && asciiSpace[s[i]] != 0 {
366			i++
367		}
368		fieldStart = i
369	}
370	if fieldStart < len(s) { // Last field might end at EOF.
371		a[na] = s[fieldStart:]
372	}
373	return a
374}
375
376// FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
377// and returns an array of slices of s. If all code points in s satisfy f(c) or the
378// string is empty, an empty slice is returned.
379//
380// FieldsFunc makes no guarantees about the order in which it calls f(c)
381// and assumes that f always returns the same value for a given c.
382func FieldsFunc(s string, f func(rune) bool) []string {
383	// A span is used to record a slice of s of the form s[start:end].
384	// The start index is inclusive and the end index is exclusive.
385	type span struct {
386		start int
387		end   int
388	}
389	spans := make([]span, 0, 32)
390
391	// Find the field start and end indices.
392	// Doing this in a separate pass (rather than slicing the string s
393	// and collecting the result substrings right away) is significantly
394	// more efficient, possibly due to cache effects.
395	start := -1 // valid span start if >= 0
396	for end, rune := range s {
397		if f(rune) {
398			if start >= 0 {
399				spans = append(spans, span{start, end})
400				// Set start to a negative value.
401				// Note: using -1 here consistently and reproducibly
402				// slows down this code by a several percent on amd64.
403				start = ^start
404			}
405		} else {
406			if start < 0 {
407				start = end
408			}
409		}
410	}
411
412	// Last field might end at EOF.
413	if start >= 0 {
414		spans = append(spans, span{start, len(s)})
415	}
416
417	// Create strings from recorded field indices.
418	a := make([]string, len(spans))
419	for i, span := range spans {
420		a[i] = s[span.start:span.end]
421	}
422
423	return a
424}
425
426// Join concatenates the elements of its first argument to create a single string. The separator
427// string sep is placed between elements in the resulting string.
428func Join(elems []string, sep string) string {
429	switch len(elems) {
430	case 0:
431		return ""
432	case 1:
433		return elems[0]
434	}
435
436	var n int
437	if len(sep) > 0 {
438		if len(sep) >= maxInt/(len(elems)-1) {
439			panic("strings: Join output length overflow")
440		}
441		n += len(sep) * (len(elems) - 1)
442	}
443	for _, elem := range elems {
444		if len(elem) > maxInt-n {
445			panic("strings: Join output length overflow")
446		}
447		n += len(elem)
448	}
449
450	var b Builder
451	b.Grow(n)
452	b.WriteString(elems[0])
453	for _, s := range elems[1:] {
454		b.WriteString(sep)
455		b.WriteString(s)
456	}
457	return b.String()
458}
459
460// HasPrefix reports whether the string s begins with prefix.
461func HasPrefix(s, prefix string) bool {
462	return stringslite.HasPrefix(s, prefix)
463}
464
465// HasSuffix reports whether the string s ends with suffix.
466func HasSuffix(s, suffix string) bool {
467	return stringslite.HasSuffix(s, suffix)
468}
469
470// Map returns a copy of the string s with all its characters modified
471// according to the mapping function. If mapping returns a negative value, the character is
472// dropped from the string with no replacement.
473func Map(mapping func(rune) rune, s string) string {
474	// In the worst case, the string can grow when mapped, making
475	// things unpleasant. But it's so rare we barge in assuming it's
476	// fine. It could also shrink but that falls out naturally.
477
478	// The output buffer b is initialized on demand, the first
479	// time a character differs.
480	var b Builder
481
482	for i, c := range s {
483		r := mapping(c)
484		if r == c && c != utf8.RuneError {
485			continue
486		}
487
488		var width int
489		if c == utf8.RuneError {
490			c, width = utf8.DecodeRuneInString(s[i:])
491			if width != 1 && r == c {
492				continue
493			}
494		} else {
495			width = utf8.RuneLen(c)
496		}
497
498		b.Grow(len(s) + utf8.UTFMax)
499		b.WriteString(s[:i])
500		if r >= 0 {
501			b.WriteRune(r)
502		}
503
504		s = s[i+width:]
505		break
506	}
507
508	// Fast path for unchanged input
509	if b.Cap() == 0 { // didn't call b.Grow above
510		return s
511	}
512
513	for _, c := range s {
514		r := mapping(c)
515
516		if r >= 0 {
517			// common case
518			// Due to inlining, it is more performant to determine if WriteByte should be
519			// invoked rather than always call WriteRune
520			if r < utf8.RuneSelf {
521				b.WriteByte(byte(r))
522			} else {
523				// r is not an ASCII rune.
524				b.WriteRune(r)
525			}
526		}
527	}
528
529	return b.String()
530}
531
532// According to static analysis, spaces, dashes, zeros, equals, and tabs
533// are the most commonly repeated string literal,
534// often used for display on fixed-width terminal windows.
535// Pre-declare constants for these for O(1) repetition in the common-case.
536const (
537	repeatedSpaces = "" +
538		"                                                                " +
539		"                                                                "
540	repeatedDashes = "" +
541		"----------------------------------------------------------------" +
542		"----------------------------------------------------------------"
543	repeatedZeroes = "" +
544		"0000000000000000000000000000000000000000000000000000000000000000"
545	repeatedEquals = "" +
546		"================================================================" +
547		"================================================================"
548	repeatedTabs = "" +
549		"\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" +
550		"\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
551)
552
553// Repeat returns a new string consisting of count copies of the string s.
554//
555// It panics if count is negative or if the result of (len(s) * count)
556// overflows.
557func Repeat(s string, count int) string {
558	switch count {
559	case 0:
560		return ""
561	case 1:
562		return s
563	}
564
565	// Since we cannot return an error on overflow,
566	// we should panic if the repeat will generate an overflow.
567	// See golang.org/issue/16237.
568	if count < 0 {
569		panic("strings: negative Repeat count")
570	}
571	if len(s) > maxInt/count {
572		panic("strings: Repeat output length overflow")
573	}
574	n := len(s) * count
575
576	if len(s) == 0 {
577		return ""
578	}
579
580	// Optimize for commonly repeated strings of relatively short length.
581	switch s[0] {
582	case ' ', '-', '0', '=', '\t':
583		switch {
584		case n <= len(repeatedSpaces) && HasPrefix(repeatedSpaces, s):
585			return repeatedSpaces[:n]
586		case n <= len(repeatedDashes) && HasPrefix(repeatedDashes, s):
587			return repeatedDashes[:n]
588		case n <= len(repeatedZeroes) && HasPrefix(repeatedZeroes, s):
589			return repeatedZeroes[:n]
590		case n <= len(repeatedEquals) && HasPrefix(repeatedEquals, s):
591			return repeatedEquals[:n]
592		case n <= len(repeatedTabs) && HasPrefix(repeatedTabs, s):
593			return repeatedTabs[:n]
594		}
595	}
596
597	// Past a certain chunk size it is counterproductive to use
598	// larger chunks as the source of the write, as when the source
599	// is too large we are basically just thrashing the CPU D-cache.
600	// So if the result length is larger than an empirically-found
601	// limit (8KB), we stop growing the source string once the limit
602	// is reached and keep reusing the same source string - that
603	// should therefore be always resident in the L1 cache - until we
604	// have completed the construction of the result.
605	// This yields significant speedups (up to +100%) in cases where
606	// the result length is large (roughly, over L2 cache size).
607	const chunkLimit = 8 * 1024
608	chunkMax := n
609	if n > chunkLimit {
610		chunkMax = chunkLimit / len(s) * len(s)
611		if chunkMax == 0 {
612			chunkMax = len(s)
613		}
614	}
615
616	var b Builder
617	b.Grow(n)
618	b.WriteString(s)
619	for b.Len() < n {
620		chunk := n - b.Len()
621		if chunk > b.Len() {
622			chunk = b.Len()
623		}
624		if chunk > chunkMax {
625			chunk = chunkMax
626		}
627		b.WriteString(b.String()[:chunk])
628	}
629	return b.String()
630}
631
632// ToUpper returns s with all Unicode letters mapped to their upper case.
633func ToUpper(s string) string {
634	isASCII, hasLower := true, false
635	for i := 0; i < len(s); i++ {
636		c := s[i]
637		if c >= utf8.RuneSelf {
638			isASCII = false
639			break
640		}
641		hasLower = hasLower || ('a' <= c && c <= 'z')
642	}
643
644	if isASCII { // optimize for ASCII-only strings.
645		if !hasLower {
646			return s
647		}
648		var (
649			b   Builder
650			pos int
651		)
652		b.Grow(len(s))
653		for i := 0; i < len(s); i++ {
654			c := s[i]
655			if 'a' <= c && c <= 'z' {
656				c -= 'a' - 'A'
657				if pos < i {
658					b.WriteString(s[pos:i])
659				}
660				b.WriteByte(c)
661				pos = i + 1
662			}
663		}
664		if pos < len(s) {
665			b.WriteString(s[pos:])
666		}
667		return b.String()
668	}
669	return Map(unicode.ToUpper, s)
670}
671
672// ToLower returns s with all Unicode letters mapped to their lower case.
673func ToLower(s string) string {
674	isASCII, hasUpper := true, false
675	for i := 0; i < len(s); i++ {
676		c := s[i]
677		if c >= utf8.RuneSelf {
678			isASCII = false
679			break
680		}
681		hasUpper = hasUpper || ('A' <= c && c <= 'Z')
682	}
683
684	if isASCII { // optimize for ASCII-only strings.
685		if !hasUpper {
686			return s
687		}
688		var (
689			b   Builder
690			pos int
691		)
692		b.Grow(len(s))
693		for i := 0; i < len(s); i++ {
694			c := s[i]
695			if 'A' <= c && c <= 'Z' {
696				c += 'a' - 'A'
697				if pos < i {
698					b.WriteString(s[pos:i])
699				}
700				b.WriteByte(c)
701				pos = i + 1
702			}
703		}
704		if pos < len(s) {
705			b.WriteString(s[pos:])
706		}
707		return b.String()
708	}
709	return Map(unicode.ToLower, s)
710}
711
712// ToTitle returns a copy of the string s with all Unicode letters mapped to
713// their Unicode title case.
714func ToTitle(s string) string { return Map(unicode.ToTitle, s) }
715
716// ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their
717// upper case using the case mapping specified by c.
718func ToUpperSpecial(c unicode.SpecialCase, s string) string {
719	return Map(c.ToUpper, s)
720}
721
722// ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their
723// lower case using the case mapping specified by c.
724func ToLowerSpecial(c unicode.SpecialCase, s string) string {
725	return Map(c.ToLower, s)
726}
727
728// ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their
729// Unicode title case, giving priority to the special casing rules.
730func ToTitleSpecial(c unicode.SpecialCase, s string) string {
731	return Map(c.ToTitle, s)
732}
733
734// ToValidUTF8 returns a copy of the string s with each run of invalid UTF-8 byte sequences
735// replaced by the replacement string, which may be empty.
736func ToValidUTF8(s, replacement string) string {
737	var b Builder
738
739	for i, c := range s {
740		if c != utf8.RuneError {
741			continue
742		}
743
744		_, wid := utf8.DecodeRuneInString(s[i:])
745		if wid == 1 {
746			b.Grow(len(s) + len(replacement))
747			b.WriteString(s[:i])
748			s = s[i:]
749			break
750		}
751	}
752
753	// Fast path for unchanged input
754	if b.Cap() == 0 { // didn't call b.Grow above
755		return s
756	}
757
758	invalid := false // previous byte was from an invalid UTF-8 sequence
759	for i := 0; i < len(s); {
760		c := s[i]
761		if c < utf8.RuneSelf {
762			i++
763			invalid = false
764			b.WriteByte(c)
765			continue
766		}
767		_, wid := utf8.DecodeRuneInString(s[i:])
768		if wid == 1 {
769			i++
770			if !invalid {
771				invalid = true
772				b.WriteString(replacement)
773			}
774			continue
775		}
776		invalid = false
777		b.WriteString(s[i : i+wid])
778		i += wid
779	}
780
781	return b.String()
782}
783
784// isSeparator reports whether the rune could mark a word boundary.
785// TODO: update when package unicode captures more of the properties.
786func isSeparator(r rune) bool {
787	// ASCII alphanumerics and underscore are not separators
788	if r <= 0x7F {
789		switch {
790		case '0' <= r && r <= '9':
791			return false
792		case 'a' <= r && r <= 'z':
793			return false
794		case 'A' <= r && r <= 'Z':
795			return false
796		case r == '_':
797			return false
798		}
799		return true
800	}
801	// Letters and digits are not separators
802	if unicode.IsLetter(r) || unicode.IsDigit(r) {
803		return false
804	}
805	// Otherwise, all we can do for now is treat spaces as separators.
806	return unicode.IsSpace(r)
807}
808
809// Title returns a copy of the string s with all Unicode letters that begin words
810// mapped to their Unicode title case.
811//
812// Deprecated: The rule Title uses for word boundaries does not handle Unicode
813// punctuation properly. Use golang.org/x/text/cases instead.
814func Title(s string) string {
815	// Use a closure here to remember state.
816	// Hackish but effective. Depends on Map scanning in order and calling
817	// the closure once per rune.
818	prev := ' '
819	return Map(
820		func(r rune) rune {
821			if isSeparator(prev) {
822				prev = r
823				return unicode.ToTitle(r)
824			}
825			prev = r
826			return r
827		},
828		s)
829}
830
831// TrimLeftFunc returns a slice of the string s with all leading
832// Unicode code points c satisfying f(c) removed.
833func TrimLeftFunc(s string, f func(rune) bool) string {
834	i := indexFunc(s, f, false)
835	if i == -1 {
836		return ""
837	}
838	return s[i:]
839}
840
841// TrimRightFunc returns a slice of the string s with all trailing
842// Unicode code points c satisfying f(c) removed.
843func TrimRightFunc(s string, f func(rune) bool) string {
844	i := lastIndexFunc(s, f, false)
845	if i >= 0 && s[i] >= utf8.RuneSelf {
846		_, wid := utf8.DecodeRuneInString(s[i:])
847		i += wid
848	} else {
849		i++
850	}
851	return s[0:i]
852}
853
854// TrimFunc returns a slice of the string s with all leading
855// and trailing Unicode code points c satisfying f(c) removed.
856func TrimFunc(s string, f func(rune) bool) string {
857	return TrimRightFunc(TrimLeftFunc(s, f), f)
858}
859
860// IndexFunc returns the index into s of the first Unicode
861// code point satisfying f(c), or -1 if none do.
862func IndexFunc(s string, f func(rune) bool) int {
863	return indexFunc(s, f, true)
864}
865
866// LastIndexFunc returns the index into s of the last
867// Unicode code point satisfying f(c), or -1 if none do.
868func LastIndexFunc(s string, f func(rune) bool) int {
869	return lastIndexFunc(s, f, true)
870}
871
872// indexFunc is the same as IndexFunc except that if
873// truth==false, the sense of the predicate function is
874// inverted.
875func indexFunc(s string, f func(rune) bool, truth bool) int {
876	for i, r := range s {
877		if f(r) == truth {
878			return i
879		}
880	}
881	return -1
882}
883
884// lastIndexFunc is the same as LastIndexFunc except that if
885// truth==false, the sense of the predicate function is
886// inverted.
887func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
888	for i := len(s); i > 0; {
889		r, size := utf8.DecodeLastRuneInString(s[0:i])
890		i -= size
891		if f(r) == truth {
892			return i
893		}
894	}
895	return -1
896}
897
898// asciiSet is a 32-byte value, where each bit represents the presence of a
899// given ASCII character in the set. The 128-bits of the lower 16 bytes,
900// starting with the least-significant bit of the lowest word to the
901// most-significant bit of the highest word, map to the full range of all
902// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
903// ensuring that any non-ASCII character will be reported as not in the set.
904// This allocates a total of 32 bytes even though the upper half
905// is unused to avoid bounds checks in asciiSet.contains.
906type asciiSet [8]uint32
907
908// makeASCIISet creates a set of ASCII characters and reports whether all
909// characters in chars are ASCII.
910func makeASCIISet(chars string) (as asciiSet, ok bool) {
911	for i := 0; i < len(chars); i++ {
912		c := chars[i]
913		if c >= utf8.RuneSelf {
914			return as, false
915		}
916		as[c/32] |= 1 << (c % 32)
917	}
918	return as, true
919}
920
921// contains reports whether c is inside the set.
922func (as *asciiSet) contains(c byte) bool {
923	return (as[c/32] & (1 << (c % 32))) != 0
924}
925
926// Trim returns a slice of the string s with all leading and
927// trailing Unicode code points contained in cutset removed.
928func Trim(s, cutset string) string {
929	if s == "" || cutset == "" {
930		return s
931	}
932	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
933		return trimLeftByte(trimRightByte(s, cutset[0]), cutset[0])
934	}
935	if as, ok := makeASCIISet(cutset); ok {
936		return trimLeftASCII(trimRightASCII(s, &as), &as)
937	}
938	return trimLeftUnicode(trimRightUnicode(s, cutset), cutset)
939}
940
941// TrimLeft returns a slice of the string s with all leading
942// Unicode code points contained in cutset removed.
943//
944// To remove a prefix, use [TrimPrefix] instead.
945func TrimLeft(s, cutset string) string {
946	if s == "" || cutset == "" {
947		return s
948	}
949	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
950		return trimLeftByte(s, cutset[0])
951	}
952	if as, ok := makeASCIISet(cutset); ok {
953		return trimLeftASCII(s, &as)
954	}
955	return trimLeftUnicode(s, cutset)
956}
957
958func trimLeftByte(s string, c byte) string {
959	for len(s) > 0 && s[0] == c {
960		s = s[1:]
961	}
962	return s
963}
964
965func trimLeftASCII(s string, as *asciiSet) string {
966	for len(s) > 0 {
967		if !as.contains(s[0]) {
968			break
969		}
970		s = s[1:]
971	}
972	return s
973}
974
975func trimLeftUnicode(s, cutset string) string {
976	for len(s) > 0 {
977		r, n := rune(s[0]), 1
978		if r >= utf8.RuneSelf {
979			r, n = utf8.DecodeRuneInString(s)
980		}
981		if !ContainsRune(cutset, r) {
982			break
983		}
984		s = s[n:]
985	}
986	return s
987}
988
989// TrimRight returns a slice of the string s, with all trailing
990// Unicode code points contained in cutset removed.
991//
992// To remove a suffix, use [TrimSuffix] instead.
993func TrimRight(s, cutset string) string {
994	if s == "" || cutset == "" {
995		return s
996	}
997	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
998		return trimRightByte(s, cutset[0])
999	}
1000	if as, ok := makeASCIISet(cutset); ok {
1001		return trimRightASCII(s, &as)
1002	}
1003	return trimRightUnicode(s, cutset)
1004}
1005
1006func trimRightByte(s string, c byte) string {
1007	for len(s) > 0 && s[len(s)-1] == c {
1008		s = s[:len(s)-1]
1009	}
1010	return s
1011}
1012
1013func trimRightASCII(s string, as *asciiSet) string {
1014	for len(s) > 0 {
1015		if !as.contains(s[len(s)-1]) {
1016			break
1017		}
1018		s = s[:len(s)-1]
1019	}
1020	return s
1021}
1022
1023func trimRightUnicode(s, cutset string) string {
1024	for len(s) > 0 {
1025		r, n := rune(s[len(s)-1]), 1
1026		if r >= utf8.RuneSelf {
1027			r, n = utf8.DecodeLastRuneInString(s)
1028		}
1029		if !ContainsRune(cutset, r) {
1030			break
1031		}
1032		s = s[:len(s)-n]
1033	}
1034	return s
1035}
1036
1037// TrimSpace returns a slice of the string s, with all leading
1038// and trailing white space removed, as defined by Unicode.
1039func TrimSpace(s string) string {
1040	// Fast path for ASCII: look for the first ASCII non-space byte
1041	start := 0
1042	for ; start < len(s); start++ {
1043		c := s[start]
1044		if c >= utf8.RuneSelf {
1045			// If we run into a non-ASCII byte, fall back to the
1046			// slower unicode-aware method on the remaining bytes
1047			return TrimFunc(s[start:], unicode.IsSpace)
1048		}
1049		if asciiSpace[c] == 0 {
1050			break
1051		}
1052	}
1053
1054	// Now look for the first ASCII non-space byte from the end
1055	stop := len(s)
1056	for ; stop > start; stop-- {
1057		c := s[stop-1]
1058		if c >= utf8.RuneSelf {
1059			// start has been already trimmed above, should trim end only
1060			return TrimRightFunc(s[start:stop], unicode.IsSpace)
1061		}
1062		if asciiSpace[c] == 0 {
1063			break
1064		}
1065	}
1066
1067	// At this point s[start:stop] starts and ends with an ASCII
1068	// non-space bytes, so we're done. Non-ASCII cases have already
1069	// been handled above.
1070	return s[start:stop]
1071}
1072
1073// TrimPrefix returns s without the provided leading prefix string.
1074// If s doesn't start with prefix, s is returned unchanged.
1075func TrimPrefix(s, prefix string) string {
1076	return stringslite.TrimPrefix(s, prefix)
1077}
1078
1079// TrimSuffix returns s without the provided trailing suffix string.
1080// If s doesn't end with suffix, s is returned unchanged.
1081func TrimSuffix(s, suffix string) string {
1082	return stringslite.TrimSuffix(s, suffix)
1083}
1084
1085// Replace returns a copy of the string s with the first n
1086// non-overlapping instances of old replaced by new.
1087// If old is empty, it matches at the beginning of the string
1088// and after each UTF-8 sequence, yielding up to k+1 replacements
1089// for a k-rune string.
1090// If n < 0, there is no limit on the number of replacements.
1091func Replace(s, old, new string, n int) string {
1092	if old == new || n == 0 {
1093		return s // avoid allocation
1094	}
1095
1096	// Compute number of replacements.
1097	if m := Count(s, old); m == 0 {
1098		return s // avoid allocation
1099	} else if n < 0 || m < n {
1100		n = m
1101	}
1102
1103	// Apply replacements to buffer.
1104	var b Builder
1105	b.Grow(len(s) + n*(len(new)-len(old)))
1106	start := 0
1107	for i := 0; i < n; i++ {
1108		j := start
1109		if len(old) == 0 {
1110			if i > 0 {
1111				_, wid := utf8.DecodeRuneInString(s[start:])
1112				j += wid
1113			}
1114		} else {
1115			j += Index(s[start:], old)
1116		}
1117		b.WriteString(s[start:j])
1118		b.WriteString(new)
1119		start = j + len(old)
1120	}
1121	b.WriteString(s[start:])
1122	return b.String()
1123}
1124
1125// ReplaceAll returns a copy of the string s with all
1126// non-overlapping instances of old replaced by new.
1127// If old is empty, it matches at the beginning of the string
1128// and after each UTF-8 sequence, yielding up to k+1 replacements
1129// for a k-rune string.
1130func ReplaceAll(s, old, new string) string {
1131	return Replace(s, old, new, -1)
1132}
1133
1134// EqualFold reports whether s and t, interpreted as UTF-8 strings,
1135// are equal under simple Unicode case-folding, which is a more general
1136// form of case-insensitivity.
1137func EqualFold(s, t string) bool {
1138	// ASCII fast path
1139	i := 0
1140	for ; i < len(s) && i < len(t); i++ {
1141		sr := s[i]
1142		tr := t[i]
1143		if sr|tr >= utf8.RuneSelf {
1144			goto hasUnicode
1145		}
1146
1147		// Easy case.
1148		if tr == sr {
1149			continue
1150		}
1151
1152		// Make sr < tr to simplify what follows.
1153		if tr < sr {
1154			tr, sr = sr, tr
1155		}
1156		// ASCII only, sr/tr must be upper/lower case
1157		if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
1158			continue
1159		}
1160		return false
1161	}
1162	// Check if we've exhausted both strings.
1163	return len(s) == len(t)
1164
1165hasUnicode:
1166	s = s[i:]
1167	t = t[i:]
1168	for _, sr := range s {
1169		// If t is exhausted the strings are not equal.
1170		if len(t) == 0 {
1171			return false
1172		}
1173
1174		// Extract first rune from second string.
1175		var tr rune
1176		if t[0] < utf8.RuneSelf {
1177			tr, t = rune(t[0]), t[1:]
1178		} else {
1179			r, size := utf8.DecodeRuneInString(t)
1180			tr, t = r, t[size:]
1181		}
1182
1183		// If they match, keep going; if not, return false.
1184
1185		// Easy case.
1186		if tr == sr {
1187			continue
1188		}
1189
1190		// Make sr < tr to simplify what follows.
1191		if tr < sr {
1192			tr, sr = sr, tr
1193		}
1194		// Fast check for ASCII.
1195		if tr < utf8.RuneSelf {
1196			// ASCII only, sr/tr must be upper/lower case
1197			if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
1198				continue
1199			}
1200			return false
1201		}
1202
1203		// General case. SimpleFold(x) returns the next equivalent rune > x
1204		// or wraps around to smaller values.
1205		r := unicode.SimpleFold(sr)
1206		for r != sr && r < tr {
1207			r = unicode.SimpleFold(r)
1208		}
1209		if r == tr {
1210			continue
1211		}
1212		return false
1213	}
1214
1215	// First string is empty, so check if the second one is also empty.
1216	return len(t) == 0
1217}
1218
1219// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
1220func Index(s, substr string) int {
1221	return stringslite.Index(s, substr)
1222}
1223
1224// Cut slices s around the first instance of sep,
1225// returning the text before and after sep.
1226// The found result reports whether sep appears in s.
1227// If sep does not appear in s, cut returns s, "", false.
1228func Cut(s, sep string) (before, after string, found bool) {
1229	return stringslite.Cut(s, sep)
1230}
1231
1232// CutPrefix returns s without the provided leading prefix string
1233// and reports whether it found the prefix.
1234// If s doesn't start with prefix, CutPrefix returns s, false.
1235// If prefix is the empty string, CutPrefix returns s, true.
1236func CutPrefix(s, prefix string) (after string, found bool) {
1237	return stringslite.CutPrefix(s, prefix)
1238}
1239
1240// CutSuffix returns s without the provided ending suffix string
1241// and reports whether it found the suffix.
1242// If s doesn't end with suffix, CutSuffix returns s, false.
1243// If suffix is the empty string, CutSuffix returns s, true.
1244func CutSuffix(s, suffix string) (before string, found bool) {
1245	return stringslite.CutSuffix(s, suffix)
1246}
1247