xref: /aosp_15_r20/external/starlark-go/internal/spell/spell.go (revision 4947cdc739c985f6d86941e22894f5cefe7c9e9a)
1*4947cdc7SCole Faust// Package spell file defines a simple spelling checker for use in attribute errors
2*4947cdc7SCole Faust// such as "no such field .foo; did you mean .food?".
3*4947cdc7SCole Faustpackage spell
4*4947cdc7SCole Faust
5*4947cdc7SCole Faustimport (
6*4947cdc7SCole Faust	"strings"
7*4947cdc7SCole Faust	"unicode"
8*4947cdc7SCole Faust)
9*4947cdc7SCole Faust
10*4947cdc7SCole Faust// Nearest returns the element of candidates
11*4947cdc7SCole Faust// nearest to x using the Levenshtein metric,
12*4947cdc7SCole Faust// or "" if none were promising.
13*4947cdc7SCole Faustfunc Nearest(x string, candidates []string) string {
14*4947cdc7SCole Faust	// Ignore underscores and case when matching.
15*4947cdc7SCole Faust	fold := func(s string) string {
16*4947cdc7SCole Faust		return strings.Map(func(r rune) rune {
17*4947cdc7SCole Faust			if r == '_' {
18*4947cdc7SCole Faust				return -1
19*4947cdc7SCole Faust			}
20*4947cdc7SCole Faust			return unicode.ToLower(r)
21*4947cdc7SCole Faust		}, s)
22*4947cdc7SCole Faust	}
23*4947cdc7SCole Faust
24*4947cdc7SCole Faust	x = fold(x)
25*4947cdc7SCole Faust
26*4947cdc7SCole Faust	var best string
27*4947cdc7SCole Faust	bestD := (len(x) + 1) / 2 // allow up to 50% typos
28*4947cdc7SCole Faust	for _, c := range candidates {
29*4947cdc7SCole Faust		d := levenshtein(x, fold(c), bestD)
30*4947cdc7SCole Faust		if d < bestD {
31*4947cdc7SCole Faust			bestD = d
32*4947cdc7SCole Faust			best = c
33*4947cdc7SCole Faust		}
34*4947cdc7SCole Faust	}
35*4947cdc7SCole Faust	return best
36*4947cdc7SCole Faust}
37*4947cdc7SCole Faust
38*4947cdc7SCole Faust// levenshtein returns the non-negative Levenshtein edit distance
39*4947cdc7SCole Faust// between the byte strings x and y.
40*4947cdc7SCole Faust//
41*4947cdc7SCole Faust// If the computed distance exceeds max,
42*4947cdc7SCole Faust// the function may return early with an approximate value > max.
43*4947cdc7SCole Faustfunc levenshtein(x, y string, max int) int {
44*4947cdc7SCole Faust	// This implementation is derived from one by Laurent Le Brun in
45*4947cdc7SCole Faust	// Bazel that uses the single-row space efficiency trick
46*4947cdc7SCole Faust	// described at bitbucket.org/clearer/iosifovich.
47*4947cdc7SCole Faust
48*4947cdc7SCole Faust	// Let x be the shorter string.
49*4947cdc7SCole Faust	if len(x) > len(y) {
50*4947cdc7SCole Faust		x, y = y, x
51*4947cdc7SCole Faust	}
52*4947cdc7SCole Faust
53*4947cdc7SCole Faust	// Remove common prefix.
54*4947cdc7SCole Faust	for i := 0; i < len(x); i++ {
55*4947cdc7SCole Faust		if x[i] != y[i] {
56*4947cdc7SCole Faust			x = x[i:]
57*4947cdc7SCole Faust			y = y[i:]
58*4947cdc7SCole Faust			break
59*4947cdc7SCole Faust		}
60*4947cdc7SCole Faust	}
61*4947cdc7SCole Faust	if x == "" {
62*4947cdc7SCole Faust		return len(y)
63*4947cdc7SCole Faust	}
64*4947cdc7SCole Faust
65*4947cdc7SCole Faust	if d := abs(len(x) - len(y)); d > max {
66*4947cdc7SCole Faust		return d // excessive length divergence
67*4947cdc7SCole Faust	}
68*4947cdc7SCole Faust
69*4947cdc7SCole Faust	row := make([]int, len(y)+1)
70*4947cdc7SCole Faust	for i := range row {
71*4947cdc7SCole Faust		row[i] = i
72*4947cdc7SCole Faust	}
73*4947cdc7SCole Faust
74*4947cdc7SCole Faust	for i := 1; i <= len(x); i++ {
75*4947cdc7SCole Faust		row[0] = i
76*4947cdc7SCole Faust		best := i
77*4947cdc7SCole Faust		prev := i - 1
78*4947cdc7SCole Faust		for j := 1; j <= len(y); j++ {
79*4947cdc7SCole Faust			a := prev + b2i(x[i-1] != y[j-1]) // substitution
80*4947cdc7SCole Faust			b := 1 + row[j-1]                 // deletion
81*4947cdc7SCole Faust			c := 1 + row[j]                   // insertion
82*4947cdc7SCole Faust			k := min(a, min(b, c))
83*4947cdc7SCole Faust			prev, row[j] = row[j], k
84*4947cdc7SCole Faust			best = min(best, k)
85*4947cdc7SCole Faust		}
86*4947cdc7SCole Faust		if best > max {
87*4947cdc7SCole Faust			return best
88*4947cdc7SCole Faust		}
89*4947cdc7SCole Faust	}
90*4947cdc7SCole Faust	return row[len(y)]
91*4947cdc7SCole Faust}
92*4947cdc7SCole Faust
93*4947cdc7SCole Faustfunc b2i(b bool) int {
94*4947cdc7SCole Faust	if b {
95*4947cdc7SCole Faust		return 1
96*4947cdc7SCole Faust	} else {
97*4947cdc7SCole Faust		return 0
98*4947cdc7SCole Faust	}
99*4947cdc7SCole Faust}
100*4947cdc7SCole Faust
101*4947cdc7SCole Faustfunc min(x, y int) int {
102*4947cdc7SCole Faust	if x < y {
103*4947cdc7SCole Faust		return x
104*4947cdc7SCole Faust	} else {
105*4947cdc7SCole Faust		return y
106*4947cdc7SCole Faust	}
107*4947cdc7SCole Faust}
108*4947cdc7SCole Faust
109*4947cdc7SCole Faustfunc abs(x int) int {
110*4947cdc7SCole Faust	if x >= 0 {
111*4947cdc7SCole Faust		return x
112*4947cdc7SCole Faust	} else {
113*4947cdc7SCole Faust		return -x
114*4947cdc7SCole Faust	}
115*4947cdc7SCole Faust}
116