1*4947cdc7SCole Faust// Package spell file defines a simple spelling checker for use in attribute errors 2*4947cdc7SCole Faust// such as "no such field .foo; did you mean .food?". 3*4947cdc7SCole Faustpackage spell 4*4947cdc7SCole Faust 5*4947cdc7SCole Faustimport ( 6*4947cdc7SCole Faust "strings" 7*4947cdc7SCole Faust "unicode" 8*4947cdc7SCole Faust) 9*4947cdc7SCole Faust 10*4947cdc7SCole Faust// Nearest returns the element of candidates 11*4947cdc7SCole Faust// nearest to x using the Levenshtein metric, 12*4947cdc7SCole Faust// or "" if none were promising. 13*4947cdc7SCole Faustfunc Nearest(x string, candidates []string) string { 14*4947cdc7SCole Faust // Ignore underscores and case when matching. 15*4947cdc7SCole Faust fold := func(s string) string { 16*4947cdc7SCole Faust return strings.Map(func(r rune) rune { 17*4947cdc7SCole Faust if r == '_' { 18*4947cdc7SCole Faust return -1 19*4947cdc7SCole Faust } 20*4947cdc7SCole Faust return unicode.ToLower(r) 21*4947cdc7SCole Faust }, s) 22*4947cdc7SCole Faust } 23*4947cdc7SCole Faust 24*4947cdc7SCole Faust x = fold(x) 25*4947cdc7SCole Faust 26*4947cdc7SCole Faust var best string 27*4947cdc7SCole Faust bestD := (len(x) + 1) / 2 // allow up to 50% typos 28*4947cdc7SCole Faust for _, c := range candidates { 29*4947cdc7SCole Faust d := levenshtein(x, fold(c), bestD) 30*4947cdc7SCole Faust if d < bestD { 31*4947cdc7SCole Faust bestD = d 32*4947cdc7SCole Faust best = c 33*4947cdc7SCole Faust } 34*4947cdc7SCole Faust } 35*4947cdc7SCole Faust return best 36*4947cdc7SCole Faust} 37*4947cdc7SCole Faust 38*4947cdc7SCole Faust// levenshtein returns the non-negative Levenshtein edit distance 39*4947cdc7SCole Faust// between the byte strings x and y. 40*4947cdc7SCole Faust// 41*4947cdc7SCole Faust// If the computed distance exceeds max, 42*4947cdc7SCole Faust// the function may return early with an approximate value > max. 43*4947cdc7SCole Faustfunc levenshtein(x, y string, max int) int { 44*4947cdc7SCole Faust // This implementation is derived from one by Laurent Le Brun in 45*4947cdc7SCole Faust // Bazel that uses the single-row space efficiency trick 46*4947cdc7SCole Faust // described at bitbucket.org/clearer/iosifovich. 47*4947cdc7SCole Faust 48*4947cdc7SCole Faust // Let x be the shorter string. 49*4947cdc7SCole Faust if len(x) > len(y) { 50*4947cdc7SCole Faust x, y = y, x 51*4947cdc7SCole Faust } 52*4947cdc7SCole Faust 53*4947cdc7SCole Faust // Remove common prefix. 54*4947cdc7SCole Faust for i := 0; i < len(x); i++ { 55*4947cdc7SCole Faust if x[i] != y[i] { 56*4947cdc7SCole Faust x = x[i:] 57*4947cdc7SCole Faust y = y[i:] 58*4947cdc7SCole Faust break 59*4947cdc7SCole Faust } 60*4947cdc7SCole Faust } 61*4947cdc7SCole Faust if x == "" { 62*4947cdc7SCole Faust return len(y) 63*4947cdc7SCole Faust } 64*4947cdc7SCole Faust 65*4947cdc7SCole Faust if d := abs(len(x) - len(y)); d > max { 66*4947cdc7SCole Faust return d // excessive length divergence 67*4947cdc7SCole Faust } 68*4947cdc7SCole Faust 69*4947cdc7SCole Faust row := make([]int, len(y)+1) 70*4947cdc7SCole Faust for i := range row { 71*4947cdc7SCole Faust row[i] = i 72*4947cdc7SCole Faust } 73*4947cdc7SCole Faust 74*4947cdc7SCole Faust for i := 1; i <= len(x); i++ { 75*4947cdc7SCole Faust row[0] = i 76*4947cdc7SCole Faust best := i 77*4947cdc7SCole Faust prev := i - 1 78*4947cdc7SCole Faust for j := 1; j <= len(y); j++ { 79*4947cdc7SCole Faust a := prev + b2i(x[i-1] != y[j-1]) // substitution 80*4947cdc7SCole Faust b := 1 + row[j-1] // deletion 81*4947cdc7SCole Faust c := 1 + row[j] // insertion 82*4947cdc7SCole Faust k := min(a, min(b, c)) 83*4947cdc7SCole Faust prev, row[j] = row[j], k 84*4947cdc7SCole Faust best = min(best, k) 85*4947cdc7SCole Faust } 86*4947cdc7SCole Faust if best > max { 87*4947cdc7SCole Faust return best 88*4947cdc7SCole Faust } 89*4947cdc7SCole Faust } 90*4947cdc7SCole Faust return row[len(y)] 91*4947cdc7SCole Faust} 92*4947cdc7SCole Faust 93*4947cdc7SCole Faustfunc b2i(b bool) int { 94*4947cdc7SCole Faust if b { 95*4947cdc7SCole Faust return 1 96*4947cdc7SCole Faust } else { 97*4947cdc7SCole Faust return 0 98*4947cdc7SCole Faust } 99*4947cdc7SCole Faust} 100*4947cdc7SCole Faust 101*4947cdc7SCole Faustfunc min(x, y int) int { 102*4947cdc7SCole Faust if x < y { 103*4947cdc7SCole Faust return x 104*4947cdc7SCole Faust } else { 105*4947cdc7SCole Faust return y 106*4947cdc7SCole Faust } 107*4947cdc7SCole Faust} 108*4947cdc7SCole Faust 109*4947cdc7SCole Faustfunc abs(x int) int { 110*4947cdc7SCole Faust if x >= 0 { 111*4947cdc7SCole Faust return x 112*4947cdc7SCole Faust } else { 113*4947cdc7SCole Faust return -x 114*4947cdc7SCole Faust } 115*4947cdc7SCole Faust} 116