Lines Matching +full:unknown +full:- +full:key

7 //     http://www.apache.org/licenses/LICENSE-2.0
39 // for _, unknown := range unknownTexts {
40 // m := sc.NearestMatch(unknown.Text)
42 // unknown.Name, m.Name, m.Confidence)
57 "github.com/sergi/go-diff/diffmatchpatch"
113 key string member
120 // for key, an error is returned.
121 func (c *Classifier) AddValue(key, value string) error {
124 if _, ok := c.values[key]; ok {
125 return fmt.Errorf("value already registered with key %q", key)
128 c.values[key] = &knownValue{
129 key: key,
139 func (c *Classifier) AddPrecomputedValue(key, value string, set *searchset.SearchSet) error {
142 if _, ok := c.values[key]; ok {
143 return fmt.Errorf("value already registered with key %q", key)
146 c.values[key] = &knownValue{
147 key: key,
167 Offset int // The offset into the unknown string the match was made
168 Extent int // The length from the offset into the unknown string
171 // Matches is a list of Match-es. This is here mainly so that the list can be
178 if math.Abs(m[j].Confidence-m[i].Confidence) < math.SmallestNonzeroFloat64 {
226 // the unknown string and a confidence percentage is returned indicating how
241 // unknown string. This differs from "NearestMatch" in that it looks only at
242 // those areas within the unknown string that are likely to match. A list of
281 // nearestMatch returns a Queue of values that the unknown string may be. The
284 func (c *Classifier) nearestMatch(unknown string) *pq.Queue {
290 unknown = c.normalize(unknown)
291 if len(unknown) == 0 {
298 dr := diffRatio(unknown, v.normalizedValue)
302 if unknown == v.normalizedValue {
304 pq.Push(&Match{Name: v.key, Confidence: 1.0, Offset: 0, Extent: len(unknown)})
314 classifyString := func(name, unknown, known string) {
317 diffs := dmp.DiffMain(unknown, known, true)
319 confidence := confidencePercentage(len(unknown), len(known), distance)
322 pq.Push(&Match{Name: name, Confidence: confidence, Offset: 0, Extent: len(unknown)})
329 go classifyString(known.value.key, unknown, known.value.normalizedValue)
335 // matcher finds all potential matches of "known" in "unknown". The results are
338 unknown *searchset.SearchSet member
347 func newMatcher(unknown string, threshold float64) *matcher {
349 unknown: searchset.New(unknown, searchset.DefaultGranularity),
350 normUnknown: unknown,
359 // the unknown text. The resulting matches can then filtered to determine which
363 if all := known.reValue.FindAllStringIndex(m.normUnknown, -1); all != nil {
367 for i, tok := range m.unknown.Tokens {
370 } else if tok.Offset >= a[len(a)-1]-len(tok.Text) {
385 mrs = searchset.FindPotentialMatches(known.set, m.unknown)
396 start, end := mr.TargetRange(m.unknown)
400 m.queue.Push(&Match{Name: known.key, Confidence: conf, Offset: start, Extent: end - start})
416 // multipleMatch returns a Queue of values that might be within the unknown
419 func (c *Classifier) multipleMatch(unknown string) *pq.Queue {
420 normUnknown := c.normalize(unknown)
441 c.values[known.key].set = k
452 // levDist runs the Levenshtein Distance algorithm on the known and unknown
454 func levDist(unknown, known string) float64 { argument
455 if len(known) == 0 || len(unknown) == 0 {
456 …log.Printf("Zero-sized texts in Levenshtein Distance algorithm: known==%d, unknown==%d", len(known…
461 // text and the unknown text.
462 diffs := dmp.DiffMain(unknown, known, false)
468 return confidencePercentage(unknownTextLength(unknown, diffs), len(known), distance)
471 // unknownTextLength returns the length of the unknown text based on the diff range.
472 func unknownTextLength(unknown string, diffs []diffmatchpatch.Diff) int {
473 last := len(diffs) - 1
474 for ; last >= 0; last-- {
521 return 1.0 - float64(distance)/float64(max(ulen, klen))