classifier.go - OpenGrok cross reference for /aosp_15_r20/external/licenseclassifier/stringclassifier/classifier.go

Lines Matching +full:unknown +full:- +full:key
7 //     http://www.apache.org/licenses/LICENSE-2.0
39 //	  for _, unknown := range unknownTexts {
40 //	    m := sc.NearestMatch(unknown.Text)
42 //	      unknown.Name, m.Name, m.Confidence)
57 	"github.com/sergi/go-diff/diffmatchpatch"
113 	key             string  member
120 // for key, an error is returned.
121 func (c *Classifier) AddValue(key, value string) error {
124 	if _, ok := c.values[key]; ok {
125 		return fmt.Errorf("value already registered with key %q", key)
128 	c.values[key] = &knownValue{
129 		key:             key,
139 func (c *Classifier) AddPrecomputedValue(key, value string, set *searchset.SearchSet) error {
142 	if _, ok := c.values[key]; ok {
143 		return fmt.Errorf("value already registered with key %q", key)
146 	c.values[key] = &knownValue{
147 		key:             key,
167 	Offset     int     // The offset into the unknown string the match was made
168 	Extent     int     // The length from the offset into the unknown string
171 // Matches is a list of Match-es. This is here mainly so that the list can be
178 	if math.Abs(m[j].Confidence-m[i].Confidence) < math.SmallestNonzeroFloat64 {
226 // the unknown string and a confidence percentage is returned indicating how
241 // unknown string. This differs from "NearestMatch" in that it looks only at
242 // those areas within the unknown string that are likely to match. A list of
281 // nearestMatch returns a Queue of values that the unknown string may be. The
284 func (c *Classifier) nearestMatch(unknown string) *pq.Queue {
290 	unknown = c.normalize(unknown)
291 	if len(unknown) == 0 {
298 		dr := diffRatio(unknown, v.normalizedValue)
302 		if unknown == v.normalizedValue {
304 			pq.Push(&Match{Name: v.key, Confidence: 1.0, Offset: 0, Extent: len(unknown)})
314 	classifyString := func(name, unknown, known string) {
317 		diffs := dmp.DiffMain(unknown, known, true)
319 		confidence := confidencePercentage(len(unknown), len(known), distance)
322 			pq.Push(&Match{Name: name, Confidence: confidence, Offset: 0, Extent: len(unknown)})
329 		go classifyString(known.value.key, unknown, known.value.normalizedValue)
335 // matcher finds all potential matches of "known" in "unknown". The results are
338 	unknown     *searchset.SearchSet  member
347 func newMatcher(unknown string, threshold float64) *matcher {
349 		unknown:     searchset.New(unknown, searchset.DefaultGranularity),
350 		normUnknown: unknown,
359 // the unknown text. The resulting matches can then filtered to determine which
363 	if all := known.reValue.FindAllStringIndex(m.normUnknown, -1); all != nil {
367 			for i, tok := range m.unknown.Tokens {
370 				} else if tok.Offset >= a[len(a)-1]-len(tok.Text) {
385 		mrs = searchset.FindPotentialMatches(known.set, m.unknown)
396 			start, end := mr.TargetRange(m.unknown)
400 				m.queue.Push(&Match{Name: known.key, Confidence: conf, Offset: start, Extent: end - start})
416 // multipleMatch returns a Queue of values that might be within the unknown
419 func (c *Classifier) multipleMatch(unknown string) *pq.Queue {
420 	normUnknown := c.normalize(unknown)
441 				c.values[known.key].set = k
452 // levDist runs the Levenshtein Distance algorithm on the known and unknown
454 func levDist(unknown, known string) float64 {  argument
455 	if len(known) == 0 || len(unknown) == 0 {
456 …log.Printf("Zero-sized texts in Levenshtein Distance algorithm: known==%d, unknown==%d", len(known…
461 	// text and the unknown text.
462 	diffs := dmp.DiffMain(unknown, known, false)
468 	return confidencePercentage(unknownTextLength(unknown, diffs), len(known), distance)
471 // unknownTextLength returns the length of the unknown text based on the diff range.
472 func unknownTextLength(unknown string, diffs []diffmatchpatch.Diff) int {
473 	last := len(diffs) - 1
474 	for ; last >= 0; last-- {
521 	return 1.0 - float64(distance)/float64(max(ulen, klen))