xref: /aosp_15_r20/external/licenseclassifier/v2/frequencies_test.go (revision 46c4c49da23cae783fa41bf46525a6505638499a)
1*46c4c49dSIbrahim Kanouche// Copyright 2020 Google Inc.
2*46c4c49dSIbrahim Kanouche//
3*46c4c49dSIbrahim Kanouche// Licensed under the Apache License, Version 2.0 (the "License");
4*46c4c49dSIbrahim Kanouche// you may not use this file except in compliance with the License.
5*46c4c49dSIbrahim Kanouche// You may obtain a copy of the License at
6*46c4c49dSIbrahim Kanouche//
7*46c4c49dSIbrahim Kanouche//     http://www.apache.org/licenses/LICENSE-2.0
8*46c4c49dSIbrahim Kanouche//
9*46c4c49dSIbrahim Kanouche// Unless required by applicable law or agreed to in writing, software
10*46c4c49dSIbrahim Kanouche// distributed under the License is distributed on an "AS IS" BASIS,
11*46c4c49dSIbrahim Kanouche// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*46c4c49dSIbrahim Kanouche// See the License for the specific language governing permissions and
13*46c4c49dSIbrahim Kanouche// limitations under the License.
14*46c4c49dSIbrahim Kanouche
15*46c4c49dSIbrahim Kanouchepackage classifier
16*46c4c49dSIbrahim Kanouche
17*46c4c49dSIbrahim Kanoucheimport "testing"
18*46c4c49dSIbrahim Kanouche
19*46c4c49dSIbrahim Kanouchefunc TestTokenSimilarity(t *testing.T) {
20*46c4c49dSIbrahim Kanouche	tests := []struct {
21*46c4c49dSIbrahim Kanouche		name string
22*46c4c49dSIbrahim Kanouche		a, b string
23*46c4c49dSIbrahim Kanouche		sim  float64
24*46c4c49dSIbrahim Kanouche	}{
25*46c4c49dSIbrahim Kanouche		{
26*46c4c49dSIbrahim Kanouche			name: "identical match",
27*46c4c49dSIbrahim Kanouche			a:    "this text is the same in both scenarios",
28*46c4c49dSIbrahim Kanouche			b:    "this text is the same in both scenarios",
29*46c4c49dSIbrahim Kanouche			sim:  1.0,
30*46c4c49dSIbrahim Kanouche		},
31*46c4c49dSIbrahim Kanouche		{
32*46c4c49dSIbrahim Kanouche			name: "no match",
33*46c4c49dSIbrahim Kanouche			a:    "this text is the same in both scenarios",
34*46c4c49dSIbrahim Kanouche			b:    "completely different stuff here",
35*46c4c49dSIbrahim Kanouche			sim:  0.0,
36*46c4c49dSIbrahim Kanouche		},
37*46c4c49dSIbrahim Kanouche		{
38*46c4c49dSIbrahim Kanouche			name: "half match",
39*46c4c49dSIbrahim Kanouche			a:    "this text is one sample sentence",
40*46c4c49dSIbrahim Kanouche			b:    "that text is some different sample",
41*46c4c49dSIbrahim Kanouche			sim:  0.5,
42*46c4c49dSIbrahim Kanouche		},
43*46c4c49dSIbrahim Kanouche	}
44*46c4c49dSIbrahim Kanouche
45*46c4c49dSIbrahim Kanouche	for _, test := range tests {
46*46c4c49dSIbrahim Kanouche		t.Run(test.name, func(t *testing.T) {
47*46c4c49dSIbrahim Kanouche			c := NewClassifier(.8) // This value doesn't affect the test.
48*46c4c49dSIbrahim Kanouche			c.AddContent("", "b", "", []byte(test.b))
49*46c4c49dSIbrahim Kanouche			a := c.createTargetIndexedDocument([]byte(test.a))
50*46c4c49dSIbrahim Kanouche			if actual := a.tokenSimilarity(c.getIndexedDocument("", "b", "")); actual != test.sim {
51*46c4c49dSIbrahim Kanouche				t.Errorf("got %v want %v", actual, test.sim)
52*46c4c49dSIbrahim Kanouche			}
53*46c4c49dSIbrahim Kanouche		})
54*46c4c49dSIbrahim Kanouche	}
55*46c4c49dSIbrahim Kanouche}
56