1*46c4c49dSIbrahim Kanouche// Copyright 2020 Google Inc. 2*46c4c49dSIbrahim Kanouche// 3*46c4c49dSIbrahim Kanouche// Licensed under the Apache License, Version 2.0 (the "License"); 4*46c4c49dSIbrahim Kanouche// you may not use this file except in compliance with the License. 5*46c4c49dSIbrahim Kanouche// You may obtain a copy of the License at 6*46c4c49dSIbrahim Kanouche// 7*46c4c49dSIbrahim Kanouche// http://www.apache.org/licenses/LICENSE-2.0 8*46c4c49dSIbrahim Kanouche// 9*46c4c49dSIbrahim Kanouche// Unless required by applicable law or agreed to in writing, software 10*46c4c49dSIbrahim Kanouche// distributed under the License is distributed on an "AS IS" BASIS, 11*46c4c49dSIbrahim Kanouche// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*46c4c49dSIbrahim Kanouche// See the License for the specific language governing permissions and 13*46c4c49dSIbrahim Kanouche// limitations under the License. 14*46c4c49dSIbrahim Kanouche 15*46c4c49dSIbrahim Kanouchepackage classifier 16*46c4c49dSIbrahim Kanouche 17*46c4c49dSIbrahim Kanoucheimport "testing" 18*46c4c49dSIbrahim Kanouche 19*46c4c49dSIbrahim Kanouchefunc TestTokenSimilarity(t *testing.T) { 20*46c4c49dSIbrahim Kanouche tests := []struct { 21*46c4c49dSIbrahim Kanouche name string 22*46c4c49dSIbrahim Kanouche a, b string 23*46c4c49dSIbrahim Kanouche sim float64 24*46c4c49dSIbrahim Kanouche }{ 25*46c4c49dSIbrahim Kanouche { 26*46c4c49dSIbrahim Kanouche name: "identical match", 27*46c4c49dSIbrahim Kanouche a: "this text is the same in both scenarios", 28*46c4c49dSIbrahim Kanouche b: "this text is the same in both scenarios", 29*46c4c49dSIbrahim Kanouche sim: 1.0, 30*46c4c49dSIbrahim Kanouche }, 31*46c4c49dSIbrahim Kanouche { 32*46c4c49dSIbrahim Kanouche name: "no match", 33*46c4c49dSIbrahim Kanouche a: "this text is the same in both scenarios", 34*46c4c49dSIbrahim Kanouche b: "completely different stuff here", 35*46c4c49dSIbrahim Kanouche sim: 0.0, 36*46c4c49dSIbrahim Kanouche }, 37*46c4c49dSIbrahim Kanouche { 38*46c4c49dSIbrahim Kanouche name: "half match", 39*46c4c49dSIbrahim Kanouche a: "this text is one sample sentence", 40*46c4c49dSIbrahim Kanouche b: "that text is some different sample", 41*46c4c49dSIbrahim Kanouche sim: 0.5, 42*46c4c49dSIbrahim Kanouche }, 43*46c4c49dSIbrahim Kanouche } 44*46c4c49dSIbrahim Kanouche 45*46c4c49dSIbrahim Kanouche for _, test := range tests { 46*46c4c49dSIbrahim Kanouche t.Run(test.name, func(t *testing.T) { 47*46c4c49dSIbrahim Kanouche c := NewClassifier(.8) // This value doesn't affect the test. 48*46c4c49dSIbrahim Kanouche c.AddContent("", "b", "", []byte(test.b)) 49*46c4c49dSIbrahim Kanouche a := c.createTargetIndexedDocument([]byte(test.a)) 50*46c4c49dSIbrahim Kanouche if actual := a.tokenSimilarity(c.getIndexedDocument("", "b", "")); actual != test.sim { 51*46c4c49dSIbrahim Kanouche t.Errorf("got %v want %v", actual, test.sim) 52*46c4c49dSIbrahim Kanouche } 53*46c4c49dSIbrahim Kanouche }) 54*46c4c49dSIbrahim Kanouche } 55*46c4c49dSIbrahim Kanouche} 56