1// Copyright 2020 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package classifier 16 17import "testing" 18 19func TestTokenSimilarity(t *testing.T) { 20 tests := []struct { 21 name string 22 a, b string 23 sim float64 24 }{ 25 { 26 name: "identical match", 27 a: "this text is the same in both scenarios", 28 b: "this text is the same in both scenarios", 29 sim: 1.0, 30 }, 31 { 32 name: "no match", 33 a: "this text is the same in both scenarios", 34 b: "completely different stuff here", 35 sim: 0.0, 36 }, 37 { 38 name: "half match", 39 a: "this text is one sample sentence", 40 b: "that text is some different sample", 41 sim: 0.5, 42 }, 43 } 44 45 for _, test := range tests { 46 t.Run(test.name, func(t *testing.T) { 47 c := NewClassifier(.8) // This value doesn't affect the test. 48 c.AddContent("", "b", "", []byte(test.b)) 49 a := c.createTargetIndexedDocument([]byte(test.a)) 50 if actual := a.tokenSimilarity(c.getIndexedDocument("", "b", "")); actual != test.sim { 51 t.Errorf("got %v want %v", actual, test.sim) 52 } 53 }) 54 } 55} 56