xref: /aosp_15_r20/external/licenseclassifier/v2/diff_test.go (revision 46c4c49da23cae783fa41bf46525a6505638499a)
1// Copyright 2020 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package classifier
16
17import (
18	"testing"
19
20	"github.com/google/go-cmp/cmp"
21	"github.com/sergi/go-diff/diffmatchpatch"
22)
23
24var (
25	gettysburg = `Four score and seven years ago our fathers brought forth
26on this continent, a new nation, conceived in Liberty, and dedicated to the
27proposition that all men are created equal.`
28	modifiedGettysburg = `Four score and seven years ago our fathers brought forth
29on this continent, a nation that was new and improved, conceived in Liberty, and
30dedicated to the proposition that all men are created equal.`
31	extra = `In the current state of affairs`
32
33	declaration = `When in the Course of human events, it becomes necessary
34for one people to dissolve the political bands which have connected them with
35another, and to assume among the powers of the earth, the separate and equal
36station to which the Laws of Nature and of Nature's God entitle them, a decent
37respect to the opinions of mankind requires that they should declare the causes
38which impel them to the separation.`
39
40	loremipsum = `Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla
41varius enim mattis, rhoncus lectus id, aliquet sem. Phasellus eget ex in dolor
42feugiat ultricies. Etiam interdum sit amet nisl in placerat.  Sed vitae enim
43vulputate, tempus leo commodo, accumsan nulla.`
44	lessModifiedLorem = `Lorem ipsum dolor sot amet, consectetur adipiscing elit. Nulla
45varius enim mattis, rhoncus lectus id, aliquet. Phasellus eget ex in dolor
46feugiat ultricies. Etiam interdum sit amet nisl in placerat.  Sed vitae enim
47vulputate, tempus leo commodo, accumsan nulla.`
48)
49
50func TestTextLength(t *testing.T) {
51	tests := []struct {
52		name     string
53		diffs    []diffmatchpatch.Diff
54		expected int
55	}{
56		{
57			name:     "empty diff",
58			diffs:    nil,
59			expected: 0,
60		},
61		{
62			name: "deletion diff",
63			diffs: []diffmatchpatch.Diff{
64				{
65					Type: diffmatchpatch.DiffDelete,
66					Text: "deleted text",
67				},
68			},
69			expected: 2,
70		},
71	}
72
73	for _, test := range tests {
74		t.Run(test.name, func(t *testing.T) {
75			if got := textLength(test.diffs); got != test.expected {
76				t.Errorf("got %d, want %d", got, test.expected)
77			}
78		})
79	}
80}
81
82func TestWordLen(t *testing.T) {
83	tests := []struct {
84		in       string
85		expected int
86	}{
87		{
88			in:       "short string",
89			expected: 2,
90		},
91		{
92			in:       "",
93			expected: 0,
94		},
95		{
96			in:       "word",
97			expected: 1,
98		},
99	}
100
101	for _, test := range tests {
102		t.Run(test.in, func(t *testing.T) {
103			if got := wordLen(test.in); got != test.expected {
104				t.Errorf("got %d, want %d", got, test.expected)
105			}
106		})
107	}
108}
109
110func TestDiffing(t *testing.T) {
111	tests := []struct {
112		name           string
113		unknown, known string
114		start, end     int
115		diffs          []diffmatchpatch.Diff
116	}{
117		{
118			name:    "identical",
119			unknown: declaration,
120			known:   declaration,
121			start:   0,
122			end:     1,
123			diffs: []diffmatchpatch.Diff{
124				{
125					Type: diffmatchpatch.DiffEqual,
126					Text: `when in the course of human events it becomes necessary for one people to dissolve the political bands which have connected them with another and to assume among the powers of the earth the separate and equal station to which the laws of nature and of natures god entitle them a decent respect to the opinions of mankind requires that they should declare the causes which impel them to the separation`,
127				},
128			},
129		},
130		{
131			name:    "lorem",
132			unknown: lessModifiedLorem,
133			known:   loremipsum,
134			start:   0,
135			end:     6,
136			diffs: []diffmatchpatch.Diff{
137				{
138					Type: diffmatchpatch.DiffEqual,
139					Text: "lorem ipsum dolor",
140				},
141				{
142					Type: diffmatchpatch.DiffDelete,
143					Text: "UNKNOWN",
144				},
145				{
146					Type: diffmatchpatch.DiffInsert,
147					Text: "sit",
148				},
149				{
150					Type: diffmatchpatch.DiffEqual,
151					Text: "amet consectetur adipiscing elit nulla varius enim mattis rhoncus lectus id aliquet",
152				},
153				{
154					Type: diffmatchpatch.DiffInsert,
155					Text: "sem",
156				},
157				{
158					Type: diffmatchpatch.DiffEqual,
159					Text: "phasellus eget ex in dolor feugiat ultricies etiam interdum sit amet nisl in placerat sed vitae enim vulputate tempus leo commodo accumsan nulla",
160				},
161			},
162		},
163		{
164			name:    "whole diff retained",
165			unknown: modifiedGettysburg,
166			known:   gettysburg,
167			start:   0,
168			end:     6,
169			diffs: []diffmatchpatch.Diff{
170				{
171					Type: diffmatchpatch.DiffEqual,
172					Text: "four score and seven years ago our fathers brought forth on this continent a",
173				},
174				{
175					Type: diffmatchpatch.DiffDelete,
176					Text: "nation that UNKNOWN",
177				},
178				{
179					Type: diffmatchpatch.DiffEqual,
180					Text: "new",
181				},
182				{
183					Type: diffmatchpatch.DiffDelete,
184					Text: "and UNKNOWN",
185				},
186				{
187					Type: diffmatchpatch.DiffInsert,
188					Text: "nation",
189				},
190				{
191					Type: diffmatchpatch.DiffEqual,
192					Text: "conceived in liberty and dedicated to the proposition that all men are created equal",
193				},
194			},
195		},
196		{
197			name:    "extra at beginning",
198			unknown: extra + " " + gettysburg,
199			known:   gettysburg,
200			start:   1,
201			end:     2,
202			diffs: []diffmatchpatch.Diff{
203				{
204					Type: diffmatchpatch.DiffDelete,
205					Text: "in the UNKNOWN UNKNOWN UNKNOWN UNKNOWN",
206				},
207				{
208					Type: diffmatchpatch.DiffEqual,
209					Text: "four score and seven years ago our fathers brought forth on this continent a new nation conceived in liberty and dedicated to the proposition that all men are created equal",
210				},
211			},
212		},
213		{
214			name:    "extra at end",
215			unknown: gettysburg + " " + extra,
216			known:   gettysburg,
217			start:   0,
218			end:     1,
219			diffs: []diffmatchpatch.Diff{
220				{
221					Type: diffmatchpatch.DiffEqual,
222					Text: "four score and seven years ago our fathers brought forth on this continent a new nation conceived in liberty and dedicated to the proposition that all men are created equal",
223				},
224				{
225					Type: diffmatchpatch.DiffDelete,
226					Text: "in the UNKNOWN UNKNOWN UNKNOWN UNKNOWN",
227				},
228			},
229		},
230		{
231			name:    "extra at both ends",
232			unknown: extra + " " + gettysburg + " " + extra,
233			known:   gettysburg,
234			start:   1,
235			end:     2,
236			diffs: []diffmatchpatch.Diff{
237				{
238					Type: diffmatchpatch.DiffDelete,
239					Text: "in the UNKNOWN UNKNOWN UNKNOWN UNKNOWN",
240				},
241				{
242					Type: diffmatchpatch.DiffEqual,
243					Text: "four score and seven years ago our fathers brought forth on this continent a new nation conceived in liberty and dedicated to the proposition that all men are created equal",
244				},
245				{
246					Type: diffmatchpatch.DiffDelete,
247					Text: "in the UNKNOWN UNKNOWN UNKNOWN UNKNOWN",
248				},
249			},
250		},
251		{
252			name:    "completely different",
253			unknown: "this",
254			known:   "that",
255			start:   1,
256			end:     2,
257			diffs: []diffmatchpatch.Diff{
258				{
259					Type: diffmatchpatch.DiffDelete,
260					Text: "UNKNOWN",
261				},
262				{
263					Type: diffmatchpatch.DiffInsert,
264					Text: "that",
265				},
266			},
267		},
268	}
269
270	for _, test := range tests {
271		t.Run(test.name, func(t *testing.T) {
272			c := NewClassifier(.8)
273			c.AddContent("", "known", "", []byte(test.known))
274			kd := c.getIndexedDocument("", "known", "")
275			ud := c.createTargetIndexedDocument([]byte(test.unknown))
276			diffs := docDiff("known", ud, 0, ud.size(), kd, 0, kd.size())
277			start, end := diffRange(kd.normalized(), diffs)
278			if start != test.start {
279				t.Errorf("start: got %d want %d", start, test.start)
280			}
281			if end != test.end {
282				t.Errorf("end: got %d want %d", end, test.end)
283			}
284			if !cmp.Equal(diffs, test.diffs) {
285				t.Errorf(cmp.Diff(diffs, test.diffs))
286			}
287		})
288	}
289}
290