xref: /aosp_15_r20/external/licenseclassifier/commentparser/comment_parser_test.go (revision 46c4c49da23cae783fa41bf46525a6505638499a)
1// Copyright 2017 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//	http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14package commentparser
15
16import (
17	"fmt"
18	"reflect"
19	"testing"
20
21	"github.com/google/go-cmp/cmp"
22	"github.com/google/licenseclassifier/commentparser/language"
23)
24
25const (
26	singleLineText = "single line text"
27	multilineText  = `first line of text
28second line of text
29third line of text
30`
31)
32
33func TestCommentParser_Lex(t *testing.T) {
34	tests := []struct {
35		description string
36		lang        language.Language
37		source      string
38		want        Comments
39	}{
40		{
41			description: "BCPL Single Line Comments",
42			lang:        language.Go,
43			source:      fmt.Sprintf("//%s\n", singleLineText),
44			want: []*Comment{
45				{
46					StartLine: 1,
47					EndLine:   1,
48					Text:      singleLineText,
49				},
50			},
51		},
52		{
53			description: "Go Comment With Multiline String",
54			lang:        language.Go,
55			source:      fmt.Sprintf("var a = `A\nmultiline\\x20\nstring`\n//%s\n", singleLineText),
56			want: []*Comment{
57				{
58					StartLine: 4,
59					EndLine:   4,
60					Text:      singleLineText,
61				},
62			},
63		},
64		{
65			description: "Python Multiline String",
66			lang:        language.Python,
67			source:      fmt.Sprintf("#%s\n\n\n\nx = '''this is a multiline\nstring'''", singleLineText),
68			want: []*Comment{
69				{
70					StartLine: 1,
71					EndLine:   1,
72					Text:      singleLineText,
73				},
74			},
75		},
76		{
77			description: "Python module-level Docstring #1",
78			lang:        language.Python,
79			source:      fmt.Sprintf("'''%s'''\nimport foo", multilineText),
80			want: []*Comment{
81				{
82					StartLine: 1,
83					EndLine:   4,
84					Text:      multilineText,
85				},
86			},
87		},
88		{
89			description: "Python module-level Docstring #2",
90			lang:        language.Python,
91			source:      fmt.Sprintf("#!/usr/bin/python\n'''%s'''\nimport foo", multilineText),
92			want: []*Comment{
93				{
94					StartLine: 1,
95					EndLine:   1,
96					Text:      "!/usr/bin/python",
97				},
98				{
99					StartLine: 2,
100					EndLine:   5,
101					Text:      multilineText,
102				},
103			},
104		},
105		{
106			// Only include docstrings that start at the beginning of a line
107			description: "Python module-level Docstring #3",
108			lang:        language.Python,
109			source:      "'''zero1'''\n '''one'''\n  '''two'''\n'''zero2'''",
110			want: []*Comment{
111				{
112					StartLine: 1,
113					EndLine:   1,
114					Text:      "zero1",
115				},
116				{
117					StartLine: 4,
118					EndLine:   4,
119					Text:      "zero2",
120				},
121			},
122		},
123		{
124			description: "TR Command String",
125			lang:        language.Python,
126			source: fmt.Sprintf(`#%s
127AUTH= \
128| tr '"\n' \
129| base64 -w
130`, singleLineText),
131			want: []*Comment{
132				{
133					StartLine: 1,
134					EndLine:   1,
135					Text:      singleLineText,
136				},
137			},
138		},
139		{
140			description: "Lisp Single Line Comments",
141			lang:        language.Clojure,
142			source:      fmt.Sprintf(";%s\n", singleLineText),
143			want: []*Comment{
144				{
145					StartLine: 1,
146					EndLine:   1,
147					Text:      singleLineText,
148				},
149			},
150		},
151		{
152			description: "Shell Single Line Comments",
153			lang:        language.Shell,
154			source:      fmt.Sprintf("#%s\n", singleLineText),
155			want: []*Comment{
156				{
157					StartLine: 1,
158					EndLine:   1,
159					Text:      singleLineText,
160				},
161			},
162		},
163		{
164			description: "BCPL Multiline Comments",
165			lang:        language.C,
166			source:      fmt.Sprintf("/*%s*/\n", multilineText),
167			want: []*Comment{
168				{
169					StartLine: 1,
170					EndLine:   4,
171					Text:      multilineText,
172				},
173			},
174		},
175		{
176			description: "BCPL Multiline Comments no terminating newline",
177			lang:        language.C,
178			source:      fmt.Sprintf("/*%s*/", multilineText),
179			want: []*Comment{
180				{
181					StartLine: 1,
182					EndLine:   4,
183					Text:      multilineText,
184				},
185			},
186		},
187		{
188			description: "Nested Multiline Comments",
189			lang:        language.Swift,
190			source:      "/*a /*\n  nested\n*/\n  comment\n*/\n",
191			want: []*Comment{
192				{
193					StartLine: 1,
194					EndLine:   5,
195					Text:      "a /*\n  nested\n*/\n  comment\n",
196				},
197			},
198		},
199		{
200			description: "Ruby Multiline Comments",
201			lang:        language.Ruby,
202			source:      fmt.Sprintf("=begin\n%s=end\n", multilineText),
203			want: []*Comment{
204				{
205					StartLine: 1,
206					EndLine:   5,
207					Text:      "\n" + multilineText,
208				},
209			},
210		},
211		{
212			description: "Multiple Single Line Comments",
213			lang:        language.Shell,
214			source: `# First line
215# Second line
216# Third line
217`,
218			want: []*Comment{
219				{
220					StartLine: 1,
221					EndLine:   1,
222					Text:      " First line",
223				},
224				{
225					StartLine: 2,
226					EndLine:   2,
227					Text:      " Second line",
228				},
229				{
230					StartLine: 3,
231					EndLine:   3,
232					Text:      " Third line",
233				},
234			},
235		},
236		{
237			description: "Mixed Multiline / Single Line Comments",
238			lang:        language.C,
239			source: `/*
240 * The first multiline line.
241 * The second multiline line.
242 */
243 // The first single line comment.
244 // The second single line comment.
245`,
246			want: []*Comment{
247				{
248					StartLine: 1,
249					EndLine:   4,
250					Text: `
251 * The first multiline line.
252 * The second multiline line.
253 `,
254				},
255				{
256					StartLine: 5,
257					EndLine:   5,
258					Text:      " The first single line comment.",
259				},
260				{
261					StartLine: 6,
262					EndLine:   6,
263					Text:      " The second single line comment.",
264				},
265			},
266		},
267		{
268			description: "Mixed Multiline / Single Line Comments",
269			lang:        language.C,
270			source: `/*
271 * The first multiline line.
272 * The second multiline line.
273 */
274 // The first single line comment.
275 // The second single line comment.
276`,
277			want: []*Comment{
278				{
279					StartLine: 1,
280					EndLine:   4,
281					Text: `
282 * The first multiline line.
283 * The second multiline line.
284 `,
285				},
286				{
287					StartLine: 5,
288					EndLine:   5,
289					Text:      " The first single line comment.",
290				},
291				{
292					StartLine: 6,
293					EndLine:   6,
294					Text:      " The second single line comment.",
295				},
296			},
297		},
298		{
299			description: "HTML-like comments and quotes",
300			lang:        language.HTML,
301			source: `# This is an important topic
302I don't want to go on all day here! <-- notice the quote in there!
303<!-- Well, maybe I do... -->
304`,
305			want: []*Comment{
306				{
307					StartLine: 3,
308					EndLine:   3,
309					Text:      " Well, maybe I do... ",
310				},
311			},
312		},
313		{
314			description: "JavaScript regex",
315			lang:        language.JavaScript,
316			source: `var re = /hello"world/;
317// the comment
318`,
319			want: []*Comment{
320				{
321					StartLine: 2,
322					EndLine:   2,
323					Text:      " the comment",
324				},
325			},
326		},
327		{
328			description: "Perl regex",
329			lang:        language.Perl,
330			source: `if (/hello"world/) {
331  # the comment
332  print "Yo!"
333}
334`,
335			want: []*Comment{
336				{
337					StartLine: 2,
338					EndLine:   2,
339					Text:      " the comment",
340				},
341			},
342		},
343		{
344			description: "SQL using MySQL-style comments",
345			lang:        language.SQL,
346			source: `/*
347 * The first multiline line.
348 * The second multiline line.
349 */
350 # The first single line comment.
351 # The second single line comment.
352`,
353			want: []*Comment{
354				{
355					StartLine: 1,
356					EndLine:   4,
357					Text: `
358 * The first multiline line.
359 * The second multiline line.
360 `,
361				},
362				{
363					StartLine: 5,
364					EndLine:   5,
365					Text:      " The first single line comment.",
366				},
367				{
368					StartLine: 6,
369					EndLine:   6,
370					Text:      " The second single line comment.",
371				},
372			},
373		},
374		{
375			description: "SQL using MySQL-style comments",
376			lang:        language.SQL,
377			source: `-- The first single line comment.
378/*
379 * The first multiline line.
380 * The second multiline line.
381 */
382 -- The second single line comment.
383`,
384			want: []*Comment{
385				{
386					StartLine: 1,
387					EndLine:   1,
388					Text:      " The first single line comment.",
389				},
390				{
391					StartLine: 2,
392					EndLine:   5,
393					Text: `
394 * The first multiline line.
395 * The second multiline line.
396 `,
397				},
398				{
399					StartLine: 6,
400					EndLine:   6,
401					Text:      " The second single line comment.",
402				},
403			},
404		},
405		{
406			description: "Matlab language - Single Line Comments",
407			lang:        language.ObjectiveC, // Matlab has same extension as Objective-C.
408			source: `% Copyright 2017 Yoyodyne Inc.
409
410clear;
411close all;
412`,
413			want: []*Comment{
414				{
415					StartLine: 1,
416					EndLine:   1,
417					Text:      " Copyright 2017 Yoyodyne Inc.",
418				},
419			},
420		},
421		{
422			description: "Matlab language - Multi-Line Comments",
423			lang:        language.ObjectiveC, // Matlab has same extension as Objective-C.
424			source: `%{ Multiline comment start.
425  Second line of multiline comment.
426%}
427
428clear;
429close all;
430`,
431			want: []*Comment{
432				{
433					StartLine: 1,
434					EndLine:   3,
435					Text: ` Multiline comment start.
436  Second line of multiline comment.
437`,
438				},
439			},
440		},
441	}
442
443	for _, tt := range tests {
444		got := Parse([]byte(tt.source), tt.lang)
445		if !cmp.Equal(got, tt.want) {
446			t.Errorf("Mismatch(%q) = %+v, want %+v, diff=%v", tt.description, got, tt.want, cmp.Diff(got, tt.want))
447		}
448	}
449}
450
451func TestCommentParser_ChunkIterator(t *testing.T) {
452	tests := []struct {
453		description string
454		comments    Comments
455		want        []Comments
456	}{
457		{
458			description: "Empty Comments",
459			comments:    Comments{},
460			want:        nil,
461		},
462		{
463			description: "Single Line Comment Chunk",
464			comments: Comments{
465				{StartLine: 1, EndLine: 1, Text: "Block 1 line 1"},
466				{StartLine: 2, EndLine: 2, Text: "Block 1 line 2"},
467			},
468			want: []Comments{{
469				{StartLine: 1, EndLine: 1, Text: "Block 1 line 1"},
470				{StartLine: 2, EndLine: 2, Text: "Block 1 line 2"},
471			}},
472		},
473		{
474			description: "Multiline Comment Chunk",
475			comments: Comments{{
476				StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3",
477			}},
478			want: []Comments{{{
479				StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3",
480			}}},
481		},
482		{
483			description: "Multiple Single Line Comment Chunks",
484			comments: Comments{
485				{StartLine: 1, EndLine: 1, Text: "Block 1 line 1"},
486				{StartLine: 2, EndLine: 2, Text: "Block 1 line 2"},
487				{StartLine: 4, EndLine: 4, Text: "Block 2 line 1"},
488				{StartLine: 5, EndLine: 5, Text: "Block 2 line 2"},
489			},
490			want: []Comments{
491				{
492					{StartLine: 1, EndLine: 1, Text: "Block 1 line 1"},
493					{StartLine: 2, EndLine: 2, Text: "Block 1 line 2"},
494				},
495				{
496					{StartLine: 4, EndLine: 4, Text: "Block 2 line 1"},
497					{StartLine: 5, EndLine: 5, Text: "Block 2 line 2"},
498				},
499			},
500		},
501		{
502			description: "Multiline Comment Chunk",
503			comments: Comments{
504				{StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"},
505				{StartLine: 4, EndLine: 6, Text: "Multiline 1\n2\n3"},
506			},
507			want: []Comments{
508				{{StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"}},
509				{{StartLine: 4, EndLine: 6, Text: "Multiline 1\n2\n3"}},
510			},
511		},
512		{
513			description: "Multiline and Single Line Comment Chunks",
514			comments: Comments{
515				{StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"},
516				{StartLine: 4, EndLine: 4, Text: "Block 2 line 1"},
517				{StartLine: 5, EndLine: 5, Text: "Block 2 line 2"},
518			},
519			want: []Comments{
520				{
521					{StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"},
522				},
523				{
524					{StartLine: 4, EndLine: 4, Text: "Block 2 line 1"},
525					{StartLine: 5, EndLine: 5, Text: "Block 2 line 2"},
526				},
527			},
528		},
529		{
530			description: "Mixed Multiline / Single Line Comments",
531			comments: []*Comment{
532				{StartLine: 1, EndLine: 1, Text: " The first single line comment."},
533				{StartLine: 2, EndLine: 2, Text: " The second single line comment."},
534				{StartLine: 4, EndLine: 7, Text: "\n * The first multiline line.\n * The second multiline line.\n"},
535			},
536			want: []Comments{
537				{
538					{StartLine: 1, EndLine: 1, Text: " The first single line comment."},
539					{StartLine: 2, EndLine: 2, Text: " The second single line comment."},
540				},
541				{
542					{StartLine: 4, EndLine: 7, Text: "\n * The first multiline line.\n * The second multiline line.\n"},
543				},
544			},
545		},
546	}
547
548	for _, tt := range tests {
549		i := 0
550		for got := range tt.comments.ChunkIterator() {
551			if i >= len(tt.want) {
552				t.Errorf("Mismatch(%q) more comment chunks than expected = %v, want %v",
553					tt.description, i+1, len(tt.want))
554				break
555			}
556			if !reflect.DeepEqual(got, tt.want[i]) {
557				t.Errorf("Mismatch(%q) = %+v, want %+v", tt.description, got, tt.want[i])
558			}
559			i++
560		}
561		if i != len(tt.want) {
562			t.Errorf("Mismatch(%q) not enough comment chunks = %v, want %v",
563				tt.description, i, len(tt.want))
564		}
565	}
566}
567