1// Copyright 2017 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14package commentparser 15 16import ( 17 "fmt" 18 "reflect" 19 "testing" 20 21 "github.com/google/go-cmp/cmp" 22 "github.com/google/licenseclassifier/commentparser/language" 23) 24 25const ( 26 singleLineText = "single line text" 27 multilineText = `first line of text 28second line of text 29third line of text 30` 31) 32 33func TestCommentParser_Lex(t *testing.T) { 34 tests := []struct { 35 description string 36 lang language.Language 37 source string 38 want Comments 39 }{ 40 { 41 description: "BCPL Single Line Comments", 42 lang: language.Go, 43 source: fmt.Sprintf("//%s\n", singleLineText), 44 want: []*Comment{ 45 { 46 StartLine: 1, 47 EndLine: 1, 48 Text: singleLineText, 49 }, 50 }, 51 }, 52 { 53 description: "Go Comment With Multiline String", 54 lang: language.Go, 55 source: fmt.Sprintf("var a = `A\nmultiline\\x20\nstring`\n//%s\n", singleLineText), 56 want: []*Comment{ 57 { 58 StartLine: 4, 59 EndLine: 4, 60 Text: singleLineText, 61 }, 62 }, 63 }, 64 { 65 description: "Python Multiline String", 66 lang: language.Python, 67 source: fmt.Sprintf("#%s\n\n\n\nx = '''this is a multiline\nstring'''", singleLineText), 68 want: []*Comment{ 69 { 70 StartLine: 1, 71 EndLine: 1, 72 Text: singleLineText, 73 }, 74 }, 75 }, 76 { 77 description: "Python module-level Docstring #1", 78 lang: language.Python, 79 source: fmt.Sprintf("'''%s'''\nimport foo", multilineText), 80 want: []*Comment{ 81 { 82 StartLine: 1, 83 EndLine: 4, 84 Text: multilineText, 85 }, 86 }, 87 }, 88 { 89 description: "Python module-level Docstring #2", 90 lang: language.Python, 91 source: fmt.Sprintf("#!/usr/bin/python\n'''%s'''\nimport foo", multilineText), 92 want: []*Comment{ 93 { 94 StartLine: 1, 95 EndLine: 1, 96 Text: "!/usr/bin/python", 97 }, 98 { 99 StartLine: 2, 100 EndLine: 5, 101 Text: multilineText, 102 }, 103 }, 104 }, 105 { 106 // Only include docstrings that start at the beginning of a line 107 description: "Python module-level Docstring #3", 108 lang: language.Python, 109 source: "'''zero1'''\n '''one'''\n '''two'''\n'''zero2'''", 110 want: []*Comment{ 111 { 112 StartLine: 1, 113 EndLine: 1, 114 Text: "zero1", 115 }, 116 { 117 StartLine: 4, 118 EndLine: 4, 119 Text: "zero2", 120 }, 121 }, 122 }, 123 { 124 description: "TR Command String", 125 lang: language.Python, 126 source: fmt.Sprintf(`#%s 127AUTH= \ 128| tr '"\n' \ 129| base64 -w 130`, singleLineText), 131 want: []*Comment{ 132 { 133 StartLine: 1, 134 EndLine: 1, 135 Text: singleLineText, 136 }, 137 }, 138 }, 139 { 140 description: "Lisp Single Line Comments", 141 lang: language.Clojure, 142 source: fmt.Sprintf(";%s\n", singleLineText), 143 want: []*Comment{ 144 { 145 StartLine: 1, 146 EndLine: 1, 147 Text: singleLineText, 148 }, 149 }, 150 }, 151 { 152 description: "Shell Single Line Comments", 153 lang: language.Shell, 154 source: fmt.Sprintf("#%s\n", singleLineText), 155 want: []*Comment{ 156 { 157 StartLine: 1, 158 EndLine: 1, 159 Text: singleLineText, 160 }, 161 }, 162 }, 163 { 164 description: "BCPL Multiline Comments", 165 lang: language.C, 166 source: fmt.Sprintf("/*%s*/\n", multilineText), 167 want: []*Comment{ 168 { 169 StartLine: 1, 170 EndLine: 4, 171 Text: multilineText, 172 }, 173 }, 174 }, 175 { 176 description: "BCPL Multiline Comments no terminating newline", 177 lang: language.C, 178 source: fmt.Sprintf("/*%s*/", multilineText), 179 want: []*Comment{ 180 { 181 StartLine: 1, 182 EndLine: 4, 183 Text: multilineText, 184 }, 185 }, 186 }, 187 { 188 description: "Nested Multiline Comments", 189 lang: language.Swift, 190 source: "/*a /*\n nested\n*/\n comment\n*/\n", 191 want: []*Comment{ 192 { 193 StartLine: 1, 194 EndLine: 5, 195 Text: "a /*\n nested\n*/\n comment\n", 196 }, 197 }, 198 }, 199 { 200 description: "Ruby Multiline Comments", 201 lang: language.Ruby, 202 source: fmt.Sprintf("=begin\n%s=end\n", multilineText), 203 want: []*Comment{ 204 { 205 StartLine: 1, 206 EndLine: 5, 207 Text: "\n" + multilineText, 208 }, 209 }, 210 }, 211 { 212 description: "Multiple Single Line Comments", 213 lang: language.Shell, 214 source: `# First line 215# Second line 216# Third line 217`, 218 want: []*Comment{ 219 { 220 StartLine: 1, 221 EndLine: 1, 222 Text: " First line", 223 }, 224 { 225 StartLine: 2, 226 EndLine: 2, 227 Text: " Second line", 228 }, 229 { 230 StartLine: 3, 231 EndLine: 3, 232 Text: " Third line", 233 }, 234 }, 235 }, 236 { 237 description: "Mixed Multiline / Single Line Comments", 238 lang: language.C, 239 source: `/* 240 * The first multiline line. 241 * The second multiline line. 242 */ 243 // The first single line comment. 244 // The second single line comment. 245`, 246 want: []*Comment{ 247 { 248 StartLine: 1, 249 EndLine: 4, 250 Text: ` 251 * The first multiline line. 252 * The second multiline line. 253 `, 254 }, 255 { 256 StartLine: 5, 257 EndLine: 5, 258 Text: " The first single line comment.", 259 }, 260 { 261 StartLine: 6, 262 EndLine: 6, 263 Text: " The second single line comment.", 264 }, 265 }, 266 }, 267 { 268 description: "Mixed Multiline / Single Line Comments", 269 lang: language.C, 270 source: `/* 271 * The first multiline line. 272 * The second multiline line. 273 */ 274 // The first single line comment. 275 // The second single line comment. 276`, 277 want: []*Comment{ 278 { 279 StartLine: 1, 280 EndLine: 4, 281 Text: ` 282 * The first multiline line. 283 * The second multiline line. 284 `, 285 }, 286 { 287 StartLine: 5, 288 EndLine: 5, 289 Text: " The first single line comment.", 290 }, 291 { 292 StartLine: 6, 293 EndLine: 6, 294 Text: " The second single line comment.", 295 }, 296 }, 297 }, 298 { 299 description: "HTML-like comments and quotes", 300 lang: language.HTML, 301 source: `# This is an important topic 302I don't want to go on all day here! <-- notice the quote in there! 303<!-- Well, maybe I do... --> 304`, 305 want: []*Comment{ 306 { 307 StartLine: 3, 308 EndLine: 3, 309 Text: " Well, maybe I do... ", 310 }, 311 }, 312 }, 313 { 314 description: "JavaScript regex", 315 lang: language.JavaScript, 316 source: `var re = /hello"world/; 317// the comment 318`, 319 want: []*Comment{ 320 { 321 StartLine: 2, 322 EndLine: 2, 323 Text: " the comment", 324 }, 325 }, 326 }, 327 { 328 description: "Perl regex", 329 lang: language.Perl, 330 source: `if (/hello"world/) { 331 # the comment 332 print "Yo!" 333} 334`, 335 want: []*Comment{ 336 { 337 StartLine: 2, 338 EndLine: 2, 339 Text: " the comment", 340 }, 341 }, 342 }, 343 { 344 description: "SQL using MySQL-style comments", 345 lang: language.SQL, 346 source: `/* 347 * The first multiline line. 348 * The second multiline line. 349 */ 350 # The first single line comment. 351 # The second single line comment. 352`, 353 want: []*Comment{ 354 { 355 StartLine: 1, 356 EndLine: 4, 357 Text: ` 358 * The first multiline line. 359 * The second multiline line. 360 `, 361 }, 362 { 363 StartLine: 5, 364 EndLine: 5, 365 Text: " The first single line comment.", 366 }, 367 { 368 StartLine: 6, 369 EndLine: 6, 370 Text: " The second single line comment.", 371 }, 372 }, 373 }, 374 { 375 description: "SQL using MySQL-style comments", 376 lang: language.SQL, 377 source: `-- The first single line comment. 378/* 379 * The first multiline line. 380 * The second multiline line. 381 */ 382 -- The second single line comment. 383`, 384 want: []*Comment{ 385 { 386 StartLine: 1, 387 EndLine: 1, 388 Text: " The first single line comment.", 389 }, 390 { 391 StartLine: 2, 392 EndLine: 5, 393 Text: ` 394 * The first multiline line. 395 * The second multiline line. 396 `, 397 }, 398 { 399 StartLine: 6, 400 EndLine: 6, 401 Text: " The second single line comment.", 402 }, 403 }, 404 }, 405 { 406 description: "Matlab language - Single Line Comments", 407 lang: language.ObjectiveC, // Matlab has same extension as Objective-C. 408 source: `% Copyright 2017 Yoyodyne Inc. 409 410clear; 411close all; 412`, 413 want: []*Comment{ 414 { 415 StartLine: 1, 416 EndLine: 1, 417 Text: " Copyright 2017 Yoyodyne Inc.", 418 }, 419 }, 420 }, 421 { 422 description: "Matlab language - Multi-Line Comments", 423 lang: language.ObjectiveC, // Matlab has same extension as Objective-C. 424 source: `%{ Multiline comment start. 425 Second line of multiline comment. 426%} 427 428clear; 429close all; 430`, 431 want: []*Comment{ 432 { 433 StartLine: 1, 434 EndLine: 3, 435 Text: ` Multiline comment start. 436 Second line of multiline comment. 437`, 438 }, 439 }, 440 }, 441 } 442 443 for _, tt := range tests { 444 got := Parse([]byte(tt.source), tt.lang) 445 if !cmp.Equal(got, tt.want) { 446 t.Errorf("Mismatch(%q) = %+v, want %+v, diff=%v", tt.description, got, tt.want, cmp.Diff(got, tt.want)) 447 } 448 } 449} 450 451func TestCommentParser_ChunkIterator(t *testing.T) { 452 tests := []struct { 453 description string 454 comments Comments 455 want []Comments 456 }{ 457 { 458 description: "Empty Comments", 459 comments: Comments{}, 460 want: nil, 461 }, 462 { 463 description: "Single Line Comment Chunk", 464 comments: Comments{ 465 {StartLine: 1, EndLine: 1, Text: "Block 1 line 1"}, 466 {StartLine: 2, EndLine: 2, Text: "Block 1 line 2"}, 467 }, 468 want: []Comments{{ 469 {StartLine: 1, EndLine: 1, Text: "Block 1 line 1"}, 470 {StartLine: 2, EndLine: 2, Text: "Block 1 line 2"}, 471 }}, 472 }, 473 { 474 description: "Multiline Comment Chunk", 475 comments: Comments{{ 476 StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3", 477 }}, 478 want: []Comments{{{ 479 StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3", 480 }}}, 481 }, 482 { 483 description: "Multiple Single Line Comment Chunks", 484 comments: Comments{ 485 {StartLine: 1, EndLine: 1, Text: "Block 1 line 1"}, 486 {StartLine: 2, EndLine: 2, Text: "Block 1 line 2"}, 487 {StartLine: 4, EndLine: 4, Text: "Block 2 line 1"}, 488 {StartLine: 5, EndLine: 5, Text: "Block 2 line 2"}, 489 }, 490 want: []Comments{ 491 { 492 {StartLine: 1, EndLine: 1, Text: "Block 1 line 1"}, 493 {StartLine: 2, EndLine: 2, Text: "Block 1 line 2"}, 494 }, 495 { 496 {StartLine: 4, EndLine: 4, Text: "Block 2 line 1"}, 497 {StartLine: 5, EndLine: 5, Text: "Block 2 line 2"}, 498 }, 499 }, 500 }, 501 { 502 description: "Multiline Comment Chunk", 503 comments: Comments{ 504 {StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"}, 505 {StartLine: 4, EndLine: 6, Text: "Multiline 1\n2\n3"}, 506 }, 507 want: []Comments{ 508 {{StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"}}, 509 {{StartLine: 4, EndLine: 6, Text: "Multiline 1\n2\n3"}}, 510 }, 511 }, 512 { 513 description: "Multiline and Single Line Comment Chunks", 514 comments: Comments{ 515 {StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"}, 516 {StartLine: 4, EndLine: 4, Text: "Block 2 line 1"}, 517 {StartLine: 5, EndLine: 5, Text: "Block 2 line 2"}, 518 }, 519 want: []Comments{ 520 { 521 {StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"}, 522 }, 523 { 524 {StartLine: 4, EndLine: 4, Text: "Block 2 line 1"}, 525 {StartLine: 5, EndLine: 5, Text: "Block 2 line 2"}, 526 }, 527 }, 528 }, 529 { 530 description: "Mixed Multiline / Single Line Comments", 531 comments: []*Comment{ 532 {StartLine: 1, EndLine: 1, Text: " The first single line comment."}, 533 {StartLine: 2, EndLine: 2, Text: " The second single line comment."}, 534 {StartLine: 4, EndLine: 7, Text: "\n * The first multiline line.\n * The second multiline line.\n"}, 535 }, 536 want: []Comments{ 537 { 538 {StartLine: 1, EndLine: 1, Text: " The first single line comment."}, 539 {StartLine: 2, EndLine: 2, Text: " The second single line comment."}, 540 }, 541 { 542 {StartLine: 4, EndLine: 7, Text: "\n * The first multiline line.\n * The second multiline line.\n"}, 543 }, 544 }, 545 }, 546 } 547 548 for _, tt := range tests { 549 i := 0 550 for got := range tt.comments.ChunkIterator() { 551 if i >= len(tt.want) { 552 t.Errorf("Mismatch(%q) more comment chunks than expected = %v, want %v", 553 tt.description, i+1, len(tt.want)) 554 break 555 } 556 if !reflect.DeepEqual(got, tt.want[i]) { 557 t.Errorf("Mismatch(%q) = %+v, want %+v", tt.description, got, tt.want[i]) 558 } 559 i++ 560 } 561 if i != len(tt.want) { 562 t.Errorf("Mismatch(%q) not enough comment chunks = %v, want %v", 563 tt.description, i, len(tt.want)) 564 } 565 } 566} 567