1 /* 2 * Portions Copyright (c) Meta Platforms, Inc. and affiliates. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * Copyright (c) Tor Norbye. 19 * 20 * Licensed under the Apache License, Version 2.0 (the "License"); 21 * you may not use this file except in compliance with the License. 22 * You may obtain a copy of the License at 23 * 24 * http://www.apache.org/licenses/LICENSE-2.0 25 * 26 * Unless required by applicable law or agreed to in writing, software 27 * distributed under the License is distributed on an "AS IS" BASIS, 28 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 29 * See the License for the specific language governing permissions and 30 * limitations under the License. 31 */ 32 33 package com.facebook.ktfmt.kdoc 34 35 import kotlin.math.min 36 37 class Paragraph(private val task: FormattingTask) { 38 private val options: KDocFormattingOptions 39 get() = task.options 40 41 var content = StringBuilder() 42 val text 43 get() = content.toString() 44 45 var prev: Paragraph? = null 46 var next: Paragraph? = null 47 48 /** If true, this paragraph should be preceded by a blank line. */ 49 var separate = false 50 51 /** 52 * If true, this paragraph is a continuation of the previous paragraph (so should be indented with 53 * the hanging indent, including line 1) 54 */ 55 var continuation = false 56 57 /** 58 * Whether this paragraph is allowed to be empty. Paragraphs are normally merged if this is not 59 * set. This allows the line breaker to call [ParagraphListBuilder.newParagraph] repeatedly 60 * without introducing more than one new paragraph. But for preformatted text we do want to be 61 * able to express repeated blank lines. 62 */ 63 var allowEmpty = false 64 65 /** Is this paragraph preformatted? */ 66 var preformatted = false 67 68 /** Is this paragraph a block paragraph? If so, it must start on its own line. */ 69 var block = false 70 71 /** Is this paragraph specifying a kdoc tag like @param? */ 72 var doc = false 73 74 /** 75 * Is this line quoted? (In the future make this an int such that we can support additional 76 * levels.) 77 */ 78 var quoted = false 79 80 /** Is this line part of a table? */ 81 var table = false 82 83 /** Is this a separator line? */ 84 var separator = false 85 86 /** Should this paragraph use a hanging indent? (Implies [block] as well). */ 87 var hanging = false 88 set(value) { 89 block = true 90 field = value 91 } 92 93 var originalIndent = 0 94 95 // The indent to use for all lines in the paragraph. 96 var indent = "" 97 98 // The indent to use for all lines in the paragraph if [hanging] is true, 99 // or the second and subsequent lines if [hanging] is false 100 var hangingIndent = "" 101 isEmptynull102 fun isEmpty(): Boolean { 103 return content.isEmpty() 104 } 105 cleanupnull106 fun cleanup() { 107 val original = text 108 109 if (preformatted) { 110 return 111 } 112 113 var s = original 114 if (options.convertMarkup) { 115 s = convertMarkup(text) 116 } 117 if (!options.allowParamBrackets) { 118 s = rewriteParams(s) 119 } 120 121 if (s != original) { 122 content.clear() 123 content.append(s) 124 } 125 } 126 rewriteParamsnull127 private fun rewriteParams(s: String): String { 128 var start = 0 129 val length = s.length 130 while (start < length && s[start].isWhitespace()) { 131 start++ 132 } 133 if (s.startsWith("@param", start)) { 134 start += "@param".length 135 while (start < length && s[start].isWhitespace()) { 136 start++ 137 } 138 if (start < length && s[start++] == '[') { 139 while (start < length && s[start].isWhitespace()) { 140 start++ 141 } 142 var end = start 143 while (end < length && s[end].isJavaIdentifierPart()) { 144 end++ 145 } 146 if (end > start) { 147 val name = s.substring(start, end) 148 while (end < length && s[end].isWhitespace()) { 149 end++ 150 } 151 if (end < length && s[end++] == ']') { 152 while (end < length && s[end].isWhitespace()) { 153 end++ 154 } 155 return "@param $name ${s.substring(end)}" 156 } 157 } 158 } 159 } 160 161 return s 162 } 163 convertMarkupnull164 private fun convertMarkup(s: String): String { 165 // Whether the tag starts with a capital letter and needs to be cleaned, e.g. `@See` -> `@see`. 166 // (isKDocTag only allows the first letter to be capitalized.) 167 val convertKDocTag = s.isKDocTag() && s[1].isUpperCase() 168 169 if (!convertKDocTag && s.none { it == '<' || it == '&' || it == '{' }) { 170 return s 171 } 172 173 val sb = StringBuilder(s.length) 174 var i = 0 175 val n = s.length 176 177 if (convertKDocTag) { 178 sb.append('@').append(s[1].lowercaseChar()) 179 i += 2 180 } 181 182 var code = false 183 var brackets = 0 184 while (i < n) { 185 val c = s[i++] 186 if (c == '\\') { 187 sb.append(c) 188 if (i < n - 1) { 189 sb.append(s[i++]) 190 } 191 continue 192 } else if (c == '`') { 193 code = !code 194 sb.append(c) 195 continue 196 } else if (c == '[') { 197 brackets++ 198 sb.append(c) 199 continue 200 } else if (c == ']') { 201 brackets-- 202 sb.append(c) 203 continue 204 } else if (code || brackets > 0) { 205 sb.append(c) 206 continue 207 } else if (c == '<') { 208 if (s.startsWith("b>", i, false) || s.startsWith("/b>", i, false)) { 209 // "<b>" or </b> -> "**" 210 sb.append('*').append('*') 211 if (s[i] == '/') i++ 212 i += 2 213 continue 214 } 215 if (s.startsWith("i>", i, false) || s.startsWith("/i>", i, false)) { 216 // "<i>" or </i> -> "*" 217 sb.append('*') 218 if (s[i] == '/') i++ 219 i += 2 220 continue 221 } 222 if (s.startsWith("em>", i, false) || s.startsWith("/em>", i, false)) { 223 // "<em>" or </em> -> "_" 224 sb.append('_') 225 if (s[i] == '/') i++ 226 i += 3 227 continue 228 } 229 // (We don't convert <pre> here because those tags appear in paragraphs 230 // marked preformatted, and preformatted paragraphs are never passed to 231 // convertTags) 232 } else if (c == '&') { 233 if (s.startsWith("lt;", i, true)) { // "<" -> "<" 234 sb.append('<') 235 i += 3 236 continue 237 } 238 if (s.startsWith("gt;", i, true)) { // ">" -> ">" 239 sb.append('>') 240 i += 3 241 continue 242 } 243 } else if (c == '{') { 244 if (s.startsWith("@param", i, true)) { 245 val curr = i + 6 246 var end = s.indexOf('}', curr) 247 if (end == -1) { 248 end = n 249 } 250 sb.append('[') 251 sb.append(s.substring(curr, end).trim()) 252 sb.append(']') 253 i = end + 1 254 continue 255 } else if (s.startsWith("@link", i, true) 256 // @linkplain is similar to @link, but kdoc does *not* render a [symbol] 257 // into a {@linkplain} in HTML, so converting these would change the output. 258 && !s.startsWith("@linkplain", i, true)) { 259 // {@link} or {@linkplain} 260 sb.append('[') 261 var curr = i + 5 262 while (curr < n) { 263 val ch = s[curr++] 264 if (ch.isWhitespace()) { 265 break 266 } 267 if (ch == '}') { 268 curr-- 269 break 270 } 271 } 272 var skip = false 273 while (curr < n) { 274 val ch = s[curr] 275 if (ch == '}') { 276 sb.append(']') 277 curr++ 278 break 279 } else if (ch == '(') { 280 skip = true 281 } else if (!skip) { 282 if (ch == '#') { 283 if (!sb.endsWith('[')) { 284 sb.append('.') 285 } 286 } else { 287 sb.append(ch) 288 } 289 } 290 curr++ 291 } 292 i = curr 293 continue 294 } 295 } 296 sb.append(c) 297 } 298 299 return sb.toString() 300 } 301 reflownull302 fun reflow(firstLineMaxWidth: Int, maxLineWidth: Int): List<String> { 303 val lineWidth = maxLineWidth - getIndentSize(indent, options) 304 val hangingIndentSize = getIndentSize(hangingIndent, options) - if (quoted) 2 else 0 // "> " 305 if (text.length < (firstLineMaxWidth - hangingIndentSize)) { 306 return listOf(text.collapseSpaces()) 307 } 308 // Split text into words 309 val words: List<String> = computeWords() 310 311 // See divide & conquer algorithm listed here: https://xxyxyz.org/line-breaking/ 312 if (words.size == 1) { 313 return listOf(words[0]) 314 } 315 316 if (firstLineMaxWidth < maxLineWidth) { 317 // We have ragged text. We'll just greedily place the first 318 // words on the first line, and then optimize the rest. 319 val line = StringBuilder() 320 val firstLineWidth = firstLineMaxWidth - getIndentSize(indent, options) 321 for (i in words.indices) { 322 val word = words[i] 323 if (line.isEmpty()) { 324 if (word.length + task.type.lineOverhead() > firstLineMaxWidth) { 325 // can't fit anything on the first line: just flow to 326 // full width and caller will need to insert comment on 327 // the next line. 328 return reflow(words, lineWidth, hangingIndentSize) 329 } 330 line.append(word) 331 } else if (line.length + word.length + 1 <= firstLineWidth) { 332 line.append(' ') 333 line.append(word) 334 } else { 335 // Break the rest 336 val remainingWords = words.subList(i, words.size) 337 val reflownRemaining = reflow(remainingWords, lineWidth, hangingIndentSize) 338 return listOf(line.toString()) + reflownRemaining 339 } 340 } 341 // We fit everything on the first line 342 return listOf(line.toString()) 343 } 344 345 return reflow(words, lineWidth, hangingIndentSize) 346 } 347 reflownull348 private fun reflow(words: List<String>, lineWidth: Int, hangingIndentSize: Int): List<String> { 349 if (options.alternate || 350 !options.optimal || 351 hanging && hangingIndentSize > 0 || 352 // An unbreakable long word may make other lines shorter and won't look good 353 words.any { it.length > lineWidth }) { 354 // Switch to greedy if explicitly turned on, and for hanging indent 355 // paragraphs, since the current implementation doesn't have support 356 // for a different maximum length on the first line from the rest 357 // and there were various cases where this ended up with bad results. 358 // This is typically used in list items (and kdoc sections) which tend 359 // to be short -- and for 2-3 lines the gains of optimal line breaking 360 // isn't worth the cases where we have really unbalanced looking text 361 return reflowGreedy(lineWidth, options, words) 362 } 363 364 val lines = reflowOptimal(lineWidth - hangingIndentSize, words) 365 if (lines.size <= 2) { 366 // Just 2 lines? We prefer long+short instead of half+half. 367 return reflowGreedy(lineWidth, options, words) 368 } else { 369 // We could just return [lines] here, but the straightforward algorithm 370 // doesn't do a great job with short paragraphs where the last line is 371 // short; it over-corrects and shortens everything else in order to balance 372 // out the last line. 373 374 val maxLine: (String) -> Int = { 375 // Ignore lines that are unbreakable 376 if (it.indexOf(' ') == -1) { 377 0 378 } else { 379 it.length 380 } 381 } 382 val longestLine = lines.maxOf(maxLine) 383 var lastWord = words.size - 1 384 while (lastWord > 0) { 385 // We can afford to do this because we're only repeating it for a single 386 // line's worth of words and because comments tend to be relatively short 387 // anyway 388 val newLines = reflowOptimal(lineWidth - hangingIndentSize, words.subList(0, lastWord)) 389 if (newLines.size < lines.size) { 390 val newLongestLine = newLines.maxOf(maxLine) 391 if (newLongestLine > longestLine && 392 newLines.subList(0, newLines.size - 1).any { it.length > longestLine }) { 393 return newLines + 394 reflowGreedy( 395 lineWidth - hangingIndentSize, options, words.subList(lastWord, words.size)) 396 } 397 break 398 } 399 lastWord-- 400 } 401 402 return lines 403 } 404 } 405 406 /** 407 * Returns true if it's okay to break at the current word. 408 * 409 * We need to check for this, because a word can have a different meaning at the beginning of a 410 * line than in the middle somewhere, so if it just so happens to be at the break boundary, we 411 * need to make sure we don't make it the first word on the next line since that would change the 412 * documentation. 413 */ canBreakAtnull414 private fun canBreakAt(prev: String, word: String): Boolean { 415 // Can we start a new line with this without interpreting it in a special 416 // way? 417 418 if (word.startsWith("#") || 419 word.startsWith("```") || 420 word.isDirectiveMarker() || 421 word.startsWith("@") || // interpreted as a tag 422 word.isTodo() || 423 word.startsWith(">")) { 424 return false 425 } 426 427 if (prev == "@sample") { 428 return false // https://github.com/facebook/ktfmt/issues/310 429 } 430 431 if (!word.first().isLetter()) { 432 val wordWithSpace = "$word " // for regex matching in below checks 433 if (wordWithSpace.isListItem() && !word.equals("<li>", true) || wordWithSpace.isQuoted()) { 434 return false 435 } 436 } 437 438 return true 439 } 440 441 /** 442 * Split [text] up into individual "words"; in the case where some words are not allowed to span 443 * lines, it will combine these into single word. For example, if we have a sentence which ends 444 * with a number, e.g. "the sum is 5.", we want to make sure "5." is never placed at the beginning 445 * of a new line (which would turn it into a list item), so for this we'll compute the word list 446 * "the", "sum", "is 5.". 447 */ computeWordsnull448 fun computeWords(): List<String> { 449 val words = text.split(Regex("\\s+")).filter { it.isNotBlank() }.map { it.trim() } 450 if (words.size == 1) { 451 return words 452 } 453 454 if (task.type != CommentType.KDOC) { 455 // In block comments and line comments we feel free to break anywhere 456 // between words; there isn't a special meaning assigned to certain words 457 // if they appear first on a line like there is in KDoc/Markdown. 458 return words 459 } 460 461 // See if any of the words should never be broken up. We do that for list 462 // separators and a few others. We never want to put "1." at the beginning 463 // of a line as an overflow. 464 465 val combined = ArrayList<String>(words.size) 466 467 var from = 0 468 val end = words.size 469 while (from < end) { 470 val start = 471 if (from == 0 && (quoted || hanging && !text.isKDocTag())) { 472 from + 2 473 } else { 474 from + 1 475 } 476 var to = words.size 477 for (i in start until words.size) { 478 val next = words[i] 479 if (next.startsWith("[") && !next.startsWith("[[")) { 480 // find end 481 var j = -1 482 for (k in i until words.size) { 483 if (']' in words[k]) { 484 j = k 485 break 486 } 487 } 488 if (j != -1) { 489 // combine everything in the string; we can't break link text or @sample tags 490 if (start == from + 1 && canBreakAt(words[start - 1], words[start])) { 491 combined.add(words[from]) 492 from = start 493 } 494 // Maybe not break; what if the next word isn't okay? 495 to = j + 1 496 if (to == words.size || canBreakAt(words[to - 1], words[to])) { 497 break 498 } 499 } // else: unterminated [, ignore 500 } else if (canBreakAt(words[i - 1], next)) { 501 to = i 502 break 503 } 504 } 505 506 if (to == from + 1) { 507 combined.add(words[from]) 508 } else if (to > from) { 509 combined.add(words.subList(from, to).joinToString(" ")) 510 } 511 from = to 512 } 513 514 return combined 515 } 516 517 private data class Quadruple(val i0: Int, val j0: Int, val i1: Int, val j1: Int) 518 reflowOptimalnull519 private fun reflowOptimal(maxLineWidth: Int, words: List<String>): List<String> { 520 val count = words.size 521 val lines = ArrayList<String>() 522 523 val offsets = ArrayList<Int>() 524 offsets.add(0) 525 526 for (boxWidth in words.map { it.length }.toList()) { 527 offsets.add(offsets.last() + min(boxWidth, maxLineWidth)) 528 } 529 530 val big = 10 shl 20 531 val minimum = IntArray(count + 1) { big } 532 val breaks = IntArray(count + 1) 533 minimum[0] = 0 534 535 fun cost(i: Int, j: Int): Int { 536 val width = offsets[j] - offsets[i] + j - i - 1 537 return if (width <= maxLineWidth) { 538 val squared = (maxLineWidth - width) * (maxLineWidth - width) 539 minimum[i] + squared 540 } else { 541 big 542 } 543 } 544 545 fun search(pi0: Int, pj0: Int, pi1: Int, pj1: Int) { 546 val stack = ArrayDeque<Quadruple>() 547 stack.add(Quadruple(pi0, pj0, pi1, pj1)) 548 549 while (stack.isNotEmpty()) { 550 val (i0, j0, i1, j1) = stack.removeLast() 551 if (j0 < j1) { 552 val j = (j0 + j1) / 2 553 554 for (i in i0 until i1) { 555 val c = cost(i, j) 556 if (c <= minimum[j]) { 557 minimum[j] = c 558 breaks[j] = i 559 } 560 } 561 stack.add(Quadruple(breaks[j], j + 1, i1, j1)) 562 stack.add(Quadruple(i0, j0, breaks[j] + 1, j)) 563 } 564 } 565 } 566 567 var n = count + 1 568 var i = 0 569 var offset = 0 570 571 while (true) { 572 val r = min(n, 1 shl (i + 1)) 573 val edge = (1 shl i) + offset 574 search(0 + offset, edge, edge, r + offset) 575 val x = minimum[r - 1 + offset] 576 var flag = true 577 for (j in (1 shl i) until (r - 1)) { 578 val y = cost(j + offset, r - 1 + offset) 579 if (y <= x) { 580 n -= j 581 i = 0 582 offset += j 583 flag = false 584 break 585 } 586 } 587 if (flag) { 588 if (r == n) break 589 i++ 590 } 591 } 592 593 var j = count 594 while (j > 0) { 595 i = breaks[j] 596 val sb = StringBuilder() 597 for (w in i until j) { 598 sb.append(words[w]) 599 if (w < j - 1) { 600 sb.append(' ') 601 } 602 } 603 lines.add(sb.toString()) 604 j = i 605 } 606 607 lines.reverse() 608 return lines 609 } 610 reflowGreedynull611 private fun reflowGreedy( 612 lineWidth: Int, 613 options: KDocFormattingOptions, 614 words: List<String> 615 ): List<String> { 616 // Greedy implementation 617 618 var width = lineWidth 619 if (options.hangingIndent > 0 && hanging && continuation) { 620 width -= getIndentSize(hangingIndent, options) 621 } 622 623 val lines = mutableListOf<String>() 624 var column = 0 625 val sb = StringBuilder() 626 for (word in words) { 627 when { 628 sb.isEmpty() -> { 629 sb.append(word) 630 column += word.length 631 } 632 column + word.length + 1 <= width -> { 633 sb.append(' ').append(word) 634 column += word.length + 1 635 } 636 else -> { 637 width = lineWidth 638 if (options.hangingIndent > 0 && hanging) { 639 width -= getIndentSize(hangingIndent, options) 640 } 641 lines.add(sb.toString()) 642 sb.setLength(0) 643 sb.append(word) 644 column = sb.length 645 } 646 } 647 } 648 if (sb.isNotEmpty()) { 649 lines.add(sb.toString()) 650 } 651 return lines 652 } 653 toStringnull654 override fun toString(): String { 655 return "$content, separate=$separate, block=$block, hanging=$hanging, preformatted=$preformatted, quoted=$quoted, continuation=$continuation, allowempty=$allowEmpty, separator=$separator" 656 } 657 } 658