xref: /aosp_15_r20/external/ktfmt/core/src/main/java/com/facebook/ktfmt/kdoc/Paragraph.kt (revision 5be3f65c8cf0e6db0a7e312df5006e8e93cdf9ec)
1 /*
2  * Portions Copyright (c) Meta Platforms, Inc. and affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * Copyright (c) Tor Norbye.
19  *
20  * Licensed under the Apache License, Version 2.0 (the "License");
21  * you may not use this file except in compliance with the License.
22  * You may obtain a copy of the License at
23  *
24  *     http://www.apache.org/licenses/LICENSE-2.0
25  *
26  * Unless required by applicable law or agreed to in writing, software
27  * distributed under the License is distributed on an "AS IS" BASIS,
28  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
29  * See the License for the specific language governing permissions and
30  * limitations under the License.
31  */
32 
33 package com.facebook.ktfmt.kdoc
34 
35 import kotlin.math.min
36 
37 class Paragraph(private val task: FormattingTask) {
38   private val options: KDocFormattingOptions
39     get() = task.options
40 
41   var content = StringBuilder()
42   val text
43     get() = content.toString()
44 
45   var prev: Paragraph? = null
46   var next: Paragraph? = null
47 
48   /** If true, this paragraph should be preceded by a blank line. */
49   var separate = false
50 
51   /**
52    * If true, this paragraph is a continuation of the previous paragraph (so should be indented with
53    * the hanging indent, including line 1)
54    */
55   var continuation = false
56 
57   /**
58    * Whether this paragraph is allowed to be empty. Paragraphs are normally merged if this is not
59    * set. This allows the line breaker to call [ParagraphListBuilder.newParagraph] repeatedly
60    * without introducing more than one new paragraph. But for preformatted text we do want to be
61    * able to express repeated blank lines.
62    */
63   var allowEmpty = false
64 
65   /** Is this paragraph preformatted? */
66   var preformatted = false
67 
68   /** Is this paragraph a block paragraph? If so, it must start on its own line. */
69   var block = false
70 
71   /** Is this paragraph specifying a kdoc tag like @param? */
72   var doc = false
73 
74   /**
75    * Is this line quoted? (In the future make this an int such that we can support additional
76    * levels.)
77    */
78   var quoted = false
79 
80   /** Is this line part of a table? */
81   var table = false
82 
83   /** Is this a separator line? */
84   var separator = false
85 
86   /** Should this paragraph use a hanging indent? (Implies [block] as well). */
87   var hanging = false
88     set(value) {
89       block = true
90       field = value
91     }
92 
93   var originalIndent = 0
94 
95   // The indent to use for all lines in the paragraph.
96   var indent = ""
97 
98   // The indent to use for all lines in the paragraph if [hanging] is true,
99   // or the second and subsequent lines if [hanging] is false
100   var hangingIndent = ""
101 
isEmptynull102   fun isEmpty(): Boolean {
103     return content.isEmpty()
104   }
105 
cleanupnull106   fun cleanup() {
107     val original = text
108 
109     if (preformatted) {
110       return
111     }
112 
113     var s = original
114     if (options.convertMarkup) {
115       s = convertMarkup(text)
116     }
117     if (!options.allowParamBrackets) {
118       s = rewriteParams(s)
119     }
120 
121     if (s != original) {
122       content.clear()
123       content.append(s)
124     }
125   }
126 
rewriteParamsnull127   private fun rewriteParams(s: String): String {
128     var start = 0
129     val length = s.length
130     while (start < length && s[start].isWhitespace()) {
131       start++
132     }
133     if (s.startsWith("@param", start)) {
134       start += "@param".length
135       while (start < length && s[start].isWhitespace()) {
136         start++
137       }
138       if (start < length && s[start++] == '[') {
139         while (start < length && s[start].isWhitespace()) {
140           start++
141         }
142         var end = start
143         while (end < length && s[end].isJavaIdentifierPart()) {
144           end++
145         }
146         if (end > start) {
147           val name = s.substring(start, end)
148           while (end < length && s[end].isWhitespace()) {
149             end++
150           }
151           if (end < length && s[end++] == ']') {
152             while (end < length && s[end].isWhitespace()) {
153               end++
154             }
155             return "@param $name ${s.substring(end)}"
156           }
157         }
158       }
159     }
160 
161     return s
162   }
163 
convertMarkupnull164   private fun convertMarkup(s: String): String {
165     // Whether the tag starts with a capital letter and needs to be cleaned, e.g. `@See` -> `@see`.
166     // (isKDocTag only allows the first letter to be capitalized.)
167     val convertKDocTag = s.isKDocTag() && s[1].isUpperCase()
168 
169     if (!convertKDocTag && s.none { it == '<' || it == '&' || it == '{' }) {
170       return s
171     }
172 
173     val sb = StringBuilder(s.length)
174     var i = 0
175     val n = s.length
176 
177     if (convertKDocTag) {
178       sb.append('@').append(s[1].lowercaseChar())
179       i += 2
180     }
181 
182     var code = false
183     var brackets = 0
184     while (i < n) {
185       val c = s[i++]
186       if (c == '\\') {
187         sb.append(c)
188         if (i < n - 1) {
189           sb.append(s[i++])
190         }
191         continue
192       } else if (c == '`') {
193         code = !code
194         sb.append(c)
195         continue
196       } else if (c == '[') {
197         brackets++
198         sb.append(c)
199         continue
200       } else if (c == ']') {
201         brackets--
202         sb.append(c)
203         continue
204       } else if (code || brackets > 0) {
205         sb.append(c)
206         continue
207       } else if (c == '<') {
208         if (s.startsWith("b>", i, false) || s.startsWith("/b>", i, false)) {
209           // "<b>" or </b> -> "**"
210           sb.append('*').append('*')
211           if (s[i] == '/') i++
212           i += 2
213           continue
214         }
215         if (s.startsWith("i>", i, false) || s.startsWith("/i>", i, false)) {
216           // "<i>" or </i> -> "*"
217           sb.append('*')
218           if (s[i] == '/') i++
219           i += 2
220           continue
221         }
222         if (s.startsWith("em>", i, false) || s.startsWith("/em>", i, false)) {
223           // "<em>" or </em> -> "_"
224           sb.append('_')
225           if (s[i] == '/') i++
226           i += 3
227           continue
228         }
229         // (We don't convert <pre> here because those tags appear in paragraphs
230         // marked preformatted, and preformatted paragraphs are never passed to
231         // convertTags)
232       } else if (c == '&') {
233         if (s.startsWith("lt;", i, true)) { // "&lt;" -> "<"
234           sb.append('<')
235           i += 3
236           continue
237         }
238         if (s.startsWith("gt;", i, true)) { // "&gt;" -> ">"
239           sb.append('>')
240           i += 3
241           continue
242         }
243       } else if (c == '{') {
244         if (s.startsWith("@param", i, true)) {
245           val curr = i + 6
246           var end = s.indexOf('}', curr)
247           if (end == -1) {
248             end = n
249           }
250           sb.append('[')
251           sb.append(s.substring(curr, end).trim())
252           sb.append(']')
253           i = end + 1
254           continue
255         } else if (s.startsWith("@link", i, true)
256         // @linkplain is similar to @link, but kdoc does *not* render a [symbol]
257         // into a {@linkplain} in HTML, so converting these would change the output.
258         && !s.startsWith("@linkplain", i, true)) {
259           // {@link} or {@linkplain}
260           sb.append('[')
261           var curr = i + 5
262           while (curr < n) {
263             val ch = s[curr++]
264             if (ch.isWhitespace()) {
265               break
266             }
267             if (ch == '}') {
268               curr--
269               break
270             }
271           }
272           var skip = false
273           while (curr < n) {
274             val ch = s[curr]
275             if (ch == '}') {
276               sb.append(']')
277               curr++
278               break
279             } else if (ch == '(') {
280               skip = true
281             } else if (!skip) {
282               if (ch == '#') {
283                 if (!sb.endsWith('[')) {
284                   sb.append('.')
285                 }
286               } else {
287                 sb.append(ch)
288               }
289             }
290             curr++
291           }
292           i = curr
293           continue
294         }
295       }
296       sb.append(c)
297     }
298 
299     return sb.toString()
300   }
301 
reflownull302   fun reflow(firstLineMaxWidth: Int, maxLineWidth: Int): List<String> {
303     val lineWidth = maxLineWidth - getIndentSize(indent, options)
304     val hangingIndentSize = getIndentSize(hangingIndent, options) - if (quoted) 2 else 0 // "> "
305     if (text.length < (firstLineMaxWidth - hangingIndentSize)) {
306       return listOf(text.collapseSpaces())
307     }
308     // Split text into words
309     val words: List<String> = computeWords()
310 
311     // See divide & conquer algorithm listed here: https://xxyxyz.org/line-breaking/
312     if (words.size == 1) {
313       return listOf(words[0])
314     }
315 
316     if (firstLineMaxWidth < maxLineWidth) {
317       // We have ragged text. We'll just greedily place the first
318       // words on the first line, and then optimize the rest.
319       val line = StringBuilder()
320       val firstLineWidth = firstLineMaxWidth - getIndentSize(indent, options)
321       for (i in words.indices) {
322         val word = words[i]
323         if (line.isEmpty()) {
324           if (word.length + task.type.lineOverhead() > firstLineMaxWidth) {
325             // can't fit anything on the first line: just flow to
326             // full width and caller will need to insert comment on
327             // the next line.
328             return reflow(words, lineWidth, hangingIndentSize)
329           }
330           line.append(word)
331         } else if (line.length + word.length + 1 <= firstLineWidth) {
332           line.append(' ')
333           line.append(word)
334         } else {
335           // Break the rest
336           val remainingWords = words.subList(i, words.size)
337           val reflownRemaining = reflow(remainingWords, lineWidth, hangingIndentSize)
338           return listOf(line.toString()) + reflownRemaining
339         }
340       }
341       // We fit everything on the first line
342       return listOf(line.toString())
343     }
344 
345     return reflow(words, lineWidth, hangingIndentSize)
346   }
347 
reflownull348   private fun reflow(words: List<String>, lineWidth: Int, hangingIndentSize: Int): List<String> {
349     if (options.alternate ||
350         !options.optimal ||
351         hanging && hangingIndentSize > 0 ||
352         // An unbreakable long word may make other lines shorter and won't look good
353         words.any { it.length > lineWidth }) {
354       // Switch to greedy if explicitly turned on, and for hanging indent
355       // paragraphs, since the current implementation doesn't have support
356       // for a different maximum length on the first line from the rest
357       // and there were various cases where this ended up with bad results.
358       // This is typically used in list items (and kdoc sections) which tend
359       // to be short -- and for 2-3 lines the gains of optimal line breaking
360       // isn't worth the cases where we have really unbalanced looking text
361       return reflowGreedy(lineWidth, options, words)
362     }
363 
364     val lines = reflowOptimal(lineWidth - hangingIndentSize, words)
365     if (lines.size <= 2) {
366       // Just 2 lines? We prefer long+short instead of half+half.
367       return reflowGreedy(lineWidth, options, words)
368     } else {
369       // We could just return [lines] here, but the straightforward algorithm
370       // doesn't do a great job with short paragraphs where the last line is
371       // short; it over-corrects and shortens everything else in order to balance
372       // out the last line.
373 
374       val maxLine: (String) -> Int = {
375         // Ignore lines that are unbreakable
376         if (it.indexOf(' ') == -1) {
377           0
378         } else {
379           it.length
380         }
381       }
382       val longestLine = lines.maxOf(maxLine)
383       var lastWord = words.size - 1
384       while (lastWord > 0) {
385         // We can afford to do this because we're only repeating it for a single
386         // line's worth of words and because comments tend to be relatively short
387         // anyway
388         val newLines = reflowOptimal(lineWidth - hangingIndentSize, words.subList(0, lastWord))
389         if (newLines.size < lines.size) {
390           val newLongestLine = newLines.maxOf(maxLine)
391           if (newLongestLine > longestLine &&
392               newLines.subList(0, newLines.size - 1).any { it.length > longestLine }) {
393             return newLines +
394                 reflowGreedy(
395                     lineWidth - hangingIndentSize, options, words.subList(lastWord, words.size))
396           }
397           break
398         }
399         lastWord--
400       }
401 
402       return lines
403     }
404   }
405 
406   /**
407    * Returns true if it's okay to break at the current word.
408    *
409    * We need to check for this, because a word can have a different meaning at the beginning of a
410    * line than in the middle somewhere, so if it just so happens to be at the break boundary, we
411    * need to make sure we don't make it the first word on the next line since that would change the
412    * documentation.
413    */
canBreakAtnull414   private fun canBreakAt(prev: String, word: String): Boolean {
415     // Can we start a new line with this without interpreting it in a special
416     // way?
417 
418     if (word.startsWith("#") ||
419         word.startsWith("```") ||
420         word.isDirectiveMarker() ||
421         word.startsWith("@") || // interpreted as a tag
422         word.isTodo() ||
423         word.startsWith(">")) {
424       return false
425     }
426 
427     if (prev == "@sample") {
428       return false // https://github.com/facebook/ktfmt/issues/310
429     }
430 
431     if (!word.first().isLetter()) {
432       val wordWithSpace = "$word " // for regex matching in below checks
433       if (wordWithSpace.isListItem() && !word.equals("<li>", true) || wordWithSpace.isQuoted()) {
434         return false
435       }
436     }
437 
438     return true
439   }
440 
441   /**
442    * Split [text] up into individual "words"; in the case where some words are not allowed to span
443    * lines, it will combine these into single word. For example, if we have a sentence which ends
444    * with a number, e.g. "the sum is 5.", we want to make sure "5." is never placed at the beginning
445    * of a new line (which would turn it into a list item), so for this we'll compute the word list
446    * "the", "sum", "is 5.".
447    */
computeWordsnull448   fun computeWords(): List<String> {
449     val words = text.split(Regex("\\s+")).filter { it.isNotBlank() }.map { it.trim() }
450     if (words.size == 1) {
451       return words
452     }
453 
454     if (task.type != CommentType.KDOC) {
455       // In block comments and line comments we feel free to break anywhere
456       // between words; there isn't a special meaning assigned to certain words
457       // if they appear first on a line like there is in KDoc/Markdown.
458       return words
459     }
460 
461     // See if any of the words should never be broken up. We do that for list
462     // separators and a few others. We never want to put "1." at the beginning
463     // of a line as an overflow.
464 
465     val combined = ArrayList<String>(words.size)
466 
467     var from = 0
468     val end = words.size
469     while (from < end) {
470       val start =
471           if (from == 0 && (quoted || hanging && !text.isKDocTag())) {
472             from + 2
473           } else {
474             from + 1
475           }
476       var to = words.size
477       for (i in start until words.size) {
478         val next = words[i]
479         if (next.startsWith("[") && !next.startsWith("[[")) {
480           // find end
481           var j = -1
482           for (k in i until words.size) {
483             if (']' in words[k]) {
484               j = k
485               break
486             }
487           }
488           if (j != -1) {
489             // combine everything in the string; we can't break link text or @sample tags
490             if (start == from + 1 && canBreakAt(words[start - 1], words[start])) {
491               combined.add(words[from])
492               from = start
493             }
494             // Maybe not break; what if the next word isn't okay?
495             to = j + 1
496             if (to == words.size || canBreakAt(words[to - 1], words[to])) {
497               break
498             }
499           } // else: unterminated [, ignore
500         } else if (canBreakAt(words[i - 1], next)) {
501           to = i
502           break
503         }
504       }
505 
506       if (to == from + 1) {
507         combined.add(words[from])
508       } else if (to > from) {
509         combined.add(words.subList(from, to).joinToString(" "))
510       }
511       from = to
512     }
513 
514     return combined
515   }
516 
517   private data class Quadruple(val i0: Int, val j0: Int, val i1: Int, val j1: Int)
518 
reflowOptimalnull519   private fun reflowOptimal(maxLineWidth: Int, words: List<String>): List<String> {
520     val count = words.size
521     val lines = ArrayList<String>()
522 
523     val offsets = ArrayList<Int>()
524     offsets.add(0)
525 
526     for (boxWidth in words.map { it.length }.toList()) {
527       offsets.add(offsets.last() + min(boxWidth, maxLineWidth))
528     }
529 
530     val big = 10 shl 20
531     val minimum = IntArray(count + 1) { big }
532     val breaks = IntArray(count + 1)
533     minimum[0] = 0
534 
535     fun cost(i: Int, j: Int): Int {
536       val width = offsets[j] - offsets[i] + j - i - 1
537       return if (width <= maxLineWidth) {
538         val squared = (maxLineWidth - width) * (maxLineWidth - width)
539         minimum[i] + squared
540       } else {
541         big
542       }
543     }
544 
545     fun search(pi0: Int, pj0: Int, pi1: Int, pj1: Int) {
546       val stack = ArrayDeque<Quadruple>()
547       stack.add(Quadruple(pi0, pj0, pi1, pj1))
548 
549       while (stack.isNotEmpty()) {
550         val (i0, j0, i1, j1) = stack.removeLast()
551         if (j0 < j1) {
552           val j = (j0 + j1) / 2
553 
554           for (i in i0 until i1) {
555             val c = cost(i, j)
556             if (c <= minimum[j]) {
557               minimum[j] = c
558               breaks[j] = i
559             }
560           }
561           stack.add(Quadruple(breaks[j], j + 1, i1, j1))
562           stack.add(Quadruple(i0, j0, breaks[j] + 1, j))
563         }
564       }
565     }
566 
567     var n = count + 1
568     var i = 0
569     var offset = 0
570 
571     while (true) {
572       val r = min(n, 1 shl (i + 1))
573       val edge = (1 shl i) + offset
574       search(0 + offset, edge, edge, r + offset)
575       val x = minimum[r - 1 + offset]
576       var flag = true
577       for (j in (1 shl i) until (r - 1)) {
578         val y = cost(j + offset, r - 1 + offset)
579         if (y <= x) {
580           n -= j
581           i = 0
582           offset += j
583           flag = false
584           break
585         }
586       }
587       if (flag) {
588         if (r == n) break
589         i++
590       }
591     }
592 
593     var j = count
594     while (j > 0) {
595       i = breaks[j]
596       val sb = StringBuilder()
597       for (w in i until j) {
598         sb.append(words[w])
599         if (w < j - 1) {
600           sb.append(' ')
601         }
602       }
603       lines.add(sb.toString())
604       j = i
605     }
606 
607     lines.reverse()
608     return lines
609   }
610 
reflowGreedynull611   private fun reflowGreedy(
612       lineWidth: Int,
613       options: KDocFormattingOptions,
614       words: List<String>
615   ): List<String> {
616     // Greedy implementation
617 
618     var width = lineWidth
619     if (options.hangingIndent > 0 && hanging && continuation) {
620       width -= getIndentSize(hangingIndent, options)
621     }
622 
623     val lines = mutableListOf<String>()
624     var column = 0
625     val sb = StringBuilder()
626     for (word in words) {
627       when {
628         sb.isEmpty() -> {
629           sb.append(word)
630           column += word.length
631         }
632         column + word.length + 1 <= width -> {
633           sb.append(' ').append(word)
634           column += word.length + 1
635         }
636         else -> {
637           width = lineWidth
638           if (options.hangingIndent > 0 && hanging) {
639             width -= getIndentSize(hangingIndent, options)
640           }
641           lines.add(sb.toString())
642           sb.setLength(0)
643           sb.append(word)
644           column = sb.length
645         }
646       }
647     }
648     if (sb.isNotEmpty()) {
649       lines.add(sb.toString())
650     }
651     return lines
652   }
653 
toStringnull654   override fun toString(): String {
655     return "$content, separate=$separate, block=$block, hanging=$hanging, preformatted=$preformatted, quoted=$quoted, continuation=$continuation, allowempty=$allowEmpty, separator=$separator"
656   }
657 }
658