xref: /aosp_15_r20/external/ktfmt/core/src/main/java/com/facebook/ktfmt/format/Tokenizer.kt (revision 5be3f65c8cf0e6db0a7e312df5006e8e93cdf9ec)
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.facebook.ktfmt.format
18 
19 import java.util.regex.Pattern
20 import org.jetbrains.kotlin.com.intellij.psi.PsiComment
21 import org.jetbrains.kotlin.com.intellij.psi.PsiElement
22 import org.jetbrains.kotlin.com.intellij.psi.PsiWhiteSpace
23 import org.jetbrains.kotlin.com.intellij.psi.impl.source.tree.LeafPsiElement
24 import org.jetbrains.kotlin.lexer.KtTokens
25 import org.jetbrains.kotlin.psi.KtFile
26 import org.jetbrains.kotlin.psi.KtStringTemplateExpression
27 import org.jetbrains.kotlin.psi.KtTreeVisitorVoid
28 import org.jetbrains.kotlin.psi.psiUtil.endOffset
29 import org.jetbrains.kotlin.psi.psiUtil.startOffset
30 
31 /**
32  * Tokenizer traverses a Kotlin parse tree (which blessedly contains whitespaces and comments,
33  * unlike Javac) and constructs a list of 'Tok's.
34  *
35  * <p>The google-java-format infra expects newline Toks to be separate from maximal-whitespace Toks,
36  * but Kotlin emits them together. So, we split them using Java's \R regex matcher. We don't use
37  * 'split' et al. because we want Toks for the newlines themselves.
38  */
39 class Tokenizer(private val fileText: String, val file: KtFile) : KtTreeVisitorVoid() {
40 
41   companion object {
42     private val WHITESPACE_NEWLINE_REGEX: Pattern = Pattern.compile("\\R|( )+")
43   }
44 
45   val toks: MutableList<KotlinTok> = mutableListOf()
46   var index: Int = 0
47     private set
48 
visitElementnull49   override fun visitElement(element: PsiElement) {
50     val startIndex = element.startOffset
51     val endIndex = element.endOffset
52     val elementText = element.text
53     val originalText = fileText.substring(startIndex, endIndex)
54     when (element) {
55       is PsiComment -> {
56         toks.add(
57             KotlinTok(
58                 index = index,
59                 originalText = originalText,
60                 text = elementText,
61                 position = startIndex,
62                 column = 0,
63                 isToken = false,
64                 kind = KtTokens.EOF,
65             ),
66         )
67         index++
68         return
69       }
70       is KtStringTemplateExpression -> {
71         toks.add(
72             KotlinTok(
73                 index = index,
74                 originalText =
75                     WhitespaceTombstones.replaceTrailingWhitespaceWithTombstone(
76                         originalText,
77                     ),
78                 text = elementText,
79                 position = startIndex,
80                 column = 0,
81                 isToken = true,
82                 kind = KtTokens.EOF,
83             ),
84         )
85         index++
86         return
87       }
88       is LeafPsiElement -> {
89         if (element is PsiWhiteSpace) {
90           val matcher = WHITESPACE_NEWLINE_REGEX.matcher(elementText)
91           while (matcher.find()) {
92             val text = matcher.group()
93             toks.add(
94                 KotlinTok(
95                     index = -1,
96                     originalText =
97                         fileText.substring(
98                             startIndex + matcher.start(), startIndex + matcher.end()),
99                     text = text,
100                     position = startIndex + matcher.start(),
101                     column = 0,
102                     isToken = false,
103                     kind = KtTokens.EOF,
104                 ),
105             )
106           }
107         } else {
108           toks.add(
109               KotlinTok(
110                   index = index,
111                   originalText = originalText,
112                   text = elementText,
113                   position = startIndex,
114                   column = 0,
115                   isToken = true,
116                   kind = KtTokens.EOF,
117               ),
118           )
119           index++
120         }
121       }
122     }
123     super.visitElement(element)
124   }
125 }
126