1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.facebook.ktfmt.format 18 19 import java.util.regex.Pattern 20 import org.jetbrains.kotlin.com.intellij.psi.PsiComment 21 import org.jetbrains.kotlin.com.intellij.psi.PsiElement 22 import org.jetbrains.kotlin.com.intellij.psi.PsiWhiteSpace 23 import org.jetbrains.kotlin.com.intellij.psi.impl.source.tree.LeafPsiElement 24 import org.jetbrains.kotlin.lexer.KtTokens 25 import org.jetbrains.kotlin.psi.KtFile 26 import org.jetbrains.kotlin.psi.KtStringTemplateExpression 27 import org.jetbrains.kotlin.psi.KtTreeVisitorVoid 28 import org.jetbrains.kotlin.psi.psiUtil.endOffset 29 import org.jetbrains.kotlin.psi.psiUtil.startOffset 30 31 /** 32 * Tokenizer traverses a Kotlin parse tree (which blessedly contains whitespaces and comments, 33 * unlike Javac) and constructs a list of 'Tok's. 34 * 35 * <p>The google-java-format infra expects newline Toks to be separate from maximal-whitespace Toks, 36 * but Kotlin emits them together. So, we split them using Java's \R regex matcher. We don't use 37 * 'split' et al. because we want Toks for the newlines themselves. 38 */ 39 class Tokenizer(private val fileText: String, val file: KtFile) : KtTreeVisitorVoid() { 40 41 companion object { 42 private val WHITESPACE_NEWLINE_REGEX: Pattern = Pattern.compile("\\R|( )+") 43 } 44 45 val toks: MutableList<KotlinTok> = mutableListOf() 46 var index: Int = 0 47 private set 48 visitElementnull49 override fun visitElement(element: PsiElement) { 50 val startIndex = element.startOffset 51 val endIndex = element.endOffset 52 val elementText = element.text 53 val originalText = fileText.substring(startIndex, endIndex) 54 when (element) { 55 is PsiComment -> { 56 toks.add( 57 KotlinTok( 58 index = index, 59 originalText = originalText, 60 text = elementText, 61 position = startIndex, 62 column = 0, 63 isToken = false, 64 kind = KtTokens.EOF, 65 ), 66 ) 67 index++ 68 return 69 } 70 is KtStringTemplateExpression -> { 71 toks.add( 72 KotlinTok( 73 index = index, 74 originalText = 75 WhitespaceTombstones.replaceTrailingWhitespaceWithTombstone( 76 originalText, 77 ), 78 text = elementText, 79 position = startIndex, 80 column = 0, 81 isToken = true, 82 kind = KtTokens.EOF, 83 ), 84 ) 85 index++ 86 return 87 } 88 is LeafPsiElement -> { 89 if (element is PsiWhiteSpace) { 90 val matcher = WHITESPACE_NEWLINE_REGEX.matcher(elementText) 91 while (matcher.find()) { 92 val text = matcher.group() 93 toks.add( 94 KotlinTok( 95 index = -1, 96 originalText = 97 fileText.substring( 98 startIndex + matcher.start(), startIndex + matcher.end()), 99 text = text, 100 position = startIndex + matcher.start(), 101 column = 0, 102 isToken = false, 103 kind = KtTokens.EOF, 104 ), 105 ) 106 } 107 } else { 108 toks.add( 109 KotlinTok( 110 index = index, 111 originalText = originalText, 112 text = elementText, 113 position = startIndex, 114 column = 0, 115 isToken = true, 116 kind = KtTokens.EOF, 117 ), 118 ) 119 index++ 120 } 121 } 122 } 123 super.visitElement(element) 124 } 125 } 126