1*46c4c49dSIbrahim Kanouche// Copyright 2017 Google Inc. 2*46c4c49dSIbrahim Kanouche// 3*46c4c49dSIbrahim Kanouche// Licensed under the Apache License, Version 2.0 (the "License"); 4*46c4c49dSIbrahim Kanouche// you may not use this file except in compliance with the License. 5*46c4c49dSIbrahim Kanouche// You may obtain a copy of the License at 6*46c4c49dSIbrahim Kanouche// 7*46c4c49dSIbrahim Kanouche// http://www.apache.org/licenses/LICENSE-2.0 8*46c4c49dSIbrahim Kanouche// 9*46c4c49dSIbrahim Kanouche// Unless required by applicable law or agreed to in writing, software 10*46c4c49dSIbrahim Kanouche// distributed under the License is distributed on an "AS IS" BASIS, 11*46c4c49dSIbrahim Kanouche// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*46c4c49dSIbrahim Kanouche// See the License for the specific language governing permissions and 13*46c4c49dSIbrahim Kanouche// limitations under the License. 14*46c4c49dSIbrahim Kanouche 15*46c4c49dSIbrahim Kanouche// Package language contains methods and information about the different 16*46c4c49dSIbrahim Kanouche// programming languages the comment parser supports. 17*46c4c49dSIbrahim Kanouchepackage language 18*46c4c49dSIbrahim Kanouche 19*46c4c49dSIbrahim Kanoucheimport ( 20*46c4c49dSIbrahim Kanouche "path/filepath" 21*46c4c49dSIbrahim Kanouche "strings" 22*46c4c49dSIbrahim Kanouche) 23*46c4c49dSIbrahim Kanouche 24*46c4c49dSIbrahim Kanouche// Language is the progamming language we're grabbing the comments from. 25*46c4c49dSIbrahim Kanouchetype Language int 26*46c4c49dSIbrahim Kanouche 27*46c4c49dSIbrahim Kanouche// Languages we can retrieve comments from. 28*46c4c49dSIbrahim Kanoucheconst ( 29*46c4c49dSIbrahim Kanouche Unknown Language = iota 30*46c4c49dSIbrahim Kanouche AppleScript 31*46c4c49dSIbrahim Kanouche Assembly 32*46c4c49dSIbrahim Kanouche BLIF // Berkley Logic Interface Format 33*46c4c49dSIbrahim Kanouche Batch 34*46c4c49dSIbrahim Kanouche C 35*46c4c49dSIbrahim Kanouche Clif 36*46c4c49dSIbrahim Kanouche Clojure 37*46c4c49dSIbrahim Kanouche CMake 38*46c4c49dSIbrahim Kanouche CSharp 39*46c4c49dSIbrahim Kanouche Dart 40*46c4c49dSIbrahim Kanouche EDIF // Electronic Design Interchange Format 41*46c4c49dSIbrahim Kanouche Elixir 42*46c4c49dSIbrahim Kanouche Flex 43*46c4c49dSIbrahim Kanouche Fortran 44*46c4c49dSIbrahim Kanouche GLSLF // OpenGL Shading Language 45*46c4c49dSIbrahim Kanouche Go 46*46c4c49dSIbrahim Kanouche HTML 47*46c4c49dSIbrahim Kanouche Haskell 48*46c4c49dSIbrahim Kanouche Java 49*46c4c49dSIbrahim Kanouche JavaScript 50*46c4c49dSIbrahim Kanouche Kotlin 51*46c4c49dSIbrahim Kanouche LEF // Library Exchange Format 52*46c4c49dSIbrahim Kanouche Lisp 53*46c4c49dSIbrahim Kanouche Markdown 54*46c4c49dSIbrahim Kanouche Matlab 55*46c4c49dSIbrahim Kanouche MySQL 56*46c4c49dSIbrahim Kanouche NinjaBuild 57*46c4c49dSIbrahim Kanouche ObjectiveC 58*46c4c49dSIbrahim Kanouche Perl 59*46c4c49dSIbrahim Kanouche Python 60*46c4c49dSIbrahim Kanouche R 61*46c4c49dSIbrahim Kanouche Ruby 62*46c4c49dSIbrahim Kanouche Rust 63*46c4c49dSIbrahim Kanouche SDC // Synopsis Design Constraint 64*46c4c49dSIbrahim Kanouche SDF // Standard Delay Format 65*46c4c49dSIbrahim Kanouche SPEF // Standard Parasitics Exchange Format 66*46c4c49dSIbrahim Kanouche SQL 67*46c4c49dSIbrahim Kanouche SWIG 68*46c4c49dSIbrahim Kanouche Shader 69*46c4c49dSIbrahim Kanouche Shell 70*46c4c49dSIbrahim Kanouche Swift 71*46c4c49dSIbrahim Kanouche SystemVerilog 72*46c4c49dSIbrahim Kanouche TCL 73*46c4c49dSIbrahim Kanouche TypeScript 74*46c4c49dSIbrahim Kanouche Verilog 75*46c4c49dSIbrahim Kanouche XDC // Xilinx Design Constraint files 76*46c4c49dSIbrahim Kanouche Yacc 77*46c4c49dSIbrahim Kanouche Yaml 78*46c4c49dSIbrahim Kanouche) 79*46c4c49dSIbrahim Kanouche 80*46c4c49dSIbrahim Kanouche// style is the comment styles that a language uses. 81*46c4c49dSIbrahim Kanouchetype style int 82*46c4c49dSIbrahim Kanouche 83*46c4c49dSIbrahim Kanouche// Comment styles. 84*46c4c49dSIbrahim Kanoucheconst ( 85*46c4c49dSIbrahim Kanouche unknown style = iota 86*46c4c49dSIbrahim Kanouche applescript // -- ... and (* ... *) 87*46c4c49dSIbrahim Kanouche batch // @REM 88*46c4c49dSIbrahim Kanouche bcpl // // ... and /* ... */ 89*46c4c49dSIbrahim Kanouche cmake // # ... and #[[ ... ]] 90*46c4c49dSIbrahim Kanouche fortran // ! ... 91*46c4c49dSIbrahim Kanouche hash // # ... 92*46c4c49dSIbrahim Kanouche haskell // -- ... and {- ... -} 93*46c4c49dSIbrahim Kanouche html // <!-- ... --> 94*46c4c49dSIbrahim Kanouche lisp // ;; ... 95*46c4c49dSIbrahim Kanouche matlab // % ... 96*46c4c49dSIbrahim Kanouche mysql // # ... and /* ... */ 97*46c4c49dSIbrahim Kanouche ruby // # ... and =begin ... =end 98*46c4c49dSIbrahim Kanouche shell // # ... and %{ ... %} 99*46c4c49dSIbrahim Kanouche sql // -- ... and /* ... */ 100*46c4c49dSIbrahim Kanouche) 101*46c4c49dSIbrahim Kanouche 102*46c4c49dSIbrahim Kanouche// ClassifyLanguage determines what language the source code was written in. It 103*46c4c49dSIbrahim Kanouche// does this by looking at the file's extension. 104*46c4c49dSIbrahim Kanouchefunc ClassifyLanguage(filename string) Language { 105*46c4c49dSIbrahim Kanouche ext := strings.ToLower(filepath.Ext(filename)) 106*46c4c49dSIbrahim Kanouche if len(ext) == 0 || ext[0] != '.' { 107*46c4c49dSIbrahim Kanouche return Unknown 108*46c4c49dSIbrahim Kanouche } 109*46c4c49dSIbrahim Kanouche 110*46c4c49dSIbrahim Kanouche switch ext[1:] { // Skip the '.'. 111*46c4c49dSIbrahim Kanouche case "applescript": 112*46c4c49dSIbrahim Kanouche return AppleScript 113*46c4c49dSIbrahim Kanouche case "bat": 114*46c4c49dSIbrahim Kanouche return Batch 115*46c4c49dSIbrahim Kanouche case "blif", "eblif": 116*46c4c49dSIbrahim Kanouche return BLIF 117*46c4c49dSIbrahim Kanouche case "c", "cc", "cpp", "c++", "h", "hh", "hpp": 118*46c4c49dSIbrahim Kanouche return C 119*46c4c49dSIbrahim Kanouche case "clif": 120*46c4c49dSIbrahim Kanouche return Clif 121*46c4c49dSIbrahim Kanouche case "cmake": 122*46c4c49dSIbrahim Kanouche return CMake 123*46c4c49dSIbrahim Kanouche case "cs": 124*46c4c49dSIbrahim Kanouche return CSharp 125*46c4c49dSIbrahim Kanouche case "dart": 126*46c4c49dSIbrahim Kanouche return Dart 127*46c4c49dSIbrahim Kanouche case "ex", "exs": 128*46c4c49dSIbrahim Kanouche return Elixir 129*46c4c49dSIbrahim Kanouche case "f", "f90", "f95": 130*46c4c49dSIbrahim Kanouche return Fortran 131*46c4c49dSIbrahim Kanouche case "glslf": 132*46c4c49dSIbrahim Kanouche return GLSLF 133*46c4c49dSIbrahim Kanouche case "go": 134*46c4c49dSIbrahim Kanouche return Go 135*46c4c49dSIbrahim Kanouche case "hs": 136*46c4c49dSIbrahim Kanouche return Haskell 137*46c4c49dSIbrahim Kanouche case "html", "htm", "ng", "sgml": 138*46c4c49dSIbrahim Kanouche return HTML 139*46c4c49dSIbrahim Kanouche case "java": 140*46c4c49dSIbrahim Kanouche return Java 141*46c4c49dSIbrahim Kanouche case "js": 142*46c4c49dSIbrahim Kanouche return JavaScript 143*46c4c49dSIbrahim Kanouche case "kt": 144*46c4c49dSIbrahim Kanouche return Kotlin 145*46c4c49dSIbrahim Kanouche case "l": 146*46c4c49dSIbrahim Kanouche return Flex 147*46c4c49dSIbrahim Kanouche case "lef": 148*46c4c49dSIbrahim Kanouche return LEF 149*46c4c49dSIbrahim Kanouche case "lisp", "el", "clj": 150*46c4c49dSIbrahim Kanouche return Lisp 151*46c4c49dSIbrahim Kanouche case "m", "mm": 152*46c4c49dSIbrahim Kanouche return ObjectiveC 153*46c4c49dSIbrahim Kanouche case "md": 154*46c4c49dSIbrahim Kanouche return Markdown 155*46c4c49dSIbrahim Kanouche case "gn": 156*46c4c49dSIbrahim Kanouche return NinjaBuild 157*46c4c49dSIbrahim Kanouche case "pl", "pm": 158*46c4c49dSIbrahim Kanouche return Perl 159*46c4c49dSIbrahim Kanouche case "py", "pi": 160*46c4c49dSIbrahim Kanouche return Python 161*46c4c49dSIbrahim Kanouche case "r": 162*46c4c49dSIbrahim Kanouche return R 163*46c4c49dSIbrahim Kanouche case "rb": 164*46c4c49dSIbrahim Kanouche return Ruby 165*46c4c49dSIbrahim Kanouche case "rs": 166*46c4c49dSIbrahim Kanouche return Rust 167*46c4c49dSIbrahim Kanouche case "s": 168*46c4c49dSIbrahim Kanouche return Assembly 169*46c4c49dSIbrahim Kanouche case "sdf": 170*46c4c49dSIbrahim Kanouche return SDF 171*46c4c49dSIbrahim Kanouche case "sh": 172*46c4c49dSIbrahim Kanouche return Shell 173*46c4c49dSIbrahim Kanouche case "shader": 174*46c4c49dSIbrahim Kanouche return Shader 175*46c4c49dSIbrahim Kanouche case "sql": 176*46c4c49dSIbrahim Kanouche return SQL 177*46c4c49dSIbrahim Kanouche case "swift": 178*46c4c49dSIbrahim Kanouche return Swift 179*46c4c49dSIbrahim Kanouche case "swig": 180*46c4c49dSIbrahim Kanouche return SWIG 181*46c4c49dSIbrahim Kanouche case "sv", "svh": 182*46c4c49dSIbrahim Kanouche return SystemVerilog 183*46c4c49dSIbrahim Kanouche case "tcl", "sdc", "xdc": 184*46c4c49dSIbrahim Kanouche return TCL 185*46c4c49dSIbrahim Kanouche case "ts", "tsx": 186*46c4c49dSIbrahim Kanouche return TypeScript 187*46c4c49dSIbrahim Kanouche case "v", "vh": 188*46c4c49dSIbrahim Kanouche return Verilog 189*46c4c49dSIbrahim Kanouche case "y": 190*46c4c49dSIbrahim Kanouche return Yacc 191*46c4c49dSIbrahim Kanouche case "yaml": 192*46c4c49dSIbrahim Kanouche return Yaml 193*46c4c49dSIbrahim Kanouche } 194*46c4c49dSIbrahim Kanouche return Unknown 195*46c4c49dSIbrahim Kanouche} 196*46c4c49dSIbrahim Kanouche 197*46c4c49dSIbrahim Kanouche// commentStyle returns the language's comment style. 198*46c4c49dSIbrahim Kanouchefunc (lang Language) commentStyle() style { 199*46c4c49dSIbrahim Kanouche switch lang { 200*46c4c49dSIbrahim Kanouche case Assembly, C, CSharp, Dart, Flex, GLSLF, Go, Java, JavaScript, Kotlin, ObjectiveC, Rust, Shader, Swift, SWIG, TypeScript, Yacc, Verilog, SystemVerilog, SDF, SPEF: 201*46c4c49dSIbrahim Kanouche return bcpl 202*46c4c49dSIbrahim Kanouche case Batch: 203*46c4c49dSIbrahim Kanouche return batch 204*46c4c49dSIbrahim Kanouche case BLIF, TCL: 205*46c4c49dSIbrahim Kanouche return hash 206*46c4c49dSIbrahim Kanouche case CMake: 207*46c4c49dSIbrahim Kanouche return cmake 208*46c4c49dSIbrahim Kanouche case Fortran: 209*46c4c49dSIbrahim Kanouche return fortran 210*46c4c49dSIbrahim Kanouche case Haskell: 211*46c4c49dSIbrahim Kanouche return haskell 212*46c4c49dSIbrahim Kanouche case HTML, Markdown: 213*46c4c49dSIbrahim Kanouche return html 214*46c4c49dSIbrahim Kanouche case Clojure, Lisp: 215*46c4c49dSIbrahim Kanouche return lisp 216*46c4c49dSIbrahim Kanouche case Ruby: 217*46c4c49dSIbrahim Kanouche return ruby 218*46c4c49dSIbrahim Kanouche case Clif, Elixir, NinjaBuild, Perl, Python, R, Shell, Yaml: 219*46c4c49dSIbrahim Kanouche return shell 220*46c4c49dSIbrahim Kanouche case Matlab: 221*46c4c49dSIbrahim Kanouche return matlab 222*46c4c49dSIbrahim Kanouche case MySQL: 223*46c4c49dSIbrahim Kanouche return mysql 224*46c4c49dSIbrahim Kanouche case SQL: 225*46c4c49dSIbrahim Kanouche return sql 226*46c4c49dSIbrahim Kanouche } 227*46c4c49dSIbrahim Kanouche return unknown 228*46c4c49dSIbrahim Kanouche} 229*46c4c49dSIbrahim Kanouche 230*46c4c49dSIbrahim Kanouche// SingleLineCommentStart returns the starting string of a single line comment 231*46c4c49dSIbrahim Kanouche// for the given language. There is no equivalent "End" method, because it's 232*46c4c49dSIbrahim Kanouche// the end of line. 233*46c4c49dSIbrahim Kanouchefunc (lang Language) SingleLineCommentStart() string { 234*46c4c49dSIbrahim Kanouche switch lang.commentStyle() { 235*46c4c49dSIbrahim Kanouche case applescript, haskell, sql: 236*46c4c49dSIbrahim Kanouche return "--" 237*46c4c49dSIbrahim Kanouche case batch: 238*46c4c49dSIbrahim Kanouche return "@REM" 239*46c4c49dSIbrahim Kanouche case bcpl: 240*46c4c49dSIbrahim Kanouche return "//" 241*46c4c49dSIbrahim Kanouche case fortran: 242*46c4c49dSIbrahim Kanouche return "!" 243*46c4c49dSIbrahim Kanouche case lisp: 244*46c4c49dSIbrahim Kanouche return ";" 245*46c4c49dSIbrahim Kanouche case matlab: 246*46c4c49dSIbrahim Kanouche return "%" 247*46c4c49dSIbrahim Kanouche case shell, ruby, cmake, mysql, hash: 248*46c4c49dSIbrahim Kanouche return "#" 249*46c4c49dSIbrahim Kanouche } 250*46c4c49dSIbrahim Kanouche return "" 251*46c4c49dSIbrahim Kanouche} 252*46c4c49dSIbrahim Kanouche 253*46c4c49dSIbrahim Kanouche// MultilineCommentStart returns the starting string of a multiline comment for 254*46c4c49dSIbrahim Kanouche// the given language. 255*46c4c49dSIbrahim Kanouchefunc (lang Language) MultilineCommentStart() string { 256*46c4c49dSIbrahim Kanouche switch lang.commentStyle() { 257*46c4c49dSIbrahim Kanouche case applescript: 258*46c4c49dSIbrahim Kanouche return "(*" 259*46c4c49dSIbrahim Kanouche case bcpl, mysql: 260*46c4c49dSIbrahim Kanouche if lang != Rust { 261*46c4c49dSIbrahim Kanouche return "/*" 262*46c4c49dSIbrahim Kanouche } 263*46c4c49dSIbrahim Kanouche case cmake: 264*46c4c49dSIbrahim Kanouche return "#[[" 265*46c4c49dSIbrahim Kanouche case haskell: 266*46c4c49dSIbrahim Kanouche return "{-" 267*46c4c49dSIbrahim Kanouche case html: 268*46c4c49dSIbrahim Kanouche return "<!--" 269*46c4c49dSIbrahim Kanouche case matlab: 270*46c4c49dSIbrahim Kanouche return "%{" 271*46c4c49dSIbrahim Kanouche case ruby: 272*46c4c49dSIbrahim Kanouche return "=begin" 273*46c4c49dSIbrahim Kanouche } 274*46c4c49dSIbrahim Kanouche return "" 275*46c4c49dSIbrahim Kanouche} 276*46c4c49dSIbrahim Kanouche 277*46c4c49dSIbrahim Kanouche// MultilineCommentEnd returns the ending string of a multiline comment for the 278*46c4c49dSIbrahim Kanouche// given language. 279*46c4c49dSIbrahim Kanouchefunc (lang Language) MultilineCommentEnd() string { 280*46c4c49dSIbrahim Kanouche switch lang.commentStyle() { 281*46c4c49dSIbrahim Kanouche case applescript: 282*46c4c49dSIbrahim Kanouche return "*)" 283*46c4c49dSIbrahim Kanouche case bcpl, mysql: 284*46c4c49dSIbrahim Kanouche if lang != Rust { 285*46c4c49dSIbrahim Kanouche return "*/" 286*46c4c49dSIbrahim Kanouche } 287*46c4c49dSIbrahim Kanouche case cmake: 288*46c4c49dSIbrahim Kanouche return "]]" 289*46c4c49dSIbrahim Kanouche case haskell: 290*46c4c49dSIbrahim Kanouche return "-}" 291*46c4c49dSIbrahim Kanouche case html: 292*46c4c49dSIbrahim Kanouche return "-->" 293*46c4c49dSIbrahim Kanouche case matlab: 294*46c4c49dSIbrahim Kanouche return "%}" 295*46c4c49dSIbrahim Kanouche case ruby: 296*46c4c49dSIbrahim Kanouche return "=end" 297*46c4c49dSIbrahim Kanouche } 298*46c4c49dSIbrahim Kanouche return "" 299*46c4c49dSIbrahim Kanouche} 300*46c4c49dSIbrahim Kanouche 301*46c4c49dSIbrahim Kanouche// QuoteCharacter returns 'true' if the character is considered the beginning 302*46c4c49dSIbrahim Kanouche// of a string in the given language. The second return value is true if the 303*46c4c49dSIbrahim Kanouche// string allows for escaping. 304*46c4c49dSIbrahim Kanouchefunc (lang Language) QuoteCharacter(quote rune) (ok bool, escape bool) { 305*46c4c49dSIbrahim Kanouche switch quote { 306*46c4c49dSIbrahim Kanouche case '"', '\'': 307*46c4c49dSIbrahim Kanouche return true, true 308*46c4c49dSIbrahim Kanouche case '`': 309*46c4c49dSIbrahim Kanouche if lang == Go { 310*46c4c49dSIbrahim Kanouche return true, false 311*46c4c49dSIbrahim Kanouche } 312*46c4c49dSIbrahim Kanouche } 313*46c4c49dSIbrahim Kanouche return false, false 314*46c4c49dSIbrahim Kanouche} 315*46c4c49dSIbrahim Kanouche 316*46c4c49dSIbrahim Kanouche// NestedComments returns true if the language allows for nested multiline comments. 317*46c4c49dSIbrahim Kanouchefunc (lang Language) NestedComments() bool { 318*46c4c49dSIbrahim Kanouche return lang == Swift 319*46c4c49dSIbrahim Kanouche} 320