1// Copyright 2017 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Package language contains methods and information about the different 16// programming languages the comment parser supports. 17package language 18 19import ( 20 "path/filepath" 21 "strings" 22) 23 24// Language is the progamming language we're grabbing the comments from. 25type Language int 26 27// Languages we can retrieve comments from. 28const ( 29 Unknown Language = iota 30 AppleScript 31 Assembly 32 BLIF // Berkley Logic Interface Format 33 Batch 34 C 35 Clif 36 Clojure 37 CMake 38 CSharp 39 Dart 40 EDIF // Electronic Design Interchange Format 41 Elixir 42 Flex 43 Fortran 44 GLSLF // OpenGL Shading Language 45 Go 46 HTML 47 Haskell 48 Java 49 JavaScript 50 Kotlin 51 LEF // Library Exchange Format 52 Lisp 53 Markdown 54 Matlab 55 MySQL 56 NinjaBuild 57 ObjectiveC 58 Perl 59 Python 60 R 61 Ruby 62 Rust 63 SDC // Synopsis Design Constraint 64 SDF // Standard Delay Format 65 SPEF // Standard Parasitics Exchange Format 66 SQL 67 SWIG 68 Shader 69 Shell 70 Swift 71 SystemVerilog 72 TCL 73 TypeScript 74 Verilog 75 XDC // Xilinx Design Constraint files 76 Yacc 77 Yaml 78) 79 80// style is the comment styles that a language uses. 81type style int 82 83// Comment styles. 84const ( 85 unknown style = iota 86 applescript // -- ... and (* ... *) 87 batch // @REM 88 bcpl // // ... and /* ... */ 89 cmake // # ... and #[[ ... ]] 90 fortran // ! ... 91 hash // # ... 92 haskell // -- ... and {- ... -} 93 html // <!-- ... --> 94 lisp // ;; ... 95 matlab // % ... 96 mysql // # ... and /* ... */ 97 ruby // # ... and =begin ... =end 98 shell // # ... and %{ ... %} 99 sql // -- ... and /* ... */ 100) 101 102// ClassifyLanguage determines what language the source code was written in. It 103// does this by looking at the file's extension. 104func ClassifyLanguage(filename string) Language { 105 ext := strings.ToLower(filepath.Ext(filename)) 106 if len(ext) == 0 || ext[0] != '.' { 107 return Unknown 108 } 109 110 switch ext[1:] { // Skip the '.'. 111 case "applescript": 112 return AppleScript 113 case "bat": 114 return Batch 115 case "blif", "eblif": 116 return BLIF 117 case "c", "cc", "cpp", "c++", "h", "hh", "hpp": 118 return C 119 case "clif": 120 return Clif 121 case "cmake": 122 return CMake 123 case "cs": 124 return CSharp 125 case "dart": 126 return Dart 127 case "ex", "exs": 128 return Elixir 129 case "f", "f90", "f95": 130 return Fortran 131 case "glslf": 132 return GLSLF 133 case "go": 134 return Go 135 case "hs": 136 return Haskell 137 case "html", "htm", "ng", "sgml": 138 return HTML 139 case "java": 140 return Java 141 case "js": 142 return JavaScript 143 case "kt": 144 return Kotlin 145 case "l": 146 return Flex 147 case "lef": 148 return LEF 149 case "lisp", "el", "clj": 150 return Lisp 151 case "m", "mm": 152 return ObjectiveC 153 case "md": 154 return Markdown 155 case "gn": 156 return NinjaBuild 157 case "pl", "pm": 158 return Perl 159 case "py", "pi": 160 return Python 161 case "r": 162 return R 163 case "rb": 164 return Ruby 165 case "rs": 166 return Rust 167 case "s": 168 return Assembly 169 case "sdf": 170 return SDF 171 case "sh": 172 return Shell 173 case "shader": 174 return Shader 175 case "sql": 176 return SQL 177 case "swift": 178 return Swift 179 case "swig": 180 return SWIG 181 case "sv", "svh": 182 return SystemVerilog 183 case "tcl", "sdc", "xdc": 184 return TCL 185 case "ts", "tsx": 186 return TypeScript 187 case "v", "vh": 188 return Verilog 189 case "y": 190 return Yacc 191 case "yaml": 192 return Yaml 193 } 194 return Unknown 195} 196 197// commentStyle returns the language's comment style. 198func (lang Language) commentStyle() style { 199 switch lang { 200 case Assembly, C, CSharp, Dart, Flex, GLSLF, Go, Java, JavaScript, Kotlin, ObjectiveC, Rust, Shader, Swift, SWIG, TypeScript, Yacc, Verilog, SystemVerilog, SDF, SPEF: 201 return bcpl 202 case Batch: 203 return batch 204 case BLIF, TCL: 205 return hash 206 case CMake: 207 return cmake 208 case Fortran: 209 return fortran 210 case Haskell: 211 return haskell 212 case HTML, Markdown: 213 return html 214 case Clojure, Lisp: 215 return lisp 216 case Ruby: 217 return ruby 218 case Clif, Elixir, NinjaBuild, Perl, Python, R, Shell, Yaml: 219 return shell 220 case Matlab: 221 return matlab 222 case MySQL: 223 return mysql 224 case SQL: 225 return sql 226 } 227 return unknown 228} 229 230// SingleLineCommentStart returns the starting string of a single line comment 231// for the given language. There is no equivalent "End" method, because it's 232// the end of line. 233func (lang Language) SingleLineCommentStart() string { 234 switch lang.commentStyle() { 235 case applescript, haskell, sql: 236 return "--" 237 case batch: 238 return "@REM" 239 case bcpl: 240 return "//" 241 case fortran: 242 return "!" 243 case lisp: 244 return ";" 245 case matlab: 246 return "%" 247 case shell, ruby, cmake, mysql, hash: 248 return "#" 249 } 250 return "" 251} 252 253// MultilineCommentStart returns the starting string of a multiline comment for 254// the given language. 255func (lang Language) MultilineCommentStart() string { 256 switch lang.commentStyle() { 257 case applescript: 258 return "(*" 259 case bcpl, mysql: 260 if lang != Rust { 261 return "/*" 262 } 263 case cmake: 264 return "#[[" 265 case haskell: 266 return "{-" 267 case html: 268 return "<!--" 269 case matlab: 270 return "%{" 271 case ruby: 272 return "=begin" 273 } 274 return "" 275} 276 277// MultilineCommentEnd returns the ending string of a multiline comment for the 278// given language. 279func (lang Language) MultilineCommentEnd() string { 280 switch lang.commentStyle() { 281 case applescript: 282 return "*)" 283 case bcpl, mysql: 284 if lang != Rust { 285 return "*/" 286 } 287 case cmake: 288 return "]]" 289 case haskell: 290 return "-}" 291 case html: 292 return "-->" 293 case matlab: 294 return "%}" 295 case ruby: 296 return "=end" 297 } 298 return "" 299} 300 301// QuoteCharacter returns 'true' if the character is considered the beginning 302// of a string in the given language. The second return value is true if the 303// string allows for escaping. 304func (lang Language) QuoteCharacter(quote rune) (ok bool, escape bool) { 305 switch quote { 306 case '"', '\'': 307 return true, true 308 case '`': 309 if lang == Go { 310 return true, false 311 } 312 } 313 return false, false 314} 315 316// NestedComments returns true if the language allows for nested multiline comments. 317func (lang Language) NestedComments() bool { 318 return lang == Swift 319} 320