1 /* 2 [The "BSD license"] 3 Copyright (c) 2010 Kyle Yetter 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions 8 are met: 9 1. Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 2. Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 3. The name of the author may not be used to endorse or promote products 15 derived from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 package org.antlr.codegen; 30 31 import org.antlr.Tool; 32 import org.antlr.tool.Grammar; 33 import org.stringtemplate.v4.AttributeRenderer; 34 import org.stringtemplate.v4.ST; 35 import org.stringtemplate.v4.STGroup; 36 37 import java.io.IOException; 38 import java.util.*; 39 40 public class RubyTarget extends Target 41 { 42 /** A set of ruby keywords which are used to escape labels and method names 43 * which will cause parse errors in the ruby source 44 */ 45 public static final Set<String> rubyKeywords = 46 new HashSet<String>() { 47 { 48 add( "alias" ); add( "END" ); add( "retry" ); 49 add( "and" ); add( "ensure" ); add( "return" ); 50 add( "BEGIN" ); add( "false" ); add( "self" ); 51 add( "begin" ); add( "for" ); add( "super" ); 52 add( "break" ); add( "if" ); add( "then" ); 53 add( "case" ); add( "in" ); add( "true" ); 54 add( "class" ); add( "module" ); add( "undef" ); 55 add( "def" ); add( "next" ); add( "unless" ); 56 add( "defined?" ); add( "nil" ); add( "until" ); 57 add( "do" ); add( "not" ); add( "when" ); 58 add( "else" ); add( "or" ); add( "while" ); 59 add( "elsif" ); add( "redo" ); add( "yield" ); 60 add( "end" ); add( "rescue" ); 61 } 62 }; 63 64 public static Map<String, Map<String, Object>> sharedActionBlocks = new HashMap<String, Map<String, Object>>(); 65 66 public class RubyRenderer implements AttributeRenderer 67 { 68 protected String[] rubyCharValueEscape = new String[256]; 69 RubyRenderer()70 public RubyRenderer() { 71 for ( int i = 0; i < 16; i++ ) { 72 rubyCharValueEscape[ i ] = "\\x0" + Integer.toHexString( i ); 73 } 74 for ( int i = 16; i < 32; i++ ) { 75 rubyCharValueEscape[ i ] = "\\x" + Integer.toHexString( i ); 76 } 77 for ( char i = 32; i < 127; i++ ) { 78 rubyCharValueEscape[ i ] = Character.toString( i ); 79 } 80 for ( int i = 127; i < 256; i++ ) { 81 rubyCharValueEscape[ i ] = "\\x" + Integer.toHexString( i ); 82 } 83 84 rubyCharValueEscape['\n'] = "\\n"; 85 rubyCharValueEscape['\r'] = "\\r"; 86 rubyCharValueEscape['\t'] = "\\t"; 87 rubyCharValueEscape['\b'] = "\\b"; 88 rubyCharValueEscape['\f'] = "\\f"; 89 rubyCharValueEscape['\\'] = "\\\\"; 90 rubyCharValueEscape['"'] = "\\\""; 91 } 92 93 @Override toString( Object o, String formatName, Locale locale )94 public String toString( Object o, String formatName, Locale locale ) { 95 if ( formatName==null ) { 96 return o.toString(); 97 } 98 99 String idString = o.toString(); 100 101 if ( idString.length() == 0 ) return idString; 102 103 if ( formatName.equals( "snakecase" ) ) { 104 return snakecase( idString ); 105 } else if ( formatName.equals( "camelcase" ) ) { 106 return camelcase( idString ); 107 } else if ( formatName.equals( "subcamelcase" ) ) { 108 return subcamelcase( idString ); 109 } else if ( formatName.equals( "constant" ) ) { 110 return constantcase( idString ); 111 } else if ( formatName.equals( "platform" ) ) { 112 return platform( idString ); 113 } else if ( formatName.equals( "lexerRule" ) ) { 114 return lexerRule( idString ); 115 } else if ( formatName.equals( "constantPath" ) ) { 116 return constantPath( idString ); 117 } else if ( formatName.equals( "rubyString" ) ) { 118 return rubyString( idString ); 119 } else if ( formatName.equals( "label" ) ) { 120 return label( idString ); 121 } else if ( formatName.equals( "symbol" ) ) { 122 return symbol( idString ); 123 } else { 124 throw new IllegalArgumentException( "Unsupported format name" ); 125 } 126 } 127 128 /** given an input string, which is presumed 129 * to contain a word, which may potentially be camelcased, 130 * and convert it to snake_case underscore style. 131 * 132 * algorithm -- 133 * iterate through the string with a sliding window 3 chars wide 134 * 135 * example -- aGUIWhatNot 136 * c c+1 c+2 action 137 * a G << 'a' << '_' // a lower-upper word edge 138 * G U I << 'g' 139 * U I W << 'w' 140 * I W h << 'i' << '_' // the last character in an acronym run of uppers 141 * W h << 'w' 142 * ... and so on 143 */ snakecase( String value )144 private String snakecase( String value ) { 145 StringBuilder output_buffer = new StringBuilder(); 146 int l = value.length(); 147 int cliff = l - 1; 148 char cur; 149 char next; 150 char peek; 151 152 if ( value.length() == 0 ) return value; 153 if ( l == 1 ) return value.toLowerCase(); 154 155 for ( int i = 0; i < cliff; i++ ) { 156 cur = value.charAt( i ); 157 next = value.charAt( i + 1 ); 158 159 if ( Character.isLetter( cur ) ) { 160 output_buffer.append( Character.toLowerCase( cur ) ); 161 162 if ( Character.isDigit( next ) || Character.isWhitespace( next ) ) { 163 output_buffer.append( '_' ); 164 } else if ( Character.isLowerCase( cur ) && Character.isUpperCase( next ) ) { 165 // at camelcase word edge 166 output_buffer.append( '_' ); 167 } else if ( ( i < cliff - 1 ) && Character.isUpperCase( cur ) && Character.isUpperCase( next ) ) { 168 // cur is part of an acronym 169 170 peek = value.charAt( i + 2 ); 171 if ( Character.isLowerCase( peek ) ) { 172 /* if next is the start of word (indicated when peek is lowercase) 173 then the acronym must be completed by appending an underscore */ 174 output_buffer.append( '_' ); 175 } 176 } 177 } else if ( Character.isDigit( cur ) ) { 178 output_buffer.append( cur ); 179 if ( Character.isLetter( next ) ) { 180 output_buffer.append( '_' ); 181 } 182 } else if ( Character.isWhitespace( cur ) ) { 183 // do nothing 184 } else { 185 output_buffer.append( cur ); 186 } 187 188 } 189 190 cur = value.charAt( cliff ); 191 if ( ! Character.isWhitespace( cur ) ) { 192 output_buffer.append( Character.toLowerCase( cur ) ); 193 } 194 195 return output_buffer.toString(); 196 } 197 constantcase( String value )198 private String constantcase( String value ) { 199 return snakecase( value ).toUpperCase(); 200 } 201 platform( String value )202 private String platform( String value ) { 203 return ( "__" + value + "__" ); 204 } 205 symbol( String value )206 private String symbol( String value ) { 207 if ( value.matches( "[a-zA-Z_]\\w*[\\?\\!\\=]?" ) ) { 208 return ( ":" + value ); 209 } else { 210 return ( "%s(" + value + ")" ); 211 } 212 } 213 lexerRule( String value )214 private String lexerRule( String value ) { 215 // System.out.print( "lexerRule( \"" + value + "\") => " ); 216 if ( value.equals( "Tokens" ) ) { 217 // System.out.println( "\"token!\"" ); 218 return "token!"; 219 } else { 220 // String result = snakecase( value ) + "!"; 221 // System.out.println( "\"" + result + "\"" ); 222 return ( snakecase( value ) + "!" ); 223 } 224 } 225 constantPath( String value )226 private String constantPath( String value ) { 227 return value.replaceAll( "\\.", "::" ); 228 } 229 rubyString( String value )230 private String rubyString( String value ) { 231 StringBuilder output_buffer = new StringBuilder(); 232 int len = value.length(); 233 234 output_buffer.append( '"' ); 235 for ( int i = 0; i < len; i++ ) { 236 output_buffer.append( rubyCharValueEscape[ value.charAt( i ) ] ); 237 } 238 output_buffer.append( '"' ); 239 return output_buffer.toString(); 240 } 241 camelcase( String value )242 private String camelcase( String value ) { 243 StringBuilder output_buffer = new StringBuilder(); 244 int cliff = value.length(); 245 char cur; 246 char next; 247 boolean at_edge = true; 248 249 if ( value.length() == 0 ) return value; 250 if ( cliff == 1 ) return value.toUpperCase(); 251 252 for ( int i = 0; i < cliff; i++ ) { 253 cur = value.charAt( i ); 254 255 if ( Character.isWhitespace( cur ) ) { 256 at_edge = true; 257 continue; 258 } else if ( cur == '_' ) { 259 at_edge = true; 260 continue; 261 } else if ( Character.isDigit( cur ) ) { 262 output_buffer.append( cur ); 263 at_edge = true; 264 continue; 265 } 266 267 if ( at_edge ) { 268 output_buffer.append( Character.toUpperCase( cur ) ); 269 if ( Character.isLetter( cur ) ) at_edge = false; 270 } else { 271 output_buffer.append( cur ); 272 } 273 } 274 275 return output_buffer.toString(); 276 } 277 label( String value )278 private String label( String value ) { 279 if ( rubyKeywords.contains( value ) ) { 280 return platform( value ); 281 } else if ( Character.isUpperCase( value.charAt( 0 ) ) && 282 ( !value.equals( "FILE" ) ) && 283 ( !value.equals( "LINE" ) ) ) { 284 return platform( value ); 285 } else if ( value.equals( "FILE" ) ) { 286 return "_FILE_"; 287 } else if ( value.equals( "LINE" ) ) { 288 return "_LINE_"; 289 } else { 290 return value; 291 } 292 } 293 subcamelcase( String value )294 private String subcamelcase( String value ) { 295 value = camelcase( value ); 296 if ( value.length() == 0 ) 297 return value; 298 Character head = Character.toLowerCase( value.charAt( 0 ) ); 299 String tail = value.substring( 1 ); 300 return head.toString().concat( tail ); 301 } 302 } 303 304 @Override genRecognizerFile( Tool tool, CodeGenerator generator, Grammar grammar, ST outputFileST )305 protected void genRecognizerFile( 306 Tool tool, 307 CodeGenerator generator, 308 Grammar grammar, 309 ST outputFileST 310 ) throws IOException 311 { 312 /* 313 Below is an experimental attempt at providing a few named action blocks 314 that are printed in both lexer and parser files from combined grammars. 315 ANTLR appears to first generate a parser, then generate an independent lexer, 316 and then generate code from that. It keeps the combo/parser grammar object 317 and the lexer grammar object, as well as their respective code generator and 318 target instances, completely independent. So, while a bit hack-ish, this is 319 a solution that should work without having to modify Terrence Parr's 320 core tool code. 321 322 - sharedActionBlocks is a class variable containing a hash map 323 - if this method is called with a combo grammar, and the action map 324 in the grammar contains an entry for the named scope "all", 325 add an entry to sharedActionBlocks mapping the grammar name to 326 the "all" action map. 327 - if this method is called with an `implicit lexer' 328 (one that's extracted from a combo grammar), check to see if 329 there's an entry in sharedActionBlocks for the lexer's grammar name. 330 - if there is an action map entry, place it in the lexer's action map 331 - the recognizerFile template has code to place the 332 "all" actions appropriately 333 334 problems: 335 - This solution assumes that the parser will be generated 336 before the lexer. If that changes at some point, this will 337 not work. 338 - I have not investigated how this works with delegation yet 339 340 Kyle Yetter - March 25, 2010 341 */ 342 343 if ( grammar.type == Grammar.COMBINED ) { 344 Map<String, Map<String, Object>> actions = grammar.getActions(); 345 if ( actions.containsKey( "all" ) ) { 346 sharedActionBlocks.put( grammar.name, actions.get( "all" ) ); 347 } 348 } else if ( grammar.implicitLexer ) { 349 if ( sharedActionBlocks.containsKey( grammar.name ) ) { 350 Map<String, Map<String, Object>> actions = grammar.getActions(); 351 actions.put( "all", sharedActionBlocks.get( grammar.name ) ); 352 } 353 } 354 355 STGroup group = generator.getTemplates(); 356 RubyRenderer renderer = new RubyRenderer(); 357 try { 358 group.registerRenderer( Class.forName( "java.lang.String" ), renderer ); 359 } catch ( ClassNotFoundException e ) { 360 // this shouldn't happen 361 System.err.println( "ClassNotFoundException: " + e.getMessage() ); 362 e.printStackTrace( System.err ); 363 } 364 String fileName = 365 generator.getRecognizerFileName( grammar.name, grammar.type ); 366 generator.write( outputFileST, fileName ); 367 } 368 369 @Override getTargetCharLiteralFromANTLRCharLiteral( CodeGenerator generator, String literal )370 public String getTargetCharLiteralFromANTLRCharLiteral( 371 CodeGenerator generator, 372 String literal 373 ) 374 { 375 int code_point = 0; 376 literal = literal.substring( 1, literal.length() - 1 ); 377 378 if ( literal.charAt( 0 ) == '\\' ) { 379 switch ( literal.charAt( 1 ) ) { 380 case '\\': 381 case '"': 382 case '\'': 383 code_point = literal.codePointAt( 1 ); 384 break; 385 case 'n': 386 code_point = 10; 387 break; 388 case 'r': 389 code_point = 13; 390 break; 391 case 't': 392 code_point = 9; 393 break; 394 case 'b': 395 code_point = 8; 396 break; 397 case 'f': 398 code_point = 12; 399 break; 400 case 'u': // Assume unnnn 401 code_point = Integer.parseInt( literal.substring( 2 ), 16 ); 402 break; 403 default: 404 System.out.println( "1: hey you didn't account for this: \"" + literal + "\"" ); 405 break; 406 } 407 } else if ( literal.length() == 1 ) { 408 code_point = literal.codePointAt( 0 ); 409 } else { 410 System.out.println( "2: hey you didn't account for this: \"" + literal + "\"" ); 411 } 412 413 return ( "0x" + Integer.toHexString( code_point ) ); 414 } 415 416 @Override getMaxCharValue( CodeGenerator generator )417 public int getMaxCharValue( CodeGenerator generator ) 418 { 419 // Versions before 1.9 do not support unicode 420 return 0xFF; 421 } 422 423 @Override getTokenTypeAsTargetLabel( CodeGenerator generator, int ttype )424 public String getTokenTypeAsTargetLabel( CodeGenerator generator, int ttype ) 425 { 426 String name = generator.grammar.getTokenDisplayName( ttype ); 427 // If name is a literal, return the token type instead 428 if ( name.charAt( 0 )=='\'' ) { 429 return generator.grammar.computeTokenNameFromLiteral( ttype, name ); 430 } 431 return name; 432 } 433 434 @Override isValidActionScope( int grammarType, String scope )435 public boolean isValidActionScope( int grammarType, String scope ) { 436 if ( scope.equals( "all" ) ) { 437 return true; 438 } 439 if ( scope.equals( "token" ) ) { 440 return true; 441 } 442 if ( scope.equals( "module" ) ) { 443 return true; 444 } 445 if ( scope.equals( "overrides" ) ) { 446 return true; 447 } 448 449 switch ( grammarType ) { 450 case Grammar.LEXER: 451 if ( scope.equals( "lexer" ) ) { 452 return true; 453 } 454 break; 455 case Grammar.PARSER: 456 if ( scope.equals( "parser" ) ) { 457 return true; 458 } 459 break; 460 case Grammar.COMBINED: 461 if ( scope.equals( "parser" ) ) { 462 return true; 463 } 464 if ( scope.equals( "lexer" ) ) { 465 return true; 466 } 467 break; 468 case Grammar.TREE_PARSER: 469 if ( scope.equals( "treeparser" ) ) { 470 return true; 471 } 472 break; 473 } 474 return false; 475 } 476 477 @Override encodeIntAsCharEscape( final int v )478 public String encodeIntAsCharEscape( final int v ) { 479 final int intValue; 480 481 if ( v == 65535 ) { 482 intValue = -1; 483 } else { 484 intValue = v; 485 } 486 487 return String.valueOf( intValue ); 488 } 489 } 490