xref: /aosp_15_r20/external/antlr/tool/src/main/java/org/antlr/codegen/RubyTarget.java (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1 /*
2  [The "BSD license"]
3  Copyright (c) 2010 Kyle Yetter
4  All rights reserved.
5 
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions
8  are met:
9  1. Redistributions of source code must retain the above copyright
10     notice, this list of conditions and the following disclaimer.
11  2. Redistributions in binary form must reproduce the above copyright
12     notice, this list of conditions and the following disclaimer in the
13     documentation and/or other materials provided with the distribution.
14  3. The name of the author may not be used to endorse or promote products
15     derived from this software without specific prior written permission.
16 
17  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 package org.antlr.codegen;
30 
31 import org.antlr.Tool;
32 import org.antlr.tool.Grammar;
33 import org.stringtemplate.v4.AttributeRenderer;
34 import org.stringtemplate.v4.ST;
35 import org.stringtemplate.v4.STGroup;
36 
37 import java.io.IOException;
38 import java.util.*;
39 
40 public class RubyTarget extends Target
41 {
42     /** A set of ruby keywords which are used to escape labels and method names
43      *  which will cause parse errors in the ruby source
44      */
45     public static final Set<String> rubyKeywords =
46     new HashSet<String>() {
47         {
48         	add( "alias" );     add( "END" );     add( "retry" );
49         	add( "and" );       add( "ensure" );  add( "return" );
50         	add( "BEGIN" );     add( "false" );   add( "self" );
51         	add( "begin" );     add( "for" );     add( "super" );
52         	add( "break" );     add( "if" );      add( "then" );
53         	add( "case" );      add( "in" );      add( "true" );
54         	add( "class" );     add( "module" );  add( "undef" );
55         	add( "def" );       add( "next" );    add( "unless" );
56         	add( "defined?" );  add( "nil" );     add( "until" );
57         	add( "do" );        add( "not" );     add( "when" );
58         	add( "else" );      add( "or" );      add( "while" );
59         	add( "elsif" );     add( "redo" );    add( "yield" );
60         	add( "end" );       add( "rescue" );
61         }
62     };
63 
64     public static Map<String, Map<String, Object>> sharedActionBlocks = new HashMap<String, Map<String, Object>>();
65 
66     public class RubyRenderer implements AttributeRenderer
67     {
68     	protected String[] rubyCharValueEscape = new String[256];
69 
RubyRenderer()70     	public RubyRenderer() {
71     		for ( int i = 0; i < 16; i++ ) {
72     			rubyCharValueEscape[ i ] = "\\x0" + Integer.toHexString( i );
73     		}
74     		for ( int i = 16; i < 32; i++ ) {
75     			rubyCharValueEscape[ i ] = "\\x" + Integer.toHexString( i );
76     		}
77     		for ( char i = 32; i < 127; i++ ) {
78     			rubyCharValueEscape[ i ] = Character.toString( i );
79     		}
80     		for ( int i = 127; i < 256; i++ ) {
81     			rubyCharValueEscape[ i ] = "\\x" + Integer.toHexString( i );
82     		}
83 
84     		rubyCharValueEscape['\n'] = "\\n";
85     		rubyCharValueEscape['\r'] = "\\r";
86     		rubyCharValueEscape['\t'] = "\\t";
87     		rubyCharValueEscape['\b'] = "\\b";
88     		rubyCharValueEscape['\f'] = "\\f";
89     		rubyCharValueEscape['\\'] = "\\\\";
90     		rubyCharValueEscape['"'] = "\\\"";
91     	}
92 
93 		@Override
toString( Object o, String formatName, Locale locale )94         public String toString( Object o, String formatName, Locale locale ) {
95 			if ( formatName==null ) {
96 				return o.toString();
97 			}
98 
99             String idString = o.toString();
100 
101             if ( idString.length() == 0 ) return idString;
102 
103             if ( formatName.equals( "snakecase" ) ) {
104                 return snakecase( idString );
105             } else if ( formatName.equals( "camelcase" ) ) {
106                 return camelcase( idString );
107             } else if ( formatName.equals( "subcamelcase" ) ) {
108                 return subcamelcase( idString );
109             } else if ( formatName.equals( "constant" ) ) {
110                 return constantcase( idString );
111             } else if ( formatName.equals( "platform" ) ) {
112                 return platform( idString );
113             } else if ( formatName.equals( "lexerRule" ) ) {
114                 return lexerRule( idString );
115             } else if ( formatName.equals( "constantPath" ) ) {
116             	return constantPath( idString );
117             } else if ( formatName.equals( "rubyString" ) ) {
118                 return rubyString( idString );
119             } else if ( formatName.equals( "label" ) ) {
120                 return label( idString );
121             } else if ( formatName.equals( "symbol" ) ) {
122                 return symbol( idString );
123             } else {
124                 throw new IllegalArgumentException( "Unsupported format name" );
125             }
126         }
127 
128         /** given an input string, which is presumed
129          * to contain a word, which may potentially be camelcased,
130          * and convert it to snake_case underscore style.
131          *
132          * algorithm --
133          *   iterate through the string with a sliding window 3 chars wide
134          *
135          * example -- aGUIWhatNot
136          *   c   c+1 c+2  action
137          *   a   G        &lt;&lt; 'a' &lt;&lt; '_'  // a lower-upper word edge
138          *   G   U   I    &lt;&lt; 'g'
139          *   U   I   W    &lt;&lt; 'w'
140          *   I   W   h    &lt;&lt; 'i' &lt;&lt; '_'  // the last character in an acronym run of uppers
141          *   W   h        &lt;&lt; 'w'
142          *   ... and so on
143          */
snakecase( String value )144         private String snakecase( String value ) {
145             StringBuilder output_buffer = new StringBuilder();
146             int l = value.length();
147             int cliff = l - 1;
148             char cur;
149             char next;
150             char peek;
151 
152             if ( value.length() == 0 ) return value;
153             if ( l == 1 ) return value.toLowerCase();
154 
155             for ( int i = 0; i < cliff; i++ ) {
156                 cur  = value.charAt( i );
157                 next = value.charAt( i + 1 );
158 
159                 if ( Character.isLetter( cur ) ) {
160                     output_buffer.append( Character.toLowerCase( cur ) );
161 
162                     if ( Character.isDigit( next ) || Character.isWhitespace( next ) ) {
163                         output_buffer.append( '_' );
164                     } else if ( Character.isLowerCase( cur ) && Character.isUpperCase( next ) ) {
165                         // at camelcase word edge
166                         output_buffer.append( '_' );
167                     } else if ( ( i < cliff - 1 ) && Character.isUpperCase( cur ) && Character.isUpperCase( next ) ) {
168                         // cur is part of an acronym
169 
170                         peek = value.charAt( i + 2 );
171                         if ( Character.isLowerCase( peek ) ) {
172                             /* if next is the start of word (indicated when peek is lowercase)
173                                          then the acronym must be completed by appending an underscore */
174                             output_buffer.append( '_' );
175                         }
176                     }
177                 } else if ( Character.isDigit( cur ) ) {
178                     output_buffer.append( cur );
179                     if ( Character.isLetter( next ) ) {
180                         output_buffer.append( '_' );
181                     }
182                 } else if ( Character.isWhitespace( cur ) ) {
183                     // do nothing
184                 } else {
185                     output_buffer.append( cur );
186                 }
187 
188             }
189 
190             cur  = value.charAt( cliff );
191             if ( ! Character.isWhitespace( cur ) ) {
192                 output_buffer.append( Character.toLowerCase( cur ) );
193             }
194 
195             return output_buffer.toString();
196         }
197 
constantcase( String value )198         private String constantcase( String value ) {
199             return snakecase( value ).toUpperCase();
200         }
201 
platform( String value )202         private String platform( String value ) {
203             return ( "__" + value + "__" );
204         }
205 
symbol( String value )206         private String symbol( String value ) {
207             if ( value.matches( "[a-zA-Z_]\\w*[\\?\\!\\=]?" ) ) {
208                 return ( ":" + value );
209             } else {
210                 return ( "%s(" + value + ")" );
211             }
212         }
213 
lexerRule( String value )214         private String lexerRule( String value ) {
215 					  // System.out.print( "lexerRule( \"" + value + "\") => " );
216             if ( value.equals( "Tokens" ) ) {
217 							  // System.out.println( "\"token!\"" );
218                 return "token!";
219             } else {
220 							  // String result = snakecase( value ) + "!";
221 								// System.out.println( "\"" + result + "\"" );
222                 return ( snakecase( value ) + "!" );
223             }
224         }
225 
constantPath( String value )226         private String constantPath( String value ) {
227             return value.replaceAll( "\\.", "::" );
228         }
229 
rubyString( String value )230         private String rubyString( String value ) {
231         	StringBuilder output_buffer = new StringBuilder();
232         	int len = value.length();
233 
234         	output_buffer.append( '"' );
235         	for ( int i = 0; i < len; i++ ) {
236         		output_buffer.append( rubyCharValueEscape[ value.charAt( i ) ] );
237         	}
238         	output_buffer.append( '"' );
239         	return output_buffer.toString();
240         }
241 
camelcase( String value )242         private String camelcase( String value ) {
243             StringBuilder output_buffer = new StringBuilder();
244             int cliff = value.length();
245             char cur;
246             char next;
247             boolean at_edge = true;
248 
249             if ( value.length() == 0 ) return value;
250             if ( cliff == 1 ) return value.toUpperCase();
251 
252             for ( int i = 0; i < cliff; i++ ) {
253                 cur  = value.charAt( i );
254 
255                 if ( Character.isWhitespace( cur ) ) {
256                     at_edge = true;
257                     continue;
258                 } else if ( cur == '_' ) {
259                     at_edge = true;
260                     continue;
261                 } else if ( Character.isDigit( cur ) ) {
262                     output_buffer.append( cur );
263                     at_edge = true;
264                     continue;
265                 }
266 
267                 if ( at_edge ) {
268                     output_buffer.append( Character.toUpperCase( cur ) );
269                     if ( Character.isLetter( cur ) ) at_edge = false;
270                 } else {
271                     output_buffer.append( cur );
272                 }
273             }
274 
275             return output_buffer.toString();
276         }
277 
label( String value )278         private String label( String value ) {
279             if ( rubyKeywords.contains( value ) ) {
280                 return platform( value );
281             } else if ( Character.isUpperCase( value.charAt( 0 ) ) &&
282                         ( !value.equals( "FILE" ) ) &&
283                         ( !value.equals( "LINE" ) ) ) {
284                 return platform( value );
285             } else if ( value.equals( "FILE" ) ) {
286                 return "_FILE_";
287             } else if ( value.equals( "LINE" ) ) {
288                 return "_LINE_";
289             } else {
290                 return value;
291             }
292         }
293 
subcamelcase( String value )294         private String subcamelcase( String value ) {
295             value = camelcase( value );
296             if ( value.length() == 0 )
297                 return value;
298             Character head = Character.toLowerCase( value.charAt( 0 ) );
299             String tail = value.substring( 1 );
300             return head.toString().concat( tail );
301         }
302     }
303 
304 	@Override
genRecognizerFile( Tool tool, CodeGenerator generator, Grammar grammar, ST outputFileST )305     protected void genRecognizerFile(
306     		Tool tool,
307     		CodeGenerator generator,
308     		Grammar grammar,
309     		ST outputFileST
310     ) throws IOException
311     {
312         /*
313             Below is an experimental attempt at providing a few named action blocks
314             that are printed in both lexer and parser files from combined grammars.
315             ANTLR appears to first generate a parser, then generate an independent lexer,
316             and then generate code from that. It keeps the combo/parser grammar object
317             and the lexer grammar object, as well as their respective code generator and
318             target instances, completely independent. So, while a bit hack-ish, this is
319             a solution that should work without having to modify Terrence Parr's
320             core tool code.
321 
322             - sharedActionBlocks is a class variable containing a hash map
323             - if this method is called with a combo grammar, and the action map
324               in the grammar contains an entry for the named scope "all",
325               add an entry to sharedActionBlocks mapping the grammar name to
326               the "all" action map.
327             - if this method is called with an `implicit lexer'
328               (one that's extracted from a combo grammar), check to see if
329               there's an entry in sharedActionBlocks for the lexer's grammar name.
330             - if there is an action map entry, place it in the lexer's action map
331             - the recognizerFile template has code to place the
332               "all" actions appropriately
333 
334             problems:
335               - This solution assumes that the parser will be generated
336                 before the lexer. If that changes at some point, this will
337                 not work.
338               - I have not investigated how this works with delegation yet
339 
340             Kyle Yetter - March 25, 2010
341         */
342 
343         if ( grammar.type == Grammar.COMBINED ) {
344             Map<String, Map<String, Object>> actions = grammar.getActions();
345             if ( actions.containsKey( "all" ) ) {
346                 sharedActionBlocks.put( grammar.name, actions.get( "all" ) );
347             }
348         } else if ( grammar.implicitLexer ) {
349             if ( sharedActionBlocks.containsKey( grammar.name ) ) {
350                 Map<String, Map<String, Object>> actions = grammar.getActions();
351                 actions.put( "all", sharedActionBlocks.get( grammar.name ) );
352             }
353         }
354 
355         STGroup group = generator.getTemplates();
356         RubyRenderer renderer = new RubyRenderer();
357         try {
358             group.registerRenderer( Class.forName( "java.lang.String" ), renderer );
359         } catch ( ClassNotFoundException e ) {
360             // this shouldn't happen
361             System.err.println( "ClassNotFoundException: " + e.getMessage() );
362             e.printStackTrace( System.err );
363         }
364         String fileName =
365             generator.getRecognizerFileName( grammar.name, grammar.type );
366         generator.write( outputFileST, fileName );
367     }
368 
369 	@Override
getTargetCharLiteralFromANTLRCharLiteral( CodeGenerator generator, String literal )370     public String getTargetCharLiteralFromANTLRCharLiteral(
371         CodeGenerator generator,
372         String literal
373     )
374     {
375         int code_point = 0;
376         literal = literal.substring( 1, literal.length() - 1 );
377 
378         if ( literal.charAt( 0 ) == '\\' ) {
379             switch ( literal.charAt( 1 ) ) {
380                 case    '\\':
381                 case    '"':
382                 case    '\'':
383                     code_point = literal.codePointAt( 1 );
384                     break;
385                 case    'n':
386                     code_point = 10;
387                     break;
388                 case    'r':
389                     code_point = 13;
390                     break;
391                 case    't':
392                     code_point = 9;
393                     break;
394                 case    'b':
395                     code_point = 8;
396                     break;
397                 case    'f':
398                     code_point = 12;
399                     break;
400                 case    'u':    // Assume unnnn
401                     code_point = Integer.parseInt( literal.substring( 2 ), 16 );
402                     break;
403                 default:
404                     System.out.println( "1: hey you didn't account for this: \"" + literal + "\"" );
405                     break;
406             }
407         } else if ( literal.length() == 1 ) {
408             code_point = literal.codePointAt( 0 );
409         } else {
410             System.out.println( "2: hey you didn't account for this: \"" + literal + "\"" );
411         }
412 
413         return ( "0x" + Integer.toHexString( code_point ) );
414     }
415 
416 	@Override
getMaxCharValue( CodeGenerator generator )417     public int getMaxCharValue( CodeGenerator generator )
418     {
419         // Versions before 1.9 do not support unicode
420         return 0xFF;
421     }
422 
423 	@Override
getTokenTypeAsTargetLabel( CodeGenerator generator, int ttype )424     public String getTokenTypeAsTargetLabel( CodeGenerator generator, int ttype )
425     {
426         String name = generator.grammar.getTokenDisplayName( ttype );
427         // If name is a literal, return the token type instead
428         if ( name.charAt( 0 )=='\'' ) {
429             return generator.grammar.computeTokenNameFromLiteral( ttype, name );
430         }
431         return name;
432     }
433 
434 	@Override
isValidActionScope( int grammarType, String scope )435     public boolean isValidActionScope( int grammarType, String scope ) {
436         if ( scope.equals( "all" ) )       {
437             return true;
438         }
439         if ( scope.equals( "token" ) )     {
440             return true;
441         }
442         if ( scope.equals( "module" ) )    {
443             return true;
444         }
445         if ( scope.equals( "overrides" ) ) {
446             return true;
447         }
448 
449         switch ( grammarType ) {
450         case Grammar.LEXER:
451             if ( scope.equals( "lexer" ) ) {
452                 return true;
453             }
454             break;
455         case Grammar.PARSER:
456             if ( scope.equals( "parser" ) ) {
457                 return true;
458             }
459             break;
460         case Grammar.COMBINED:
461             if ( scope.equals( "parser" ) ) {
462                 return true;
463             }
464             if ( scope.equals( "lexer" ) ) {
465                 return true;
466             }
467             break;
468         case Grammar.TREE_PARSER:
469             if ( scope.equals( "treeparser" ) ) {
470                 return true;
471             }
472             break;
473         }
474         return false;
475     }
476 
477 	@Override
encodeIntAsCharEscape( final int v )478     public String encodeIntAsCharEscape( final int v ) {
479         final int intValue;
480 
481         if ( v == 65535 ) {
482             intValue = -1;
483         } else {
484             intValue = v;
485         }
486 
487         return String.valueOf( intValue );
488     }
489 }
490