xref: /aosp_15_r20/external/google-smali/smali/src/main/jflex/smaliLexer.jflex (revision 37f5703ca959d1ce24046e7595880d209e15c133)
1package com.android.tools.smali.smali;
2
3import static java.lang.Math.toIntExact;
4
5import java.io.*;
6import java.util.Stack;
7import org.antlr.runtime.*;
8import com.android.tools.smali.smali.util.*;
9import com.android.tools.smali.util.*;
10import static com.android.tools.smali.smali.smaliParser.*;
11
12%%
13
14%public
15%class smaliFlexLexer
16%implements TokenSource
17%implements LexerErrorInterface
18%type Token
19%unicode
20%line
21%column
22%char
23
24%ctorarg int apiLevel
25%init{
26    this.apiLevel = apiLevel;
27%init}
28
29%{
30    private StringBuffer sb = new StringBuffer();
31    private String tokenError = null;
32    private int tokenStartLine;
33    private int tokenStartCol;
34    private int tokenStartChar;
35
36    private int lexerErrors = 0;
37
38    private File sourceFile;
39
40    private boolean suppressErrors;
41
42    private int apiLevel;
43
44    private Stack<Integer> stateStack = new Stack<>();
45
46    public Token nextToken() {
47        try {
48            Token token = yylex();
49            if (token instanceof InvalidToken) {
50                InvalidToken invalidToken = (InvalidToken)token;
51                if (!suppressErrors) {
52                    System.err.println(getErrorHeader(invalidToken) + " Error for input '" +
53                        invalidToken.getText() + "': " + invalidToken.getMessage());
54                }
55                lexerErrors++;
56            }
57            return token;
58        }
59        catch (java.io.IOException e) {
60            System.err.println("shouldn't happen: " + e.getMessage());
61            return newToken(EOF);
62        }
63    }
64
65    public void setLine(int line) {
66        this.yyline = line-1;
67    }
68
69    public void setColumn(int column) {
70        this.yycolumn = column;
71    }
72
73    public int getLine() {
74        return this.yyline+1;
75    }
76
77    public int getColumn() {
78        return this.yycolumn;
79    }
80
81    public void setSuppressErrors(boolean suppressErrors) {
82        this.suppressErrors = suppressErrors;
83    }
84
85    public void setSourceFile(File sourceFile) {
86        this.sourceFile = sourceFile;
87    }
88
89    public String getSourceName() {
90        if (sourceFile == null) {
91            return "";
92        }
93        try {
94            return  PathUtil.getRelativeFile(new File("."), sourceFile).getPath();
95        } catch (IOException ex) {
96            return sourceFile.getAbsolutePath();
97        }
98    }
99
100    public int getNumberOfSyntaxErrors() {
101        return lexerErrors;
102    }
103
104    private Token newToken(int type, String text, boolean hidden) {
105        CommonToken token = new CommonToken(type, text);
106        if (hidden) {
107            token.setChannel(Token.HIDDEN_CHANNEL);
108        }
109        // yychar is long, but antlr CommonToken only takes an int.
110        token.setStartIndex(toIntExact(yychar));
111        token.setStopIndex(stopIndex());
112        token.setLine(getLine());
113        token.setCharPositionInLine(getColumn());
114        return token;
115    }
116
117    private Token newToken(int type, String text) {
118        return newToken(type, text, false);
119    }
120
121    private Token newToken(int type, boolean hidden) {
122        return newToken(type, yytext(), hidden);
123    }
124
125    private Token newToken(int type) {
126        return newToken(type, yytext(), false);
127    }
128
129    private Token invalidToken(String message, String text) {
130        InvalidToken token = new InvalidToken(message, text);
131        // yychar is long, but antlr CommonToken only takes an int.
132        token.setStartIndex(toIntExact(yychar));
133        token.setStopIndex(stopIndex());
134        token.setLine(getLine());
135        token.setCharPositionInLine(getColumn());
136
137        return token;
138    }
139
140    private Token invalidToken(String message) {
141        return invalidToken(message, yytext());
142    }
143
144    private void beginStateBasedToken(int state) {
145        stateStack.push(yystate());
146        yybegin(state);
147        sb.setLength(0);
148        tokenStartLine = getLine();
149        tokenStartCol = getColumn();
150        // yychar is long, but antlr CommonToken only takes an int.
151        tokenStartChar = toIntExact(yychar);
152        tokenError = null;
153    }
154
155    private Token endStateBasedToken(int type) {
156        if (tokenError != null) {
157            return invalidStateBasedToken(tokenError);
158        }
159
160        yybegin(stateStack.pop());
161
162        CommonToken token = new CommonToken(type, sb.toString());
163        token.setStartIndex(tokenStartChar);
164        token.setStopIndex(stopIndex());
165        token.setLine(tokenStartLine);
166        token.setCharPositionInLine(tokenStartCol);
167        return token;
168    }
169
170    private void setStateBasedTokenError(String message) {
171        if (tokenError == null) {
172            tokenError = message;
173        }
174    }
175
176    private Token invalidStateBasedToken(String message) {
177        yybegin(stateStack.pop());
178
179        InvalidToken token = new InvalidToken(message, sb.toString());
180        token.setStartIndex(tokenStartChar);
181        token.setStopIndex(stopIndex());
182        token.setLine(tokenStartLine);
183        token.setCharPositionInLine(tokenStartCol);
184        return token;
185    }
186
187    public String getErrorHeader(InvalidToken token) {
188        return getSourceName()+"["+ token.getLine()+","+token.getCharPositionInLine()+"]";
189    }
190
191    public void reset(CharSequence charSequence, int start, int end, int initialState) {
192        zzReader = BlankReader.INSTANCE;
193        zzBuffer = new char[charSequence.length()];
194        for (int i=0; i<charSequence.length(); i++) {
195            zzBuffer[i] = charSequence.charAt(i);
196        }
197
198        yychar = zzCurrentPos = zzMarkedPos = zzStartRead = start;
199        zzEndRead = end;
200        zzAtBOL = true;
201        zzAtEOF = false;
202        yybegin(initialState);
203    }
204
205    private String processQuotedSimpleName(String text) {
206        // strip backticks
207        return text.substring(1, text.length() - 1);
208    }
209
210    private String processQuotedSimpleNameWithSpaces(String text) {
211        if (apiLevel < 30) {
212            setStateBasedTokenError("spaces in class descriptors and member names are not supported prior to API " +
213                "level 30/dex version 040");
214        }
215        return processQuotedSimpleName(text);
216    }
217
218    private int stopIndex() {
219      // jflex yychar is long, but antlr CommonToken only takes an int for
220      // stopIndex.
221      return toIntExact(yychar + yylength() - 1);
222    }
223%}
224
225HexPrefix = 0 [xX]
226
227HexDigit = [0-9a-fA-F]
228HexDigits = [0-9a-fA-F]{4}
229FewerHexDigits = [0-9a-fA-F]{0,3}
230
231Integer1 = 0
232Integer2 = [1-9] [0-9]*
233Integer3 = 0 [0-7]+
234Integer4 = {HexPrefix} {HexDigit}+
235Integer = {Integer1} | {Integer2} | {Integer3} | {Integer4}
236
237DecimalExponent = [eE] -? [0-9]+
238
239BinaryExponent = [pP] -? [0-9]+
240
241/*This can either be a floating point number or an identifier*/
242FloatOrID1 = -? [0-9]+ {DecimalExponent}
243FloatOrID2 = -? {HexPrefix} {HexDigit}+ {BinaryExponent}
244FloatOrID3 = -? [iI][nN][fF][iI][nN][iI][tT][yY]
245FloatOrID4 = [nN][aA][nN]
246FloatOrID =  {FloatOrID1} | {FloatOrID2} | {FloatOrID3} | {FloatOrID4}
247
248
249/*This can only be a float and not an identifier, due to the decimal point*/
250Float1 = -? [0-9]+ "." [0-9]* {DecimalExponent}?
251Float2 = -? "." [0-9]+ {DecimalExponent}?
252Float3 = -? {HexPrefix} {HexDigit}+ "." {HexDigit}* {BinaryExponent}
253Float4 = -? {HexPrefix} "." {HexDigit}+ {BinaryExponent}
254Float =  {Float1} | {Float2} | {Float3} | {Float4}
255
256HighSurrogate = [\ud800-\udbff]
257
258LowSurrogate = [\udc00-\udfff]
259
260SimpleNameCharacter = ({HighSurrogate} {LowSurrogate}) | [A-Za-z0-9$\-_\u00a1-\u1fff\u2010-\u2027\u2030-\ud7ff\ue000-\uffef]
261UnicodeSpace = [\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000] /* Zs category */
262
263SimpleNameRaw = {SimpleNameCharacter}+
264SimpleNameQuoted = [`] {SimpleNameCharacter}+ [`]
265SimpleNameQuotedWithSpaces = [`] ({SimpleNameCharacter} | {UnicodeSpace})+ [`]
266SimpleName = {SimpleNameRaw} | {SimpleNameQuoted} | {SimpleNameQuotedWithSpaces}
267
268PrimitiveType = [ZBSCIJFD]
269
270ClassDescriptor = L ({SimpleName} "/")* {SimpleName} ;
271
272ArrayPrefix = "["+
273
274Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} | {PrimitiveType})
275
276
277%state PARAM_LIST_OR_ID
278%state PARAM_LIST
279%state ARRAY_DESCRIPTOR
280%state STRING
281%state CHAR
282%state CLASS_DESCRIPTOR_BEGINNING
283%state CLASS_DESCRIPTOR_REMAINING
284
285%%
286
287/*Directives*/
288<YYINITIAL>
289{
290    ".class" { return newToken(CLASS_DIRECTIVE); }
291    ".super" { return newToken(SUPER_DIRECTIVE); }
292    ".implements" { return newToken(IMPLEMENTS_DIRECTIVE); }
293    ".source" { return newToken(SOURCE_DIRECTIVE); }
294    ".field" { return newToken(FIELD_DIRECTIVE); }
295    ".end field" { return newToken(END_FIELD_DIRECTIVE); }
296    ".subannotation" { return newToken(SUBANNOTATION_DIRECTIVE); }
297    ".end subannotation" { return newToken(END_SUBANNOTATION_DIRECTIVE); }
298    ".annotation" { return newToken(ANNOTATION_DIRECTIVE); }
299    ".end annotation" { return newToken(END_ANNOTATION_DIRECTIVE); }
300    ".enum" { return newToken(ENUM_DIRECTIVE); }
301    ".method" { return newToken(METHOD_DIRECTIVE); }
302    ".end method" { return newToken(END_METHOD_DIRECTIVE); }
303    ".registers" { return newToken(REGISTERS_DIRECTIVE); }
304    ".locals" { return newToken(LOCALS_DIRECTIVE); }
305    ".array-data" { return newToken(ARRAY_DATA_DIRECTIVE); }
306    ".end array-data" { return newToken(END_ARRAY_DATA_DIRECTIVE); }
307    ".packed-switch" { return newToken(PACKED_SWITCH_DIRECTIVE); }
308    ".end packed-switch" { return newToken(END_PACKED_SWITCH_DIRECTIVE); }
309    ".sparse-switch" { return newToken(SPARSE_SWITCH_DIRECTIVE); }
310    ".end sparse-switch" { return newToken(END_SPARSE_SWITCH_DIRECTIVE); }
311    ".catch" { return newToken(CATCH_DIRECTIVE); }
312    ".catchall" { return newToken(CATCHALL_DIRECTIVE); }
313    ".line" { return newToken(LINE_DIRECTIVE); }
314    ".param" { return newToken(PARAMETER_DIRECTIVE); }
315    ".end param" { return newToken(END_PARAMETER_DIRECTIVE); }
316    ".local" { return newToken(LOCAL_DIRECTIVE); }
317    ".end local" { return newToken(END_LOCAL_DIRECTIVE); }
318    ".restart local" { return newToken(RESTART_LOCAL_DIRECTIVE); }
319    ".prologue" { return newToken(PROLOGUE_DIRECTIVE); }
320    ".epilogue" { return newToken(EPILOGUE_DIRECTIVE); }
321
322    ".end" { return invalidToken("Invalid directive"); }
323    ".end " [a-zA-z0-9\-_]+ { return invalidToken("Invalid directive"); }
324    ".restart" { return invalidToken("Invalid directive"); }
325    ".restart " [a-zA-z0-9\-_]+ { return invalidToken("Invalid directive"); }
326}
327
328/*Literals*/
329<YYINITIAL> {
330    {Integer} { return newToken(POSITIVE_INTEGER_LITERAL); }
331    - {Integer} { return newToken(NEGATIVE_INTEGER_LITERAL); }
332    -? {Integer} [lL] { return newToken(LONG_LITERAL); }
333    -? {Integer} [sS] { return newToken(SHORT_LITERAL); }
334    -? {Integer} [tT] { return newToken(BYTE_LITERAL); }
335
336    {FloatOrID} [fF] | -? [0-9]+ [fF] { return newToken(FLOAT_LITERAL_OR_ID); }
337    {FloatOrID} [dD]? | -? [0-9]+ [dD] { return newToken(DOUBLE_LITERAL_OR_ID); }
338    {Float} [fF] { return newToken(FLOAT_LITERAL); }
339    {Float} [dD]? { return newToken(DOUBLE_LITERAL); }
340
341    "true"|"false" { return newToken(BOOL_LITERAL); }
342    "null" { return newToken(NULL_LITERAL); }
343
344    "\"" { beginStateBasedToken(STRING); sb.append('"'); }
345
346    ' { beginStateBasedToken(CHAR); sb.append('\''); }
347}
348
349<PARAM_LIST_OR_ID> {
350    {PrimitiveType} { return newToken(PARAM_LIST_OR_ID_PRIMITIVE_TYPE); }
351    [^] { yypushback(1); yybegin(YYINITIAL); }
352    <<EOF>> { yybegin(YYINITIAL); }
353}
354
355<PARAM_LIST> {
356    {PrimitiveType} { return newToken(PRIMITIVE_TYPE); }
357    {ClassDescriptor} {
358        yypushback(yylength());
359        beginStateBasedToken(CLASS_DESCRIPTOR_BEGINNING);
360        sb.append(yytext());
361    }
362    {ArrayPrefix} { return newToken(ARRAY_TYPE_PREFIX); }
363    [^] { yypushback(1); yybegin(YYINITIAL);}
364    <<EOF>> { yybegin(YYINITIAL);}
365}
366
367<CLASS_DESCRIPTOR_BEGINNING> {
368    "L" {SimpleNameRaw} {
369        sb.append(yytext());
370        yybegin(CLASS_DESCRIPTOR_REMAINING);
371    }
372    "L" {SimpleNameQuoted} {
373        sb.append("L");
374        sb.append(processQuotedSimpleName(yytext().substring(1)));
375        yybegin(CLASS_DESCRIPTOR_REMAINING);
376    }
377    "L" {SimpleNameQuotedWithSpaces} {
378        sb.append("L");
379        sb.append(processQuotedSimpleNameWithSpaces(yytext().substring(1)));
380        yybegin(CLASS_DESCRIPTOR_REMAINING);
381    }
382}
383
384<CLASS_DESCRIPTOR_REMAINING> {
385    "/" {SimpleNameRaw} {
386        sb.append(yytext());
387    }
388    "/" {SimpleNameQuoted} {
389        sb.append("/");
390        sb.append(processQuotedSimpleName(yytext().substring(1)));
391    }
392    "/" {SimpleNameQuotedWithSpaces} {
393        sb.append("/");
394        sb.append(processQuotedSimpleNameWithSpaces(yytext().substring(1)));
395    }
396
397    ";" {
398        sb.append(yytext());
399        return endStateBasedToken(CLASS_DESCRIPTOR);
400    }
401}
402
403<STRING> {
404    "\""  { sb.append('"'); return endStateBasedToken(STRING_LITERAL); }
405
406    [^\r\n\"\\]+ { sb.append(yytext()); }
407    "\\b" { sb.append('\b'); }
408    "\\t" { sb.append('\t'); }
409    "\\n" { sb.append('\n'); }
410    "\\f" { sb.append('\f'); }
411    "\\r" { sb.append('\r'); }
412    "\\'" { sb.append('\''); }
413    "\\\"" { sb.append('"'); }
414    "\\\\" { sb.append('\\'); }
415    "\\u" {HexDigits} { sb.append((char)Integer.parseInt(yytext().substring(2,6), 16)); }
416
417    "\\u" {FewerHexDigits} {
418        sb.append(yytext());
419        setStateBasedTokenError("Invalid \\u sequence. \\u must be followed by 4 hex digits");
420    }
421
422    "\\" [^btnfr'\"\\u] {
423        sb.append(yytext());
424        setStateBasedTokenError("Invalid escape sequence " + yytext());
425    }
426
427    [\r\n] { return invalidStateBasedToken("Unterminated string literal"); }
428    <<EOF>> { return invalidStateBasedToken("Unterminated string literal"); }
429}
430
431<CHAR> {
432    ' {
433        sb.append('\'');
434        if (sb.length() == 2) {
435            return invalidStateBasedToken("Empty character literal");
436        } else if (sb.length() > 3) {
437            return invalidStateBasedToken("Character literal with multiple chars");
438        }
439
440        return endStateBasedToken(CHAR_LITERAL);
441    }
442
443    [^\r\n'\\]+ { sb.append(yytext()); }
444    "\\b" { sb.append('\b'); }
445    "\\t" { sb.append('\t'); }
446    "\\n" { sb.append('\n'); }
447    "\\f" { sb.append('\f'); }
448    "\\r" { sb.append('\r'); }
449    "\\'" { sb.append('\''); }
450    "\\\"" { sb.append('"'); }
451    "\\\\" { sb.append('\\'); }
452    "\\u" {HexDigits} { sb.append((char)Integer.parseInt(yytext().substring(2,6), 16)); }
453
454    "\\u" {HexDigit}* {
455        sb.append(yytext());
456        setStateBasedTokenError("Invalid \\u sequence. \\u must be followed by exactly 4 hex digits");
457    }
458
459    "\\" [^btnfr'\"\\u] {
460        sb.append(yytext());
461        setStateBasedTokenError("Invalid escape sequence " + yytext());
462    }
463
464    [\r\n] { return invalidStateBasedToken("Unterminated character literal"); }
465    <<EOF>> { return invalidStateBasedToken("Unterminated character literal"); }
466}
467
468/*Misc*/
469<YYINITIAL> {
470    [vp] [0-9]+ { return newToken(REGISTER); }
471
472    "build" | "runtime" | "system" {
473        return newToken(ANNOTATION_VISIBILITY);
474    }
475
476    "public" | "private" | "protected" | "static" | "final" | "synchronized" | "bridge" | "varargs" | "native" |
477    "abstract" | "strictfp" | "synthetic" | "constructor" | "declared-synchronized" | "interface" | "enum" |
478    "annotation" | "volatile" | "transient" {
479        return newToken(ACCESS_SPEC);
480    }
481
482    "whitelist" | "greylist" | "blacklist" | "greylist-max-o" | "greylist-max-p" | "greylist-max-q" | "greylist-max-r" |
483    "core-platform-api" | "test-api" {
484        return newToken(HIDDENAPI_RESTRICTION);
485    }
486
487    "no-error" | "generic-error" | "no-such-class" | "no-such-field" | "no-such-method" | "illegal-class-access" |
488    "illegal-field-access" | "illegal-method-access" | "class-change-error" | "instantiation-error" {
489        return newToken(VERIFICATION_ERROR_TYPE);
490    }
491
492    "inline@0x" {HexDigit}+ { return newToken(INLINE_INDEX); }
493    "vtable@0x" {HexDigit}+ { return newToken(VTABLE_INDEX); }
494    "field@0x" {HexDigit}+ { return newToken(FIELD_OFFSET); }
495
496    "static-put" | "static-get" | "instance-put" | "instance-get" {
497        return newToken(METHOD_HANDLE_TYPE_FIELD);
498    }
499
500    "invoke-instance" | "invoke-constructor" {
501        return newToken(METHOD_HANDLE_TYPE_METHOD);
502    }
503
504    # [^\r\n]* { return newToken(LINE_COMMENT, true); }
505}
506
507/*Instructions*/
508<YYINITIAL> {
509    "goto" {
510        return newToken(INSTRUCTION_FORMAT10t);
511    }
512
513    "return-void" | "nop" {
514        return newToken(INSTRUCTION_FORMAT10x);
515    }
516
517    "return-void-barrier" | "return-void-no-barrier" {
518        return newToken(INSTRUCTION_FORMAT10x_ODEX);
519    }
520
521    "const/4" {
522        return newToken(INSTRUCTION_FORMAT11n);
523    }
524
525    "move-result" | "move-result-wide" | "move-result-object" | "move-exception" | "return" | "return-wide" |
526    "return-object" | "monitor-enter" | "monitor-exit" | "throw" {
527        return newToken(INSTRUCTION_FORMAT11x);
528    }
529
530    "move" | "move-wide" | "move-object" | "array-length" | "neg-int" | "not-int" | "neg-long" | "not-long" |
531    "neg-float" | "neg-double" | "int-to-long" | "int-to-float" | "int-to-double" | "long-to-int" | "long-to-float" |
532    "long-to-double" | "float-to-int" | "float-to-long" | "float-to-double" | "double-to-int" | "double-to-long" |
533    "double-to-float" | "int-to-byte" | "int-to-char" | "int-to-short" {
534        return newToken(INSTRUCTION_FORMAT12x_OR_ID);
535    }
536
537    "add-int/2addr" | "sub-int/2addr" | "mul-int/2addr" | "div-int/2addr" | "rem-int/2addr" | "and-int/2addr" |
538    "or-int/2addr" | "xor-int/2addr" | "shl-int/2addr" | "shr-int/2addr" | "ushr-int/2addr" | "add-long/2addr" |
539    "sub-long/2addr" | "mul-long/2addr" | "div-long/2addr" | "rem-long/2addr" | "and-long/2addr" | "or-long/2addr" |
540    "xor-long/2addr" | "shl-long/2addr" | "shr-long/2addr" | "ushr-long/2addr" | "add-float/2addr" |
541    "sub-float/2addr" | "mul-float/2addr" | "div-float/2addr" | "rem-float/2addr" | "add-double/2addr" |
542    "sub-double/2addr" | "mul-double/2addr" | "div-double/2addr" | "rem-double/2addr" {
543        return newToken(INSTRUCTION_FORMAT12x);
544    }
545
546    "throw-verification-error" {
547        return newToken(INSTRUCTION_FORMAT20bc);
548    }
549
550    "goto/16" {
551        return newToken(INSTRUCTION_FORMAT20t);
552    }
553
554    "sget" | "sget-wide" | "sget-object" | "sget-boolean" | "sget-byte" | "sget-char" | "sget-short" | "sput" |
555    "sput-wide" | "sput-object" | "sput-boolean" | "sput-byte" | "sput-char" | "sput-short" {
556        return newToken(INSTRUCTION_FORMAT21c_FIELD);
557    }
558
559    "sget-volatile" | "sget-wide-volatile" | "sget-object-volatile" | "sput-volatile" | "sput-wide-volatile" |
560    "sput-object-volatile" {
561        return newToken(INSTRUCTION_FORMAT21c_FIELD_ODEX);
562    }
563
564    "const-string" {
565        return newToken(INSTRUCTION_FORMAT21c_STRING);
566    }
567
568    "check-cast" | "new-instance" | "const-class" {
569        return newToken(INSTRUCTION_FORMAT21c_TYPE);
570    }
571
572    "const-method-handle" {
573        return newToken(INSTRUCTION_FORMAT21c_METHOD_HANDLE);
574    }
575
576    "const-method-type" {
577        return newToken(INSTRUCTION_FORMAT21c_METHOD_TYPE);
578    }
579
580    "const/high16" {
581        return newToken(INSTRUCTION_FORMAT21ih);
582    }
583
584    "const-wide/high16" {
585        return newToken(INSTRUCTION_FORMAT21lh);
586    }
587
588    "const/16" | "const-wide/16" {
589        return newToken(INSTRUCTION_FORMAT21s);
590    }
591
592    "if-eqz" | "if-nez" | "if-ltz" | "if-gez" | "if-gtz" | "if-lez" {
593        return newToken(INSTRUCTION_FORMAT21t);
594    }
595
596    "add-int/lit8" | "rsub-int/lit8" | "mul-int/lit8" | "div-int/lit8" | "rem-int/lit8" | "and-int/lit8" |
597    "or-int/lit8" | "xor-int/lit8" | "shl-int/lit8" | "shr-int/lit8" | "ushr-int/lit8" {
598        return newToken(INSTRUCTION_FORMAT22b);
599    }
600
601    "iget" | "iget-wide" | "iget-object" | "iget-boolean" | "iget-byte" | "iget-char" | "iget-short" | "iput" |
602    "iput-wide" | "iput-object" | "iput-boolean" | "iput-byte" | "iput-char" | "iput-short" {
603        return newToken(INSTRUCTION_FORMAT22c_FIELD);
604    }
605
606    "iget-volatile" | "iget-wide-volatile" | "iget-object-volatile" | "iput-volatile" | "iput-wide-volatile" |
607    "iput-object-volatile" {
608        return newToken(INSTRUCTION_FORMAT22c_FIELD_ODEX);
609    }
610
611    "instance-of" | "new-array" {
612        return newToken(INSTRUCTION_FORMAT22c_TYPE);
613    }
614
615    "iget-quick" | "iget-wide-quick" | "iget-object-quick" | "iput-quick" | "iput-wide-quick" | "iput-object-quick" |
616    "iput-boolean-quick" | "iput-byte-quick" | "iput-char-quick" | "iput-short-quick" {
617        return newToken(INSTRUCTION_FORMAT22cs_FIELD);
618    }
619
620    "rsub-int" {
621        return newToken(INSTRUCTION_FORMAT22s_OR_ID);
622    }
623
624    "add-int/lit16" | "mul-int/lit16" | "div-int/lit16" | "rem-int/lit16" | "and-int/lit16" | "or-int/lit16" |
625    "xor-int/lit16" {
626        return newToken(INSTRUCTION_FORMAT22s);
627    }
628
629    "if-eq" | "if-ne" | "if-lt" | "if-ge" | "if-gt" | "if-le" {
630        return newToken(INSTRUCTION_FORMAT22t);
631    }
632
633    "move/from16" | "move-wide/from16" | "move-object/from16" {
634        return newToken(INSTRUCTION_FORMAT22x);
635    }
636
637    "cmpl-float" | "cmpg-float" | "cmpl-double" | "cmpg-double" | "cmp-long" | "aget" | "aget-wide" | "aget-object" |
638    "aget-boolean" | "aget-byte" | "aget-char" | "aget-short" | "aput" | "aput-wide" | "aput-object" | "aput-boolean" |
639    "aput-byte" | "aput-char" | "aput-short" | "add-int" | "sub-int" | "mul-int" | "div-int" | "rem-int" | "and-int" |
640    "or-int" | "xor-int" | "shl-int" | "shr-int" | "ushr-int" | "add-long" | "sub-long" | "mul-long" | "div-long" |
641    "rem-long" | "and-long" | "or-long" | "xor-long" | "shl-long" | "shr-long" | "ushr-long" | "add-float" |
642    "sub-float" | "mul-float" | "div-float" | "rem-float" | "add-double" | "sub-double" | "mul-double" | "div-double" |
643    "rem-double" {
644        return newToken(INSTRUCTION_FORMAT23x);
645    }
646
647    "goto/32" {
648        return newToken(INSTRUCTION_FORMAT30t);
649    }
650
651    "const-string/jumbo" {
652        return newToken(INSTRUCTION_FORMAT31c);
653    }
654
655    "const" {
656        return newToken(INSTRUCTION_FORMAT31i_OR_ID);
657    }
658
659    "const-wide/32" {
660        return newToken(INSTRUCTION_FORMAT31i);
661    }
662
663    "fill-array-data" | "packed-switch" | "sparse-switch" {
664        return newToken(INSTRUCTION_FORMAT31t);
665    }
666
667    "move/16" | "move-wide/16" | "move-object/16" {
668        return newToken(INSTRUCTION_FORMAT32x);
669    }
670
671    "invoke-custom" {
672        return newToken(INSTRUCTION_FORMAT35c_CALL_SITE);
673    }
674
675    "invoke-virtual" | "invoke-super" {
676        return newToken(INSTRUCTION_FORMAT35c_METHOD);
677    }
678
679    "invoke-direct" | "invoke-static" | "invoke-interface" {
680        return newToken(INSTRUCTION_FORMAT35c_METHOD_OR_METHOD_HANDLE_TYPE);
681    }
682
683    "invoke-direct-empty" {
684        return newToken(INSTRUCTION_FORMAT35c_METHOD_ODEX);
685    }
686
687    "filled-new-array" {
688        return newToken(INSTRUCTION_FORMAT35c_TYPE);
689    }
690
691    "execute-inline" {
692        return newToken(INSTRUCTION_FORMAT35mi_METHOD);
693    }
694
695    "invoke-virtual-quick" | "invoke-super-quick" {
696        return newToken(INSTRUCTION_FORMAT35ms_METHOD);
697    }
698
699    "invoke-custom/range" {
700        return newToken(INSTRUCTION_FORMAT3rc_CALL_SITE);
701    }
702
703    "invoke-virtual/range" | "invoke-super/range" | "invoke-direct/range" | "invoke-static/range" |
704    "invoke-interface/range" {
705        return newToken(INSTRUCTION_FORMAT3rc_METHOD);
706    }
707
708    "invoke-object-init/range" {
709        return newToken(INSTRUCTION_FORMAT3rc_METHOD_ODEX);
710    }
711
712    "filled-new-array/range" {
713        return newToken(INSTRUCTION_FORMAT3rc_TYPE);
714    }
715
716    "execute-inline/range" {
717        return newToken(INSTRUCTION_FORMAT3rmi_METHOD);
718    }
719
720    "invoke-virtual-quick/range" | "invoke-super-quick/range" {
721        return newToken(INSTRUCTION_FORMAT3rms_METHOD);
722    }
723
724    "invoke-polymorphic" {
725        return newToken(INSTRUCTION_FORMAT45cc_METHOD);
726    }
727
728    "invoke-polymorphic/range" {
729        return newToken(INSTRUCTION_FORMAT4rcc_METHOD);
730    }
731
732    "const-wide" {
733        return newToken(INSTRUCTION_FORMAT51l);
734    }
735}
736
737<ARRAY_DESCRIPTOR> {
738    {PrimitiveType} { yybegin(YYINITIAL); return newToken(PRIMITIVE_TYPE); }
739    {ClassDescriptor} {
740        yypushback(yylength());
741        beginStateBasedToken(CLASS_DESCRIPTOR_BEGINNING);
742        sb.append(yytext());
743    }
744    [^] { yypushback(1); yybegin(YYINITIAL); }
745    <<EOF>> { yybegin(YYINITIAL); }
746}
747
748/*Types*/
749<YYINITIAL> {
750    {PrimitiveType} { return newToken(PRIMITIVE_TYPE); }
751    V { return newToken(VOID_TYPE); }
752    {ClassDescriptor} {
753        yypushback(yylength());
754        beginStateBasedToken(CLASS_DESCRIPTOR_BEGINNING);
755    }
756
757    // we have to drop into a separate state so that we don't parse something like
758    // "[I->" as "[" followed by "I-" as a SIMPLE_NAME
759    {ArrayPrefix} {
760      yybegin(ARRAY_DESCRIPTOR);
761      return newToken(ARRAY_TYPE_PREFIX);
762    }
763
764    {PrimitiveType} {PrimitiveType}+ {
765        // go back and re-lex it as a PARAM_LIST_OR_ID
766        yypushback(yylength());
767        yybegin(PARAM_LIST_OR_ID);
768    }
769
770    {Type} {Type}+ {
771        // go back and re-lex it as a PARAM_LIST
772        yypushback(yylength());
773        yybegin(PARAM_LIST);
774    }
775
776    {SimpleNameRaw} { return newToken(SIMPLE_NAME, yytext()); }
777    {SimpleNameQuoted} { return newToken(SIMPLE_NAME, processQuotedSimpleName(yytext())); }
778    {SimpleNameQuotedWithSpaces} { return newToken(SIMPLE_NAME, processQuotedSimpleNameWithSpaces(yytext())); }
779    "<" {SimpleNameRaw} ">" { return newToken(MEMBER_NAME); }
780}
781
782/*Symbols/Whitespace/EOF*/
783<YYINITIAL> {
784    ".." { return newToken(DOTDOT); }
785    "->" { return newToken(ARROW); }
786    "=" { return newToken(EQUAL); }
787    ":" { return newToken(COLON); }
788    "," { return newToken(COMMA); }
789    "{" { return newToken(OPEN_BRACE); }
790    "}" { return newToken(CLOSE_BRACE); }
791    "(" { return newToken(OPEN_PAREN); }
792    ")" { return newToken(CLOSE_PAREN); }
793    "@" { return newToken(AT); }
794    [\r\n\t ]+ { return newToken(WHITE_SPACE, true); }
795    <<EOF>> { return newToken(EOF); }
796}
797
798/*catch all*/
799<YYINITIAL> {
800    "." { return invalidToken("Invalid directive"); }
801    "." [a-zA-z\-_] { return invalidToken("Invalid directive"); }
802    "." [a-zA-z\-_] [a-zA-z0-9\-_]* { return invalidToken("Invalid directive"); }
803    [^] { return invalidToken("Invalid text"); }
804}
805