xref: /aosp_15_r20/external/antlr/runtime/ObjC/Framework/BaseRecognizer.m (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot//
2*16467b97STreehugger Robot//  BaseRecognizer.m
3*16467b97STreehugger Robot//  ANTLR
4*16467b97STreehugger Robot//
5*16467b97STreehugger Robot//  Created by Alan Condit on 6/16/10.
6*16467b97STreehugger Robot// [The "BSD licence"]
7*16467b97STreehugger Robot// Copyright (c) 2010 Alan Condit
8*16467b97STreehugger Robot// All rights reserved.
9*16467b97STreehugger Robot//
10*16467b97STreehugger Robot// Redistribution and use in source and binary forms, with or without
11*16467b97STreehugger Robot// modification, are permitted provided that the following conditions
12*16467b97STreehugger Robot// are met:
13*16467b97STreehugger Robot// 1. Redistributions of source code must retain the above copyright
14*16467b97STreehugger Robot//    notice, this list of conditions and the following disclaimer.
15*16467b97STreehugger Robot// 2. Redistributions in binary form must reproduce the above copyright
16*16467b97STreehugger Robot//    notice, this list of conditions and the following disclaimer in the
17*16467b97STreehugger Robot//    documentation and/or other materials provided with the distribution.
18*16467b97STreehugger Robot// 3. The name of the author may not be used to endorse or promote products
19*16467b97STreehugger Robot//    derived from this software without specific prior written permission.
20*16467b97STreehugger Robot//
21*16467b97STreehugger Robot// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22*16467b97STreehugger Robot// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23*16467b97STreehugger Robot// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24*16467b97STreehugger Robot// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25*16467b97STreehugger Robot// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26*16467b97STreehugger Robot// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27*16467b97STreehugger Robot// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28*16467b97STreehugger Robot// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29*16467b97STreehugger Robot// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30*16467b97STreehugger Robot// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*16467b97STreehugger Robot
32*16467b97STreehugger Robot#import "ACNumber.h"
33*16467b97STreehugger Robot#import "BaseRecognizer.h"
34*16467b97STreehugger Robot#import "HashRule.h"
35*16467b97STreehugger Robot#import "RuleMemo.h"
36*16467b97STreehugger Robot#import "CommonToken.h"
37*16467b97STreehugger Robot#import "Map.h"
38*16467b97STreehugger Robot#import "NoViableAltException.h"
39*16467b97STreehugger Robot
40*16467b97STreehugger Robotextern NSInteger debug;
41*16467b97STreehugger Robot
42*16467b97STreehugger Robot@implementation BaseRecognizer
43*16467b97STreehugger Robot
44*16467b97STreehugger Robotstatic AMutableArray *_tokenNames;
45*16467b97STreehugger Robotstatic NSString *_grammarFileName;
46*16467b97STreehugger Robotstatic NSString *NEXT_TOKEN_RULE_NAME;
47*16467b97STreehugger Robot
48*16467b97STreehugger Robot@synthesize state;
49*16467b97STreehugger Robot@synthesize grammarFileName;
50*16467b97STreehugger Robot//@synthesize failed;
51*16467b97STreehugger Robot@synthesize sourceName;
52*16467b97STreehugger Robot//@synthesize numberOfSyntaxErrors;
53*16467b97STreehugger Robot@synthesize tokenNames;
54*16467b97STreehugger Robot
55*16467b97STreehugger Robot+ (void) initialize
56*16467b97STreehugger Robot{
57*16467b97STreehugger Robot    NEXT_TOKEN_RULE_NAME = [NSString stringWithString:@"nextToken"];
58*16467b97STreehugger Robot    [NEXT_TOKEN_RULE_NAME retain];
59*16467b97STreehugger Robot}
60*16467b97STreehugger Robot
61*16467b97STreehugger Robot+ (BaseRecognizer *) newBaseRecognizer
62*16467b97STreehugger Robot{
63*16467b97STreehugger Robot    return [[BaseRecognizer alloc] init];
64*16467b97STreehugger Robot}
65*16467b97STreehugger Robot
66*16467b97STreehugger Robot+ (BaseRecognizer *) newBaseRecognizerWithRuleLen:(NSInteger)aLen
67*16467b97STreehugger Robot{
68*16467b97STreehugger Robot    return [[BaseRecognizer alloc] initWithLen:aLen];
69*16467b97STreehugger Robot}
70*16467b97STreehugger Robot
71*16467b97STreehugger Robot+ (BaseRecognizer *) newBaseRecognizer:(RecognizerSharedState *)aState
72*16467b97STreehugger Robot{
73*16467b97STreehugger Robot	return [[BaseRecognizer alloc] initWithState:aState];
74*16467b97STreehugger Robot}
75*16467b97STreehugger Robot
76*16467b97STreehugger Robot+ (AMutableArray *)getTokenNames
77*16467b97STreehugger Robot{
78*16467b97STreehugger Robot    return _tokenNames;
79*16467b97STreehugger Robot}
80*16467b97STreehugger Robot
81*16467b97STreehugger Robot+ (void)setTokenNames:(AMutableArray *)theTokNams
82*16467b97STreehugger Robot{
83*16467b97STreehugger Robot    if ( _tokenNames != theTokNams ) {
84*16467b97STreehugger Robot        if ( _tokenNames ) [_tokenNames release];
85*16467b97STreehugger Robot        [theTokNams retain];
86*16467b97STreehugger Robot    }
87*16467b97STreehugger Robot    _tokenNames = theTokNams;
88*16467b97STreehugger Robot}
89*16467b97STreehugger Robot
90*16467b97STreehugger Robot+ (void)setGrammarFileName:(NSString *)aFileName
91*16467b97STreehugger Robot{
92*16467b97STreehugger Robot    if ( _grammarFileName != aFileName ) {
93*16467b97STreehugger Robot        if ( _grammarFileName ) [_grammarFileName release];
94*16467b97STreehugger Robot        [aFileName retain];
95*16467b97STreehugger Robot    }
96*16467b97STreehugger Robot    [_grammarFileName retain];
97*16467b97STreehugger Robot}
98*16467b97STreehugger Robot
99*16467b97STreehugger Robot- (id) init
100*16467b97STreehugger Robot{
101*16467b97STreehugger Robot	if ((self = [super init]) != nil) {
102*16467b97STreehugger Robot        if (state == nil) {
103*16467b97STreehugger Robot            state = [[RecognizerSharedState newRecognizerSharedState] retain];
104*16467b97STreehugger Robot        }
105*16467b97STreehugger Robot        tokenNames = _tokenNames;
106*16467b97STreehugger Robot        if ( tokenNames ) [tokenNames retain];
107*16467b97STreehugger Robot        grammarFileName = _grammarFileName;
108*16467b97STreehugger Robot        if ( grammarFileName ) [grammarFileName retain];
109*16467b97STreehugger Robot        state._fsp = -1;
110*16467b97STreehugger Robot        state.errorRecovery = NO;		// are we recovering?
111*16467b97STreehugger Robot        state.lastErrorIndex = -1;
112*16467b97STreehugger Robot        state.failed = NO;				// indicate that some match failed
113*16467b97STreehugger Robot        state.syntaxErrors = 0;
114*16467b97STreehugger Robot        state.backtracking = 0;			// the level of backtracking
115*16467b97STreehugger Robot        state.tokenStartCharIndex = -1;
116*16467b97STreehugger Robot	}
117*16467b97STreehugger Robot	return self;
118*16467b97STreehugger Robot}
119*16467b97STreehugger Robot
120*16467b97STreehugger Robot- (id) initWithLen:(NSInteger)aLen
121*16467b97STreehugger Robot{
122*16467b97STreehugger Robot	if ((self = [super init]) != nil) {
123*16467b97STreehugger Robot        if (state == nil) {
124*16467b97STreehugger Robot            state = [[RecognizerSharedState newRecognizerSharedStateWithRuleLen:aLen] retain];
125*16467b97STreehugger Robot        }
126*16467b97STreehugger Robot        tokenNames = _tokenNames;
127*16467b97STreehugger Robot        if ( tokenNames ) [tokenNames retain];
128*16467b97STreehugger Robot        grammarFileName = _grammarFileName;
129*16467b97STreehugger Robot        if ( grammarFileName ) [grammarFileName retain];
130*16467b97STreehugger Robot        state._fsp = -1;
131*16467b97STreehugger Robot        state.errorRecovery = NO;		// are we recovering?
132*16467b97STreehugger Robot        state.lastErrorIndex = -1;
133*16467b97STreehugger Robot        state.failed = NO;				// indicate that some match failed
134*16467b97STreehugger Robot        state.syntaxErrors = 0;
135*16467b97STreehugger Robot        state.backtracking = 0;			// the level of backtracking
136*16467b97STreehugger Robot        state.tokenStartCharIndex = -1;
137*16467b97STreehugger Robot	}
138*16467b97STreehugger Robot	return self;
139*16467b97STreehugger Robot}
140*16467b97STreehugger Robot
141*16467b97STreehugger Robot- (id) initWithState:(RecognizerSharedState *)aState
142*16467b97STreehugger Robot{
143*16467b97STreehugger Robot	if ((self = [super init]) != nil) {
144*16467b97STreehugger Robot		state = aState;
145*16467b97STreehugger Robot        if (state == nil) {
146*16467b97STreehugger Robot            state = [RecognizerSharedState newRecognizerSharedState];
147*16467b97STreehugger Robot        }
148*16467b97STreehugger Robot        [state retain];
149*16467b97STreehugger Robot        tokenNames = _tokenNames;
150*16467b97STreehugger Robot        if ( tokenNames ) [tokenNames retain];
151*16467b97STreehugger Robot        grammarFileName = _grammarFileName;
152*16467b97STreehugger Robot        if ( grammarFileName ) [grammarFileName retain];
153*16467b97STreehugger Robot        state._fsp = -1;
154*16467b97STreehugger Robot        state.errorRecovery = NO;		// are we recovering?
155*16467b97STreehugger Robot        state.lastErrorIndex = -1;
156*16467b97STreehugger Robot        state.failed = NO;				// indicate that some match failed
157*16467b97STreehugger Robot        state.syntaxErrors = 0;
158*16467b97STreehugger Robot        state.backtracking = 0;			// the level of backtracking
159*16467b97STreehugger Robot        state.tokenStartCharIndex = -1;
160*16467b97STreehugger Robot	}
161*16467b97STreehugger Robot	return self;
162*16467b97STreehugger Robot}
163*16467b97STreehugger Robot
164*16467b97STreehugger Robot- (void)dealloc
165*16467b97STreehugger Robot{
166*16467b97STreehugger Robot#ifdef DEBUG_DEALLOC
167*16467b97STreehugger Robot    NSLog( @"called dealloc in BaseRecognizer" );
168*16467b97STreehugger Robot#endif
169*16467b97STreehugger Robot	if ( grammarFileName ) [grammarFileName release];
170*16467b97STreehugger Robot	if ( tokenNames ) [tokenNames release];
171*16467b97STreehugger Robot	if ( state ) [state release];
172*16467b97STreehugger Robot	[super dealloc];
173*16467b97STreehugger Robot}
174*16467b97STreehugger Robot
175*16467b97STreehugger Robot// reset the recognizer to the initial state. does not touch the token source!
176*16467b97STreehugger Robot// this can be extended by the grammar writer to reset custom ivars
177*16467b97STreehugger Robot- (void) reset
178*16467b97STreehugger Robot{
179*16467b97STreehugger Robot    if ( state == nil )
180*16467b97STreehugger Robot        return;
181*16467b97STreehugger Robot    if ( state.following != nil ) {
182*16467b97STreehugger Robot        if ( [state.following count] )
183*16467b97STreehugger Robot            [state.following removeAllObjects];
184*16467b97STreehugger Robot    }
185*16467b97STreehugger Robot    state._fsp = -1;
186*16467b97STreehugger Robot    state.errorRecovery = NO;		// are we recovering?
187*16467b97STreehugger Robot    state.lastErrorIndex = -1;
188*16467b97STreehugger Robot    state.failed = NO;				// indicate that some match failed
189*16467b97STreehugger Robot    state.syntaxErrors = 0;
190*16467b97STreehugger Robot    state.backtracking = 0;			// the level of backtracking
191*16467b97STreehugger Robot    state.tokenStartCharIndex = -1;
192*16467b97STreehugger Robot    if ( state.ruleMemo != nil ) {
193*16467b97STreehugger Robot        if ( [state.ruleMemo count] )
194*16467b97STreehugger Robot            [state.ruleMemo removeAllObjects];
195*16467b97STreehugger Robot    }
196*16467b97STreehugger Robot}
197*16467b97STreehugger Robot
198*16467b97STreehugger Robot- (BOOL) getFailed
199*16467b97STreehugger Robot{
200*16467b97STreehugger Robot	return [state getFailed];
201*16467b97STreehugger Robot}
202*16467b97STreehugger Robot
203*16467b97STreehugger Robot- (void) setFailed:(BOOL)flag
204*16467b97STreehugger Robot{
205*16467b97STreehugger Robot	[state setFailed:flag];
206*16467b97STreehugger Robot}
207*16467b97STreehugger Robot
208*16467b97STreehugger Robot- (RecognizerSharedState *) getState
209*16467b97STreehugger Robot{
210*16467b97STreehugger Robot	return state;
211*16467b97STreehugger Robot}
212*16467b97STreehugger Robot
213*16467b97STreehugger Robot- (void) setState:(RecognizerSharedState *) theState
214*16467b97STreehugger Robot{
215*16467b97STreehugger Robot	if (state != theState) {
216*16467b97STreehugger Robot		if ( state ) [state release];
217*16467b97STreehugger Robot		state = theState;
218*16467b97STreehugger Robot		[state retain];
219*16467b97STreehugger Robot	}
220*16467b97STreehugger Robot}
221*16467b97STreehugger Robot
222*16467b97STreehugger Robot- (id)input
223*16467b97STreehugger Robot{
224*16467b97STreehugger Robot    return nil; // Must be overriden in inheriting class
225*16467b97STreehugger Robot}
226*16467b97STreehugger Robot
227*16467b97STreehugger Robot- (void)skip // override in inheriting class
228*16467b97STreehugger Robot{
229*16467b97STreehugger Robot    return;
230*16467b97STreehugger Robot}
231*16467b97STreehugger Robot
232*16467b97STreehugger Robot-(id) match:(id<IntStream>)anInput TokenType:(NSInteger)ttype Follow:(ANTLRBitSet *)follow
233*16467b97STreehugger Robot{
234*16467b97STreehugger Robot	id matchedSymbol = [self getCurrentInputSymbol:anInput];
235*16467b97STreehugger Robot	if ([anInput LA:1] == ttype) {
236*16467b97STreehugger Robot		[anInput consume];
237*16467b97STreehugger Robot		state.errorRecovery = NO;
238*16467b97STreehugger Robot		state.failed = NO;
239*16467b97STreehugger Robot		return matchedSymbol;
240*16467b97STreehugger Robot	}
241*16467b97STreehugger Robot	if (state.backtracking > 0) {
242*16467b97STreehugger Robot		state.failed = YES;
243*16467b97STreehugger Robot		return matchedSymbol;
244*16467b97STreehugger Robot	}
245*16467b97STreehugger Robot	matchedSymbol = [self recoverFromMismatchedToken:anInput TokenType:ttype Follow:follow];
246*16467b97STreehugger Robot	return matchedSymbol;
247*16467b97STreehugger Robot}
248*16467b97STreehugger Robot
249*16467b97STreehugger Robot-(void) matchAny:(id<IntStream>)anInput
250*16467b97STreehugger Robot{
251*16467b97STreehugger Robot    state.errorRecovery = NO;
252*16467b97STreehugger Robot    state.failed = NO;
253*16467b97STreehugger Robot    [anInput consume];
254*16467b97STreehugger Robot}
255*16467b97STreehugger Robot
256*16467b97STreehugger Robot-(BOOL) mismatchIsUnwantedToken:(id<IntStream>)anInput TokenType:(NSInteger)ttype
257*16467b97STreehugger Robot{
258*16467b97STreehugger Robot    return [anInput LA:2] == ttype;
259*16467b97STreehugger Robot}
260*16467b97STreehugger Robot
261*16467b97STreehugger Robot-(BOOL) mismatchIsMissingToken:(id<IntStream>)anInput Follow:(ANTLRBitSet *) follow
262*16467b97STreehugger Robot{
263*16467b97STreehugger Robot    if ( follow == nil ) {
264*16467b97STreehugger Robot        // we have no information about the follow; we can only consume
265*16467b97STreehugger Robot        // a single token and hope for the best
266*16467b97STreehugger Robot        return NO;
267*16467b97STreehugger Robot    }
268*16467b97STreehugger Robot    // compute what can follow this grammar element reference
269*16467b97STreehugger Robot    if ( [follow member:TokenTypeEOR] ) {
270*16467b97STreehugger Robot        ANTLRBitSet *viableTokensFollowingThisRule = [self computeContextSensitiveRuleFOLLOW];
271*16467b97STreehugger Robot        follow = [follow or:viableTokensFollowingThisRule];
272*16467b97STreehugger Robot        if ( state._fsp >= 0 ) { // remove EOR if we're not the start symbol
273*16467b97STreehugger Robot            [follow remove:(TokenTypeEOR)];
274*16467b97STreehugger Robot        }
275*16467b97STreehugger Robot    }
276*16467b97STreehugger Robot    // if current token is consistent with what could come after set
277*16467b97STreehugger Robot    // then we know we're missing a token; error recovery is free to
278*16467b97STreehugger Robot    // "insert" the missing token
279*16467b97STreehugger Robot
280*16467b97STreehugger Robot    //System.out.println("viable tokens="+follow.toString(getTokenNames()));
281*16467b97STreehugger Robot    //System.out.println("LT(1)="+((TokenStream)input).LT(1));
282*16467b97STreehugger Robot
283*16467b97STreehugger Robot    // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
284*16467b97STreehugger Robot    // in follow set to indicate that the fall of the start symbol is
285*16467b97STreehugger Robot    // in the set (EOF can follow).
286*16467b97STreehugger Robot    if ( [follow member:[anInput LA:1]] || [follow member:TokenTypeEOR] ) {
287*16467b97STreehugger Robot        //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
288*16467b97STreehugger Robot        return YES;
289*16467b97STreehugger Robot    }
290*16467b97STreehugger Robot    return NO;
291*16467b97STreehugger Robot}
292*16467b97STreehugger Robot
293*16467b97STreehugger Robot/** Report a recognition problem.
294*16467b97STreehugger Robot *
295*16467b97STreehugger Robot *  This method sets errorRecovery to indicate the parser is recovering
296*16467b97STreehugger Robot *  not parsing.  Once in recovery mode, no errors are generated.
297*16467b97STreehugger Robot *  To get out of recovery mode, the parser must successfully match
298*16467b97STreehugger Robot *  a token (after a resync).  So it will go:
299*16467b97STreehugger Robot *
300*16467b97STreehugger Robot * 		1. error occurs
301*16467b97STreehugger Robot * 		2. enter recovery mode, report error
302*16467b97STreehugger Robot * 		3. consume until token found in resynch set
303*16467b97STreehugger Robot * 		4. try to resume parsing
304*16467b97STreehugger Robot * 		5. next match() will reset errorRecovery mode
305*16467b97STreehugger Robot *
306*16467b97STreehugger Robot *  If you override, make sure to update syntaxErrors if you care about that.
307*16467b97STreehugger Robot */
308*16467b97STreehugger Robot-(void) reportError:(RecognitionException *) e
309*16467b97STreehugger Robot{
310*16467b97STreehugger Robot    // if we've already reported an error and have not matched a token
311*16467b97STreehugger Robot    // yet successfully, don't report any errors.
312*16467b97STreehugger Robot    if ( state.errorRecovery ) {
313*16467b97STreehugger Robot        //System.err.print("[SPURIOUS] ");
314*16467b97STreehugger Robot        return;
315*16467b97STreehugger Robot    }
316*16467b97STreehugger Robot    state.syntaxErrors++; // don't count spurious
317*16467b97STreehugger Robot    state.errorRecovery = YES;
318*16467b97STreehugger Robot
319*16467b97STreehugger Robot    [self displayRecognitionError:[self getTokenNames] Exception:e];
320*16467b97STreehugger Robot}
321*16467b97STreehugger Robot
322*16467b97STreehugger Robot-(void) displayRecognitionError:(AMutableArray *)theTokNams Exception:(RecognitionException *)e
323*16467b97STreehugger Robot{
324*16467b97STreehugger Robot    NSString *hdr = [self getErrorHeader:e];
325*16467b97STreehugger Robot    NSString *msg = [self getErrorMessage:e TokenNames:theTokNams];
326*16467b97STreehugger Robot    [self emitErrorMessage:[NSString stringWithFormat:@" %@ %@", hdr, msg]];
327*16467b97STreehugger Robot}
328*16467b97STreehugger Robot
329*16467b97STreehugger Robot/** What error message should be generated for the various
330*16467b97STreehugger Robot *  exception types?
331*16467b97STreehugger Robot *
332*16467b97STreehugger Robot *  Not very object-oriented code, but I like having all error message
333*16467b97STreehugger Robot *  generation within one method rather than spread among all of the
334*16467b97STreehugger Robot *  exception classes. This also makes it much easier for the exception
335*16467b97STreehugger Robot *  handling because the exception classes do not have to have pointers back
336*16467b97STreehugger Robot *  to this object to access utility routines and so on. Also, changing
337*16467b97STreehugger Robot *  the message for an exception type would be difficult because you
338*16467b97STreehugger Robot *  would have to subclassing exception, but then somehow get ANTLR
339*16467b97STreehugger Robot *  to make those kinds of exception objects instead of the default.
340*16467b97STreehugger Robot *  This looks weird, but trust me--it makes the most sense in terms
341*16467b97STreehugger Robot *  of flexibility.
342*16467b97STreehugger Robot *
343*16467b97STreehugger Robot *  For grammar debugging, you will want to override this to add
344*16467b97STreehugger Robot *  more information such as the stack frame with
345*16467b97STreehugger Robot *  getRuleInvocationStack(e, this.getClass().getName()) and,
346*16467b97STreehugger Robot *  for no viable alts, the decision description and state etc...
347*16467b97STreehugger Robot *
348*16467b97STreehugger Robot *  Override this to change the message generated for one or more
349*16467b97STreehugger Robot *  exception types.
350*16467b97STreehugger Robot */
351*16467b97STreehugger Robot- (NSString *)getErrorMessage:(RecognitionException *)e TokenNames:(AMutableArray *)theTokNams
352*16467b97STreehugger Robot{
353*16467b97STreehugger Robot    // NSString *msg = [e getMessage];
354*16467b97STreehugger Robot    NSString *msg;
355*16467b97STreehugger Robot    if ( [e isKindOfClass:[UnwantedTokenException class]] ) {
356*16467b97STreehugger Robot        UnwantedTokenException *ute = (UnwantedTokenException *)e;
357*16467b97STreehugger Robot        NSString *tokenName=@"<unknown>";
358*16467b97STreehugger Robot        if ( ute.expecting == TokenTypeEOF ) {
359*16467b97STreehugger Robot            tokenName = @"EOF";
360*16467b97STreehugger Robot        }
361*16467b97STreehugger Robot        else {
362*16467b97STreehugger Robot            tokenName = (NSString *)[theTokNams objectAtIndex:ute.expecting];
363*16467b97STreehugger Robot        }
364*16467b97STreehugger Robot        msg = [NSString stringWithFormat:@"extraneous input %@ expecting %@", [self getTokenErrorDisplay:[ute getUnexpectedToken]],
365*16467b97STreehugger Robot               tokenName];
366*16467b97STreehugger Robot    }
367*16467b97STreehugger Robot    else if ( [e isKindOfClass:[MissingTokenException class] ] ) {
368*16467b97STreehugger Robot        MissingTokenException *mte = (MissingTokenException *)e;
369*16467b97STreehugger Robot        NSString *tokenName=@"<unknown>";
370*16467b97STreehugger Robot        if ( mte.expecting== TokenTypeEOF ) {
371*16467b97STreehugger Robot            tokenName = @"EOF";
372*16467b97STreehugger Robot        }
373*16467b97STreehugger Robot        else {
374*16467b97STreehugger Robot            tokenName = [theTokNams objectAtIndex:mte.expecting];
375*16467b97STreehugger Robot        }
376*16467b97STreehugger Robot        msg = [NSString stringWithFormat:@"missing %@ at %@", tokenName, [self getTokenErrorDisplay:(e.token)] ];
377*16467b97STreehugger Robot    }
378*16467b97STreehugger Robot    else if ( [e isKindOfClass:[MismatchedTokenException class]] ) {
379*16467b97STreehugger Robot        MismatchedTokenException *mte = (MismatchedTokenException *)e;
380*16467b97STreehugger Robot        NSString *tokenName=@"<unknown>";
381*16467b97STreehugger Robot        if ( mte.expecting== TokenTypeEOF ) {
382*16467b97STreehugger Robot            tokenName = @"EOF";
383*16467b97STreehugger Robot        }
384*16467b97STreehugger Robot        else {
385*16467b97STreehugger Robot            tokenName = [theTokNams objectAtIndex:mte.expecting];
386*16467b97STreehugger Robot        }
387*16467b97STreehugger Robot        msg = [NSString stringWithFormat:@"mismatched input %@ expecting %@",[self getTokenErrorDisplay:(e.token)], tokenName];
388*16467b97STreehugger Robot    }
389*16467b97STreehugger Robot    else if ( [e isKindOfClass:[MismatchedTreeNodeException class]] ) {
390*16467b97STreehugger Robot        MismatchedTreeNodeException *mtne = (MismatchedTreeNodeException *)e;
391*16467b97STreehugger Robot        NSString *tokenName=@"<unknown>";
392*16467b97STreehugger Robot        if ( mtne.expecting==TokenTypeEOF ) {
393*16467b97STreehugger Robot            tokenName = @"EOF";
394*16467b97STreehugger Robot        }
395*16467b97STreehugger Robot        else {
396*16467b97STreehugger Robot            tokenName = [theTokNams objectAtIndex:mtne.expecting];
397*16467b97STreehugger Robot        }
398*16467b97STreehugger Robot        msg = [NSString stringWithFormat:@"mismatched tree node: %@ expecting %@", mtne.node, tokenName];
399*16467b97STreehugger Robot    }
400*16467b97STreehugger Robot    else if ( [e isKindOfClass:[NoViableAltException class]] ) {
401*16467b97STreehugger Robot        //NoViableAltException *nvae = (NoViableAltException *)e;
402*16467b97STreehugger Robot        // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
403*16467b97STreehugger Robot        // and "(decision="+nvae.decisionNumber+") and
404*16467b97STreehugger Robot        // "state "+nvae.stateNumber
405*16467b97STreehugger Robot        //        msg = [NSString stringWithFormat:@"no viable alternative at input %@", [self getTokenErrorDisplay:e.token]];
406*16467b97STreehugger Robot        msg = [NSString stringWithFormat:@"no viable alternative decision:%d state:%d at input %@", ((NoViableAltException *)e).stateNumber, ((NoViableAltException *)e).decisionNumber, [self getTokenErrorDisplay:e.token]];
407*16467b97STreehugger Robot    }
408*16467b97STreehugger Robot    else if ( [e isKindOfClass:[EarlyExitException class]] ) {
409*16467b97STreehugger Robot        //EarlyExitException *eee = (EarlyExitException *)e;
410*16467b97STreehugger Robot        // for development, can add "(decision="+eee.decisionNumber+")"
411*16467b97STreehugger Robot        msg =[NSString stringWithFormat: @"required (...)+ loop did not match anything at input ", [self getTokenErrorDisplay:e.token]];
412*16467b97STreehugger Robot    }
413*16467b97STreehugger Robot    else if ( [e isKindOfClass:[MismatchedSetException class]] ) {
414*16467b97STreehugger Robot        MismatchedSetException *mse = (MismatchedSetException *)e;
415*16467b97STreehugger Robot        msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@",
416*16467b97STreehugger Robot               [self getTokenErrorDisplay:(e.token)],
417*16467b97STreehugger Robot               mse.expecting];
418*16467b97STreehugger Robot    }
419*16467b97STreehugger Robot#pragma warning NotSet not yet implemented.
420*16467b97STreehugger Robot    else if ( [e isKindOfClass:[MismatchedNotSetException class] ] ) {
421*16467b97STreehugger Robot        MismatchedNotSetException *mse = (MismatchedNotSetException *)e;
422*16467b97STreehugger Robot        msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@",
423*16467b97STreehugger Robot               [self getTokenErrorDisplay:(e.token)],
424*16467b97STreehugger Robot               mse.expecting];
425*16467b97STreehugger Robot    }
426*16467b97STreehugger Robot    else if ( [e isKindOfClass:[FailedPredicateException class]] ) {
427*16467b97STreehugger Robot        FailedPredicateException *fpe = (FailedPredicateException *)e;
428*16467b97STreehugger Robot        msg = [NSString stringWithFormat:@"rule %@ failed predicate: { %@ }?", fpe.ruleName, fpe.predicate];
429*16467b97STreehugger Robot    }
430*16467b97STreehugger Robot    else {
431*16467b97STreehugger Robot        msg = [NSString stringWithFormat:@"Exception= %@\n", e.name];
432*16467b97STreehugger Robot    }
433*16467b97STreehugger Robot    return msg;
434*16467b97STreehugger Robot}
435*16467b97STreehugger Robot
436*16467b97STreehugger Robot/** Get number of recognition errors (lexer, parser, tree parser).  Each
437*16467b97STreehugger Robot *  recognizer tracks its own number.  So parser and lexer each have
438*16467b97STreehugger Robot *  separate count.  Does not count the spurious errors found between
439*16467b97STreehugger Robot *  an error and next valid token match
440*16467b97STreehugger Robot *
441*16467b97STreehugger Robot *  See also reportError()
442*16467b97STreehugger Robot */
443*16467b97STreehugger Robot- (NSInteger) getNumberOfSyntaxErrors
444*16467b97STreehugger Robot{
445*16467b97STreehugger Robot    return state.syntaxErrors;
446*16467b97STreehugger Robot}
447*16467b97STreehugger Robot
448*16467b97STreehugger Robot/** What is the error header, normally line/character position information? */
449*16467b97STreehugger Robot- (NSString *)getErrorHeader:(RecognitionException *)e
450*16467b97STreehugger Robot{
451*16467b97STreehugger Robot    return [NSString stringWithFormat:@"line %d:%d", e.line, e.charPositionInLine];
452*16467b97STreehugger Robot}
453*16467b97STreehugger Robot
454*16467b97STreehugger Robot/** How should a token be displayed in an error message? The default
455*16467b97STreehugger Robot *  is to display just the text, but during development you might
456*16467b97STreehugger Robot *  want to have a lot of information spit out.  Override in that case
457*16467b97STreehugger Robot *  to use t.toString() (which, for CommonToken, dumps everything about
458*16467b97STreehugger Robot *  the token). This is better than forcing you to override a method in
459*16467b97STreehugger Robot *  your token objects because you don't have to go modify your lexer
460*16467b97STreehugger Robot *  so that it creates a new Java type.
461*16467b97STreehugger Robot */
462*16467b97STreehugger Robot- (NSString *)getTokenErrorDisplay:(id<Token>)t
463*16467b97STreehugger Robot{
464*16467b97STreehugger Robot    NSString *s = t.text;
465*16467b97STreehugger Robot    if ( s == nil ) {
466*16467b97STreehugger Robot        if ( t.type == TokenTypeEOF ) {
467*16467b97STreehugger Robot            s = @"<EOF>";
468*16467b97STreehugger Robot        }
469*16467b97STreehugger Robot        else {
470*16467b97STreehugger Robot            s = [NSString stringWithFormat:@"<%@>", t.type];
471*16467b97STreehugger Robot        }
472*16467b97STreehugger Robot    }
473*16467b97STreehugger Robot    s = [s stringByReplacingOccurrencesOfString:@"\n" withString:@"\\\\n"];
474*16467b97STreehugger Robot    s = [s stringByReplacingOccurrencesOfString:@"\r" withString:@"\\\\r"];
475*16467b97STreehugger Robot    s = [s stringByReplacingOccurrencesOfString:@"\t" withString:@"\\\\t"];
476*16467b97STreehugger Robot    return [NSString stringWithFormat:@"\'%@\'", s];
477*16467b97STreehugger Robot}
478*16467b97STreehugger Robot
479*16467b97STreehugger Robot/** Override this method to change where error messages go */
480*16467b97STreehugger Robot- (void) emitErrorMessage:(NSString *) msg
481*16467b97STreehugger Robot{
482*16467b97STreehugger Robot//    System.err.println(msg);
483*16467b97STreehugger Robot    NSLog(@"%@", msg);
484*16467b97STreehugger Robot}
485*16467b97STreehugger Robot
486*16467b97STreehugger Robot/** Recover from an error found on the input stream.  This is
487*16467b97STreehugger Robot *  for NoViableAlt and mismatched symbol exceptions.  If you enable
488*16467b97STreehugger Robot *  single token insertion and deletion, this will usually not
489*16467b97STreehugger Robot *  handle mismatched symbol exceptions but there could be a mismatched
490*16467b97STreehugger Robot *  token that the match() routine could not recover from.
491*16467b97STreehugger Robot */
492*16467b97STreehugger Robot- (void)recover:(id<IntStream>)anInput Exception:(RecognitionException *)re
493*16467b97STreehugger Robot{
494*16467b97STreehugger Robot    if ( state.lastErrorIndex == anInput.index ) {
495*16467b97STreehugger Robot        // uh oh, another error at same token index; must be a case
496*16467b97STreehugger Robot        // where LT(1) is in the recovery token set so nothing is
497*16467b97STreehugger Robot        // consumed; consume a single token so at least to prevent
498*16467b97STreehugger Robot        // an infinite loop; this is a failsafe.
499*16467b97STreehugger Robot        [anInput consume];
500*16467b97STreehugger Robot    }
501*16467b97STreehugger Robot    state.lastErrorIndex = anInput.index;
502*16467b97STreehugger Robot    ANTLRBitSet *followSet = [self computeErrorRecoverySet];
503*16467b97STreehugger Robot    [self beginResync];
504*16467b97STreehugger Robot    [self consumeUntilFollow:anInput Follow:followSet];
505*16467b97STreehugger Robot    [self endResync];
506*16467b97STreehugger Robot}
507*16467b97STreehugger Robot
508*16467b97STreehugger Robot- (void) beginResync
509*16467b97STreehugger Robot{
510*16467b97STreehugger Robot
511*16467b97STreehugger Robot}
512*16467b97STreehugger Robot
513*16467b97STreehugger Robot- (void) endResync
514*16467b97STreehugger Robot{
515*16467b97STreehugger Robot
516*16467b97STreehugger Robot}
517*16467b97STreehugger Robot
518*16467b97STreehugger Robot/*  Compute the error recovery set for the current rule.  During
519*16467b97STreehugger Robot *  rule invocation, the parser pushes the set of tokens that can
520*16467b97STreehugger Robot *  follow that rule reference on the stack; this amounts to
521*16467b97STreehugger Robot *  computing FIRST of what follows the rule reference in the
522*16467b97STreehugger Robot *  enclosing rule. This local follow set only includes tokens
523*16467b97STreehugger Robot *  from within the rule; i.e., the FIRST computation done by
524*16467b97STreehugger Robot *  ANTLR stops at the end of a rule.
525*16467b97STreehugger Robot *
526*16467b97STreehugger Robot *  EXAMPLE
527*16467b97STreehugger Robot *
528*16467b97STreehugger Robot *  When you find a "no viable alt exception", the input is not
529*16467b97STreehugger Robot *  consistent with any of the alternatives for rule r.  The best
530*16467b97STreehugger Robot *  thing to do is to consume tokens until you see something that
531*16467b97STreehugger Robot *  can legally follow a call to r *or* any rule that called r.
532*16467b97STreehugger Robot *  You don't want the exact set of viable next tokens because the
533*16467b97STreehugger Robot *  input might just be missing a token--you might consume the
534*16467b97STreehugger Robot *  rest of the input looking for one of the missing tokens.
535*16467b97STreehugger Robot *
536*16467b97STreehugger Robot *  Consider grammar:
537*16467b97STreehugger Robot *
538*16467b97STreehugger Robot *  a : '[' b ']'
539*16467b97STreehugger Robot *    | '(' b ')'
540*16467b97STreehugger Robot *    ;
541*16467b97STreehugger Robot *  b : c '^' INT ;
542*16467b97STreehugger Robot *  c : ID
543*16467b97STreehugger Robot *    | INT
544*16467b97STreehugger Robot *    ;
545*16467b97STreehugger Robot *
546*16467b97STreehugger Robot *  At each rule invocation, the set of tokens that could follow
547*16467b97STreehugger Robot *  that rule is pushed on a stack.  Here are the various "local"
548*16467b97STreehugger Robot *  follow sets:
549*16467b97STreehugger Robot *
550*16467b97STreehugger Robot *  FOLLOW(b1_in_a) = FIRST(']') = ']'
551*16467b97STreehugger Robot *  FOLLOW(b2_in_a) = FIRST(')') = ')'
552*16467b97STreehugger Robot *  FOLLOW(c_in_b) = FIRST('^') = '^'
553*16467b97STreehugger Robot *
554*16467b97STreehugger Robot *  Upon erroneous input "[]", the call chain is
555*16467b97STreehugger Robot *
556*16467b97STreehugger Robot *  a -> b -> c
557*16467b97STreehugger Robot *
558*16467b97STreehugger Robot *  and, hence, the follow context stack is:
559*16467b97STreehugger Robot *
560*16467b97STreehugger Robot *  depth  local follow set     after call to rule
561*16467b97STreehugger Robot *    0         <EOF>                    a (from main())
562*16467b97STreehugger Robot *    1          ']'                     b
563*16467b97STreehugger Robot *    3          '^'                     c
564*16467b97STreehugger Robot *
565*16467b97STreehugger Robot *  Notice that ')' is not included, because b would have to have
566*16467b97STreehugger Robot *  been called from a different context in rule a for ')' to be
567*16467b97STreehugger Robot *  included.
568*16467b97STreehugger Robot *
569*16467b97STreehugger Robot *  For error recovery, we cannot consider FOLLOW(c)
570*16467b97STreehugger Robot *  (context-sensitive or otherwise).  We need the combined set of
571*16467b97STreehugger Robot *  all context-sensitive FOLLOW sets--the set of all tokens that
572*16467b97STreehugger Robot *  could follow any reference in the call chain.  We need to
573*16467b97STreehugger Robot *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
574*16467b97STreehugger Robot *  we resync'd to that token, we'd consume until EOF.  We need to
575*16467b97STreehugger Robot *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
576*16467b97STreehugger Robot *  In this case, for input "[]", LA(1) is in this set so we would
577*16467b97STreehugger Robot *  not consume anything and after printing an error rule c would
578*16467b97STreehugger Robot *  return normally.  It would not find the required '^' though.
579*16467b97STreehugger Robot *  At this point, it gets a mismatched token error and throws an
580*16467b97STreehugger Robot *  exception (since LA(1) is not in the viable following token
581*16467b97STreehugger Robot *  set).  The rule exception handler tries to recover, but finds
582*16467b97STreehugger Robot *  the same recovery set and doesn't consume anything.  Rule b
583*16467b97STreehugger Robot *  exits normally returning to rule a.  Now it finds the ']' (and
584*16467b97STreehugger Robot *  with the successful match exits errorRecovery mode).
585*16467b97STreehugger Robot *
586*16467b97STreehugger Robot *  So, you cna see that the parser walks up call chain looking
587*16467b97STreehugger Robot *  for the token that was a member of the recovery set.
588*16467b97STreehugger Robot *
589*16467b97STreehugger Robot *  Errors are not generated in errorRecovery mode.
590*16467b97STreehugger Robot *
591*16467b97STreehugger Robot *  ANTLR's error recovery mechanism is based upon original ideas:
592*16467b97STreehugger Robot *
593*16467b97STreehugger Robot *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
594*16467b97STreehugger Robot *
595*16467b97STreehugger Robot *  and
596*16467b97STreehugger Robot *
597*16467b97STreehugger Robot *  "A note on error recovery in recursive descent parsers":
598*16467b97STreehugger Robot *  http://portal.acm.org/citation.cfm?id=947902.947905
599*16467b97STreehugger Robot *
600*16467b97STreehugger Robot *  Later, Josef Grosch had some good ideas:
601*16467b97STreehugger Robot *
602*16467b97STreehugger Robot *  "Efficient and Comfortable Error Recovery in Recursive Descent
603*16467b97STreehugger Robot *  Parsers":
604*16467b97STreehugger Robot *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
605*16467b97STreehugger Robot *
606*16467b97STreehugger Robot *  Like Grosch I implemented local FOLLOW sets that are combined
607*16467b97STreehugger Robot *  at run-time upon error to avoid overhead during parsing.
608*16467b97STreehugger Robot */
609*16467b97STreehugger Robot- (ANTLRBitSet *) computeErrorRecoverySet
610*16467b97STreehugger Robot{
611*16467b97STreehugger Robot    return [self combineFollows:NO];
612*16467b97STreehugger Robot}
613*16467b97STreehugger Robot
614*16467b97STreehugger Robot/** Compute the context-sensitive FOLLOW set for current rule.
615*16467b97STreehugger Robot *  This is set of token types that can follow a specific rule
616*16467b97STreehugger Robot *  reference given a specific call chain.  You get the set of
617*16467b97STreehugger Robot *  viable tokens that can possibly come next (lookahead depth 1)
618*16467b97STreehugger Robot *  given the current call chain.  Contrast this with the
619*16467b97STreehugger Robot *  definition of plain FOLLOW for rule r:
620*16467b97STreehugger Robot *
621*16467b97STreehugger Robot *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
622*16467b97STreehugger Robot *
623*16467b97STreehugger Robot *  where x in T* and alpha, beta in V*; T is set of terminals and
624*16467b97STreehugger Robot *  V is the set of terminals and nonterminals.  In other words,
625*16467b97STreehugger Robot *  FOLLOW(r) is the set of all tokens that can possibly follow
626*16467b97STreehugger Robot *  references to r in *any* sentential form (context).  At
627*16467b97STreehugger Robot *  runtime, however, we know precisely which context applies as
628*16467b97STreehugger Robot *  we have the call chain.  We may compute the exact (rather
629*16467b97STreehugger Robot *  than covering superset) set of following tokens.
630*16467b97STreehugger Robot *
631*16467b97STreehugger Robot *  For example, consider grammar:
632*16467b97STreehugger Robot *
633*16467b97STreehugger Robot *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
634*16467b97STreehugger Robot *       | "return" expr '.'
635*16467b97STreehugger Robot *       ;
636*16467b97STreehugger Robot *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
637*16467b97STreehugger Robot *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
638*16467b97STreehugger Robot *       | '(' expr ')'
639*16467b97STreehugger Robot *       ;
640*16467b97STreehugger Robot *
641*16467b97STreehugger Robot *  The FOLLOW sets are all inclusive whereas context-sensitive
642*16467b97STreehugger Robot *  FOLLOW sets are precisely what could follow a rule reference.
643*16467b97STreehugger Robot *  For input input "i=(3);", here is the derivation:
644*16467b97STreehugger Robot *
645*16467b97STreehugger Robot *  stat => ID '=' expr ';'
646*16467b97STreehugger Robot *       => ID '=' atom ('+' atom)* ';'
647*16467b97STreehugger Robot *       => ID '=' '(' expr ')' ('+' atom)* ';'
648*16467b97STreehugger Robot *       => ID '=' '(' atom ')' ('+' atom)* ';'
649*16467b97STreehugger Robot *       => ID '=' '(' INT ')' ('+' atom)* ';'
650*16467b97STreehugger Robot *       => ID '=' '(' INT ')' ';'
651*16467b97STreehugger Robot *
652*16467b97STreehugger Robot *  At the "3" token, you'd have a call chain of
653*16467b97STreehugger Robot *
654*16467b97STreehugger Robot *    stat -> expr -> atom -> expr -> atom
655*16467b97STreehugger Robot *
656*16467b97STreehugger Robot *  What can follow that specific nested ref to atom?  Exactly ')'
657*16467b97STreehugger Robot *  as you can see by looking at the derivation of this specific
658*16467b97STreehugger Robot *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
659*16467b97STreehugger Robot *
660*16467b97STreehugger Robot *  You want the exact viable token set when recovering from a
661*16467b97STreehugger Robot *  token mismatch.  Upon token mismatch, if LA(1) is member of
662*16467b97STreehugger Robot *  the viable next token set, then you know there is most likely
663*16467b97STreehugger Robot *  a missing token in the input stream.  "Insert" one by just not
664*16467b97STreehugger Robot *  throwing an exception.
665*16467b97STreehugger Robot */
666*16467b97STreehugger Robot- (ANTLRBitSet *)computeContextSensitiveRuleFOLLOW
667*16467b97STreehugger Robot{
668*16467b97STreehugger Robot    return [self combineFollows:YES];
669*16467b97STreehugger Robot}
670*16467b97STreehugger Robot
671*16467b97STreehugger Robot// what is exact? it seems to only add sets from above on stack
672*16467b97STreehugger Robot// if EOR is in set i.  When it sees a set w/o EOR, it stops adding.
673*16467b97STreehugger Robot// Why would we ever want them all?  Maybe no viable alt instead of
674*16467b97STreehugger Robot// mismatched token?
675*16467b97STreehugger Robot- (ANTLRBitSet *)combineFollows:(BOOL) exact
676*16467b97STreehugger Robot{
677*16467b97STreehugger Robot    NSInteger top = state._fsp;
678*16467b97STreehugger Robot    ANTLRBitSet *followSet = [[ANTLRBitSet newBitSet] retain];
679*16467b97STreehugger Robot    for (int i = top; i >= 0; i--) {
680*16467b97STreehugger Robot        ANTLRBitSet *localFollowSet = (ANTLRBitSet *)[state.following objectAtIndex:i];
681*16467b97STreehugger Robot        /*
682*16467b97STreehugger Robot         System.out.println("local follow depth "+i+"="+
683*16467b97STreehugger Robot         localFollowSet.toString(getTokenNames())+")");
684*16467b97STreehugger Robot         */
685*16467b97STreehugger Robot        [followSet orInPlace:localFollowSet];
686*16467b97STreehugger Robot        if ( exact ) {
687*16467b97STreehugger Robot            // can we see end of rule?
688*16467b97STreehugger Robot            if ( [localFollowSet member:TokenTypeEOR] ) {
689*16467b97STreehugger Robot                // Only leave EOR in set if at top (start rule); this lets
690*16467b97STreehugger Robot                // us know if have to include follow(start rule); i.e., EOF
691*16467b97STreehugger Robot                if ( i > 0 ) {
692*16467b97STreehugger Robot                    [followSet remove:TokenTypeEOR];
693*16467b97STreehugger Robot                }
694*16467b97STreehugger Robot            }
695*16467b97STreehugger Robot            else { // can't see end of rule, quit
696*16467b97STreehugger Robot                break;
697*16467b97STreehugger Robot            }
698*16467b97STreehugger Robot        }
699*16467b97STreehugger Robot    }
700*16467b97STreehugger Robot    return followSet;
701*16467b97STreehugger Robot}
702*16467b97STreehugger Robot
703*16467b97STreehugger Robot/** Attempt to recover from a single missing or extra token.
704*16467b97STreehugger Robot *
705*16467b97STreehugger Robot *  EXTRA TOKEN
706*16467b97STreehugger Robot *
707*16467b97STreehugger Robot *  LA(1) is not what we are looking for.  If LA(2) has the right token,
708*16467b97STreehugger Robot *  however, then assume LA(1) is some extra spurious token.  Delete it
709*16467b97STreehugger Robot *  and LA(2) as if we were doing a normal match(), which advances the
710*16467b97STreehugger Robot *  input.
711*16467b97STreehugger Robot *
712*16467b97STreehugger Robot *  MISSING TOKEN
713*16467b97STreehugger Robot *
714*16467b97STreehugger Robot *  If current token is consistent with what could come after
715*16467b97STreehugger Robot *  ttype then it is ok to "insert" the missing token, else throw
716*16467b97STreehugger Robot *  exception For example, Input "i=(3;" is clearly missing the
717*16467b97STreehugger Robot *  ')'.  When the parser returns from the nested call to expr, it
718*16467b97STreehugger Robot *  will have call chain:
719*16467b97STreehugger Robot *
720*16467b97STreehugger Robot *    stat -> expr -> atom
721*16467b97STreehugger Robot *
722*16467b97STreehugger Robot *  and it will be trying to match the ')' at this point in the
723*16467b97STreehugger Robot *  derivation:
724*16467b97STreehugger Robot *
725*16467b97STreehugger Robot *       => ID '=' '(' INT ')' ('+' atom)* ';'
726*16467b97STreehugger Robot *                          ^
727*16467b97STreehugger Robot *  match() will see that ';' doesn't match ')' and report a
728*16467b97STreehugger Robot *  mismatched token error.  To recover, it sees that LA(1)==';'
729*16467b97STreehugger Robot *  is in the set of tokens that can follow the ')' token
730*16467b97STreehugger Robot *  reference in rule atom.  It can assume that you forgot the ')'.
731*16467b97STreehugger Robot */
732*16467b97STreehugger Robot- (id<Token>)recoverFromMismatchedToken:(id<IntStream>)anInput
733*16467b97STreehugger Robot                       TokenType:(NSInteger)ttype
734*16467b97STreehugger Robot                          Follow:(ANTLRBitSet *)follow
735*16467b97STreehugger Robot{
736*16467b97STreehugger Robot    RecognitionException *e = nil;
737*16467b97STreehugger Robot    // if next token is what we are looking for then "delete" this token
738*16467b97STreehugger Robot    if ( [self mismatchIsUnwantedToken:anInput TokenType:ttype] ) {
739*16467b97STreehugger Robot        e = [UnwantedTokenException newException:ttype Stream:anInput];
740*16467b97STreehugger Robot        /*
741*16467b97STreehugger Robot         System.err.println("recoverFromMismatchedToken deleting "+
742*16467b97STreehugger Robot         ((TokenStream)input).LT(1)+
743*16467b97STreehugger Robot         " since "+((TokenStream)input).LT(2)+" is what we want");
744*16467b97STreehugger Robot         */
745*16467b97STreehugger Robot        [self beginResync];
746*16467b97STreehugger Robot        [anInput consume]; // simply delete extra token
747*16467b97STreehugger Robot        [self endResync];
748*16467b97STreehugger Robot        [self reportError:e];  // report after consuming so AW sees the token in the exception
749*16467b97STreehugger Robot                         // we want to return the token we're actually matching
750*16467b97STreehugger Robot        id matchedSymbol = [self getCurrentInputSymbol:anInput];
751*16467b97STreehugger Robot        [anInput consume]; // move past ttype token as if all were ok
752*16467b97STreehugger Robot        return matchedSymbol;
753*16467b97STreehugger Robot    }
754*16467b97STreehugger Robot    // can't recover with single token deletion, try insertion
755*16467b97STreehugger Robot    if ( [self mismatchIsMissingToken:anInput Follow:follow] ) {
756*16467b97STreehugger Robot        id<Token> inserted = [self getMissingSymbol:anInput Exception:e TokenType:ttype Follow:follow];
757*16467b97STreehugger Robot        e = [MissingTokenException newException:ttype Stream:anInput With:inserted];
758*16467b97STreehugger Robot        [self reportError:e];  // report after inserting so AW sees the token in the exception
759*16467b97STreehugger Robot        return inserted;
760*16467b97STreehugger Robot    }
761*16467b97STreehugger Robot    // even that didn't work; must throw the exception
762*16467b97STreehugger Robot    e = [MismatchedTokenException newException:ttype Stream:anInput];
763*16467b97STreehugger Robot    @throw e;
764*16467b97STreehugger Robot}
765*16467b97STreehugger Robot
766*16467b97STreehugger Robot/** Not currently used */
767*16467b97STreehugger Robot-(id) recoverFromMismatchedSet:(id<IntStream>)anInput
768*16467b97STreehugger Robot                     Exception:(RecognitionException *)e
769*16467b97STreehugger Robot                        Follow:(ANTLRBitSet *) follow
770*16467b97STreehugger Robot{
771*16467b97STreehugger Robot    if ( [self mismatchIsMissingToken:anInput Follow:follow] ) {
772*16467b97STreehugger Robot        // System.out.println("missing token");
773*16467b97STreehugger Robot        [self reportError:e];
774*16467b97STreehugger Robot        // we don't know how to conjure up a token for sets yet
775*16467b97STreehugger Robot        return [self getMissingSymbol:anInput Exception:e TokenType:TokenTypeInvalid Follow:follow];
776*16467b97STreehugger Robot    }
777*16467b97STreehugger Robot    // TODO do single token deletion like above for Token mismatch
778*16467b97STreehugger Robot    @throw e;
779*16467b97STreehugger Robot}
780*16467b97STreehugger Robot
781*16467b97STreehugger Robot/** Match needs to return the current input symbol, which gets put
782*16467b97STreehugger Robot *  into the label for the associated token ref; e.g., x=ID.  Token
783*16467b97STreehugger Robot *  and tree parsers need to return different objects. Rather than test
784*16467b97STreehugger Robot *  for input stream type or change the IntStream interface, I use
785*16467b97STreehugger Robot *  a simple method to ask the recognizer to tell me what the current
786*16467b97STreehugger Robot *  input symbol is.
787*16467b97STreehugger Robot *
788*16467b97STreehugger Robot *  This is ignored for lexers.
789*16467b97STreehugger Robot */
790*16467b97STreehugger Robot- (id) getCurrentInputSymbol:(id<IntStream>)anInput
791*16467b97STreehugger Robot{
792*16467b97STreehugger Robot    return nil;
793*16467b97STreehugger Robot}
794*16467b97STreehugger Robot
795*16467b97STreehugger Robot/** Conjure up a missing token during error recovery.
796*16467b97STreehugger Robot *
797*16467b97STreehugger Robot *  The recognizer attempts to recover from single missing
798*16467b97STreehugger Robot *  symbols. But, actions might refer to that missing symbol.
799*16467b97STreehugger Robot *  For example, x=ID {f($x);}. The action clearly assumes
800*16467b97STreehugger Robot *  that there has been an identifier matched previously and that
801*16467b97STreehugger Robot *  $x points at that token. If that token is missing, but
802*16467b97STreehugger Robot *  the next token in the stream is what we want we assume that
803*16467b97STreehugger Robot *  this token is missing and we keep going. Because we
804*16467b97STreehugger Robot *  have to return some token to replace the missing token,
805*16467b97STreehugger Robot *  we have to conjure one up. This method gives the user control
806*16467b97STreehugger Robot *  over the tokens returned for missing tokens. Mostly,
807*16467b97STreehugger Robot *  you will want to create something special for identifier
808*16467b97STreehugger Robot *  tokens. For literals such as '{' and ',', the default
809*16467b97STreehugger Robot *  action in the parser or tree parser works. It simply creates
810*16467b97STreehugger Robot *  a CommonToken of the appropriate type. The text will be the token.
811*16467b97STreehugger Robot *  If you change what tokens must be created by the lexer,
812*16467b97STreehugger Robot *  override this method to create the appropriate tokens.
813*16467b97STreehugger Robot */
814*16467b97STreehugger Robot- (id)getMissingSymbol:(id<IntStream>)anInput
815*16467b97STreehugger Robot             Exception:(RecognitionException *)e
816*16467b97STreehugger Robot             TokenType:(NSInteger)expectedTokenType
817*16467b97STreehugger Robot                Follow:(ANTLRBitSet *)follow
818*16467b97STreehugger Robot{
819*16467b97STreehugger Robot    return nil;
820*16467b97STreehugger Robot}
821*16467b97STreehugger Robot
822*16467b97STreehugger Robot
823*16467b97STreehugger Robot-(void) consumeUntilTType:(id<IntStream>)anInput TokenType:(NSInteger)tokenType
824*16467b97STreehugger Robot{
825*16467b97STreehugger Robot    //System.out.println("consumeUntil "+tokenType);
826*16467b97STreehugger Robot    int ttype = [anInput LA:1];
827*16467b97STreehugger Robot    while (ttype != TokenTypeEOF && ttype != tokenType) {
828*16467b97STreehugger Robot        [anInput consume];
829*16467b97STreehugger Robot        ttype = [anInput LA:1];
830*16467b97STreehugger Robot    }
831*16467b97STreehugger Robot}
832*16467b97STreehugger Robot
833*16467b97STreehugger Robot/** Consume tokens until one matches the given token set */
834*16467b97STreehugger Robot-(void) consumeUntilFollow:(id<IntStream>)anInput Follow:(ANTLRBitSet *)set
835*16467b97STreehugger Robot{
836*16467b97STreehugger Robot    //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
837*16467b97STreehugger Robot    int ttype = [anInput LA:1];
838*16467b97STreehugger Robot    while (ttype != TokenTypeEOF && ![set member:ttype] ) {
839*16467b97STreehugger Robot        //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
840*16467b97STreehugger Robot        [anInput consume];
841*16467b97STreehugger Robot        ttype = [anInput LA:1];
842*16467b97STreehugger Robot    }
843*16467b97STreehugger Robot}
844*16467b97STreehugger Robot
845*16467b97STreehugger Robot/** Push a rule's follow set using our own hardcoded stack */
846*16467b97STreehugger Robot- (void)pushFollow:(ANTLRBitSet *)fset
847*16467b97STreehugger Robot{
848*16467b97STreehugger Robot    if ( (state._fsp +1) >= [state.following count] ) {
849*16467b97STreehugger Robot        //        AMutableArray *f = [AMutableArray arrayWithCapacity:[[state.following] count]*2];
850*16467b97STreehugger Robot        //        System.arraycopy(state.following, 0, f, 0, state.following.length);
851*16467b97STreehugger Robot        //        state.following = f;
852*16467b97STreehugger Robot        [state.following addObject:fset];
853*16467b97STreehugger Robot        [fset retain];
854*16467b97STreehugger Robot        state._fsp++;
855*16467b97STreehugger Robot    }
856*16467b97STreehugger Robot    else {
857*16467b97STreehugger Robot        [state.following replaceObjectAtIndex:++state._fsp withObject:fset];
858*16467b97STreehugger Robot    }
859*16467b97STreehugger Robot}
860*16467b97STreehugger Robot
861*16467b97STreehugger Robot- (ANTLRBitSet *)popFollow
862*16467b97STreehugger Robot{
863*16467b97STreehugger Robot    ANTLRBitSet *fset;
864*16467b97STreehugger Robot
865*16467b97STreehugger Robot    if ( state._fsp >= 0 && [state.following count] > 0 ) {
866*16467b97STreehugger Robot        fset = [state.following objectAtIndex:state._fsp--];
867*16467b97STreehugger Robot        [state.following removeLastObject];
868*16467b97STreehugger Robot        return fset;
869*16467b97STreehugger Robot    }
870*16467b97STreehugger Robot    else {
871*16467b97STreehugger Robot        NSLog( @"Attempted to pop a follow when none exists on the stack\n" );
872*16467b97STreehugger Robot    }
873*16467b97STreehugger Robot    return nil;
874*16467b97STreehugger Robot}
875*16467b97STreehugger Robot
876*16467b97STreehugger Robot/** Return List<String> of the rules in your parser instance
877*16467b97STreehugger Robot *  leading up to a call to this method.  You could override if
878*16467b97STreehugger Robot *  you want more details such as the file/line info of where
879*16467b97STreehugger Robot *  in the parser java code a rule is invoked.
880*16467b97STreehugger Robot *
881*16467b97STreehugger Robot *  This is very useful for error messages and for context-sensitive
882*16467b97STreehugger Robot *  error recovery.
883*16467b97STreehugger Robot */
884*16467b97STreehugger Robot- (AMutableArray *)getRuleInvocationStack
885*16467b97STreehugger Robot{
886*16467b97STreehugger Robot    NSString *parserClassName = [[self className] retain];
887*16467b97STreehugger Robot    return [self getRuleInvocationStack:[RecognitionException newException] Recognizer:parserClassName];
888*16467b97STreehugger Robot}
889*16467b97STreehugger Robot
890*16467b97STreehugger Robot/** A more general version of getRuleInvocationStack where you can
891*16467b97STreehugger Robot *  pass in, for example, a RecognitionException to get it's rule
892*16467b97STreehugger Robot *  stack trace.  This routine is shared with all recognizers, hence,
893*16467b97STreehugger Robot *  static.
894*16467b97STreehugger Robot *
895*16467b97STreehugger Robot *  TODO: move to a utility class or something; weird having lexer call this
896*16467b97STreehugger Robot */
897*16467b97STreehugger Robot- (AMutableArray *)getRuleInvocationStack:(RecognitionException *)e
898*16467b97STreehugger Robot                                Recognizer:(NSString *)recognizerClassName
899*16467b97STreehugger Robot{
900*16467b97STreehugger Robot    // char *name;
901*16467b97STreehugger Robot    AMutableArray *rules = [[AMutableArray arrayWithCapacity:20] retain];
902*16467b97STreehugger Robot    NSArray *stack = [e callStackSymbols];
903*16467b97STreehugger Robot    int i = 0;
904*16467b97STreehugger Robot    for (i = [stack count]-1; i >= 0; i--) {
905*16467b97STreehugger Robot        NSString *t = [stack objectAtIndex:i];
906*16467b97STreehugger Robot        // NSLog(@"stack %d = %@\n", i, t);
907*16467b97STreehugger Robot        if ( [t commonPrefixWithString:@"org.antlr.runtime." options:NSLiteralSearch] ) {
908*16467b97STreehugger Robot            // id aClass = objc_getClass( [t UTF8String] );
909*16467b97STreehugger Robot            continue; // skip support code such as this method
910*16467b97STreehugger Robot        }
911*16467b97STreehugger Robot        if ( [t isEqualTo:NEXT_TOKEN_RULE_NAME] ) {
912*16467b97STreehugger Robot            // name = sel_getName(method_getName(method));
913*16467b97STreehugger Robot            // NSString *aMethod = [NSString stringWithFormat:@"%s", name];
914*16467b97STreehugger Robot            continue;
915*16467b97STreehugger Robot        }
916*16467b97STreehugger Robot        if ( ![t isEqualTo:recognizerClassName] ) {
917*16467b97STreehugger Robot            // name = class_getName( [t UTF8String] );
918*16467b97STreehugger Robot            continue; // must not be part of this parser
919*16467b97STreehugger Robot        }
920*16467b97STreehugger Robot        [rules addObject:t];
921*16467b97STreehugger Robot    }
922*16467b97STreehugger Robot#ifdef DONTUSEYET
923*16467b97STreehugger Robot    StackTraceElement[] stack = e.getStackTrace();
924*16467b97STreehugger Robot    int i = 0;
925*16467b97STreehugger Robot    for (i=stack.length-1; i>=0; i--) {
926*16467b97STreehugger Robot        StackTraceElement t = stack[i];
927*16467b97STreehugger Robot        if ( [t getClassName().startsWith("org.antlr.runtime.") ) {
928*16467b97STreehugger Robot            continue; // skip support code such as this method
929*16467b97STreehugger Robot        }
930*16467b97STreehugger Robot              if ( [[t getMethodName] equals:NEXT_TOKEN_RULE_NAME] ) {
931*16467b97STreehugger Robot            continue;
932*16467b97STreehugger Robot        }
933*16467b97STreehugger Robot              if ( ![[t getClassName] equals:recognizerClassName] ) {
934*16467b97STreehugger Robot            continue; // must not be part of this parser
935*16467b97STreehugger Robot        }
936*16467b97STreehugger Robot              [rules addObject:[t getMethodName]];
937*16467b97STreehugger Robot    }
938*16467b97STreehugger Robot#endif
939*16467b97STreehugger Robot    [stack release];
940*16467b97STreehugger Robot    return rules;
941*16467b97STreehugger Robot}
942*16467b97STreehugger Robot
943*16467b97STreehugger Robot- (NSInteger) getBacktrackingLevel
944*16467b97STreehugger Robot{
945*16467b97STreehugger Robot    return [state getBacktracking];
946*16467b97STreehugger Robot}
947*16467b97STreehugger Robot
948*16467b97STreehugger Robot- (void) setBacktrackingLevel:(NSInteger)level
949*16467b97STreehugger Robot{
950*16467b97STreehugger Robot    [state setBacktracking:level];
951*16467b97STreehugger Robot}
952*16467b97STreehugger Robot
953*16467b97STreehugger Robot        /** Used to print out token names like ID during debugging and
954*16467b97STreehugger Robot *  error reporting.  The generated parsers implement a method
955*16467b97STreehugger Robot *  that overrides this to point to their String[] tokenNames.
956*16467b97STreehugger Robot */
957*16467b97STreehugger Robot- (NSArray *)getTokenNames
958*16467b97STreehugger Robot{
959*16467b97STreehugger Robot    return tokenNames;
960*16467b97STreehugger Robot}
961*16467b97STreehugger Robot
962*16467b97STreehugger Robot/** For debugging and other purposes, might want the grammar name.
963*16467b97STreehugger Robot *  Have ANTLR generate an implementation for this method.
964*16467b97STreehugger Robot */
965*16467b97STreehugger Robot- (NSString *)getGrammarFileName
966*16467b97STreehugger Robot{
967*16467b97STreehugger Robot    return grammarFileName;
968*16467b97STreehugger Robot}
969*16467b97STreehugger Robot
970*16467b97STreehugger Robot- (NSString *)getSourceName
971*16467b97STreehugger Robot{
972*16467b97STreehugger Robot    return nil;
973*16467b97STreehugger Robot}
974*16467b97STreehugger Robot
975*16467b97STreehugger Robot/** A convenience method for use most often with template rewrites.
976*16467b97STreehugger Robot *  Convert a List<Token> to List<String>
977*16467b97STreehugger Robot */
978*16467b97STreehugger Robot- (AMutableArray *)toStrings:(AMutableArray *)tokens
979*16467b97STreehugger Robot{
980*16467b97STreehugger Robot    if ( tokens == nil )
981*16467b97STreehugger Robot        return nil;
982*16467b97STreehugger Robot    AMutableArray *strings = [AMutableArray arrayWithCapacity:[tokens count]];
983*16467b97STreehugger Robot    id object;
984*16467b97STreehugger Robot    NSInteger i = 0;
985*16467b97STreehugger Robot    for (object in tokens) {
986*16467b97STreehugger Robot        [strings addObject:[object text]];
987*16467b97STreehugger Robot        i++;
988*16467b97STreehugger Robot    }
989*16467b97STreehugger Robot    return strings;
990*16467b97STreehugger Robot}
991*16467b97STreehugger Robot
992*16467b97STreehugger Robot/** Given a rule number and a start token index number, return
993*16467b97STreehugger Robot *  ANTLR_MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
994*16467b97STreehugger Robot *  start index.  If this rule has parsed input starting from the
995*16467b97STreehugger Robot *  start index before, then return where the rule stopped parsing.
996*16467b97STreehugger Robot *  It returns the index of the last token matched by the rule.
997*16467b97STreehugger Robot *
998*16467b97STreehugger Robot *  For now we use a hashtable and just the slow Object-based one.
999*16467b97STreehugger Robot *  Later, we can make a special one for ints and also one that
1000*16467b97STreehugger Robot *  tosses out data after we commit past input position i.
1001*16467b97STreehugger Robot */
1002*16467b97STreehugger Robot- (NSInteger)getRuleMemoization:(NSInteger)ruleIndex StartIndex:(NSInteger)ruleStartIndex
1003*16467b97STreehugger Robot{
1004*16467b97STreehugger Robot    ACNumber *stopIndexI;
1005*16467b97STreehugger Robot    HashRule *aHashRule;
1006*16467b97STreehugger Robot    if ( (aHashRule = [state.ruleMemo objectAtIndex:ruleIndex]) == nil ) {
1007*16467b97STreehugger Robot        aHashRule = [HashRule newHashRuleWithLen:17];
1008*16467b97STreehugger Robot        [state.ruleMemo insertObject:aHashRule atIndex:ruleIndex];
1009*16467b97STreehugger Robot    }
1010*16467b97STreehugger Robot    stopIndexI = [aHashRule getRuleMemoStopIndex:ruleStartIndex];
1011*16467b97STreehugger Robot    if ( stopIndexI == nil ) {
1012*16467b97STreehugger Robot        return ANTLR_MEMO_RULE_UNKNOWN;
1013*16467b97STreehugger Robot    }
1014*16467b97STreehugger Robot    return [stopIndexI integerValue];
1015*16467b97STreehugger Robot}
1016*16467b97STreehugger Robot
1017*16467b97STreehugger Robot/** Has this rule already parsed input at the current index in the
1018*16467b97STreehugger Robot *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
1019*16467b97STreehugger Robot *  If we attempted but failed to parse properly before, return
1020*16467b97STreehugger Robot *  MEMO_RULE_FAILED.
1021*16467b97STreehugger Robot *
1022*16467b97STreehugger Robot *  This method has a side-effect: if we have seen this input for
1023*16467b97STreehugger Robot *  this rule and successfully parsed before, then seek ahead to
1024*16467b97STreehugger Robot *  1 past the stop token matched for this rule last time.
1025*16467b97STreehugger Robot */
1026*16467b97STreehugger Robot- (BOOL)alreadyParsedRule:(id<IntStream>)anInput RuleIndex:(NSInteger)ruleIndex
1027*16467b97STreehugger Robot{
1028*16467b97STreehugger Robot    NSInteger aStopIndex = [self getRuleMemoization:ruleIndex StartIndex:anInput.index];
1029*16467b97STreehugger Robot    if ( aStopIndex == ANTLR_MEMO_RULE_UNKNOWN ) {
1030*16467b97STreehugger Robot        // NSLog(@"rule %d not yet encountered\n", ruleIndex);
1031*16467b97STreehugger Robot        return NO;
1032*16467b97STreehugger Robot    }
1033*16467b97STreehugger Robot    if ( aStopIndex == ANTLR_MEMO_RULE_FAILED ) {
1034*16467b97STreehugger Robot        if (debug) NSLog(@"rule %d will never succeed\n", ruleIndex);
1035*16467b97STreehugger Robot        state.failed = YES;
1036*16467b97STreehugger Robot    }
1037*16467b97STreehugger Robot    else {
1038*16467b97STreehugger Robot        if (debug) NSLog(@"seen rule %d before; skipping ahead to %d failed = %@\n", ruleIndex, aStopIndex+1, state.failed?@"YES":@"NO");
1039*16467b97STreehugger Robot        [anInput seek:(aStopIndex+1)]; // jump to one past stop token
1040*16467b97STreehugger Robot    }
1041*16467b97STreehugger Robot    return YES;
1042*16467b97STreehugger Robot}
1043*16467b97STreehugger Robot
1044*16467b97STreehugger Robot/** Record whether or not this rule parsed the input at this position
1045*16467b97STreehugger Robot *  successfully.  Use a standard java hashtable for now.
1046*16467b97STreehugger Robot */
1047*16467b97STreehugger Robot- (void)memoize:(id<IntStream>)anInput
1048*16467b97STreehugger Robot      RuleIndex:(NSInteger)ruleIndex
1049*16467b97STreehugger Robot     StartIndex:(NSInteger)ruleStartIndex
1050*16467b97STreehugger Robot{
1051*16467b97STreehugger Robot    RuleStack *aRuleStack;
1052*16467b97STreehugger Robot    NSInteger stopTokenIndex;
1053*16467b97STreehugger Robot
1054*16467b97STreehugger Robot    aRuleStack = state.ruleMemo;
1055*16467b97STreehugger Robot    stopTokenIndex = (state.failed ? ANTLR_MEMO_RULE_FAILED : (anInput.index-1));
1056*16467b97STreehugger Robot    if ( aRuleStack == nil ) {
1057*16467b97STreehugger Robot        if (debug) NSLog(@"!!!!!!!!! memo array is nil for %@", [self getGrammarFileName]);
1058*16467b97STreehugger Robot        return;
1059*16467b97STreehugger Robot    }
1060*16467b97STreehugger Robot    if ( ruleIndex >= [aRuleStack length] ) {
1061*16467b97STreehugger Robot        if (debug) NSLog(@"!!!!!!!!! memo size is %d, but rule index is %d", [state.ruleMemo length], ruleIndex);
1062*16467b97STreehugger Robot        return;
1063*16467b97STreehugger Robot    }
1064*16467b97STreehugger Robot    if ( [aRuleStack objectAtIndex:ruleIndex] != nil ) {
1065*16467b97STreehugger Robot        [aRuleStack putHashRuleAtRuleIndex:ruleIndex StartIndex:ruleStartIndex StopIndex:stopTokenIndex];
1066*16467b97STreehugger Robot    }
1067*16467b97STreehugger Robot    return;
1068*16467b97STreehugger Robot}
1069*16467b97STreehugger Robot
1070*16467b97STreehugger Robot/** return how many rule/input-index pairs there are in total.
1071*16467b97STreehugger Robot *  TODO: this includes synpreds. :(
1072*16467b97STreehugger Robot */
1073*16467b97STreehugger Robot- (NSInteger)getRuleMemoizationCacheSize
1074*16467b97STreehugger Robot{
1075*16467b97STreehugger Robot    RuleStack *aRuleStack;
1076*16467b97STreehugger Robot    HashRule *aHashRule;
1077*16467b97STreehugger Robot
1078*16467b97STreehugger Robot    int aCnt = 0;
1079*16467b97STreehugger Robot    aRuleStack = state.ruleMemo;
1080*16467b97STreehugger Robot    for (NSUInteger i = 0; aRuleStack != nil && i < [aRuleStack length]; i++) {
1081*16467b97STreehugger Robot        aHashRule = [aRuleStack objectAtIndex:i];
1082*16467b97STreehugger Robot        if ( aHashRule != nil ) {
1083*16467b97STreehugger Robot            aCnt += [aHashRule count]; // how many input indexes are recorded?
1084*16467b97STreehugger Robot        }
1085*16467b97STreehugger Robot    }
1086*16467b97STreehugger Robot    return aCnt;
1087*16467b97STreehugger Robot}
1088*16467b97STreehugger Robot
1089*16467b97STreehugger Robot#pragma warning Have to fix traceIn and traceOut.
1090*16467b97STreehugger Robot- (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol
1091*16467b97STreehugger Robot{
1092*16467b97STreehugger Robot    NSLog(@"enter %@ %@", ruleName, inputSymbol);
1093*16467b97STreehugger Robot    if ( state.backtracking > 0 ) {
1094*16467b97STreehugger Robot        NSLog(@" backtracking=%s", ((state.backtracking==YES)?"YES":"NO"));
1095*16467b97STreehugger Robot    }
1096*16467b97STreehugger Robot    NSLog(@"\n");
1097*16467b97STreehugger Robot}
1098*16467b97STreehugger Robot
1099*16467b97STreehugger Robot- (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol
1100*16467b97STreehugger Robot{
1101*16467b97STreehugger Robot    NSLog(@"exit %@ -- %@", ruleName, inputSymbol);
1102*16467b97STreehugger Robot    if ( state.backtracking > 0 ) {
1103*16467b97STreehugger Robot        NSLog(@" backtracking=%s %s", state.backtracking?"YES":"NO", state.failed ? "failed":"succeeded");
1104*16467b97STreehugger Robot    }
1105*16467b97STreehugger Robot    NSLog(@"\n");
1106*16467b97STreehugger Robot}
1107*16467b97STreehugger Robot
1108*16467b97STreehugger Robot
1109*16467b97STreehugger Robot// call a syntactic predicate methods using its selector. this way we can support arbitrary synpreds.
1110*16467b97STreehugger Robot- (BOOL) evaluateSyntacticPredicate:(SEL)synpredFragment // stream:(id<IntStream>)input
1111*16467b97STreehugger Robot{
1112*16467b97STreehugger Robot    id<IntStream> input;
1113*16467b97STreehugger Robot
1114*16467b97STreehugger Robot    state.backtracking++;
1115*16467b97STreehugger Robot    // input = state.token.input;
1116*16467b97STreehugger Robot    input = self.input;
1117*16467b97STreehugger Robot    int start = [input mark];
1118*16467b97STreehugger Robot    @try {
1119*16467b97STreehugger Robot        [self performSelector:synpredFragment];
1120*16467b97STreehugger Robot    }
1121*16467b97STreehugger Robot    @catch (RecognitionException *re) {
1122*16467b97STreehugger Robot        NSLog(@"impossible synpred: %@", re.name);
1123*16467b97STreehugger Robot    }
1124*16467b97STreehugger Robot    BOOL success = (state.failed == NO);
1125*16467b97STreehugger Robot    [input rewind:start];
1126*16467b97STreehugger Robot    state.backtracking--;
1127*16467b97STreehugger Robot    state.failed = NO;
1128*16467b97STreehugger Robot    return success;
1129*16467b97STreehugger Robot}
1130*16467b97STreehugger Robot
1131*16467b97STreehugger Robot@end
1132*16467b97STreehugger Robot
1133