1*16467b97STreehugger Robot// 2*16467b97STreehugger Robot// BaseRecognizer.m 3*16467b97STreehugger Robot// ANTLR 4*16467b97STreehugger Robot// 5*16467b97STreehugger Robot// Created by Alan Condit on 6/16/10. 6*16467b97STreehugger Robot// [The "BSD licence"] 7*16467b97STreehugger Robot// Copyright (c) 2010 Alan Condit 8*16467b97STreehugger Robot// All rights reserved. 9*16467b97STreehugger Robot// 10*16467b97STreehugger Robot// Redistribution and use in source and binary forms, with or without 11*16467b97STreehugger Robot// modification, are permitted provided that the following conditions 12*16467b97STreehugger Robot// are met: 13*16467b97STreehugger Robot// 1. Redistributions of source code must retain the above copyright 14*16467b97STreehugger Robot// notice, this list of conditions and the following disclaimer. 15*16467b97STreehugger Robot// 2. Redistributions in binary form must reproduce the above copyright 16*16467b97STreehugger Robot// notice, this list of conditions and the following disclaimer in the 17*16467b97STreehugger Robot// documentation and/or other materials provided with the distribution. 18*16467b97STreehugger Robot// 3. The name of the author may not be used to endorse or promote products 19*16467b97STreehugger Robot// derived from this software without specific prior written permission. 20*16467b97STreehugger Robot// 21*16467b97STreehugger Robot// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22*16467b97STreehugger Robot// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23*16467b97STreehugger Robot// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24*16467b97STreehugger Robot// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25*16467b97STreehugger Robot// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26*16467b97STreehugger Robot// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27*16467b97STreehugger Robot// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28*16467b97STreehugger Robot// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29*16467b97STreehugger Robot// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30*16467b97STreehugger Robot// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31*16467b97STreehugger Robot 32*16467b97STreehugger Robot#import "ACNumber.h" 33*16467b97STreehugger Robot#import "BaseRecognizer.h" 34*16467b97STreehugger Robot#import "HashRule.h" 35*16467b97STreehugger Robot#import "RuleMemo.h" 36*16467b97STreehugger Robot#import "CommonToken.h" 37*16467b97STreehugger Robot#import "Map.h" 38*16467b97STreehugger Robot#import "NoViableAltException.h" 39*16467b97STreehugger Robot 40*16467b97STreehugger Robotextern NSInteger debug; 41*16467b97STreehugger Robot 42*16467b97STreehugger Robot@implementation BaseRecognizer 43*16467b97STreehugger Robot 44*16467b97STreehugger Robotstatic AMutableArray *_tokenNames; 45*16467b97STreehugger Robotstatic NSString *_grammarFileName; 46*16467b97STreehugger Robotstatic NSString *NEXT_TOKEN_RULE_NAME; 47*16467b97STreehugger Robot 48*16467b97STreehugger Robot@synthesize state; 49*16467b97STreehugger Robot@synthesize grammarFileName; 50*16467b97STreehugger Robot//@synthesize failed; 51*16467b97STreehugger Robot@synthesize sourceName; 52*16467b97STreehugger Robot//@synthesize numberOfSyntaxErrors; 53*16467b97STreehugger Robot@synthesize tokenNames; 54*16467b97STreehugger Robot 55*16467b97STreehugger Robot+ (void) initialize 56*16467b97STreehugger Robot{ 57*16467b97STreehugger Robot NEXT_TOKEN_RULE_NAME = [NSString stringWithString:@"nextToken"]; 58*16467b97STreehugger Robot [NEXT_TOKEN_RULE_NAME retain]; 59*16467b97STreehugger Robot} 60*16467b97STreehugger Robot 61*16467b97STreehugger Robot+ (BaseRecognizer *) newBaseRecognizer 62*16467b97STreehugger Robot{ 63*16467b97STreehugger Robot return [[BaseRecognizer alloc] init]; 64*16467b97STreehugger Robot} 65*16467b97STreehugger Robot 66*16467b97STreehugger Robot+ (BaseRecognizer *) newBaseRecognizerWithRuleLen:(NSInteger)aLen 67*16467b97STreehugger Robot{ 68*16467b97STreehugger Robot return [[BaseRecognizer alloc] initWithLen:aLen]; 69*16467b97STreehugger Robot} 70*16467b97STreehugger Robot 71*16467b97STreehugger Robot+ (BaseRecognizer *) newBaseRecognizer:(RecognizerSharedState *)aState 72*16467b97STreehugger Robot{ 73*16467b97STreehugger Robot return [[BaseRecognizer alloc] initWithState:aState]; 74*16467b97STreehugger Robot} 75*16467b97STreehugger Robot 76*16467b97STreehugger Robot+ (AMutableArray *)getTokenNames 77*16467b97STreehugger Robot{ 78*16467b97STreehugger Robot return _tokenNames; 79*16467b97STreehugger Robot} 80*16467b97STreehugger Robot 81*16467b97STreehugger Robot+ (void)setTokenNames:(AMutableArray *)theTokNams 82*16467b97STreehugger Robot{ 83*16467b97STreehugger Robot if ( _tokenNames != theTokNams ) { 84*16467b97STreehugger Robot if ( _tokenNames ) [_tokenNames release]; 85*16467b97STreehugger Robot [theTokNams retain]; 86*16467b97STreehugger Robot } 87*16467b97STreehugger Robot _tokenNames = theTokNams; 88*16467b97STreehugger Robot} 89*16467b97STreehugger Robot 90*16467b97STreehugger Robot+ (void)setGrammarFileName:(NSString *)aFileName 91*16467b97STreehugger Robot{ 92*16467b97STreehugger Robot if ( _grammarFileName != aFileName ) { 93*16467b97STreehugger Robot if ( _grammarFileName ) [_grammarFileName release]; 94*16467b97STreehugger Robot [aFileName retain]; 95*16467b97STreehugger Robot } 96*16467b97STreehugger Robot [_grammarFileName retain]; 97*16467b97STreehugger Robot} 98*16467b97STreehugger Robot 99*16467b97STreehugger Robot- (id) init 100*16467b97STreehugger Robot{ 101*16467b97STreehugger Robot if ((self = [super init]) != nil) { 102*16467b97STreehugger Robot if (state == nil) { 103*16467b97STreehugger Robot state = [[RecognizerSharedState newRecognizerSharedState] retain]; 104*16467b97STreehugger Robot } 105*16467b97STreehugger Robot tokenNames = _tokenNames; 106*16467b97STreehugger Robot if ( tokenNames ) [tokenNames retain]; 107*16467b97STreehugger Robot grammarFileName = _grammarFileName; 108*16467b97STreehugger Robot if ( grammarFileName ) [grammarFileName retain]; 109*16467b97STreehugger Robot state._fsp = -1; 110*16467b97STreehugger Robot state.errorRecovery = NO; // are we recovering? 111*16467b97STreehugger Robot state.lastErrorIndex = -1; 112*16467b97STreehugger Robot state.failed = NO; // indicate that some match failed 113*16467b97STreehugger Robot state.syntaxErrors = 0; 114*16467b97STreehugger Robot state.backtracking = 0; // the level of backtracking 115*16467b97STreehugger Robot state.tokenStartCharIndex = -1; 116*16467b97STreehugger Robot } 117*16467b97STreehugger Robot return self; 118*16467b97STreehugger Robot} 119*16467b97STreehugger Robot 120*16467b97STreehugger Robot- (id) initWithLen:(NSInteger)aLen 121*16467b97STreehugger Robot{ 122*16467b97STreehugger Robot if ((self = [super init]) != nil) { 123*16467b97STreehugger Robot if (state == nil) { 124*16467b97STreehugger Robot state = [[RecognizerSharedState newRecognizerSharedStateWithRuleLen:aLen] retain]; 125*16467b97STreehugger Robot } 126*16467b97STreehugger Robot tokenNames = _tokenNames; 127*16467b97STreehugger Robot if ( tokenNames ) [tokenNames retain]; 128*16467b97STreehugger Robot grammarFileName = _grammarFileName; 129*16467b97STreehugger Robot if ( grammarFileName ) [grammarFileName retain]; 130*16467b97STreehugger Robot state._fsp = -1; 131*16467b97STreehugger Robot state.errorRecovery = NO; // are we recovering? 132*16467b97STreehugger Robot state.lastErrorIndex = -1; 133*16467b97STreehugger Robot state.failed = NO; // indicate that some match failed 134*16467b97STreehugger Robot state.syntaxErrors = 0; 135*16467b97STreehugger Robot state.backtracking = 0; // the level of backtracking 136*16467b97STreehugger Robot state.tokenStartCharIndex = -1; 137*16467b97STreehugger Robot } 138*16467b97STreehugger Robot return self; 139*16467b97STreehugger Robot} 140*16467b97STreehugger Robot 141*16467b97STreehugger Robot- (id) initWithState:(RecognizerSharedState *)aState 142*16467b97STreehugger Robot{ 143*16467b97STreehugger Robot if ((self = [super init]) != nil) { 144*16467b97STreehugger Robot state = aState; 145*16467b97STreehugger Robot if (state == nil) { 146*16467b97STreehugger Robot state = [RecognizerSharedState newRecognizerSharedState]; 147*16467b97STreehugger Robot } 148*16467b97STreehugger Robot [state retain]; 149*16467b97STreehugger Robot tokenNames = _tokenNames; 150*16467b97STreehugger Robot if ( tokenNames ) [tokenNames retain]; 151*16467b97STreehugger Robot grammarFileName = _grammarFileName; 152*16467b97STreehugger Robot if ( grammarFileName ) [grammarFileName retain]; 153*16467b97STreehugger Robot state._fsp = -1; 154*16467b97STreehugger Robot state.errorRecovery = NO; // are we recovering? 155*16467b97STreehugger Robot state.lastErrorIndex = -1; 156*16467b97STreehugger Robot state.failed = NO; // indicate that some match failed 157*16467b97STreehugger Robot state.syntaxErrors = 0; 158*16467b97STreehugger Robot state.backtracking = 0; // the level of backtracking 159*16467b97STreehugger Robot state.tokenStartCharIndex = -1; 160*16467b97STreehugger Robot } 161*16467b97STreehugger Robot return self; 162*16467b97STreehugger Robot} 163*16467b97STreehugger Robot 164*16467b97STreehugger Robot- (void)dealloc 165*16467b97STreehugger Robot{ 166*16467b97STreehugger Robot#ifdef DEBUG_DEALLOC 167*16467b97STreehugger Robot NSLog( @"called dealloc in BaseRecognizer" ); 168*16467b97STreehugger Robot#endif 169*16467b97STreehugger Robot if ( grammarFileName ) [grammarFileName release]; 170*16467b97STreehugger Robot if ( tokenNames ) [tokenNames release]; 171*16467b97STreehugger Robot if ( state ) [state release]; 172*16467b97STreehugger Robot [super dealloc]; 173*16467b97STreehugger Robot} 174*16467b97STreehugger Robot 175*16467b97STreehugger Robot// reset the recognizer to the initial state. does not touch the token source! 176*16467b97STreehugger Robot// this can be extended by the grammar writer to reset custom ivars 177*16467b97STreehugger Robot- (void) reset 178*16467b97STreehugger Robot{ 179*16467b97STreehugger Robot if ( state == nil ) 180*16467b97STreehugger Robot return; 181*16467b97STreehugger Robot if ( state.following != nil ) { 182*16467b97STreehugger Robot if ( [state.following count] ) 183*16467b97STreehugger Robot [state.following removeAllObjects]; 184*16467b97STreehugger Robot } 185*16467b97STreehugger Robot state._fsp = -1; 186*16467b97STreehugger Robot state.errorRecovery = NO; // are we recovering? 187*16467b97STreehugger Robot state.lastErrorIndex = -1; 188*16467b97STreehugger Robot state.failed = NO; // indicate that some match failed 189*16467b97STreehugger Robot state.syntaxErrors = 0; 190*16467b97STreehugger Robot state.backtracking = 0; // the level of backtracking 191*16467b97STreehugger Robot state.tokenStartCharIndex = -1; 192*16467b97STreehugger Robot if ( state.ruleMemo != nil ) { 193*16467b97STreehugger Robot if ( [state.ruleMemo count] ) 194*16467b97STreehugger Robot [state.ruleMemo removeAllObjects]; 195*16467b97STreehugger Robot } 196*16467b97STreehugger Robot} 197*16467b97STreehugger Robot 198*16467b97STreehugger Robot- (BOOL) getFailed 199*16467b97STreehugger Robot{ 200*16467b97STreehugger Robot return [state getFailed]; 201*16467b97STreehugger Robot} 202*16467b97STreehugger Robot 203*16467b97STreehugger Robot- (void) setFailed:(BOOL)flag 204*16467b97STreehugger Robot{ 205*16467b97STreehugger Robot [state setFailed:flag]; 206*16467b97STreehugger Robot} 207*16467b97STreehugger Robot 208*16467b97STreehugger Robot- (RecognizerSharedState *) getState 209*16467b97STreehugger Robot{ 210*16467b97STreehugger Robot return state; 211*16467b97STreehugger Robot} 212*16467b97STreehugger Robot 213*16467b97STreehugger Robot- (void) setState:(RecognizerSharedState *) theState 214*16467b97STreehugger Robot{ 215*16467b97STreehugger Robot if (state != theState) { 216*16467b97STreehugger Robot if ( state ) [state release]; 217*16467b97STreehugger Robot state = theState; 218*16467b97STreehugger Robot [state retain]; 219*16467b97STreehugger Robot } 220*16467b97STreehugger Robot} 221*16467b97STreehugger Robot 222*16467b97STreehugger Robot- (id)input 223*16467b97STreehugger Robot{ 224*16467b97STreehugger Robot return nil; // Must be overriden in inheriting class 225*16467b97STreehugger Robot} 226*16467b97STreehugger Robot 227*16467b97STreehugger Robot- (void)skip // override in inheriting class 228*16467b97STreehugger Robot{ 229*16467b97STreehugger Robot return; 230*16467b97STreehugger Robot} 231*16467b97STreehugger Robot 232*16467b97STreehugger Robot-(id) match:(id<IntStream>)anInput TokenType:(NSInteger)ttype Follow:(ANTLRBitSet *)follow 233*16467b97STreehugger Robot{ 234*16467b97STreehugger Robot id matchedSymbol = [self getCurrentInputSymbol:anInput]; 235*16467b97STreehugger Robot if ([anInput LA:1] == ttype) { 236*16467b97STreehugger Robot [anInput consume]; 237*16467b97STreehugger Robot state.errorRecovery = NO; 238*16467b97STreehugger Robot state.failed = NO; 239*16467b97STreehugger Robot return matchedSymbol; 240*16467b97STreehugger Robot } 241*16467b97STreehugger Robot if (state.backtracking > 0) { 242*16467b97STreehugger Robot state.failed = YES; 243*16467b97STreehugger Robot return matchedSymbol; 244*16467b97STreehugger Robot } 245*16467b97STreehugger Robot matchedSymbol = [self recoverFromMismatchedToken:anInput TokenType:ttype Follow:follow]; 246*16467b97STreehugger Robot return matchedSymbol; 247*16467b97STreehugger Robot} 248*16467b97STreehugger Robot 249*16467b97STreehugger Robot-(void) matchAny:(id<IntStream>)anInput 250*16467b97STreehugger Robot{ 251*16467b97STreehugger Robot state.errorRecovery = NO; 252*16467b97STreehugger Robot state.failed = NO; 253*16467b97STreehugger Robot [anInput consume]; 254*16467b97STreehugger Robot} 255*16467b97STreehugger Robot 256*16467b97STreehugger Robot-(BOOL) mismatchIsUnwantedToken:(id<IntStream>)anInput TokenType:(NSInteger)ttype 257*16467b97STreehugger Robot{ 258*16467b97STreehugger Robot return [anInput LA:2] == ttype; 259*16467b97STreehugger Robot} 260*16467b97STreehugger Robot 261*16467b97STreehugger Robot-(BOOL) mismatchIsMissingToken:(id<IntStream>)anInput Follow:(ANTLRBitSet *) follow 262*16467b97STreehugger Robot{ 263*16467b97STreehugger Robot if ( follow == nil ) { 264*16467b97STreehugger Robot // we have no information about the follow; we can only consume 265*16467b97STreehugger Robot // a single token and hope for the best 266*16467b97STreehugger Robot return NO; 267*16467b97STreehugger Robot } 268*16467b97STreehugger Robot // compute what can follow this grammar element reference 269*16467b97STreehugger Robot if ( [follow member:TokenTypeEOR] ) { 270*16467b97STreehugger Robot ANTLRBitSet *viableTokensFollowingThisRule = [self computeContextSensitiveRuleFOLLOW]; 271*16467b97STreehugger Robot follow = [follow or:viableTokensFollowingThisRule]; 272*16467b97STreehugger Robot if ( state._fsp >= 0 ) { // remove EOR if we're not the start symbol 273*16467b97STreehugger Robot [follow remove:(TokenTypeEOR)]; 274*16467b97STreehugger Robot } 275*16467b97STreehugger Robot } 276*16467b97STreehugger Robot // if current token is consistent with what could come after set 277*16467b97STreehugger Robot // then we know we're missing a token; error recovery is free to 278*16467b97STreehugger Robot // "insert" the missing token 279*16467b97STreehugger Robot 280*16467b97STreehugger Robot //System.out.println("viable tokens="+follow.toString(getTokenNames())); 281*16467b97STreehugger Robot //System.out.println("LT(1)="+((TokenStream)input).LT(1)); 282*16467b97STreehugger Robot 283*16467b97STreehugger Robot // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR 284*16467b97STreehugger Robot // in follow set to indicate that the fall of the start symbol is 285*16467b97STreehugger Robot // in the set (EOF can follow). 286*16467b97STreehugger Robot if ( [follow member:[anInput LA:1]] || [follow member:TokenTypeEOR] ) { 287*16467b97STreehugger Robot //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting..."); 288*16467b97STreehugger Robot return YES; 289*16467b97STreehugger Robot } 290*16467b97STreehugger Robot return NO; 291*16467b97STreehugger Robot} 292*16467b97STreehugger Robot 293*16467b97STreehugger Robot/** Report a recognition problem. 294*16467b97STreehugger Robot * 295*16467b97STreehugger Robot * This method sets errorRecovery to indicate the parser is recovering 296*16467b97STreehugger Robot * not parsing. Once in recovery mode, no errors are generated. 297*16467b97STreehugger Robot * To get out of recovery mode, the parser must successfully match 298*16467b97STreehugger Robot * a token (after a resync). So it will go: 299*16467b97STreehugger Robot * 300*16467b97STreehugger Robot * 1. error occurs 301*16467b97STreehugger Robot * 2. enter recovery mode, report error 302*16467b97STreehugger Robot * 3. consume until token found in resynch set 303*16467b97STreehugger Robot * 4. try to resume parsing 304*16467b97STreehugger Robot * 5. next match() will reset errorRecovery mode 305*16467b97STreehugger Robot * 306*16467b97STreehugger Robot * If you override, make sure to update syntaxErrors if you care about that. 307*16467b97STreehugger Robot */ 308*16467b97STreehugger Robot-(void) reportError:(RecognitionException *) e 309*16467b97STreehugger Robot{ 310*16467b97STreehugger Robot // if we've already reported an error and have not matched a token 311*16467b97STreehugger Robot // yet successfully, don't report any errors. 312*16467b97STreehugger Robot if ( state.errorRecovery ) { 313*16467b97STreehugger Robot //System.err.print("[SPURIOUS] "); 314*16467b97STreehugger Robot return; 315*16467b97STreehugger Robot } 316*16467b97STreehugger Robot state.syntaxErrors++; // don't count spurious 317*16467b97STreehugger Robot state.errorRecovery = YES; 318*16467b97STreehugger Robot 319*16467b97STreehugger Robot [self displayRecognitionError:[self getTokenNames] Exception:e]; 320*16467b97STreehugger Robot} 321*16467b97STreehugger Robot 322*16467b97STreehugger Robot-(void) displayRecognitionError:(AMutableArray *)theTokNams Exception:(RecognitionException *)e 323*16467b97STreehugger Robot{ 324*16467b97STreehugger Robot NSString *hdr = [self getErrorHeader:e]; 325*16467b97STreehugger Robot NSString *msg = [self getErrorMessage:e TokenNames:theTokNams]; 326*16467b97STreehugger Robot [self emitErrorMessage:[NSString stringWithFormat:@" %@ %@", hdr, msg]]; 327*16467b97STreehugger Robot} 328*16467b97STreehugger Robot 329*16467b97STreehugger Robot/** What error message should be generated for the various 330*16467b97STreehugger Robot * exception types? 331*16467b97STreehugger Robot * 332*16467b97STreehugger Robot * Not very object-oriented code, but I like having all error message 333*16467b97STreehugger Robot * generation within one method rather than spread among all of the 334*16467b97STreehugger Robot * exception classes. This also makes it much easier for the exception 335*16467b97STreehugger Robot * handling because the exception classes do not have to have pointers back 336*16467b97STreehugger Robot * to this object to access utility routines and so on. Also, changing 337*16467b97STreehugger Robot * the message for an exception type would be difficult because you 338*16467b97STreehugger Robot * would have to subclassing exception, but then somehow get ANTLR 339*16467b97STreehugger Robot * to make those kinds of exception objects instead of the default. 340*16467b97STreehugger Robot * This looks weird, but trust me--it makes the most sense in terms 341*16467b97STreehugger Robot * of flexibility. 342*16467b97STreehugger Robot * 343*16467b97STreehugger Robot * For grammar debugging, you will want to override this to add 344*16467b97STreehugger Robot * more information such as the stack frame with 345*16467b97STreehugger Robot * getRuleInvocationStack(e, this.getClass().getName()) and, 346*16467b97STreehugger Robot * for no viable alts, the decision description and state etc... 347*16467b97STreehugger Robot * 348*16467b97STreehugger Robot * Override this to change the message generated for one or more 349*16467b97STreehugger Robot * exception types. 350*16467b97STreehugger Robot */ 351*16467b97STreehugger Robot- (NSString *)getErrorMessage:(RecognitionException *)e TokenNames:(AMutableArray *)theTokNams 352*16467b97STreehugger Robot{ 353*16467b97STreehugger Robot // NSString *msg = [e getMessage]; 354*16467b97STreehugger Robot NSString *msg; 355*16467b97STreehugger Robot if ( [e isKindOfClass:[UnwantedTokenException class]] ) { 356*16467b97STreehugger Robot UnwantedTokenException *ute = (UnwantedTokenException *)e; 357*16467b97STreehugger Robot NSString *tokenName=@"<unknown>"; 358*16467b97STreehugger Robot if ( ute.expecting == TokenTypeEOF ) { 359*16467b97STreehugger Robot tokenName = @"EOF"; 360*16467b97STreehugger Robot } 361*16467b97STreehugger Robot else { 362*16467b97STreehugger Robot tokenName = (NSString *)[theTokNams objectAtIndex:ute.expecting]; 363*16467b97STreehugger Robot } 364*16467b97STreehugger Robot msg = [NSString stringWithFormat:@"extraneous input %@ expecting %@", [self getTokenErrorDisplay:[ute getUnexpectedToken]], 365*16467b97STreehugger Robot tokenName]; 366*16467b97STreehugger Robot } 367*16467b97STreehugger Robot else if ( [e isKindOfClass:[MissingTokenException class] ] ) { 368*16467b97STreehugger Robot MissingTokenException *mte = (MissingTokenException *)e; 369*16467b97STreehugger Robot NSString *tokenName=@"<unknown>"; 370*16467b97STreehugger Robot if ( mte.expecting== TokenTypeEOF ) { 371*16467b97STreehugger Robot tokenName = @"EOF"; 372*16467b97STreehugger Robot } 373*16467b97STreehugger Robot else { 374*16467b97STreehugger Robot tokenName = [theTokNams objectAtIndex:mte.expecting]; 375*16467b97STreehugger Robot } 376*16467b97STreehugger Robot msg = [NSString stringWithFormat:@"missing %@ at %@", tokenName, [self getTokenErrorDisplay:(e.token)] ]; 377*16467b97STreehugger Robot } 378*16467b97STreehugger Robot else if ( [e isKindOfClass:[MismatchedTokenException class]] ) { 379*16467b97STreehugger Robot MismatchedTokenException *mte = (MismatchedTokenException *)e; 380*16467b97STreehugger Robot NSString *tokenName=@"<unknown>"; 381*16467b97STreehugger Robot if ( mte.expecting== TokenTypeEOF ) { 382*16467b97STreehugger Robot tokenName = @"EOF"; 383*16467b97STreehugger Robot } 384*16467b97STreehugger Robot else { 385*16467b97STreehugger Robot tokenName = [theTokNams objectAtIndex:mte.expecting]; 386*16467b97STreehugger Robot } 387*16467b97STreehugger Robot msg = [NSString stringWithFormat:@"mismatched input %@ expecting %@",[self getTokenErrorDisplay:(e.token)], tokenName]; 388*16467b97STreehugger Robot } 389*16467b97STreehugger Robot else if ( [e isKindOfClass:[MismatchedTreeNodeException class]] ) { 390*16467b97STreehugger Robot MismatchedTreeNodeException *mtne = (MismatchedTreeNodeException *)e; 391*16467b97STreehugger Robot NSString *tokenName=@"<unknown>"; 392*16467b97STreehugger Robot if ( mtne.expecting==TokenTypeEOF ) { 393*16467b97STreehugger Robot tokenName = @"EOF"; 394*16467b97STreehugger Robot } 395*16467b97STreehugger Robot else { 396*16467b97STreehugger Robot tokenName = [theTokNams objectAtIndex:mtne.expecting]; 397*16467b97STreehugger Robot } 398*16467b97STreehugger Robot msg = [NSString stringWithFormat:@"mismatched tree node: %@ expecting %@", mtne.node, tokenName]; 399*16467b97STreehugger Robot } 400*16467b97STreehugger Robot else if ( [e isKindOfClass:[NoViableAltException class]] ) { 401*16467b97STreehugger Robot //NoViableAltException *nvae = (NoViableAltException *)e; 402*16467b97STreehugger Robot // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" 403*16467b97STreehugger Robot // and "(decision="+nvae.decisionNumber+") and 404*16467b97STreehugger Robot // "state "+nvae.stateNumber 405*16467b97STreehugger Robot // msg = [NSString stringWithFormat:@"no viable alternative at input %@", [self getTokenErrorDisplay:e.token]]; 406*16467b97STreehugger Robot msg = [NSString stringWithFormat:@"no viable alternative decision:%d state:%d at input %@", ((NoViableAltException *)e).stateNumber, ((NoViableAltException *)e).decisionNumber, [self getTokenErrorDisplay:e.token]]; 407*16467b97STreehugger Robot } 408*16467b97STreehugger Robot else if ( [e isKindOfClass:[EarlyExitException class]] ) { 409*16467b97STreehugger Robot //EarlyExitException *eee = (EarlyExitException *)e; 410*16467b97STreehugger Robot // for development, can add "(decision="+eee.decisionNumber+")" 411*16467b97STreehugger Robot msg =[NSString stringWithFormat: @"required (...)+ loop did not match anything at input ", [self getTokenErrorDisplay:e.token]]; 412*16467b97STreehugger Robot } 413*16467b97STreehugger Robot else if ( [e isKindOfClass:[MismatchedSetException class]] ) { 414*16467b97STreehugger Robot MismatchedSetException *mse = (MismatchedSetException *)e; 415*16467b97STreehugger Robot msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@", 416*16467b97STreehugger Robot [self getTokenErrorDisplay:(e.token)], 417*16467b97STreehugger Robot mse.expecting]; 418*16467b97STreehugger Robot } 419*16467b97STreehugger Robot#pragma warning NotSet not yet implemented. 420*16467b97STreehugger Robot else if ( [e isKindOfClass:[MismatchedNotSetException class] ] ) { 421*16467b97STreehugger Robot MismatchedNotSetException *mse = (MismatchedNotSetException *)e; 422*16467b97STreehugger Robot msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@", 423*16467b97STreehugger Robot [self getTokenErrorDisplay:(e.token)], 424*16467b97STreehugger Robot mse.expecting]; 425*16467b97STreehugger Robot } 426*16467b97STreehugger Robot else if ( [e isKindOfClass:[FailedPredicateException class]] ) { 427*16467b97STreehugger Robot FailedPredicateException *fpe = (FailedPredicateException *)e; 428*16467b97STreehugger Robot msg = [NSString stringWithFormat:@"rule %@ failed predicate: { %@ }?", fpe.ruleName, fpe.predicate]; 429*16467b97STreehugger Robot } 430*16467b97STreehugger Robot else { 431*16467b97STreehugger Robot msg = [NSString stringWithFormat:@"Exception= %@\n", e.name]; 432*16467b97STreehugger Robot } 433*16467b97STreehugger Robot return msg; 434*16467b97STreehugger Robot} 435*16467b97STreehugger Robot 436*16467b97STreehugger Robot/** Get number of recognition errors (lexer, parser, tree parser). Each 437*16467b97STreehugger Robot * recognizer tracks its own number. So parser and lexer each have 438*16467b97STreehugger Robot * separate count. Does not count the spurious errors found between 439*16467b97STreehugger Robot * an error and next valid token match 440*16467b97STreehugger Robot * 441*16467b97STreehugger Robot * See also reportError() 442*16467b97STreehugger Robot */ 443*16467b97STreehugger Robot- (NSInteger) getNumberOfSyntaxErrors 444*16467b97STreehugger Robot{ 445*16467b97STreehugger Robot return state.syntaxErrors; 446*16467b97STreehugger Robot} 447*16467b97STreehugger Robot 448*16467b97STreehugger Robot/** What is the error header, normally line/character position information? */ 449*16467b97STreehugger Robot- (NSString *)getErrorHeader:(RecognitionException *)e 450*16467b97STreehugger Robot{ 451*16467b97STreehugger Robot return [NSString stringWithFormat:@"line %d:%d", e.line, e.charPositionInLine]; 452*16467b97STreehugger Robot} 453*16467b97STreehugger Robot 454*16467b97STreehugger Robot/** How should a token be displayed in an error message? The default 455*16467b97STreehugger Robot * is to display just the text, but during development you might 456*16467b97STreehugger Robot * want to have a lot of information spit out. Override in that case 457*16467b97STreehugger Robot * to use t.toString() (which, for CommonToken, dumps everything about 458*16467b97STreehugger Robot * the token). This is better than forcing you to override a method in 459*16467b97STreehugger Robot * your token objects because you don't have to go modify your lexer 460*16467b97STreehugger Robot * so that it creates a new Java type. 461*16467b97STreehugger Robot */ 462*16467b97STreehugger Robot- (NSString *)getTokenErrorDisplay:(id<Token>)t 463*16467b97STreehugger Robot{ 464*16467b97STreehugger Robot NSString *s = t.text; 465*16467b97STreehugger Robot if ( s == nil ) { 466*16467b97STreehugger Robot if ( t.type == TokenTypeEOF ) { 467*16467b97STreehugger Robot s = @"<EOF>"; 468*16467b97STreehugger Robot } 469*16467b97STreehugger Robot else { 470*16467b97STreehugger Robot s = [NSString stringWithFormat:@"<%@>", t.type]; 471*16467b97STreehugger Robot } 472*16467b97STreehugger Robot } 473*16467b97STreehugger Robot s = [s stringByReplacingOccurrencesOfString:@"\n" withString:@"\\\\n"]; 474*16467b97STreehugger Robot s = [s stringByReplacingOccurrencesOfString:@"\r" withString:@"\\\\r"]; 475*16467b97STreehugger Robot s = [s stringByReplacingOccurrencesOfString:@"\t" withString:@"\\\\t"]; 476*16467b97STreehugger Robot return [NSString stringWithFormat:@"\'%@\'", s]; 477*16467b97STreehugger Robot} 478*16467b97STreehugger Robot 479*16467b97STreehugger Robot/** Override this method to change where error messages go */ 480*16467b97STreehugger Robot- (void) emitErrorMessage:(NSString *) msg 481*16467b97STreehugger Robot{ 482*16467b97STreehugger Robot// System.err.println(msg); 483*16467b97STreehugger Robot NSLog(@"%@", msg); 484*16467b97STreehugger Robot} 485*16467b97STreehugger Robot 486*16467b97STreehugger Robot/** Recover from an error found on the input stream. This is 487*16467b97STreehugger Robot * for NoViableAlt and mismatched symbol exceptions. If you enable 488*16467b97STreehugger Robot * single token insertion and deletion, this will usually not 489*16467b97STreehugger Robot * handle mismatched symbol exceptions but there could be a mismatched 490*16467b97STreehugger Robot * token that the match() routine could not recover from. 491*16467b97STreehugger Robot */ 492*16467b97STreehugger Robot- (void)recover:(id<IntStream>)anInput Exception:(RecognitionException *)re 493*16467b97STreehugger Robot{ 494*16467b97STreehugger Robot if ( state.lastErrorIndex == anInput.index ) { 495*16467b97STreehugger Robot // uh oh, another error at same token index; must be a case 496*16467b97STreehugger Robot // where LT(1) is in the recovery token set so nothing is 497*16467b97STreehugger Robot // consumed; consume a single token so at least to prevent 498*16467b97STreehugger Robot // an infinite loop; this is a failsafe. 499*16467b97STreehugger Robot [anInput consume]; 500*16467b97STreehugger Robot } 501*16467b97STreehugger Robot state.lastErrorIndex = anInput.index; 502*16467b97STreehugger Robot ANTLRBitSet *followSet = [self computeErrorRecoverySet]; 503*16467b97STreehugger Robot [self beginResync]; 504*16467b97STreehugger Robot [self consumeUntilFollow:anInput Follow:followSet]; 505*16467b97STreehugger Robot [self endResync]; 506*16467b97STreehugger Robot} 507*16467b97STreehugger Robot 508*16467b97STreehugger Robot- (void) beginResync 509*16467b97STreehugger Robot{ 510*16467b97STreehugger Robot 511*16467b97STreehugger Robot} 512*16467b97STreehugger Robot 513*16467b97STreehugger Robot- (void) endResync 514*16467b97STreehugger Robot{ 515*16467b97STreehugger Robot 516*16467b97STreehugger Robot} 517*16467b97STreehugger Robot 518*16467b97STreehugger Robot/* Compute the error recovery set for the current rule. During 519*16467b97STreehugger Robot * rule invocation, the parser pushes the set of tokens that can 520*16467b97STreehugger Robot * follow that rule reference on the stack; this amounts to 521*16467b97STreehugger Robot * computing FIRST of what follows the rule reference in the 522*16467b97STreehugger Robot * enclosing rule. This local follow set only includes tokens 523*16467b97STreehugger Robot * from within the rule; i.e., the FIRST computation done by 524*16467b97STreehugger Robot * ANTLR stops at the end of a rule. 525*16467b97STreehugger Robot * 526*16467b97STreehugger Robot * EXAMPLE 527*16467b97STreehugger Robot * 528*16467b97STreehugger Robot * When you find a "no viable alt exception", the input is not 529*16467b97STreehugger Robot * consistent with any of the alternatives for rule r. The best 530*16467b97STreehugger Robot * thing to do is to consume tokens until you see something that 531*16467b97STreehugger Robot * can legally follow a call to r *or* any rule that called r. 532*16467b97STreehugger Robot * You don't want the exact set of viable next tokens because the 533*16467b97STreehugger Robot * input might just be missing a token--you might consume the 534*16467b97STreehugger Robot * rest of the input looking for one of the missing tokens. 535*16467b97STreehugger Robot * 536*16467b97STreehugger Robot * Consider grammar: 537*16467b97STreehugger Robot * 538*16467b97STreehugger Robot * a : '[' b ']' 539*16467b97STreehugger Robot * | '(' b ')' 540*16467b97STreehugger Robot * ; 541*16467b97STreehugger Robot * b : c '^' INT ; 542*16467b97STreehugger Robot * c : ID 543*16467b97STreehugger Robot * | INT 544*16467b97STreehugger Robot * ; 545*16467b97STreehugger Robot * 546*16467b97STreehugger Robot * At each rule invocation, the set of tokens that could follow 547*16467b97STreehugger Robot * that rule is pushed on a stack. Here are the various "local" 548*16467b97STreehugger Robot * follow sets: 549*16467b97STreehugger Robot * 550*16467b97STreehugger Robot * FOLLOW(b1_in_a) = FIRST(']') = ']' 551*16467b97STreehugger Robot * FOLLOW(b2_in_a) = FIRST(')') = ')' 552*16467b97STreehugger Robot * FOLLOW(c_in_b) = FIRST('^') = '^' 553*16467b97STreehugger Robot * 554*16467b97STreehugger Robot * Upon erroneous input "[]", the call chain is 555*16467b97STreehugger Robot * 556*16467b97STreehugger Robot * a -> b -> c 557*16467b97STreehugger Robot * 558*16467b97STreehugger Robot * and, hence, the follow context stack is: 559*16467b97STreehugger Robot * 560*16467b97STreehugger Robot * depth local follow set after call to rule 561*16467b97STreehugger Robot * 0 <EOF> a (from main()) 562*16467b97STreehugger Robot * 1 ']' b 563*16467b97STreehugger Robot * 3 '^' c 564*16467b97STreehugger Robot * 565*16467b97STreehugger Robot * Notice that ')' is not included, because b would have to have 566*16467b97STreehugger Robot * been called from a different context in rule a for ')' to be 567*16467b97STreehugger Robot * included. 568*16467b97STreehugger Robot * 569*16467b97STreehugger Robot * For error recovery, we cannot consider FOLLOW(c) 570*16467b97STreehugger Robot * (context-sensitive or otherwise). We need the combined set of 571*16467b97STreehugger Robot * all context-sensitive FOLLOW sets--the set of all tokens that 572*16467b97STreehugger Robot * could follow any reference in the call chain. We need to 573*16467b97STreehugger Robot * resync to one of those tokens. Note that FOLLOW(c)='^' and if 574*16467b97STreehugger Robot * we resync'd to that token, we'd consume until EOF. We need to 575*16467b97STreehugger Robot * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 576*16467b97STreehugger Robot * In this case, for input "[]", LA(1) is in this set so we would 577*16467b97STreehugger Robot * not consume anything and after printing an error rule c would 578*16467b97STreehugger Robot * return normally. It would not find the required '^' though. 579*16467b97STreehugger Robot * At this point, it gets a mismatched token error and throws an 580*16467b97STreehugger Robot * exception (since LA(1) is not in the viable following token 581*16467b97STreehugger Robot * set). The rule exception handler tries to recover, but finds 582*16467b97STreehugger Robot * the same recovery set and doesn't consume anything. Rule b 583*16467b97STreehugger Robot * exits normally returning to rule a. Now it finds the ']' (and 584*16467b97STreehugger Robot * with the successful match exits errorRecovery mode). 585*16467b97STreehugger Robot * 586*16467b97STreehugger Robot * So, you cna see that the parser walks up call chain looking 587*16467b97STreehugger Robot * for the token that was a member of the recovery set. 588*16467b97STreehugger Robot * 589*16467b97STreehugger Robot * Errors are not generated in errorRecovery mode. 590*16467b97STreehugger Robot * 591*16467b97STreehugger Robot * ANTLR's error recovery mechanism is based upon original ideas: 592*16467b97STreehugger Robot * 593*16467b97STreehugger Robot * "Algorithms + Data Structures = Programs" by Niklaus Wirth 594*16467b97STreehugger Robot * 595*16467b97STreehugger Robot * and 596*16467b97STreehugger Robot * 597*16467b97STreehugger Robot * "A note on error recovery in recursive descent parsers": 598*16467b97STreehugger Robot * http://portal.acm.org/citation.cfm?id=947902.947905 599*16467b97STreehugger Robot * 600*16467b97STreehugger Robot * Later, Josef Grosch had some good ideas: 601*16467b97STreehugger Robot * 602*16467b97STreehugger Robot * "Efficient and Comfortable Error Recovery in Recursive Descent 603*16467b97STreehugger Robot * Parsers": 604*16467b97STreehugger Robot * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 605*16467b97STreehugger Robot * 606*16467b97STreehugger Robot * Like Grosch I implemented local FOLLOW sets that are combined 607*16467b97STreehugger Robot * at run-time upon error to avoid overhead during parsing. 608*16467b97STreehugger Robot */ 609*16467b97STreehugger Robot- (ANTLRBitSet *) computeErrorRecoverySet 610*16467b97STreehugger Robot{ 611*16467b97STreehugger Robot return [self combineFollows:NO]; 612*16467b97STreehugger Robot} 613*16467b97STreehugger Robot 614*16467b97STreehugger Robot/** Compute the context-sensitive FOLLOW set for current rule. 615*16467b97STreehugger Robot * This is set of token types that can follow a specific rule 616*16467b97STreehugger Robot * reference given a specific call chain. You get the set of 617*16467b97STreehugger Robot * viable tokens that can possibly come next (lookahead depth 1) 618*16467b97STreehugger Robot * given the current call chain. Contrast this with the 619*16467b97STreehugger Robot * definition of plain FOLLOW for rule r: 620*16467b97STreehugger Robot * 621*16467b97STreehugger Robot * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} 622*16467b97STreehugger Robot * 623*16467b97STreehugger Robot * where x in T* and alpha, beta in V*; T is set of terminals and 624*16467b97STreehugger Robot * V is the set of terminals and nonterminals. In other words, 625*16467b97STreehugger Robot * FOLLOW(r) is the set of all tokens that can possibly follow 626*16467b97STreehugger Robot * references to r in *any* sentential form (context). At 627*16467b97STreehugger Robot * runtime, however, we know precisely which context applies as 628*16467b97STreehugger Robot * we have the call chain. We may compute the exact (rather 629*16467b97STreehugger Robot * than covering superset) set of following tokens. 630*16467b97STreehugger Robot * 631*16467b97STreehugger Robot * For example, consider grammar: 632*16467b97STreehugger Robot * 633*16467b97STreehugger Robot * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} 634*16467b97STreehugger Robot * | "return" expr '.' 635*16467b97STreehugger Robot * ; 636*16467b97STreehugger Robot * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} 637*16467b97STreehugger Robot * atom : INT // FOLLOW(atom)=={'+',')',';','.'} 638*16467b97STreehugger Robot * | '(' expr ')' 639*16467b97STreehugger Robot * ; 640*16467b97STreehugger Robot * 641*16467b97STreehugger Robot * The FOLLOW sets are all inclusive whereas context-sensitive 642*16467b97STreehugger Robot * FOLLOW sets are precisely what could follow a rule reference. 643*16467b97STreehugger Robot * For input input "i=(3);", here is the derivation: 644*16467b97STreehugger Robot * 645*16467b97STreehugger Robot * stat => ID '=' expr ';' 646*16467b97STreehugger Robot * => ID '=' atom ('+' atom)* ';' 647*16467b97STreehugger Robot * => ID '=' '(' expr ')' ('+' atom)* ';' 648*16467b97STreehugger Robot * => ID '=' '(' atom ')' ('+' atom)* ';' 649*16467b97STreehugger Robot * => ID '=' '(' INT ')' ('+' atom)* ';' 650*16467b97STreehugger Robot * => ID '=' '(' INT ')' ';' 651*16467b97STreehugger Robot * 652*16467b97STreehugger Robot * At the "3" token, you'd have a call chain of 653*16467b97STreehugger Robot * 654*16467b97STreehugger Robot * stat -> expr -> atom -> expr -> atom 655*16467b97STreehugger Robot * 656*16467b97STreehugger Robot * What can follow that specific nested ref to atom? Exactly ')' 657*16467b97STreehugger Robot * as you can see by looking at the derivation of this specific 658*16467b97STreehugger Robot * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. 659*16467b97STreehugger Robot * 660*16467b97STreehugger Robot * You want the exact viable token set when recovering from a 661*16467b97STreehugger Robot * token mismatch. Upon token mismatch, if LA(1) is member of 662*16467b97STreehugger Robot * the viable next token set, then you know there is most likely 663*16467b97STreehugger Robot * a missing token in the input stream. "Insert" one by just not 664*16467b97STreehugger Robot * throwing an exception. 665*16467b97STreehugger Robot */ 666*16467b97STreehugger Robot- (ANTLRBitSet *)computeContextSensitiveRuleFOLLOW 667*16467b97STreehugger Robot{ 668*16467b97STreehugger Robot return [self combineFollows:YES]; 669*16467b97STreehugger Robot} 670*16467b97STreehugger Robot 671*16467b97STreehugger Robot// what is exact? it seems to only add sets from above on stack 672*16467b97STreehugger Robot// if EOR is in set i. When it sees a set w/o EOR, it stops adding. 673*16467b97STreehugger Robot// Why would we ever want them all? Maybe no viable alt instead of 674*16467b97STreehugger Robot// mismatched token? 675*16467b97STreehugger Robot- (ANTLRBitSet *)combineFollows:(BOOL) exact 676*16467b97STreehugger Robot{ 677*16467b97STreehugger Robot NSInteger top = state._fsp; 678*16467b97STreehugger Robot ANTLRBitSet *followSet = [[ANTLRBitSet newBitSet] retain]; 679*16467b97STreehugger Robot for (int i = top; i >= 0; i--) { 680*16467b97STreehugger Robot ANTLRBitSet *localFollowSet = (ANTLRBitSet *)[state.following objectAtIndex:i]; 681*16467b97STreehugger Robot /* 682*16467b97STreehugger Robot System.out.println("local follow depth "+i+"="+ 683*16467b97STreehugger Robot localFollowSet.toString(getTokenNames())+")"); 684*16467b97STreehugger Robot */ 685*16467b97STreehugger Robot [followSet orInPlace:localFollowSet]; 686*16467b97STreehugger Robot if ( exact ) { 687*16467b97STreehugger Robot // can we see end of rule? 688*16467b97STreehugger Robot if ( [localFollowSet member:TokenTypeEOR] ) { 689*16467b97STreehugger Robot // Only leave EOR in set if at top (start rule); this lets 690*16467b97STreehugger Robot // us know if have to include follow(start rule); i.e., EOF 691*16467b97STreehugger Robot if ( i > 0 ) { 692*16467b97STreehugger Robot [followSet remove:TokenTypeEOR]; 693*16467b97STreehugger Robot } 694*16467b97STreehugger Robot } 695*16467b97STreehugger Robot else { // can't see end of rule, quit 696*16467b97STreehugger Robot break; 697*16467b97STreehugger Robot } 698*16467b97STreehugger Robot } 699*16467b97STreehugger Robot } 700*16467b97STreehugger Robot return followSet; 701*16467b97STreehugger Robot} 702*16467b97STreehugger Robot 703*16467b97STreehugger Robot/** Attempt to recover from a single missing or extra token. 704*16467b97STreehugger Robot * 705*16467b97STreehugger Robot * EXTRA TOKEN 706*16467b97STreehugger Robot * 707*16467b97STreehugger Robot * LA(1) is not what we are looking for. If LA(2) has the right token, 708*16467b97STreehugger Robot * however, then assume LA(1) is some extra spurious token. Delete it 709*16467b97STreehugger Robot * and LA(2) as if we were doing a normal match(), which advances the 710*16467b97STreehugger Robot * input. 711*16467b97STreehugger Robot * 712*16467b97STreehugger Robot * MISSING TOKEN 713*16467b97STreehugger Robot * 714*16467b97STreehugger Robot * If current token is consistent with what could come after 715*16467b97STreehugger Robot * ttype then it is ok to "insert" the missing token, else throw 716*16467b97STreehugger Robot * exception For example, Input "i=(3;" is clearly missing the 717*16467b97STreehugger Robot * ')'. When the parser returns from the nested call to expr, it 718*16467b97STreehugger Robot * will have call chain: 719*16467b97STreehugger Robot * 720*16467b97STreehugger Robot * stat -> expr -> atom 721*16467b97STreehugger Robot * 722*16467b97STreehugger Robot * and it will be trying to match the ')' at this point in the 723*16467b97STreehugger Robot * derivation: 724*16467b97STreehugger Robot * 725*16467b97STreehugger Robot * => ID '=' '(' INT ')' ('+' atom)* ';' 726*16467b97STreehugger Robot * ^ 727*16467b97STreehugger Robot * match() will see that ';' doesn't match ')' and report a 728*16467b97STreehugger Robot * mismatched token error. To recover, it sees that LA(1)==';' 729*16467b97STreehugger Robot * is in the set of tokens that can follow the ')' token 730*16467b97STreehugger Robot * reference in rule atom. It can assume that you forgot the ')'. 731*16467b97STreehugger Robot */ 732*16467b97STreehugger Robot- (id<Token>)recoverFromMismatchedToken:(id<IntStream>)anInput 733*16467b97STreehugger Robot TokenType:(NSInteger)ttype 734*16467b97STreehugger Robot Follow:(ANTLRBitSet *)follow 735*16467b97STreehugger Robot{ 736*16467b97STreehugger Robot RecognitionException *e = nil; 737*16467b97STreehugger Robot // if next token is what we are looking for then "delete" this token 738*16467b97STreehugger Robot if ( [self mismatchIsUnwantedToken:anInput TokenType:ttype] ) { 739*16467b97STreehugger Robot e = [UnwantedTokenException newException:ttype Stream:anInput]; 740*16467b97STreehugger Robot /* 741*16467b97STreehugger Robot System.err.println("recoverFromMismatchedToken deleting "+ 742*16467b97STreehugger Robot ((TokenStream)input).LT(1)+ 743*16467b97STreehugger Robot " since "+((TokenStream)input).LT(2)+" is what we want"); 744*16467b97STreehugger Robot */ 745*16467b97STreehugger Robot [self beginResync]; 746*16467b97STreehugger Robot [anInput consume]; // simply delete extra token 747*16467b97STreehugger Robot [self endResync]; 748*16467b97STreehugger Robot [self reportError:e]; // report after consuming so AW sees the token in the exception 749*16467b97STreehugger Robot // we want to return the token we're actually matching 750*16467b97STreehugger Robot id matchedSymbol = [self getCurrentInputSymbol:anInput]; 751*16467b97STreehugger Robot [anInput consume]; // move past ttype token as if all were ok 752*16467b97STreehugger Robot return matchedSymbol; 753*16467b97STreehugger Robot } 754*16467b97STreehugger Robot // can't recover with single token deletion, try insertion 755*16467b97STreehugger Robot if ( [self mismatchIsMissingToken:anInput Follow:follow] ) { 756*16467b97STreehugger Robot id<Token> inserted = [self getMissingSymbol:anInput Exception:e TokenType:ttype Follow:follow]; 757*16467b97STreehugger Robot e = [MissingTokenException newException:ttype Stream:anInput With:inserted]; 758*16467b97STreehugger Robot [self reportError:e]; // report after inserting so AW sees the token in the exception 759*16467b97STreehugger Robot return inserted; 760*16467b97STreehugger Robot } 761*16467b97STreehugger Robot // even that didn't work; must throw the exception 762*16467b97STreehugger Robot e = [MismatchedTokenException newException:ttype Stream:anInput]; 763*16467b97STreehugger Robot @throw e; 764*16467b97STreehugger Robot} 765*16467b97STreehugger Robot 766*16467b97STreehugger Robot/** Not currently used */ 767*16467b97STreehugger Robot-(id) recoverFromMismatchedSet:(id<IntStream>)anInput 768*16467b97STreehugger Robot Exception:(RecognitionException *)e 769*16467b97STreehugger Robot Follow:(ANTLRBitSet *) follow 770*16467b97STreehugger Robot{ 771*16467b97STreehugger Robot if ( [self mismatchIsMissingToken:anInput Follow:follow] ) { 772*16467b97STreehugger Robot // System.out.println("missing token"); 773*16467b97STreehugger Robot [self reportError:e]; 774*16467b97STreehugger Robot // we don't know how to conjure up a token for sets yet 775*16467b97STreehugger Robot return [self getMissingSymbol:anInput Exception:e TokenType:TokenTypeInvalid Follow:follow]; 776*16467b97STreehugger Robot } 777*16467b97STreehugger Robot // TODO do single token deletion like above for Token mismatch 778*16467b97STreehugger Robot @throw e; 779*16467b97STreehugger Robot} 780*16467b97STreehugger Robot 781*16467b97STreehugger Robot/** Match needs to return the current input symbol, which gets put 782*16467b97STreehugger Robot * into the label for the associated token ref; e.g., x=ID. Token 783*16467b97STreehugger Robot * and tree parsers need to return different objects. Rather than test 784*16467b97STreehugger Robot * for input stream type or change the IntStream interface, I use 785*16467b97STreehugger Robot * a simple method to ask the recognizer to tell me what the current 786*16467b97STreehugger Robot * input symbol is. 787*16467b97STreehugger Robot * 788*16467b97STreehugger Robot * This is ignored for lexers. 789*16467b97STreehugger Robot */ 790*16467b97STreehugger Robot- (id) getCurrentInputSymbol:(id<IntStream>)anInput 791*16467b97STreehugger Robot{ 792*16467b97STreehugger Robot return nil; 793*16467b97STreehugger Robot} 794*16467b97STreehugger Robot 795*16467b97STreehugger Robot/** Conjure up a missing token during error recovery. 796*16467b97STreehugger Robot * 797*16467b97STreehugger Robot * The recognizer attempts to recover from single missing 798*16467b97STreehugger Robot * symbols. But, actions might refer to that missing symbol. 799*16467b97STreehugger Robot * For example, x=ID {f($x);}. The action clearly assumes 800*16467b97STreehugger Robot * that there has been an identifier matched previously and that 801*16467b97STreehugger Robot * $x points at that token. If that token is missing, but 802*16467b97STreehugger Robot * the next token in the stream is what we want we assume that 803*16467b97STreehugger Robot * this token is missing and we keep going. Because we 804*16467b97STreehugger Robot * have to return some token to replace the missing token, 805*16467b97STreehugger Robot * we have to conjure one up. This method gives the user control 806*16467b97STreehugger Robot * over the tokens returned for missing tokens. Mostly, 807*16467b97STreehugger Robot * you will want to create something special for identifier 808*16467b97STreehugger Robot * tokens. For literals such as '{' and ',', the default 809*16467b97STreehugger Robot * action in the parser or tree parser works. It simply creates 810*16467b97STreehugger Robot * a CommonToken of the appropriate type. The text will be the token. 811*16467b97STreehugger Robot * If you change what tokens must be created by the lexer, 812*16467b97STreehugger Robot * override this method to create the appropriate tokens. 813*16467b97STreehugger Robot */ 814*16467b97STreehugger Robot- (id)getMissingSymbol:(id<IntStream>)anInput 815*16467b97STreehugger Robot Exception:(RecognitionException *)e 816*16467b97STreehugger Robot TokenType:(NSInteger)expectedTokenType 817*16467b97STreehugger Robot Follow:(ANTLRBitSet *)follow 818*16467b97STreehugger Robot{ 819*16467b97STreehugger Robot return nil; 820*16467b97STreehugger Robot} 821*16467b97STreehugger Robot 822*16467b97STreehugger Robot 823*16467b97STreehugger Robot-(void) consumeUntilTType:(id<IntStream>)anInput TokenType:(NSInteger)tokenType 824*16467b97STreehugger Robot{ 825*16467b97STreehugger Robot //System.out.println("consumeUntil "+tokenType); 826*16467b97STreehugger Robot int ttype = [anInput LA:1]; 827*16467b97STreehugger Robot while (ttype != TokenTypeEOF && ttype != tokenType) { 828*16467b97STreehugger Robot [anInput consume]; 829*16467b97STreehugger Robot ttype = [anInput LA:1]; 830*16467b97STreehugger Robot } 831*16467b97STreehugger Robot} 832*16467b97STreehugger Robot 833*16467b97STreehugger Robot/** Consume tokens until one matches the given token set */ 834*16467b97STreehugger Robot-(void) consumeUntilFollow:(id<IntStream>)anInput Follow:(ANTLRBitSet *)set 835*16467b97STreehugger Robot{ 836*16467b97STreehugger Robot //System.out.println("consumeUntil("+set.toString(getTokenNames())+")"); 837*16467b97STreehugger Robot int ttype = [anInput LA:1]; 838*16467b97STreehugger Robot while (ttype != TokenTypeEOF && ![set member:ttype] ) { 839*16467b97STreehugger Robot //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]); 840*16467b97STreehugger Robot [anInput consume]; 841*16467b97STreehugger Robot ttype = [anInput LA:1]; 842*16467b97STreehugger Robot } 843*16467b97STreehugger Robot} 844*16467b97STreehugger Robot 845*16467b97STreehugger Robot/** Push a rule's follow set using our own hardcoded stack */ 846*16467b97STreehugger Robot- (void)pushFollow:(ANTLRBitSet *)fset 847*16467b97STreehugger Robot{ 848*16467b97STreehugger Robot if ( (state._fsp +1) >= [state.following count] ) { 849*16467b97STreehugger Robot // AMutableArray *f = [AMutableArray arrayWithCapacity:[[state.following] count]*2]; 850*16467b97STreehugger Robot // System.arraycopy(state.following, 0, f, 0, state.following.length); 851*16467b97STreehugger Robot // state.following = f; 852*16467b97STreehugger Robot [state.following addObject:fset]; 853*16467b97STreehugger Robot [fset retain]; 854*16467b97STreehugger Robot state._fsp++; 855*16467b97STreehugger Robot } 856*16467b97STreehugger Robot else { 857*16467b97STreehugger Robot [state.following replaceObjectAtIndex:++state._fsp withObject:fset]; 858*16467b97STreehugger Robot } 859*16467b97STreehugger Robot} 860*16467b97STreehugger Robot 861*16467b97STreehugger Robot- (ANTLRBitSet *)popFollow 862*16467b97STreehugger Robot{ 863*16467b97STreehugger Robot ANTLRBitSet *fset; 864*16467b97STreehugger Robot 865*16467b97STreehugger Robot if ( state._fsp >= 0 && [state.following count] > 0 ) { 866*16467b97STreehugger Robot fset = [state.following objectAtIndex:state._fsp--]; 867*16467b97STreehugger Robot [state.following removeLastObject]; 868*16467b97STreehugger Robot return fset; 869*16467b97STreehugger Robot } 870*16467b97STreehugger Robot else { 871*16467b97STreehugger Robot NSLog( @"Attempted to pop a follow when none exists on the stack\n" ); 872*16467b97STreehugger Robot } 873*16467b97STreehugger Robot return nil; 874*16467b97STreehugger Robot} 875*16467b97STreehugger Robot 876*16467b97STreehugger Robot/** Return List<String> of the rules in your parser instance 877*16467b97STreehugger Robot * leading up to a call to this method. You could override if 878*16467b97STreehugger Robot * you want more details such as the file/line info of where 879*16467b97STreehugger Robot * in the parser java code a rule is invoked. 880*16467b97STreehugger Robot * 881*16467b97STreehugger Robot * This is very useful for error messages and for context-sensitive 882*16467b97STreehugger Robot * error recovery. 883*16467b97STreehugger Robot */ 884*16467b97STreehugger Robot- (AMutableArray *)getRuleInvocationStack 885*16467b97STreehugger Robot{ 886*16467b97STreehugger Robot NSString *parserClassName = [[self className] retain]; 887*16467b97STreehugger Robot return [self getRuleInvocationStack:[RecognitionException newException] Recognizer:parserClassName]; 888*16467b97STreehugger Robot} 889*16467b97STreehugger Robot 890*16467b97STreehugger Robot/** A more general version of getRuleInvocationStack where you can 891*16467b97STreehugger Robot * pass in, for example, a RecognitionException to get it's rule 892*16467b97STreehugger Robot * stack trace. This routine is shared with all recognizers, hence, 893*16467b97STreehugger Robot * static. 894*16467b97STreehugger Robot * 895*16467b97STreehugger Robot * TODO: move to a utility class or something; weird having lexer call this 896*16467b97STreehugger Robot */ 897*16467b97STreehugger Robot- (AMutableArray *)getRuleInvocationStack:(RecognitionException *)e 898*16467b97STreehugger Robot Recognizer:(NSString *)recognizerClassName 899*16467b97STreehugger Robot{ 900*16467b97STreehugger Robot // char *name; 901*16467b97STreehugger Robot AMutableArray *rules = [[AMutableArray arrayWithCapacity:20] retain]; 902*16467b97STreehugger Robot NSArray *stack = [e callStackSymbols]; 903*16467b97STreehugger Robot int i = 0; 904*16467b97STreehugger Robot for (i = [stack count]-1; i >= 0; i--) { 905*16467b97STreehugger Robot NSString *t = [stack objectAtIndex:i]; 906*16467b97STreehugger Robot // NSLog(@"stack %d = %@\n", i, t); 907*16467b97STreehugger Robot if ( [t commonPrefixWithString:@"org.antlr.runtime." options:NSLiteralSearch] ) { 908*16467b97STreehugger Robot // id aClass = objc_getClass( [t UTF8String] ); 909*16467b97STreehugger Robot continue; // skip support code such as this method 910*16467b97STreehugger Robot } 911*16467b97STreehugger Robot if ( [t isEqualTo:NEXT_TOKEN_RULE_NAME] ) { 912*16467b97STreehugger Robot // name = sel_getName(method_getName(method)); 913*16467b97STreehugger Robot // NSString *aMethod = [NSString stringWithFormat:@"%s", name]; 914*16467b97STreehugger Robot continue; 915*16467b97STreehugger Robot } 916*16467b97STreehugger Robot if ( ![t isEqualTo:recognizerClassName] ) { 917*16467b97STreehugger Robot // name = class_getName( [t UTF8String] ); 918*16467b97STreehugger Robot continue; // must not be part of this parser 919*16467b97STreehugger Robot } 920*16467b97STreehugger Robot [rules addObject:t]; 921*16467b97STreehugger Robot } 922*16467b97STreehugger Robot#ifdef DONTUSEYET 923*16467b97STreehugger Robot StackTraceElement[] stack = e.getStackTrace(); 924*16467b97STreehugger Robot int i = 0; 925*16467b97STreehugger Robot for (i=stack.length-1; i>=0; i--) { 926*16467b97STreehugger Robot StackTraceElement t = stack[i]; 927*16467b97STreehugger Robot if ( [t getClassName().startsWith("org.antlr.runtime.") ) { 928*16467b97STreehugger Robot continue; // skip support code such as this method 929*16467b97STreehugger Robot } 930*16467b97STreehugger Robot if ( [[t getMethodName] equals:NEXT_TOKEN_RULE_NAME] ) { 931*16467b97STreehugger Robot continue; 932*16467b97STreehugger Robot } 933*16467b97STreehugger Robot if ( ![[t getClassName] equals:recognizerClassName] ) { 934*16467b97STreehugger Robot continue; // must not be part of this parser 935*16467b97STreehugger Robot } 936*16467b97STreehugger Robot [rules addObject:[t getMethodName]]; 937*16467b97STreehugger Robot } 938*16467b97STreehugger Robot#endif 939*16467b97STreehugger Robot [stack release]; 940*16467b97STreehugger Robot return rules; 941*16467b97STreehugger Robot} 942*16467b97STreehugger Robot 943*16467b97STreehugger Robot- (NSInteger) getBacktrackingLevel 944*16467b97STreehugger Robot{ 945*16467b97STreehugger Robot return [state getBacktracking]; 946*16467b97STreehugger Robot} 947*16467b97STreehugger Robot 948*16467b97STreehugger Robot- (void) setBacktrackingLevel:(NSInteger)level 949*16467b97STreehugger Robot{ 950*16467b97STreehugger Robot [state setBacktracking:level]; 951*16467b97STreehugger Robot} 952*16467b97STreehugger Robot 953*16467b97STreehugger Robot /** Used to print out token names like ID during debugging and 954*16467b97STreehugger Robot * error reporting. The generated parsers implement a method 955*16467b97STreehugger Robot * that overrides this to point to their String[] tokenNames. 956*16467b97STreehugger Robot */ 957*16467b97STreehugger Robot- (NSArray *)getTokenNames 958*16467b97STreehugger Robot{ 959*16467b97STreehugger Robot return tokenNames; 960*16467b97STreehugger Robot} 961*16467b97STreehugger Robot 962*16467b97STreehugger Robot/** For debugging and other purposes, might want the grammar name. 963*16467b97STreehugger Robot * Have ANTLR generate an implementation for this method. 964*16467b97STreehugger Robot */ 965*16467b97STreehugger Robot- (NSString *)getGrammarFileName 966*16467b97STreehugger Robot{ 967*16467b97STreehugger Robot return grammarFileName; 968*16467b97STreehugger Robot} 969*16467b97STreehugger Robot 970*16467b97STreehugger Robot- (NSString *)getSourceName 971*16467b97STreehugger Robot{ 972*16467b97STreehugger Robot return nil; 973*16467b97STreehugger Robot} 974*16467b97STreehugger Robot 975*16467b97STreehugger Robot/** A convenience method for use most often with template rewrites. 976*16467b97STreehugger Robot * Convert a List<Token> to List<String> 977*16467b97STreehugger Robot */ 978*16467b97STreehugger Robot- (AMutableArray *)toStrings:(AMutableArray *)tokens 979*16467b97STreehugger Robot{ 980*16467b97STreehugger Robot if ( tokens == nil ) 981*16467b97STreehugger Robot return nil; 982*16467b97STreehugger Robot AMutableArray *strings = [AMutableArray arrayWithCapacity:[tokens count]]; 983*16467b97STreehugger Robot id object; 984*16467b97STreehugger Robot NSInteger i = 0; 985*16467b97STreehugger Robot for (object in tokens) { 986*16467b97STreehugger Robot [strings addObject:[object text]]; 987*16467b97STreehugger Robot i++; 988*16467b97STreehugger Robot } 989*16467b97STreehugger Robot return strings; 990*16467b97STreehugger Robot} 991*16467b97STreehugger Robot 992*16467b97STreehugger Robot/** Given a rule number and a start token index number, return 993*16467b97STreehugger Robot * ANTLR_MEMO_RULE_UNKNOWN if the rule has not parsed input starting from 994*16467b97STreehugger Robot * start index. If this rule has parsed input starting from the 995*16467b97STreehugger Robot * start index before, then return where the rule stopped parsing. 996*16467b97STreehugger Robot * It returns the index of the last token matched by the rule. 997*16467b97STreehugger Robot * 998*16467b97STreehugger Robot * For now we use a hashtable and just the slow Object-based one. 999*16467b97STreehugger Robot * Later, we can make a special one for ints and also one that 1000*16467b97STreehugger Robot * tosses out data after we commit past input position i. 1001*16467b97STreehugger Robot */ 1002*16467b97STreehugger Robot- (NSInteger)getRuleMemoization:(NSInteger)ruleIndex StartIndex:(NSInteger)ruleStartIndex 1003*16467b97STreehugger Robot{ 1004*16467b97STreehugger Robot ACNumber *stopIndexI; 1005*16467b97STreehugger Robot HashRule *aHashRule; 1006*16467b97STreehugger Robot if ( (aHashRule = [state.ruleMemo objectAtIndex:ruleIndex]) == nil ) { 1007*16467b97STreehugger Robot aHashRule = [HashRule newHashRuleWithLen:17]; 1008*16467b97STreehugger Robot [state.ruleMemo insertObject:aHashRule atIndex:ruleIndex]; 1009*16467b97STreehugger Robot } 1010*16467b97STreehugger Robot stopIndexI = [aHashRule getRuleMemoStopIndex:ruleStartIndex]; 1011*16467b97STreehugger Robot if ( stopIndexI == nil ) { 1012*16467b97STreehugger Robot return ANTLR_MEMO_RULE_UNKNOWN; 1013*16467b97STreehugger Robot } 1014*16467b97STreehugger Robot return [stopIndexI integerValue]; 1015*16467b97STreehugger Robot} 1016*16467b97STreehugger Robot 1017*16467b97STreehugger Robot/** Has this rule already parsed input at the current index in the 1018*16467b97STreehugger Robot * input stream? Return the stop token index or MEMO_RULE_UNKNOWN. 1019*16467b97STreehugger Robot * If we attempted but failed to parse properly before, return 1020*16467b97STreehugger Robot * MEMO_RULE_FAILED. 1021*16467b97STreehugger Robot * 1022*16467b97STreehugger Robot * This method has a side-effect: if we have seen this input for 1023*16467b97STreehugger Robot * this rule and successfully parsed before, then seek ahead to 1024*16467b97STreehugger Robot * 1 past the stop token matched for this rule last time. 1025*16467b97STreehugger Robot */ 1026*16467b97STreehugger Robot- (BOOL)alreadyParsedRule:(id<IntStream>)anInput RuleIndex:(NSInteger)ruleIndex 1027*16467b97STreehugger Robot{ 1028*16467b97STreehugger Robot NSInteger aStopIndex = [self getRuleMemoization:ruleIndex StartIndex:anInput.index]; 1029*16467b97STreehugger Robot if ( aStopIndex == ANTLR_MEMO_RULE_UNKNOWN ) { 1030*16467b97STreehugger Robot // NSLog(@"rule %d not yet encountered\n", ruleIndex); 1031*16467b97STreehugger Robot return NO; 1032*16467b97STreehugger Robot } 1033*16467b97STreehugger Robot if ( aStopIndex == ANTLR_MEMO_RULE_FAILED ) { 1034*16467b97STreehugger Robot if (debug) NSLog(@"rule %d will never succeed\n", ruleIndex); 1035*16467b97STreehugger Robot state.failed = YES; 1036*16467b97STreehugger Robot } 1037*16467b97STreehugger Robot else { 1038*16467b97STreehugger Robot if (debug) NSLog(@"seen rule %d before; skipping ahead to %d failed = %@\n", ruleIndex, aStopIndex+1, state.failed?@"YES":@"NO"); 1039*16467b97STreehugger Robot [anInput seek:(aStopIndex+1)]; // jump to one past stop token 1040*16467b97STreehugger Robot } 1041*16467b97STreehugger Robot return YES; 1042*16467b97STreehugger Robot} 1043*16467b97STreehugger Robot 1044*16467b97STreehugger Robot/** Record whether or not this rule parsed the input at this position 1045*16467b97STreehugger Robot * successfully. Use a standard java hashtable for now. 1046*16467b97STreehugger Robot */ 1047*16467b97STreehugger Robot- (void)memoize:(id<IntStream>)anInput 1048*16467b97STreehugger Robot RuleIndex:(NSInteger)ruleIndex 1049*16467b97STreehugger Robot StartIndex:(NSInteger)ruleStartIndex 1050*16467b97STreehugger Robot{ 1051*16467b97STreehugger Robot RuleStack *aRuleStack; 1052*16467b97STreehugger Robot NSInteger stopTokenIndex; 1053*16467b97STreehugger Robot 1054*16467b97STreehugger Robot aRuleStack = state.ruleMemo; 1055*16467b97STreehugger Robot stopTokenIndex = (state.failed ? ANTLR_MEMO_RULE_FAILED : (anInput.index-1)); 1056*16467b97STreehugger Robot if ( aRuleStack == nil ) { 1057*16467b97STreehugger Robot if (debug) NSLog(@"!!!!!!!!! memo array is nil for %@", [self getGrammarFileName]); 1058*16467b97STreehugger Robot return; 1059*16467b97STreehugger Robot } 1060*16467b97STreehugger Robot if ( ruleIndex >= [aRuleStack length] ) { 1061*16467b97STreehugger Robot if (debug) NSLog(@"!!!!!!!!! memo size is %d, but rule index is %d", [state.ruleMemo length], ruleIndex); 1062*16467b97STreehugger Robot return; 1063*16467b97STreehugger Robot } 1064*16467b97STreehugger Robot if ( [aRuleStack objectAtIndex:ruleIndex] != nil ) { 1065*16467b97STreehugger Robot [aRuleStack putHashRuleAtRuleIndex:ruleIndex StartIndex:ruleStartIndex StopIndex:stopTokenIndex]; 1066*16467b97STreehugger Robot } 1067*16467b97STreehugger Robot return; 1068*16467b97STreehugger Robot} 1069*16467b97STreehugger Robot 1070*16467b97STreehugger Robot/** return how many rule/input-index pairs there are in total. 1071*16467b97STreehugger Robot * TODO: this includes synpreds. :( 1072*16467b97STreehugger Robot */ 1073*16467b97STreehugger Robot- (NSInteger)getRuleMemoizationCacheSize 1074*16467b97STreehugger Robot{ 1075*16467b97STreehugger Robot RuleStack *aRuleStack; 1076*16467b97STreehugger Robot HashRule *aHashRule; 1077*16467b97STreehugger Robot 1078*16467b97STreehugger Robot int aCnt = 0; 1079*16467b97STreehugger Robot aRuleStack = state.ruleMemo; 1080*16467b97STreehugger Robot for (NSUInteger i = 0; aRuleStack != nil && i < [aRuleStack length]; i++) { 1081*16467b97STreehugger Robot aHashRule = [aRuleStack objectAtIndex:i]; 1082*16467b97STreehugger Robot if ( aHashRule != nil ) { 1083*16467b97STreehugger Robot aCnt += [aHashRule count]; // how many input indexes are recorded? 1084*16467b97STreehugger Robot } 1085*16467b97STreehugger Robot } 1086*16467b97STreehugger Robot return aCnt; 1087*16467b97STreehugger Robot} 1088*16467b97STreehugger Robot 1089*16467b97STreehugger Robot#pragma warning Have to fix traceIn and traceOut. 1090*16467b97STreehugger Robot- (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol 1091*16467b97STreehugger Robot{ 1092*16467b97STreehugger Robot NSLog(@"enter %@ %@", ruleName, inputSymbol); 1093*16467b97STreehugger Robot if ( state.backtracking > 0 ) { 1094*16467b97STreehugger Robot NSLog(@" backtracking=%s", ((state.backtracking==YES)?"YES":"NO")); 1095*16467b97STreehugger Robot } 1096*16467b97STreehugger Robot NSLog(@"\n"); 1097*16467b97STreehugger Robot} 1098*16467b97STreehugger Robot 1099*16467b97STreehugger Robot- (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol 1100*16467b97STreehugger Robot{ 1101*16467b97STreehugger Robot NSLog(@"exit %@ -- %@", ruleName, inputSymbol); 1102*16467b97STreehugger Robot if ( state.backtracking > 0 ) { 1103*16467b97STreehugger Robot NSLog(@" backtracking=%s %s", state.backtracking?"YES":"NO", state.failed ? "failed":"succeeded"); 1104*16467b97STreehugger Robot } 1105*16467b97STreehugger Robot NSLog(@"\n"); 1106*16467b97STreehugger Robot} 1107*16467b97STreehugger Robot 1108*16467b97STreehugger Robot 1109*16467b97STreehugger Robot// call a syntactic predicate methods using its selector. this way we can support arbitrary synpreds. 1110*16467b97STreehugger Robot- (BOOL) evaluateSyntacticPredicate:(SEL)synpredFragment // stream:(id<IntStream>)input 1111*16467b97STreehugger Robot{ 1112*16467b97STreehugger Robot id<IntStream> input; 1113*16467b97STreehugger Robot 1114*16467b97STreehugger Robot state.backtracking++; 1115*16467b97STreehugger Robot // input = state.token.input; 1116*16467b97STreehugger Robot input = self.input; 1117*16467b97STreehugger Robot int start = [input mark]; 1118*16467b97STreehugger Robot @try { 1119*16467b97STreehugger Robot [self performSelector:synpredFragment]; 1120*16467b97STreehugger Robot } 1121*16467b97STreehugger Robot @catch (RecognitionException *re) { 1122*16467b97STreehugger Robot NSLog(@"impossible synpred: %@", re.name); 1123*16467b97STreehugger Robot } 1124*16467b97STreehugger Robot BOOL success = (state.failed == NO); 1125*16467b97STreehugger Robot [input rewind:start]; 1126*16467b97STreehugger Robot state.backtracking--; 1127*16467b97STreehugger Robot state.failed = NO; 1128*16467b97STreehugger Robot return success; 1129*16467b97STreehugger Robot} 1130*16467b97STreehugger Robot 1131*16467b97STreehugger Robot@end 1132*16467b97STreehugger Robot 1133