xref: /aosp_15_r20/external/antlr/tool/src/main/java/org/antlr/tool/GrammarSanity.java (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.tool;
29 
30 import org.antlr.analysis.NFAState;
31 import org.antlr.analysis.RuleClosureTransition;
32 import org.antlr.analysis.Transition;
33 import org.antlr.grammar.v3.ANTLRParser;
34 import org.antlr.runtime.tree.Tree;
35 
36 import java.util.ArrayList;
37 import java.util.HashSet;
38 import java.util.List;
39 import java.util.Set;
40 
41 /** Factor out routines that check sanity of rules, alts, grammars, etc.. */
42 public class GrammarSanity {
43 	/** The checkForLeftRecursion method needs to track what rules it has
44 	 *  visited to track infinite recursion.
45 	 */
46 	protected Set<Rule> visitedDuringRecursionCheck = null;
47 
48 	protected Grammar grammar;
GrammarSanity(Grammar grammar)49 	public GrammarSanity(Grammar grammar) {
50 		this.grammar = grammar;
51 	}
52 
53 	/** Check all rules for infinite left recursion before analysis. Return list
54 	 *  of troublesome rule cycles.  This method has two side-effects: it notifies
55 	 *  the error manager that we have problems and it sets the list of
56 	 *  recursive rules that we should ignore during analysis.
57 	 */
checkAllRulesForLeftRecursion()58 	public List<Set<Rule>> checkAllRulesForLeftRecursion() {
59 		grammar.buildNFA(); // make sure we have NFAs
60 		grammar.leftRecursiveRules = new HashSet<Rule>();
61 		List<Set<Rule>> listOfRecursiveCycles = new ArrayList<Set<Rule>>();
62 		for (int i = 0; i < grammar.composite.ruleIndexToRuleList.size(); i++) {
63 			Rule r = grammar.composite.ruleIndexToRuleList.elementAt(i);
64 			if ( r!=null ) {
65 				visitedDuringRecursionCheck = new HashSet<Rule>();
66 				visitedDuringRecursionCheck.add(r);
67 				Set<NFAState> visitedStates = new HashSet<NFAState>();
68 				traceStatesLookingForLeftRecursion(r.startState,
69 												   visitedStates,
70 												   listOfRecursiveCycles);
71 			}
72 		}
73 		if ( listOfRecursiveCycles.size()>0 ) {
74 			ErrorManager.leftRecursionCycles(listOfRecursiveCycles);
75 		}
76 		return listOfRecursiveCycles;
77 	}
78 
79 	/** From state s, look for any transition to a rule that is currently
80 	 *  being traced.  When tracing r, visitedDuringRecursionCheck has r
81 	 *  initially.  If you reach an accept state, return but notify the
82 	 *  invoking rule that it is nullable, which implies that invoking
83 	 *  rule must look at follow transition for that invoking state.
84 	 *  The visitedStates tracks visited states within a single rule so
85 	 *  we can avoid epsilon-loop-induced infinite recursion here.  Keep
86 	 *  filling the cycles in listOfRecursiveCycles and also, as a
87 	 *  side-effect, set leftRecursiveRules.
88 	 */
traceStatesLookingForLeftRecursion(NFAState s, Set<NFAState> visitedStates, List<Set<Rule>> listOfRecursiveCycles)89 	protected boolean traceStatesLookingForLeftRecursion(NFAState s,
90 														 Set<NFAState> visitedStates,
91 														 List<Set<Rule>> listOfRecursiveCycles)
92 	{
93 		if ( s.isAcceptState() ) {
94 			// this rule must be nullable!
95 			// At least one epsilon edge reached accept state
96 			return true;
97 		}
98 		if ( visitedStates.contains(s) ) {
99 			// within same rule, we've hit same state; quit looping
100 			return false;
101 		}
102 		visitedStates.add(s);
103 		boolean stateReachesAcceptState = false;
104 		Transition t0 = s.transition[0];
105 		if ( t0 instanceof RuleClosureTransition ) {
106 			RuleClosureTransition refTrans = (RuleClosureTransition)t0;
107 			Rule refRuleDef = refTrans.rule;
108 			//String targetRuleName = ((NFAState)t0.target).getEnclosingRule();
109 			if ( visitedDuringRecursionCheck.contains(refRuleDef) ) {
110 				// record left-recursive rule, but don't go back in
111 				grammar.leftRecursiveRules.add(refRuleDef);
112 				/*
113 				System.out.println("already visited "+refRuleDef+", calling from "+
114 								   s.enclosingRule);
115 								   */
116 				addRulesToCycle(refRuleDef,
117 								s.enclosingRule,
118 								listOfRecursiveCycles);
119 			}
120 			else {
121 				// must visit if not already visited; send new visitedStates set
122 				visitedDuringRecursionCheck.add(refRuleDef);
123 				boolean callReachedAcceptState =
124 					traceStatesLookingForLeftRecursion((NFAState)t0.target,
125 													   new HashSet<NFAState>(),
126 													   listOfRecursiveCycles);
127 				// we're back from visiting that rule
128 				visitedDuringRecursionCheck.remove(refRuleDef);
129 				// must keep going in this rule then
130 				if ( callReachedAcceptState ) {
131 					NFAState followingState =
132 						((RuleClosureTransition) t0).followState;
133 					stateReachesAcceptState |=
134 						traceStatesLookingForLeftRecursion(followingState,
135 														   visitedStates,
136 														   listOfRecursiveCycles);
137 				}
138 			}
139 		}
140 		else if ( t0.label.isEpsilon() || t0.label.isSemanticPredicate() ) {
141 			stateReachesAcceptState |=
142 				traceStatesLookingForLeftRecursion((NFAState)t0.target, visitedStates, listOfRecursiveCycles);
143 		}
144 		// else it has a labeled edge
145 
146 		// now do the other transition if it exists
147 		Transition t1 = s.transition[1];
148 		if ( t1!=null ) {
149 			stateReachesAcceptState |=
150 				traceStatesLookingForLeftRecursion((NFAState)t1.target,
151 												   visitedStates,
152 												   listOfRecursiveCycles);
153 		}
154 		return stateReachesAcceptState;
155 	}
156 
157 	/** enclosingRuleName calls targetRuleName, find the cycle containing
158 	 *  the target and add the caller.  Find the cycle containing the caller
159 	 *  and add the target.  If no cycles contain either, then create a new
160 	 *  cycle.  listOfRecursiveCycles is List&lt;Set&lt;String&gt;&gt; that holds a list
161 	 *  of cycles (sets of rule names).
162 	 */
addRulesToCycle(Rule targetRule, Rule enclosingRule, List<Set<Rule>> listOfRecursiveCycles)163 	protected void addRulesToCycle(Rule targetRule,
164 								   Rule enclosingRule,
165 								   List<Set<Rule>> listOfRecursiveCycles)
166 	{
167 		boolean foundCycle = false;
168 		for (int i = 0; i < listOfRecursiveCycles.size(); i++) {
169 			Set<Rule> rulesInCycle = listOfRecursiveCycles.get(i);
170 			// ensure both rules are in same cycle
171 			if ( rulesInCycle.contains(targetRule) ) {
172 				rulesInCycle.add(enclosingRule);
173 				foundCycle = true;
174 			}
175 			if ( rulesInCycle.contains(enclosingRule) ) {
176 				rulesInCycle.add(targetRule);
177 				foundCycle = true;
178 			}
179 		}
180 		if ( !foundCycle ) {
181 			Set<Rule> cycle = new HashSet<Rule>();
182 			cycle.add(targetRule);
183 			cycle.add(enclosingRule);
184 			listOfRecursiveCycles.add(cycle);
185 		}
186 	}
187 
checkRuleReference(GrammarAST scopeAST, GrammarAST refAST, GrammarAST argsAST, String currentRuleName)188 	public void checkRuleReference(GrammarAST scopeAST,
189 								   GrammarAST refAST,
190 								   GrammarAST argsAST,
191 								   String currentRuleName)
192 	{
193 		Rule r = grammar.getRule(refAST.getText());
194 		if ( refAST.getType()==ANTLRParser.RULE_REF ) {
195 			if ( argsAST!=null ) {
196 				// rule[args]; ref has args
197                 if ( r!=null && r.argActionAST==null ) {
198 					// but rule def has no args
199 					ErrorManager.grammarError(
200 						ErrorManager.MSG_RULE_HAS_NO_ARGS,
201 						grammar,
202 						argsAST.getToken(),
203 						r.name);
204 				}
205 			}
206 			else {
207 				// rule ref has no args
208 				if ( r!=null && r.argActionAST!=null ) {
209 					// but rule def has args
210 					ErrorManager.grammarError(
211 						ErrorManager.MSG_MISSING_RULE_ARGS,
212 						grammar,
213 						refAST.getToken(),
214 						r.name);
215 				}
216 			}
217 		}
218 		else if ( refAST.getType()==ANTLRParser.TOKEN_REF ) {
219 			if ( grammar.type!=Grammar.LEXER ) {
220 				if ( argsAST!=null ) {
221 					// args on a token ref not in a lexer rule
222 					ErrorManager.grammarError(
223 						ErrorManager.MSG_ARGS_ON_TOKEN_REF,
224 						grammar,
225 						refAST.getToken(),
226 						refAST.getText());
227 				}
228 				return; // ignore token refs in nonlexers
229 			}
230 			if ( argsAST!=null ) {
231 				// tokenRef[args]; ref has args
232 				if ( r!=null && r.argActionAST==null ) {
233 					// but token rule def has no args
234 					ErrorManager.grammarError(
235 						ErrorManager.MSG_RULE_HAS_NO_ARGS,
236 						grammar,
237 						argsAST.getToken(),
238 						r.name);
239 				}
240 			}
241 			else {
242 				// token ref has no args
243 				if ( r!=null && r.argActionAST!=null ) {
244 					// but token rule def has args
245 					ErrorManager.grammarError(
246 						ErrorManager.MSG_MISSING_RULE_ARGS,
247 						grammar,
248 						refAST.getToken(),
249 						r.name);
250 				}
251 			}
252 		}
253 	}
254 
255 	/** Rules in tree grammar that use -&gt; rewrites and are spitting out
256 	 *  templates via output=template and then use rewrite=true must only
257 	 *  use -&gt; on alts that are simple nodes or trees or single rule refs
258 	 *  that match either nodes or trees.  The altAST is the ALT node
259 	 *  for an ALT.  Verify that its first child is simple.  Must be either
260 	 *  ( ALT ^( A B ) &lt;end-of-alt&gt; ) or ( ALT A &lt;end-of-alt&gt; ) or
261 	 *  other element.
262 	 *
263 	 *  Ignore predicates in front and labels.
264 	 */
ensureAltIsSimpleNodeOrTree(GrammarAST altAST, GrammarAST elementAST, int outerAltNum)265 	public void ensureAltIsSimpleNodeOrTree(GrammarAST altAST,
266 											GrammarAST elementAST,
267 											int outerAltNum)
268 	{
269 		if ( isValidSimpleElementNode(elementAST) ) {
270 			GrammarAST next = elementAST.getNextSibling();
271 			if ( !isNextNonActionElementEOA(next)) {
272 				ErrorManager.grammarWarning(ErrorManager.MSG_REWRITE_FOR_MULTI_ELEMENT_ALT,
273 											grammar,
274 											next.token,
275 											outerAltNum);
276 			}
277 			return;
278 		}
279 		switch ( elementAST.getType() ) {
280 			case ANTLRParser.ASSIGN :		// labels ok on non-rule refs
281 			case ANTLRParser.PLUS_ASSIGN :
282 				if ( isValidSimpleElementNode(elementAST.getChild(1)) ) {
283 					return;
284 				}
285 				break;
286 			case ANTLRParser.ACTION :		// skip past actions
287 			case ANTLRParser.SEMPRED :
288 			case ANTLRParser.SYN_SEMPRED :
289 			case ANTLRParser.BACKTRACK_SEMPRED :
290 			case ANTLRParser.GATED_SEMPRED :
291 				ensureAltIsSimpleNodeOrTree(altAST,
292 											elementAST.getNextSibling(),
293 											outerAltNum);
294 				return;
295 		}
296 		ErrorManager.grammarWarning(ErrorManager.MSG_REWRITE_FOR_MULTI_ELEMENT_ALT,
297 									grammar,
298 									elementAST.token,
299 									outerAltNum);
300 	}
301 
isValidSimpleElementNode(Tree t)302 	protected boolean isValidSimpleElementNode(Tree t) {
303 		switch ( t.getType() ) {
304 			case ANTLRParser.TREE_BEGIN :
305 			case ANTLRParser.TOKEN_REF :
306 			case ANTLRParser.CHAR_LITERAL :
307 			case ANTLRParser.STRING_LITERAL :
308 			case ANTLRParser.WILDCARD :
309 				return true;
310 			default :
311 				return false;
312 		}
313 	}
314 
isNextNonActionElementEOA(GrammarAST t)315 	protected boolean isNextNonActionElementEOA(GrammarAST t) {
316 		while ( t.getType()==ANTLRParser.ACTION ||
317 				t.getType()==ANTLRParser.SEMPRED )
318 		{
319 			t = t.getNextSibling();
320 		}
321 		if ( t.getType()==ANTLRParser.EOA ) {
322 			return true;
323 		}
324 		return false;
325 	}
326 }
327