1*67e74705SXin Li#!/usr/bin/env python 2*67e74705SXin Li# A tool to parse ASTMatchers.h and update the documentation in 3*67e74705SXin Li# ../LibASTMatchersReference.html automatically. Run from the 4*67e74705SXin Li# directory in which this file is located to update the docs. 5*67e74705SXin Li 6*67e74705SXin Liimport collections 7*67e74705SXin Liimport re 8*67e74705SXin Liimport urllib2 9*67e74705SXin Li 10*67e74705SXin LiMATCHERS_FILE = '../../include/clang/ASTMatchers/ASTMatchers.h' 11*67e74705SXin Li 12*67e74705SXin Li# Each matcher is documented in one row of the form: 13*67e74705SXin Li# result | name | argA 14*67e74705SXin Li# The subsequent row contains the documentation and is hidden by default, 15*67e74705SXin Li# becoming visible via javascript when the user clicks the matcher name. 16*67e74705SXin LiTD_TEMPLATE=""" 17*67e74705SXin Li<tr><td>%(result)s</td><td class="name" onclick="toggle('%(id)s')"><a name="%(id)sAnchor">%(name)s</a></td><td>%(args)s</td></tr> 18*67e74705SXin Li<tr><td colspan="4" class="doc" id="%(id)s"><pre>%(comment)s</pre></td></tr> 19*67e74705SXin Li""" 20*67e74705SXin Li 21*67e74705SXin Li# We categorize the matchers into these three categories in the reference: 22*67e74705SXin Linode_matchers = {} 23*67e74705SXin Linarrowing_matchers = {} 24*67e74705SXin Litraversal_matchers = {} 25*67e74705SXin Li 26*67e74705SXin Li# We output multiple rows per matcher if the matcher can be used on multiple 27*67e74705SXin Li# node types. Thus, we need a new id per row to control the documentation 28*67e74705SXin Li# pop-up. ids[name] keeps track of those ids. 29*67e74705SXin Liids = collections.defaultdict(int) 30*67e74705SXin Li 31*67e74705SXin Li# Cache for doxygen urls we have already verified. 32*67e74705SXin Lidoxygen_probes = {} 33*67e74705SXin Li 34*67e74705SXin Lidef esc(text): 35*67e74705SXin Li """Escape any html in the given text.""" 36*67e74705SXin Li text = re.sub(r'&', '&', text) 37*67e74705SXin Li text = re.sub(r'<', '<', text) 38*67e74705SXin Li text = re.sub(r'>', '>', text) 39*67e74705SXin Li def link_if_exists(m): 40*67e74705SXin Li name = m.group(1) 41*67e74705SXin Li url = 'http://clang.llvm.org/doxygen/classclang_1_1%s.html' % name 42*67e74705SXin Li if url not in doxygen_probes: 43*67e74705SXin Li try: 44*67e74705SXin Li print 'Probing %s...' % url 45*67e74705SXin Li urllib2.urlopen(url) 46*67e74705SXin Li doxygen_probes[url] = True 47*67e74705SXin Li except: 48*67e74705SXin Li doxygen_probes[url] = False 49*67e74705SXin Li if doxygen_probes[url]: 50*67e74705SXin Li return r'Matcher<<a href="%s">%s</a>>' % (url, name) 51*67e74705SXin Li else: 52*67e74705SXin Li return m.group(0) 53*67e74705SXin Li text = re.sub( 54*67e74705SXin Li r'Matcher<([^\*&]+)>', link_if_exists, text) 55*67e74705SXin Li return text 56*67e74705SXin Li 57*67e74705SXin Lidef extract_result_types(comment): 58*67e74705SXin Li """Extracts a list of result types from the given comment. 59*67e74705SXin Li 60*67e74705SXin Li We allow annotations in the comment of the matcher to specify what 61*67e74705SXin Li nodes a matcher can match on. Those comments have the form: 62*67e74705SXin Li Usable as: Any Matcher | (Matcher<T1>[, Matcher<t2>[, ...]]) 63*67e74705SXin Li 64*67e74705SXin Li Returns ['*'] in case of 'Any Matcher', or ['T1', 'T2', ...]. 65*67e74705SXin Li Returns the empty list if no 'Usable as' specification could be 66*67e74705SXin Li parsed. 67*67e74705SXin Li """ 68*67e74705SXin Li result_types = [] 69*67e74705SXin Li m = re.search(r'Usable as: Any Matcher[\s\n]*$', comment, re.S) 70*67e74705SXin Li if m: 71*67e74705SXin Li return ['*'] 72*67e74705SXin Li while True: 73*67e74705SXin Li m = re.match(r'^(.*)Matcher<([^>]+)>\s*,?[\s\n]*$', comment, re.S) 74*67e74705SXin Li if not m: 75*67e74705SXin Li if re.search(r'Usable as:\s*$', comment): 76*67e74705SXin Li return result_types 77*67e74705SXin Li else: 78*67e74705SXin Li return None 79*67e74705SXin Li result_types += [m.group(2)] 80*67e74705SXin Li comment = m.group(1) 81*67e74705SXin Li 82*67e74705SXin Lidef strip_doxygen(comment): 83*67e74705SXin Li """Returns the given comment without \-escaped words.""" 84*67e74705SXin Li # If there is only a doxygen keyword in the line, delete the whole line. 85*67e74705SXin Li comment = re.sub(r'^\\[^\s]+\n', r'', comment, flags=re.M) 86*67e74705SXin Li 87*67e74705SXin Li # If there is a doxygen \see command, change the \see prefix into "See also:". 88*67e74705SXin Li # FIXME: it would be better to turn this into a link to the target instead. 89*67e74705SXin Li comment = re.sub(r'\\see', r'See also:', comment) 90*67e74705SXin Li 91*67e74705SXin Li # Delete the doxygen command and the following whitespace. 92*67e74705SXin Li comment = re.sub(r'\\[^\s]+\s+', r'', comment) 93*67e74705SXin Li return comment 94*67e74705SXin Li 95*67e74705SXin Lidef unify_arguments(args): 96*67e74705SXin Li """Gets rid of anything the user doesn't care about in the argument list.""" 97*67e74705SXin Li args = re.sub(r'internal::', r'', args) 98*67e74705SXin Li args = re.sub(r'const\s+(.*)&', r'\1 ', args) 99*67e74705SXin Li args = re.sub(r'&', r' ', args) 100*67e74705SXin Li args = re.sub(r'(^|\s)M\d?(\s)', r'\1Matcher<*>\2', args) 101*67e74705SXin Li return args 102*67e74705SXin Li 103*67e74705SXin Lidef add_matcher(result_type, name, args, comment, is_dyncast=False): 104*67e74705SXin Li """Adds a matcher to one of our categories.""" 105*67e74705SXin Li if name == 'id': 106*67e74705SXin Li # FIXME: Figure out whether we want to support the 'id' matcher. 107*67e74705SXin Li return 108*67e74705SXin Li matcher_id = '%s%d' % (name, ids[name]) 109*67e74705SXin Li ids[name] += 1 110*67e74705SXin Li args = unify_arguments(args) 111*67e74705SXin Li matcher_html = TD_TEMPLATE % { 112*67e74705SXin Li 'result': esc('Matcher<%s>' % result_type), 113*67e74705SXin Li 'name': name, 114*67e74705SXin Li 'args': esc(args), 115*67e74705SXin Li 'comment': esc(strip_doxygen(comment)), 116*67e74705SXin Li 'id': matcher_id, 117*67e74705SXin Li } 118*67e74705SXin Li if is_dyncast: 119*67e74705SXin Li node_matchers[result_type + name] = matcher_html 120*67e74705SXin Li # Use a heuristic to figure out whether a matcher is a narrowing or 121*67e74705SXin Li # traversal matcher. By default, matchers that take other matchers as 122*67e74705SXin Li # arguments (and are not node matchers) do traversal. We specifically 123*67e74705SXin Li # exclude known narrowing matchers that also take other matchers as 124*67e74705SXin Li # arguments. 125*67e74705SXin Li elif ('Matcher<' not in args or 126*67e74705SXin Li name in ['allOf', 'anyOf', 'anything', 'unless']): 127*67e74705SXin Li narrowing_matchers[result_type + name + esc(args)] = matcher_html 128*67e74705SXin Li else: 129*67e74705SXin Li traversal_matchers[result_type + name + esc(args)] = matcher_html 130*67e74705SXin Li 131*67e74705SXin Lidef act_on_decl(declaration, comment, allowed_types): 132*67e74705SXin Li """Parse the matcher out of the given declaration and comment. 133*67e74705SXin Li 134*67e74705SXin Li If 'allowed_types' is set, it contains a list of node types the matcher 135*67e74705SXin Li can match on, as extracted from the static type asserts in the matcher 136*67e74705SXin Li definition. 137*67e74705SXin Li """ 138*67e74705SXin Li if declaration.strip(): 139*67e74705SXin Li # Node matchers are defined by writing: 140*67e74705SXin Li # VariadicDynCastAllOfMatcher<ResultType, ArgumentType> name; 141*67e74705SXin Li m = re.match(r""".*Variadic(?:DynCast)?AllOfMatcher\s*< 142*67e74705SXin Li \s*([^\s,]+)\s*(?:, 143*67e74705SXin Li \s*([^\s>]+)\s*)?> 144*67e74705SXin Li \s*([^\s;]+)\s*;\s*$""", declaration, flags=re.X) 145*67e74705SXin Li if m: 146*67e74705SXin Li result, inner, name = m.groups() 147*67e74705SXin Li if not inner: 148*67e74705SXin Li inner = result 149*67e74705SXin Li add_matcher(result, name, 'Matcher<%s>...' % inner, 150*67e74705SXin Li comment, is_dyncast=True) 151*67e74705SXin Li return 152*67e74705SXin Li 153*67e74705SXin Li # Parse the various matcher definition macros. 154*67e74705SXin Li m = re.match(""".*AST_TYPE_MATCHER\( 155*67e74705SXin Li \s*([^\s,]+\s*), 156*67e74705SXin Li \s*([^\s,]+\s*) 157*67e74705SXin Li \)\s*;\s*$""", declaration, flags=re.X) 158*67e74705SXin Li if m: 159*67e74705SXin Li inner, name = m.groups() 160*67e74705SXin Li add_matcher('Type', name, 'Matcher<%s>...' % inner, 161*67e74705SXin Li comment, is_dyncast=True) 162*67e74705SXin Li # FIXME: re-enable once we have implemented casting on the TypeLoc 163*67e74705SXin Li # hierarchy. 164*67e74705SXin Li # add_matcher('TypeLoc', '%sLoc' % name, 'Matcher<%sLoc>...' % inner, 165*67e74705SXin Li # comment, is_dyncast=True) 166*67e74705SXin Li return 167*67e74705SXin Li 168*67e74705SXin Li m = re.match(""".*AST_TYPE(LOC)?_TRAVERSE_MATCHER\( 169*67e74705SXin Li \s*([^\s,]+\s*), 170*67e74705SXin Li \s*(?:[^\s,]+\s*), 171*67e74705SXin Li \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\) 172*67e74705SXin Li \)\s*;\s*$""", declaration, flags=re.X) 173*67e74705SXin Li if m: 174*67e74705SXin Li loc, name, results = m.groups()[0:3] 175*67e74705SXin Li result_types = [r.strip() for r in results.split(',')] 176*67e74705SXin Li 177*67e74705SXin Li comment_result_types = extract_result_types(comment) 178*67e74705SXin Li if (comment_result_types and 179*67e74705SXin Li sorted(result_types) != sorted(comment_result_types)): 180*67e74705SXin Li raise Exception('Inconsistent documentation for: %s' % name) 181*67e74705SXin Li for result_type in result_types: 182*67e74705SXin Li add_matcher(result_type, name, 'Matcher<Type>', comment) 183*67e74705SXin Li if loc: 184*67e74705SXin Li add_matcher('%sLoc' % result_type, '%sLoc' % name, 'Matcher<TypeLoc>', 185*67e74705SXin Li comment) 186*67e74705SXin Li return 187*67e74705SXin Li 188*67e74705SXin Li m = re.match(r"""^\s*AST_POLYMORPHIC_MATCHER(_P)?(.?)(?:_OVERLOAD)?\( 189*67e74705SXin Li \s*([^\s,]+)\s*, 190*67e74705SXin Li \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\) 191*67e74705SXin Li (?:,\s*([^\s,]+)\s* 192*67e74705SXin Li ,\s*([^\s,]+)\s*)? 193*67e74705SXin Li (?:,\s*([^\s,]+)\s* 194*67e74705SXin Li ,\s*([^\s,]+)\s*)? 195*67e74705SXin Li (?:,\s*\d+\s*)? 196*67e74705SXin Li \)\s*{\s*$""", declaration, flags=re.X) 197*67e74705SXin Li 198*67e74705SXin Li if m: 199*67e74705SXin Li p, n, name, results = m.groups()[0:4] 200*67e74705SXin Li args = m.groups()[4:] 201*67e74705SXin Li result_types = [r.strip() for r in results.split(',')] 202*67e74705SXin Li if allowed_types and allowed_types != result_types: 203*67e74705SXin Li raise Exception('Inconsistent documentation for: %s' % name) 204*67e74705SXin Li if n not in ['', '2']: 205*67e74705SXin Li raise Exception('Cannot parse "%s"' % declaration) 206*67e74705SXin Li args = ', '.join('%s %s' % (args[i], args[i+1]) 207*67e74705SXin Li for i in range(0, len(args), 2) if args[i]) 208*67e74705SXin Li for result_type in result_types: 209*67e74705SXin Li add_matcher(result_type, name, args, comment) 210*67e74705SXin Li return 211*67e74705SXin Li 212*67e74705SXin Li m = re.match(r"""^\s*AST_MATCHER_FUNCTION(_P)?(.?)(?:_OVERLOAD)?\( 213*67e74705SXin Li (?:\s*([^\s,]+)\s*,)? 214*67e74705SXin Li \s*([^\s,]+)\s* 215*67e74705SXin Li (?:,\s*([^\s,]+)\s* 216*67e74705SXin Li ,\s*([^\s,]+)\s*)? 217*67e74705SXin Li (?:,\s*([^\s,]+)\s* 218*67e74705SXin Li ,\s*([^\s,]+)\s*)? 219*67e74705SXin Li (?:,\s*\d+\s*)? 220*67e74705SXin Li \)\s*{\s*$""", declaration, flags=re.X) 221*67e74705SXin Li if m: 222*67e74705SXin Li p, n, result, name = m.groups()[0:4] 223*67e74705SXin Li args = m.groups()[4:] 224*67e74705SXin Li if n not in ['', '2']: 225*67e74705SXin Li raise Exception('Cannot parse "%s"' % declaration) 226*67e74705SXin Li args = ', '.join('%s %s' % (args[i], args[i+1]) 227*67e74705SXin Li for i in range(0, len(args), 2) if args[i]) 228*67e74705SXin Li add_matcher(result, name, args, comment) 229*67e74705SXin Li return 230*67e74705SXin Li 231*67e74705SXin Li m = re.match(r"""^\s*AST_MATCHER(_P)?(.?)(?:_OVERLOAD)?\( 232*67e74705SXin Li (?:\s*([^\s,]+)\s*,)? 233*67e74705SXin Li \s*([^\s,]+)\s* 234*67e74705SXin Li (?:,\s*([^,]+)\s* 235*67e74705SXin Li ,\s*([^\s,]+)\s*)? 236*67e74705SXin Li (?:,\s*([^\s,]+)\s* 237*67e74705SXin Li ,\s*([^\s,]+)\s*)? 238*67e74705SXin Li (?:,\s*\d+\s*)? 239*67e74705SXin Li \)\s*{\s*$""", declaration, flags=re.X) 240*67e74705SXin Li if m: 241*67e74705SXin Li p, n, result, name = m.groups()[0:4] 242*67e74705SXin Li args = m.groups()[4:] 243*67e74705SXin Li if not result: 244*67e74705SXin Li if not allowed_types: 245*67e74705SXin Li raise Exception('Did not find allowed result types for: %s' % name) 246*67e74705SXin Li result_types = allowed_types 247*67e74705SXin Li else: 248*67e74705SXin Li result_types = [result] 249*67e74705SXin Li if n not in ['', '2']: 250*67e74705SXin Li raise Exception('Cannot parse "%s"' % declaration) 251*67e74705SXin Li args = ', '.join('%s %s' % (args[i], args[i+1]) 252*67e74705SXin Li for i in range(0, len(args), 2) if args[i]) 253*67e74705SXin Li for result_type in result_types: 254*67e74705SXin Li add_matcher(result_type, name, args, comment) 255*67e74705SXin Li return 256*67e74705SXin Li 257*67e74705SXin Li # Parse ArgumentAdapting matchers. 258*67e74705SXin Li m = re.match( 259*67e74705SXin Li r"""^.*ArgumentAdaptingMatcherFunc<.*>\s*(?:LLVM_ATTRIBUTE_UNUSED\s*) 260*67e74705SXin Li ([a-zA-Z]*)\s*=\s*{};$""", 261*67e74705SXin Li declaration, flags=re.X) 262*67e74705SXin Li if m: 263*67e74705SXin Li name = m.groups()[0] 264*67e74705SXin Li add_matcher('*', name, 'Matcher<*>', comment) 265*67e74705SXin Li return 266*67e74705SXin Li 267*67e74705SXin Li # Parse Variadic functions. 268*67e74705SXin Li m = re.match( 269*67e74705SXin Li r"""^.*internal::VariadicFunction\s*<\s*([^,]+),\s*([^,]+),\s*[^>]+>\s* 270*67e74705SXin Li ([a-zA-Z]*)\s*=\s*{.*};$""", 271*67e74705SXin Li declaration, flags=re.X) 272*67e74705SXin Li if m: 273*67e74705SXin Li result, arg, name = m.groups()[:3] 274*67e74705SXin Li add_matcher(result, name, '%s, ..., %s' % (arg, arg), comment) 275*67e74705SXin Li return 276*67e74705SXin Li 277*67e74705SXin Li # Parse Variadic operator matchers. 278*67e74705SXin Li m = re.match( 279*67e74705SXin Li r"""^.*VariadicOperatorMatcherFunc\s*<\s*([^,]+),\s*([^\s>]+)\s*>\s* 280*67e74705SXin Li ([a-zA-Z]*)\s*=\s*{.*};$""", 281*67e74705SXin Li declaration, flags=re.X) 282*67e74705SXin Li if m: 283*67e74705SXin Li min_args, max_args, name = m.groups()[:3] 284*67e74705SXin Li if max_args == '1': 285*67e74705SXin Li add_matcher('*', name, 'Matcher<*>', comment) 286*67e74705SXin Li return 287*67e74705SXin Li elif max_args == 'UINT_MAX': 288*67e74705SXin Li add_matcher('*', name, 'Matcher<*>, ..., Matcher<*>', comment) 289*67e74705SXin Li return 290*67e74705SXin Li 291*67e74705SXin Li 292*67e74705SXin Li # Parse free standing matcher functions, like: 293*67e74705SXin Li # Matcher<ResultType> Name(Matcher<ArgumentType> InnerMatcher) { 294*67e74705SXin Li m = re.match(r"""^\s*(.*)\s+ 295*67e74705SXin Li ([^\s\(]+)\s*\( 296*67e74705SXin Li (.*) 297*67e74705SXin Li \)\s*{""", declaration, re.X) 298*67e74705SXin Li if m: 299*67e74705SXin Li result, name, args = m.groups() 300*67e74705SXin Li args = ', '.join(p.strip() for p in args.split(',')) 301*67e74705SXin Li m = re.match(r'.*\s+internal::(Bindable)?Matcher<([^>]+)>$', result) 302*67e74705SXin Li if m: 303*67e74705SXin Li result_types = [m.group(2)] 304*67e74705SXin Li else: 305*67e74705SXin Li result_types = extract_result_types(comment) 306*67e74705SXin Li if not result_types: 307*67e74705SXin Li if not comment: 308*67e74705SXin Li # Only overloads don't have their own doxygen comments; ignore those. 309*67e74705SXin Li print 'Ignoring "%s"' % name 310*67e74705SXin Li else: 311*67e74705SXin Li print 'Cannot determine result type for "%s"' % name 312*67e74705SXin Li else: 313*67e74705SXin Li for result_type in result_types: 314*67e74705SXin Li add_matcher(result_type, name, args, comment) 315*67e74705SXin Li else: 316*67e74705SXin Li print '*** Unparsable: "' + declaration + '" ***' 317*67e74705SXin Li 318*67e74705SXin Lidef sort_table(matcher_type, matcher_map): 319*67e74705SXin Li """Returns the sorted html table for the given row map.""" 320*67e74705SXin Li table = '' 321*67e74705SXin Li for key in sorted(matcher_map.keys()): 322*67e74705SXin Li table += matcher_map[key] + '\n' 323*67e74705SXin Li return ('<!-- START_%(type)s_MATCHERS -->\n' + 324*67e74705SXin Li '%(table)s' + 325*67e74705SXin Li '<!--END_%(type)s_MATCHERS -->') % { 326*67e74705SXin Li 'type': matcher_type, 327*67e74705SXin Li 'table': table, 328*67e74705SXin Li } 329*67e74705SXin Li 330*67e74705SXin Li# Parse the ast matchers. 331*67e74705SXin Li# We alternate between two modes: 332*67e74705SXin Li# body = True: We parse the definition of a matcher. We need 333*67e74705SXin Li# to parse the full definition before adding a matcher, as the 334*67e74705SXin Li# definition might contain static asserts that specify the result 335*67e74705SXin Li# type. 336*67e74705SXin Li# body = False: We parse the comments and declaration of the matcher. 337*67e74705SXin Licomment = '' 338*67e74705SXin Lideclaration = '' 339*67e74705SXin Liallowed_types = [] 340*67e74705SXin Libody = False 341*67e74705SXin Lifor line in open(MATCHERS_FILE).read().splitlines(): 342*67e74705SXin Li if body: 343*67e74705SXin Li if line.strip() and line[0] == '}': 344*67e74705SXin Li if declaration: 345*67e74705SXin Li act_on_decl(declaration, comment, allowed_types) 346*67e74705SXin Li comment = '' 347*67e74705SXin Li declaration = '' 348*67e74705SXin Li allowed_types = [] 349*67e74705SXin Li body = False 350*67e74705SXin Li else: 351*67e74705SXin Li m = re.search(r'is_base_of<([^,]+), NodeType>', line) 352*67e74705SXin Li if m and m.group(1): 353*67e74705SXin Li allowed_types += [m.group(1)] 354*67e74705SXin Li continue 355*67e74705SXin Li if line.strip() and line.lstrip()[0] == '/': 356*67e74705SXin Li comment += re.sub(r'/+\s?', '', line) + '\n' 357*67e74705SXin Li else: 358*67e74705SXin Li declaration += ' ' + line 359*67e74705SXin Li if ((not line.strip()) or 360*67e74705SXin Li line.rstrip()[-1] == ';' or 361*67e74705SXin Li (line.rstrip()[-1] == '{' and line.rstrip()[-3:] != '= {')): 362*67e74705SXin Li if line.strip() and line.rstrip()[-1] == '{': 363*67e74705SXin Li body = True 364*67e74705SXin Li else: 365*67e74705SXin Li act_on_decl(declaration, comment, allowed_types) 366*67e74705SXin Li comment = '' 367*67e74705SXin Li declaration = '' 368*67e74705SXin Li allowed_types = [] 369*67e74705SXin Li 370*67e74705SXin Linode_matcher_table = sort_table('DECL', node_matchers) 371*67e74705SXin Linarrowing_matcher_table = sort_table('NARROWING', narrowing_matchers) 372*67e74705SXin Litraversal_matcher_table = sort_table('TRAVERSAL', traversal_matchers) 373*67e74705SXin Li 374*67e74705SXin Lireference = open('../LibASTMatchersReference.html').read() 375*67e74705SXin Lireference = re.sub(r'<!-- START_DECL_MATCHERS.*END_DECL_MATCHERS -->', 376*67e74705SXin Li node_matcher_table, reference, flags=re.S) 377*67e74705SXin Lireference = re.sub(r'<!-- START_NARROWING_MATCHERS.*END_NARROWING_MATCHERS -->', 378*67e74705SXin Li narrowing_matcher_table, reference, flags=re.S) 379*67e74705SXin Lireference = re.sub(r'<!-- START_TRAVERSAL_MATCHERS.*END_TRAVERSAL_MATCHERS -->', 380*67e74705SXin Li traversal_matcher_table, reference, flags=re.S) 381*67e74705SXin Li 382*67e74705SXin Liwith open('../LibASTMatchersReference.html', 'wb') as output: 383*67e74705SXin Li output.write(reference) 384*67e74705SXin Li 385