1#!/usr/bin/env python3 2# 3# This is the API builder, it parses the C sources and build the 4# API formal description in XML. 5# 6# See Copyright for the status of this software. 7# 8# [email protected] 9# 10import os, sys 11import string 12import glob 13 14debug=0 15#debugsym='ignorableWhitespaceSAXFunc' 16debugsym=None 17 18# 19# C parser analysis code 20# 21ignored_files = { 22 "config.h": "generated portability layer", 23 "libxml.h": "internal only", 24 "testModule.c": "test tool", 25 "testapi.c": "generated regression tests", 26 "runtest.c": "regression tests program", 27 "runsuite.c": "regression tests program", 28 "tst.c": "not part of the library", 29 "test.c": "not part of the library", 30 "testdso.c": "test for dynamid shared libraries", 31 "testrecurse.c": "test for entities recursions", 32 "timsort.h": "Internal header only for xpath.c 2.9.0", 33 "nanoftp.h": "empty", 34 "SAX.h": "empty", 35} 36 37ignored_words = { 38 "WINAPI": (0, "Windows keyword"), 39 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"), 40 "XMLPUBVAR": (0, "Special macro for extern vars for win32"), 41 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"), 42 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"), 43 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"), 44 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 45 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 46 "XSLTCALL": (0, "Special macro for win32 calls"), 47 "EXSLTCALL": (0, "Special macro for win32 calls"), 48 "__declspec": (3, "Windows keyword"), 49 "__stdcall": (0, "Windows keyword"), 50 "ATTRIBUTE_UNUSED": (0, "macro keyword"), 51 "ATTRIBUTE_DESTRUCTOR": (0, "macro keyword"), 52 "LIBEXSLT_PUBLIC": (0, "macro keyword"), 53 "X_IN_Y": (5, "macro function builder"), 54 "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"), 55 "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"), 56 "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"), 57 "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"), 58 "ATTRIBUTE_NO_SANITIZE": (3, "macro keyword"), 59 "ATTRIBUTE_NO_SANITIZE_INTEGER": (0, "macro keyword"), 60 "XML_DEPRECATED": (0, "macro keyword"), 61 "XML_DEPRECATED_MEMBER": (0, "macro keyword"), 62 "XML_GLOBALS_ALLOC": (0, "macro keyword"), 63 "XML_GLOBALS_ERROR": (0, "macro keyword"), 64 "XML_GLOBALS_IO": (0, "macro keyword"), 65 "XML_GLOBALS_PARSER": (0, "macro keyword"), 66 "XML_GLOBALS_TREE": (0, "macro keyword"), 67 "XML_THREAD_LOCAL": (0, "macro keyword"), 68} 69 70def escape(raw): 71 raw = raw.replace('&', '&') 72 raw = raw.replace('<', '<') 73 raw = raw.replace('>', '>') 74 raw = raw.replace("'", ''') 75 raw = raw.replace('"', '"') 76 return raw 77 78class identifier: 79 def __init__(self, name, header=None, module=None, type=None, lineno = 0, 80 info=None, extra=None, conditionals = None): 81 self.name = name 82 self.header = header 83 self.module = module 84 self.type = type 85 self.info = info 86 self.extra = extra 87 self.lineno = lineno 88 self.static = 0 89 if conditionals == None or len(conditionals) == 0: 90 self.conditionals = None 91 else: 92 self.conditionals = conditionals[:] 93 if self.name == debugsym: 94 print("=> define %s : %s" % (debugsym, (module, type, info, 95 extra, conditionals))) 96 97 def __repr__(self): 98 r = "%s %s:" % (self.type, self.name) 99 if self.static: 100 r = r + " static" 101 if self.module != None: 102 r = r + " from %s" % (self.module) 103 if self.info != None: 104 r = r + " " + repr(self.info) 105 if self.extra != None: 106 r = r + " " + repr(self.extra) 107 if self.conditionals != None: 108 r = r + " " + repr(self.conditionals) 109 return r 110 111 112 def set_header(self, header): 113 self.header = header 114 def set_module(self, module): 115 self.module = module 116 def set_type(self, type): 117 self.type = type 118 def set_info(self, info): 119 self.info = info 120 def set_extra(self, extra): 121 self.extra = extra 122 def set_lineno(self, lineno): 123 self.lineno = lineno 124 def set_static(self, static): 125 self.static = static 126 def set_conditionals(self, conditionals): 127 if conditionals == None or len(conditionals) == 0: 128 self.conditionals = None 129 else: 130 self.conditionals = conditionals[:] 131 132 def get_name(self): 133 return self.name 134 def get_header(self): 135 return self.module 136 def get_module(self): 137 return self.module 138 def get_type(self): 139 return self.type 140 def get_info(self): 141 return self.info 142 def get_lineno(self): 143 return self.lineno 144 def get_extra(self): 145 return self.extra 146 def get_static(self): 147 return self.static 148 def get_conditionals(self): 149 return self.conditionals 150 151 def update(self, header, module, type = None, info = None, extra=None, 152 conditionals=None): 153 if self.name == debugsym: 154 print("=> update %s : %s" % (debugsym, (module, type, info, 155 extra, conditionals))) 156 if header != None and self.header == None: 157 self.set_header(module) 158 if module != None and (self.module == None or self.header == self.module): 159 self.set_module(module) 160 if type != None and self.type == None: 161 self.set_type(type) 162 if info != None: 163 self.set_info(info) 164 if extra != None: 165 self.set_extra(extra) 166 if conditionals != None: 167 self.set_conditionals(conditionals) 168 169class index: 170 def __init__(self, name = "noname"): 171 self.name = name 172 self.identifiers = {} 173 self.functions = {} 174 self.variables = {} 175 self.includes = {} 176 self.structs = {} 177 self.enums = {} 178 self.typedefs = {} 179 self.macros = {} 180 self.references = {} 181 self.info = {} 182 183 def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 184 if name[0:2] == '__': 185 return None 186 d = None 187 if name in self.identifiers: 188 d = self.identifiers[name] 189 d.update(header, module, type, info, extra, conditionals) 190 else: 191 d = identifier(name, header, module, type, lineno, info, extra, conditionals) 192 self.identifiers[name] = d 193 194 if d != None and static == 1: 195 d.set_static(1) 196 197 if d != None and name != None and type != None: 198 self.references[name] = d 199 200 if name == debugsym: 201 print("New ref: %s" % (d)) 202 203 return d 204 205 def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 206 if name[0:2] == '__': 207 return None 208 d = None 209 if name in self.identifiers: 210 d = self.identifiers[name] 211 d.update(header, module, type, info, extra, conditionals) 212 else: 213 d = identifier(name, header, module, type, lineno, info, extra, conditionals) 214 self.identifiers[name] = d 215 216 if d != None and static == 1: 217 d.set_static(1) 218 219 if d != None and name != None and type != None: 220 if type == "function": 221 self.functions[name] = d 222 elif type == "functype": 223 self.functions[name] = d 224 elif type == "variable": 225 self.variables[name] = d 226 elif type == "include": 227 self.includes[name] = d 228 elif type == "struct": 229 self.structs[name] = d 230 elif type == "enum": 231 self.enums[name] = d 232 elif type == "typedef": 233 self.typedefs[name] = d 234 elif type == "macro": 235 self.macros[name] = d 236 else: 237 print("Unable to register type ", type) 238 239 if name == debugsym: 240 print("New symbol: %s" % (d)) 241 242 return d 243 244 def merge(self, idx): 245 for id in list(idx.functions.keys()): 246 # 247 # macro might be used to override functions or variables 248 # definitions 249 # 250 if id in self.macros: 251 del self.macros[id] 252 if id in self.functions: 253 print("function %s from %s redeclared in %s" % ( 254 id, self.functions[id].header, idx.functions[id].header)) 255 else: 256 self.functions[id] = idx.functions[id] 257 self.identifiers[id] = idx.functions[id] 258 for id in list(idx.variables.keys()): 259 # 260 # macro might be used to override functions or variables 261 # definitions 262 # 263 if id in self.macros: 264 del self.macros[id] 265 if id in self.variables: 266 print("variable %s from %s redeclared in %s" % ( 267 id, self.variables[id].header, idx.variables[id].header)) 268 else: 269 self.variables[id] = idx.variables[id] 270 self.identifiers[id] = idx.variables[id] 271 for id in list(idx.structs.keys()): 272 if id in self.structs: 273 print("struct %s from %s redeclared in %s" % ( 274 id, self.structs[id].header, idx.structs[id].header)) 275 else: 276 self.structs[id] = idx.structs[id] 277 self.identifiers[id] = idx.structs[id] 278 for id in list(idx.typedefs.keys()): 279 if id in self.typedefs: 280 print("typedef %s from %s redeclared in %s" % ( 281 id, self.typedefs[id].header, idx.typedefs[id].header)) 282 else: 283 self.typedefs[id] = idx.typedefs[id] 284 self.identifiers[id] = idx.typedefs[id] 285 for id in list(idx.macros.keys()): 286 # 287 # macro might be used to override functions or variables 288 # definitions 289 # 290 if id in self.variables: 291 continue 292 if id in self.functions: 293 continue 294 if id in self.enums: 295 continue 296 if id in self.macros and id != 'XML_OP': 297 print("macro %s from %s redeclared in %s" % ( 298 id, self.macros[id].header, idx.macros[id].header)) 299 else: 300 self.macros[id] = idx.macros[id] 301 self.identifiers[id] = idx.macros[id] 302 for id in list(idx.enums.keys()): 303 if id in self.enums: 304 print("enum %s from %s redeclared in %s" % ( 305 id, self.enums[id].header, idx.enums[id].header)) 306 else: 307 self.enums[id] = idx.enums[id] 308 self.identifiers[id] = idx.enums[id] 309 310 def merge_public(self, idx): 311 for id in list(idx.functions.keys()): 312 if id in self.functions: 313 # check that function condition agrees with header 314 if idx.functions[id].conditionals != \ 315 self.functions[id].conditionals: 316 print("Header condition differs from Function for %s:" \ 317 % id) 318 print(" H: %s" % self.functions[id].conditionals) 319 print(" C: %s" % idx.functions[id].conditionals) 320 up = idx.functions[id] 321 self.functions[id].update(None, up.module, up.type, up.info, up.extra) 322 # else: 323 # print "Function %s from %s is not declared in headers" % ( 324 # id, idx.functions[id].module) 325 326 for id in list(idx.variables.keys()): 327 if id in self.variables: 328 # check that variable condition agrees with header 329 # TODO: produces many false positives 330 #if idx.variables[id].conditionals != \ 331 # self.variables[id].conditionals: 332 # print("Header condition differs from Variable for %s:" \ 333 # % id) 334 # print(" H: %s" % self.variables[id].conditionals) 335 # print(" C: %s" % idx.variables[id].conditionals) 336 up = idx.variables[id] 337 self.variables[id].update(None, up.module, up.type, up.info, up.extra) 338 339 def analyze_dict(self, type, dict): 340 count = 0 341 public = 0 342 for name in list(dict.keys()): 343 id = dict[name] 344 count = count + 1 345 if id.static == 0: 346 public = public + 1 347 if count != public: 348 print(" %d %s , %d public" % (count, type, public)) 349 elif count != 0: 350 print(" %d public %s" % (count, type)) 351 352 353 def analyze(self): 354 self.analyze_dict("functions", self.functions) 355 self.analyze_dict("variables", self.variables) 356 self.analyze_dict("structs", self.structs) 357 self.analyze_dict("typedefs", self.typedefs) 358 self.analyze_dict("macros", self.macros) 359 360class CLexer: 361 """A lexer for the C language, tokenize the input by reading and 362 analyzing it line by line""" 363 def __init__(self, input): 364 self.input = input 365 self.tokens = [] 366 self.line = "" 367 self.lineno = 0 368 369 def getline(self): 370 line = '' 371 while line == '': 372 line = self.input.readline() 373 if not line: 374 return None 375 self.lineno = self.lineno + 1 376 line = line.lstrip() 377 line = line.rstrip() 378 if line == '': 379 continue 380 while line[-1] == '\\': 381 line = line[:-1] 382 n = self.input.readline() 383 self.lineno = self.lineno + 1 384 n = n.lstrip() 385 n = n.rstrip() 386 if not n: 387 break 388 else: 389 line = line + n 390 return line 391 392 def getlineno(self): 393 return self.lineno 394 395 def push(self, token): 396 self.tokens.insert(0, token); 397 398 def debug(self): 399 print("Last token: ", self.last) 400 print("Token queue: ", self.tokens) 401 print("Line %d end: " % (self.lineno), self.line) 402 403 def token(self): 404 while self.tokens == []: 405 if self.line == "": 406 line = self.getline() 407 else: 408 line = self.line 409 self.line = "" 410 if line == None: 411 return None 412 413 if line[0] == '#': 414 self.tokens = list(map((lambda x: ('preproc', x)), 415 line.split())) 416 break; 417 l = len(line) 418 if line[0] == '"' or line[0] == "'": 419 end = line[0] 420 line = line[1:] 421 found = 0 422 tok = "" 423 while found == 0: 424 i = 0 425 l = len(line) 426 while i < l: 427 if line[i] == end: 428 self.line = line[i+1:] 429 line = line[:i] 430 l = i 431 found = 1 432 break 433 if line[i] == '\\': 434 i = i + 1 435 i = i + 1 436 tok = tok + line 437 if found == 0: 438 line = self.getline() 439 if line == None: 440 return None 441 self.last = ('string', tok) 442 return self.last 443 444 if l >= 2 and line[0] == '/' and line[1] == '*': 445 line = line[2:] 446 found = 0 447 tok = "" 448 while found == 0: 449 i = 0 450 l = len(line) 451 while i < l: 452 if line[i] == '*' and i+1 < l and line[i+1] == '/': 453 self.line = line[i+2:] 454 line = line[:i-1] 455 l = i 456 found = 1 457 break 458 i = i + 1 459 if tok != "": 460 tok = tok + "\n" 461 tok = tok + line 462 if found == 0: 463 line = self.getline() 464 if line == None: 465 return None 466 self.last = ('comment', tok) 467 return self.last 468 if l >= 2 and line[0] == '/' and line[1] == '/': 469 line = line[2:] 470 self.last = ('comment', line) 471 return self.last 472 i = 0 473 while i < l: 474 if line[i] == '/' and i+1 < l and line[i+1] == '/': 475 self.line = line[i:] 476 line = line[:i] 477 break 478 if line[i] == '/' and i+1 < l and line[i+1] == '*': 479 self.line = line[i:] 480 line = line[:i] 481 break 482 if line[i] == '"' or line[i] == "'": 483 self.line = line[i:] 484 line = line[:i] 485 break 486 i = i + 1 487 l = len(line) 488 i = 0 489 while i < l: 490 if line[i] == ' ' or line[i] == '\t': 491 i = i + 1 492 continue 493 o = ord(line[i]) 494 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 495 (o >= 48 and o <= 57): 496 s = i 497 while i < l: 498 o = ord(line[i]) 499 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 500 (o >= 48 and o <= 57) or \ 501 (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1: 502 i = i + 1 503 else: 504 break 505 self.tokens.append(('name', line[s:i])) 506 continue 507 if "(){}:;,[]".find(line[i]) != -1: 508# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \ 509# line[i] == '}' or line[i] == ':' or line[i] == ';' or \ 510# line[i] == ',' or line[i] == '[' or line[i] == ']': 511 self.tokens.append(('sep', line[i])) 512 i = i + 1 513 continue 514 if "+-*><=/%&!|.".find(line[i]) != -1: 515# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \ 516# line[i] == '>' or line[i] == '<' or line[i] == '=' or \ 517# line[i] == '/' or line[i] == '%' or line[i] == '&' or \ 518# line[i] == '!' or line[i] == '|' or line[i] == '.': 519 if line[i] == '.' and i + 2 < l and \ 520 line[i+1] == '.' and line[i+2] == '.': 521 self.tokens.append(('name', '...')) 522 i = i + 3 523 continue 524 525 j = i + 1 526 if j < l and ( 527 "+-*><=/%&!|".find(line[j]) != -1): 528# line[j] == '+' or line[j] == '-' or line[j] == '*' or \ 529# line[j] == '>' or line[j] == '<' or line[j] == '=' or \ 530# line[j] == '/' or line[j] == '%' or line[j] == '&' or \ 531# line[j] == '!' or line[j] == '|'): 532 self.tokens.append(('op', line[i:j+1])) 533 i = j + 1 534 else: 535 self.tokens.append(('op', line[i])) 536 i = i + 1 537 continue 538 s = i 539 while i < l: 540 o = ord(line[i]) 541 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 542 (o >= 48 and o <= 57) or ( 543 " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1): 544# line[i] != ' ' and line[i] != '\t' and 545# line[i] != '(' and line[i] != ')' and 546# line[i] != '{' and line[i] != '}' and 547# line[i] != ':' and line[i] != ';' and 548# line[i] != ',' and line[i] != '+' and 549# line[i] != '-' and line[i] != '*' and 550# line[i] != '/' and line[i] != '%' and 551# line[i] != '&' and line[i] != '!' and 552# line[i] != '|' and line[i] != '[' and 553# line[i] != ']' and line[i] != '=' and 554# line[i] != '*' and line[i] != '>' and 555# line[i] != '<'): 556 i = i + 1 557 else: 558 break 559 self.tokens.append(('name', line[s:i])) 560 561 tok = self.tokens[0] 562 self.tokens = self.tokens[1:] 563 self.last = tok 564 return tok 565 566class CParser: 567 """The C module parser""" 568 def __init__(self, filename, idx = None): 569 self.filename = filename 570 if len(filename) > 2 and filename[-2:] == '.h': 571 self.is_header = 1 572 else: 573 self.is_header = 0 574 self.input = open(filename) 575 self.lexer = CLexer(self.input) 576 if idx == None: 577 self.index = index() 578 else: 579 self.index = idx 580 self.top_comment = "" 581 self.last_comment = "" 582 self.comment = None 583 self.collect_ref = 0 584 self.doc_disable = 0 585 self.conditionals = [] 586 self.defines = [] 587 588 def collect_references(self): 589 self.collect_ref = 1 590 591 def disable(self): 592 self.doc_disable = 1 593 594 def enable(self): 595 self.doc_disable = 0 596 597 def lineno(self): 598 return self.lexer.getlineno() 599 600 def index_add(self, name, module, static, type, info=None, extra = None): 601 if self.doc_disable: 602 return 603 if self.is_header == 1: 604 self.index.add(name, module, module, static, type, self.lineno(), 605 info, extra, self.conditionals) 606 else: 607 self.index.add(name, None, module, static, type, self.lineno(), 608 info, extra, self.conditionals) 609 610 def index_add_ref(self, name, module, static, type, info=None, 611 extra = None): 612 if self.is_header == 1: 613 self.index.add_ref(name, module, module, static, type, 614 self.lineno(), info, extra, self.conditionals) 615 else: 616 self.index.add_ref(name, None, module, static, type, self.lineno(), 617 info, extra, self.conditionals) 618 619 def warning(self, msg): 620 if self.doc_disable: 621 return 622 print(msg) 623 624 def error(self, msg, token=-1): 625 if self.doc_disable: 626 return 627 628 print("Parse Error: " + msg) 629 if token != -1: 630 print("Got token ", token) 631 self.lexer.debug() 632 sys.exit(1) 633 634 def debug(self, msg, token=-1): 635 print("Debug: " + msg) 636 if token != -1: 637 print("Got token ", token) 638 self.lexer.debug() 639 640 def parseTopComment(self, comment): 641 res = {} 642 lines = comment.split("\n") 643 item = None 644 for line in lines: 645 while line != "" and (line[0] == ' ' or line[0] == '\t'): 646 line = line[1:] 647 while line != "" and line[0] == '*': 648 line = line[1:] 649 while line != "" and (line[0] == ' ' or line[0] == '\t'): 650 line = line[1:] 651 try: 652 (it, line) = line.split(":", 1) 653 item = it 654 while line != "" and (line[0] == ' ' or line[0] == '\t'): 655 line = line[1:] 656 if item in res: 657 res[item] = res[item] + " " + line 658 else: 659 res[item] = line 660 except: 661 if item != None: 662 if item in res: 663 res[item] = res[item] + " " + line 664 else: 665 res[item] = line 666 self.index.info = res 667 668 def parseComment(self, token): 669 if self.top_comment == "": 670 self.top_comment = token[1] 671 if self.comment == None or token[1][0] == '*': 672 self.comment = token[1]; 673 else: 674 self.comment = self.comment + token[1] 675 token = self.lexer.token() 676 677 if self.comment.find("DOC_DISABLE") != -1: 678 self.disable() 679 680 if self.comment.find("DOC_ENABLE") != -1: 681 self.enable() 682 683 return token 684 685 # 686 # Parse a simple comment block for typedefs or global variables 687 # 688 def parseSimpleComment(self, name, quiet = False): 689 if name[0:2] == '__': 690 quiet = 1 691 692 args = [] 693 desc = "" 694 695 if self.comment == None: 696 if not quiet: 697 self.warning("Missing comment for %s" % (name)) 698 return(None) 699 if self.comment[0] != '*': 700 if not quiet: 701 self.warning("Missing * in comment for %s" % (name)) 702 return(None) 703 lines = self.comment.split('\n') 704 if lines[0] == '*': 705 del lines[0] 706 if lines[0] != "* %s:" % (name): 707 if not quiet: 708 self.warning("Misformatted comment for %s" % (name)) 709 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 710 return(None) 711 del lines[0] 712 while len(lines) > 0 and lines[0] == '*': 713 del lines[0] 714 desc = "" 715 while len(lines) > 0: 716 l = lines[0] 717 while len(l) > 0 and l[0] == '*': 718 l = l[1:] 719 l = l.strip() 720 desc = desc + " " + l 721 del lines[0] 722 723 desc = desc.strip() 724 725 if quiet == 0: 726 if desc == "": 727 self.warning("Comment for %s lacks description" % (name)) 728 729 return(desc) 730 # 731 # Parse a comment block associate to a macro 732 # 733 def parseMacroComment(self, name, quiet = 0): 734 if name[0:2] == '__': 735 quiet = 1 736 737 args = [] 738 desc = "" 739 740 if self.comment == None: 741 if not quiet: 742 self.warning("Missing comment for macro %s" % (name)) 743 return((args, desc)) 744 if self.comment[0] != '*': 745 if not quiet: 746 self.warning("Missing * in macro comment for %s" % (name)) 747 return((args, desc)) 748 lines = self.comment.split('\n') 749 if lines[0] == '*': 750 del lines[0] 751 if lines[0] != "* %s:" % (name): 752 if not quiet: 753 self.warning("Misformatted macro comment for %s" % (name)) 754 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 755 return((args, desc)) 756 del lines[0] 757 while lines[0] == '*': 758 del lines[0] 759 while len(lines) > 0 and lines[0][0:3] == '* @': 760 l = lines[0][3:] 761 try: 762 (arg, desc) = l.split(':', 1) 763 desc=desc.strip() 764 arg=arg.strip() 765 except: 766 if not quiet: 767 self.warning("Misformatted macro comment for %s" % (name)) 768 self.warning(" problem with '%s'" % (lines[0])) 769 del lines[0] 770 continue 771 del lines[0] 772 l = lines[0].strip() 773 while len(l) > 2 and l[0:3] != '* @': 774 while l[0] == '*': 775 l = l[1:] 776 desc = desc + ' ' + l.strip() 777 del lines[0] 778 if len(lines) == 0: 779 break 780 l = lines[0] 781 args.append((arg, desc)) 782 while len(lines) > 0 and lines[0] == '*': 783 del lines[0] 784 desc = "" 785 while len(lines) > 0: 786 l = lines[0] 787 while len(l) > 0 and l[0] == '*': 788 l = l[1:] 789 l = l.strip() 790 desc = desc + " " + l 791 del lines[0] 792 793 desc = desc.strip() 794 795 if quiet == 0: 796 if desc == "": 797 self.warning("Macro comment for %s lack description of the macro" % (name)) 798 799 return((args, desc)) 800 801 # 802 # Parse a comment block and merge the information found in the 803 # parameters descriptions, finally returns a block as complete 804 # as possible 805 # 806 def mergeFunctionComment(self, name, description, quiet = 0): 807 if name == 'main': 808 quiet = 1 809 if name[0:2] == '__': 810 quiet = 1 811 812 (ret, args) = description 813 desc = "" 814 retdesc = "" 815 816 if self.comment == None: 817 if not quiet: 818 self.warning("Missing comment for function %s" % (name)) 819 return(((ret[0], retdesc), args, desc)) 820 if self.comment[0] != '*': 821 if not quiet: 822 self.warning("Missing * in function comment for %s" % (name)) 823 return(((ret[0], retdesc), args, desc)) 824 lines = self.comment.split('\n') 825 if lines[0] == '*': 826 del lines[0] 827 if lines[0] != "* %s:" % (name): 828 if not quiet: 829 self.warning("Misformatted function comment for %s" % (name)) 830 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 831 return(((ret[0], retdesc), args, desc)) 832 del lines[0] 833 while lines[0] == '*': 834 del lines[0] 835 nbargs = len(args) 836 while len(lines) > 0 and lines[0][0:3] == '* @': 837 l = lines[0][3:] 838 try: 839 (arg, desc) = l.split(':', 1) 840 desc=desc.strip() 841 arg=arg.strip() 842 except: 843 if not quiet: 844 self.warning("Misformatted function comment for %s" % (name)) 845 self.warning(" problem with '%s'" % (lines[0])) 846 del lines[0] 847 continue 848 del lines[0] 849 l = lines[0].strip() 850 while len(l) > 2 and l[0:3] != '* @': 851 while l[0] == '*': 852 l = l[1:] 853 desc = desc + ' ' + l.strip() 854 del lines[0] 855 if len(lines) == 0: 856 break 857 l = lines[0] 858 i = 0 859 while i < nbargs: 860 if args[i][1] == arg: 861 args[i] = (args[i][0], arg, desc) 862 break; 863 i = i + 1 864 if i >= nbargs: 865 if not quiet: 866 self.warning("Unable to find arg %s from function comment for %s" % ( 867 arg, name)) 868 while len(lines) > 0 and lines[0] == '*': 869 del lines[0] 870 desc = "" 871 while len(lines) > 0: 872 l = lines[0] 873 while len(l) > 0 and l[0] == '*': 874 l = l[1:] 875 l = l.strip() 876 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": 877 try: 878 l = l.split(' ', 1)[1] 879 except: 880 l = "" 881 retdesc = l.strip() 882 del lines[0] 883 while len(lines) > 0: 884 l = lines[0] 885 while len(l) > 0 and l[0] == '*': 886 l = l[1:] 887 l = l.strip() 888 retdesc = retdesc + " " + l 889 del lines[0] 890 else: 891 desc = desc + " " + l 892 del lines[0] 893 894 retdesc = retdesc.strip() 895 desc = desc.strip() 896 897 if quiet == 0: 898 # 899 # report missing comments 900 # 901 i = 0 902 while i < nbargs: 903 if args[i][2] == None and args[i][0] != "void" and \ 904 ((args[i][1] != None) or (args[i][1] == '')): 905 self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1])) 906 i = i + 1 907 if retdesc == "" and ret[0] != "void": 908 self.warning("Function comment for %s lacks description of return value" % (name)) 909 if desc == "" and retdesc == "": 910 self.warning("Function comment for %s lacks description of the function" % (name)) 911 912 return(((ret[0], retdesc), args, desc)) 913 914 def parsePreproc(self, token): 915 if debug: 916 print("=> preproc ", token, self.lexer.tokens) 917 name = token[1] 918 if name == "#include": 919 token = self.lexer.token() 920 if token == None: 921 return None 922 if token[0] == 'preproc': 923 self.index_add(token[1], self.filename, not self.is_header, 924 "include") 925 return self.lexer.token() 926 return token 927 if name == "#define": 928 token = self.lexer.token() 929 if token == None: 930 return None 931 if token[0] == 'preproc': 932 # TODO macros with arguments 933 name = token[1] 934 lst = [] 935 token = self.lexer.token() 936 while token != None and token[0] == 'preproc' and \ 937 token[1][0] != '#': 938 lst.append(token[1]) 939 token = self.lexer.token() 940 try: 941 name = name.split('(') [0] 942 except: 943 pass 944 info = self.parseMacroComment(name, True) 945 self.index_add(name, self.filename, not self.is_header, 946 "macro", info) 947 return token 948 949 # 950 # Processing of conditionals modified by Bill 1/1/05 951 # 952 # We process conditionals (i.e. tokens from #ifdef, #ifndef, 953 # #if, #else and #endif) for headers and mainline code, 954 # store the ones from the header in libxml2-api.xml, and later 955 # (in the routine merge_public) verify that the two (header and 956 # mainline code) agree. 957 # 958 # There is a small problem with processing the headers. Some of 959 # the variables are not concerned with enabling / disabling of 960 # library functions (e.g. '__XML_PARSER_H__'), and we don't want 961 # them to be included in libxml2-api.xml, or involved in 962 # the check between the header and the mainline code. To 963 # accomplish this, we ignore any conditional which doesn't include 964 # the string 'ENABLED' 965 # 966 if name == "#ifdef": 967 apstr = self.lexer.tokens[0][1] 968 try: 969 self.defines.append(apstr) 970 if apstr.find('ENABLED') != -1: 971 self.conditionals.append("defined(%s)" % apstr) 972 except: 973 pass 974 elif name == "#ifndef": 975 apstr = self.lexer.tokens[0][1] 976 try: 977 self.defines.append(apstr) 978 if apstr.find('ENABLED') != -1: 979 self.conditionals.append("!defined(%s)" % apstr) 980 except: 981 pass 982 elif name == "#if": 983 apstr = "" 984 for tok in self.lexer.tokens: 985 if apstr != "": 986 apstr = apstr + " " 987 apstr = apstr + tok[1] 988 try: 989 self.defines.append(apstr) 990 if apstr.find('ENABLED') != -1: 991 self.conditionals.append(apstr) 992 except: 993 pass 994 elif name == "#else": 995 if self.conditionals != [] and \ 996 self.defines[-1].find('ENABLED') != -1: 997 self.conditionals[-1] = "!(%s)" % self.conditionals[-1] 998 elif name == "#endif": 999 if self.conditionals != [] and \ 1000 self.defines[-1].find('ENABLED') != -1: 1001 self.conditionals = self.conditionals[:-1] 1002 self.defines = self.defines[:-1] 1003 token = self.lexer.token() 1004 while token != None and token[0] == 'preproc' and \ 1005 token[1][0] != '#': 1006 token = self.lexer.token() 1007 return token 1008 1009 # 1010 # token acquisition on top of the lexer, it handle internally 1011 # preprocessor and comments since they are logically not part of 1012 # the program structure. 1013 # 1014 def token(self): 1015 global ignored_words 1016 1017 token = self.lexer.token() 1018 while token != None: 1019 if token[0] == 'comment': 1020 token = self.parseComment(token) 1021 continue 1022 elif token[0] == 'preproc': 1023 token = self.parsePreproc(token) 1024 continue 1025 elif token[0] == "name" and token[1] == "__const": 1026 token = ("name", "const") 1027 return token 1028 elif token[0] == "name" and token[1] == "__attribute": 1029 token = self.lexer.token() 1030 while token != None and token[1] != ";": 1031 token = self.lexer.token() 1032 return token 1033 elif token[0] == "name" and token[1] in ignored_words: 1034 (n, info) = ignored_words[token[1]] 1035 i = 0 1036 while i < n: 1037 token = self.lexer.token() 1038 i = i + 1 1039 token = self.lexer.token() 1040 continue 1041 else: 1042 if debug: 1043 print("=> ", token) 1044 return token 1045 return None 1046 1047 # 1048 # Parse a typedef, it records the type and its name. 1049 # 1050 def parseTypedef(self, token): 1051 if token == None: 1052 return None 1053 token = self.parseType(token) 1054 if token == None: 1055 self.error("parsing typedef") 1056 return None 1057 base_type = self.type 1058 type = base_type 1059 #self.debug("end typedef type", token) 1060 while token != None: 1061 if token[0] == "name": 1062 name = token[1] 1063 signature = self.signature 1064 if signature != None: 1065 type = type.split('(')[0] 1066 d = self.mergeFunctionComment(name, 1067 ((type, None), signature), 1) 1068 self.index_add(name, self.filename, not self.is_header, 1069 "functype", d) 1070 else: 1071 if base_type == "struct": 1072 self.index_add(name, self.filename, not self.is_header, 1073 "struct", type) 1074 base_type = "struct " + name 1075 else: 1076 # TODO report missing or misformatted comments 1077 info = self.parseSimpleComment(name, True) 1078 self.index_add(name, self.filename, not self.is_header, 1079 "typedef", type, info) 1080 token = self.token() 1081 else: 1082 self.error("parsing typedef: expecting a name") 1083 return token 1084 #self.debug("end typedef", token) 1085 if token != None and token[0] == 'sep' and token[1] == ',': 1086 type = base_type 1087 token = self.token() 1088 while token != None and token[0] == "op": 1089 type = type + token[1] 1090 token = self.token() 1091 elif token != None and token[0] == 'sep' and token[1] == ';': 1092 break; 1093 elif token != None and token[0] == 'name': 1094 type = base_type 1095 continue; 1096 else: 1097 self.error("parsing typedef: expecting ';'", token) 1098 return token 1099 token = self.token() 1100 return token 1101 1102 # 1103 # Parse a C code block, used for functions it parse till 1104 # the balancing } included 1105 # 1106 def parseBlock(self, token): 1107 while token != None: 1108 if token[0] == "sep" and token[1] == "{": 1109 token = self.token() 1110 token = self.parseBlock(token) 1111 elif token[0] == "sep" and token[1] == "}": 1112 token = self.token() 1113 return token 1114 else: 1115 if self.collect_ref == 1: 1116 oldtok = token 1117 token = self.token() 1118 if oldtok[0] == "name" and oldtok[1][0:3] == "xml": 1119 if token[0] == "sep" and token[1] == "(": 1120 self.index_add_ref(oldtok[1], self.filename, 1121 0, "function") 1122 token = self.token() 1123 elif token[0] == "name": 1124 token = self.token() 1125 if token[0] == "sep" and (token[1] == ";" or 1126 token[1] == "," or token[1] == "="): 1127 self.index_add_ref(oldtok[1], self.filename, 1128 0, "type") 1129 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_": 1130 self.index_add_ref(oldtok[1], self.filename, 1131 0, "typedef") 1132 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_": 1133 self.index_add_ref(oldtok[1], self.filename, 1134 0, "typedef") 1135 1136 else: 1137 token = self.token() 1138 return token 1139 1140 # 1141 # Parse a C struct definition till the balancing } 1142 # 1143 def parseStruct(self, token): 1144 fields = [] 1145 #self.debug("start parseStruct", token) 1146 while token != None: 1147 if token[0] == "sep" and token[1] == "{": 1148 token = self.token() 1149 token = self.parseTypeBlock(token) 1150 elif token[0] == "sep" and token[1] == "}": 1151 self.struct_fields = fields 1152 #self.debug("end parseStruct", token) 1153 #print fields 1154 token = self.token() 1155 return token 1156 else: 1157 base_type = self.type 1158 #self.debug("before parseType", token) 1159 token = self.parseType(token) 1160 #self.debug("after parseType", token) 1161 if token != None and token[0] == "name": 1162 fname = token[1] 1163 token = self.token() 1164 if token[0] == "sep" and token[1] == ";": 1165 token = self.token() 1166 fields.append((self.type, fname)) 1167 else: 1168 self.error("parseStruct: expecting ;", token) 1169 elif token != None and token[0] == "sep" and token[1] == "{": 1170 token = self.token() 1171 token = self.parseTypeBlock(token) 1172 if token != None and token[0] == "name": 1173 token = self.token() 1174 if token != None and token[0] == "sep" and token[1] == ";": 1175 token = self.token() 1176 else: 1177 self.error("parseStruct: expecting ;", token) 1178 else: 1179 self.error("parseStruct: name", token) 1180 token = self.token() 1181 self.type = base_type; 1182 self.struct_fields = fields 1183 #self.debug("end parseStruct", token) 1184 #print fields 1185 return token 1186 1187 # 1188 # Parse a C enum block, parse till the balancing } 1189 # 1190 def parseEnumBlock(self, token): 1191 self.enums = [] 1192 name = None 1193 self.comment = None 1194 comment = "" 1195 value = "0" 1196 while token != None: 1197 if token[0] == "sep" and token[1] == "{": 1198 token = self.token() 1199 token = self.parseTypeBlock(token) 1200 elif token[0] == "sep" and token[1] == "}": 1201 if name != None: 1202 if self.comment != None: 1203 comment = self.comment 1204 self.comment = None 1205 self.enums.append((name, value, comment)) 1206 token = self.token() 1207 return token 1208 elif token[0] == "name": 1209 if name != None: 1210 if self.comment != None: 1211 comment = self.comment.strip() 1212 self.comment = None 1213 self.enums.append((name, value, comment)) 1214 name = token[1] 1215 comment = "" 1216 token = self.token() 1217 if token[0] == "op" and token[1][0] == "=": 1218 value = "" 1219 if len(token[1]) > 1: 1220 value = token[1][1:] 1221 token = self.token() 1222 while token[0] != "sep" or (token[1] != ',' and 1223 token[1] != '}'): 1224 value = value + token[1] 1225 token = self.token() 1226 else: 1227 try: 1228 value = "%d" % (int(value) + 1) 1229 except: 1230 self.warning("Failed to compute value of enum %s" % (name)) 1231 value="" 1232 if token[0] == "sep" and token[1] == ",": 1233 token = self.token() 1234 else: 1235 token = self.token() 1236 return token 1237 1238 # 1239 # Parse a C definition block, used for structs it parse till 1240 # the balancing } 1241 # 1242 def parseTypeBlock(self, token): 1243 while token != None: 1244 if token[0] == "sep" and token[1] == "{": 1245 token = self.token() 1246 token = self.parseTypeBlock(token) 1247 elif token[0] == "sep" and token[1] == "}": 1248 token = self.token() 1249 return token 1250 else: 1251 token = self.token() 1252 return token 1253 1254 # 1255 # Parse a type: the fact that the type name can either occur after 1256 # the definition or within the definition makes it a little harder 1257 # if inside, the name token is pushed back before returning 1258 # 1259 def parseType(self, token): 1260 self.type = "" 1261 self.struct_fields = [] 1262 self.signature = None 1263 if token == None: 1264 return token 1265 1266 have_sign = 0 1267 done = 0 1268 1269 while token[0] == "name" and ( 1270 token[1] == "const" or \ 1271 token[1] == "unsigned" or \ 1272 token[1] == "signed"): 1273 if token[1] == "unsigned" or token[1] == "signed": 1274 have_sign = 1 1275 if self.type == "": 1276 self.type = token[1] 1277 else: 1278 self.type = self.type + " " + token[1] 1279 token = self.token() 1280 1281 if token[0] == "name" and token[1] in ("char", "short", "int", "long"): 1282 if self.type == "": 1283 self.type = token[1] 1284 else: 1285 self.type = self.type + " " + token[1] 1286 1287 elif have_sign: 1288 done = 1 1289 1290 elif token[0] == "name" and token[1] == "struct": 1291 if self.type == "": 1292 self.type = token[1] 1293 else: 1294 self.type = self.type + " " + token[1] 1295 token = self.token() 1296 nametok = None 1297 if token[0] == "name": 1298 nametok = token 1299 token = self.token() 1300 if token != None and token[0] == "sep" and token[1] == "{": 1301 token = self.token() 1302 token = self.parseStruct(token) 1303 elif token != None and token[0] == "op" and token[1] == "*": 1304 self.type = self.type + " " + nametok[1] + " *" 1305 token = self.token() 1306 while token != None and token[0] == "op" and token[1] == "*": 1307 self.type = self.type + " *" 1308 token = self.token() 1309 if token[0] == "name": 1310 nametok = token 1311 token = self.token() 1312 else: 1313 self.error("struct : expecting name", token) 1314 return token 1315 elif token != None and token[0] == "name" and nametok != None: 1316 self.type = self.type + " " + nametok[1] 1317 return token 1318 1319 if nametok != None: 1320 self.lexer.push(token) 1321 token = nametok 1322 return token 1323 1324 elif token[0] == "name" and token[1] == "enum": 1325 if self.type == "": 1326 self.type = token[1] 1327 else: 1328 self.type = self.type + " " + token[1] 1329 self.enums = [] 1330 token = self.token() 1331 if token != None and token[0] == "sep" and token[1] == "{": 1332 token = self.token() 1333 token = self.parseEnumBlock(token) 1334 else: 1335 self.error("parsing enum: expecting '{'", token) 1336 enum_type = None 1337 if token != None and token[0] != "name": 1338 self.lexer.push(token) 1339 token = ("name", "enum") 1340 else: 1341 enum_type = token[1] 1342 for enum in self.enums: 1343 self.index_add(enum[0], self.filename, 1344 not self.is_header, "enum", 1345 (enum[1], enum[2], enum_type)) 1346 return token 1347 1348 elif token[0] == "name": 1349 if self.type == "": 1350 self.type = token[1] 1351 else: 1352 self.type = self.type + " " + token[1] 1353 else: 1354 self.error("parsing type %s: expecting a name" % (self.type), 1355 token) 1356 return token 1357 if not done: 1358 token = self.token() 1359 while token != None and (token[0] == "op" or 1360 token[0] == "name" and token[1] == "const"): 1361 self.type = self.type + " " + token[1] 1362 token = self.token() 1363 1364 # 1365 # if there is a parenthesis here, this means a function type 1366 # 1367 if token != None and token[0] == "sep" and token[1] == '(': 1368 self.type = self.type + token[1] 1369 token = self.token() 1370 while token != None and token[0] == "op" and token[1] == '*': 1371 self.type = self.type + token[1] 1372 token = self.token() 1373 if token == None or token[0] != "name" : 1374 self.error("parsing function type, name expected", token); 1375 return token 1376 self.type = self.type + token[1] 1377 nametok = token 1378 token = self.token() 1379 if token != None and token[0] == "sep" and token[1] == ')': 1380 self.type = self.type + token[1] 1381 token = self.token() 1382 if token != None and token[0] == "sep" and token[1] == '(': 1383 token = self.token() 1384 type = self.type; 1385 token = self.parseSignature(token); 1386 self.type = type; 1387 else: 1388 self.error("parsing function type, '(' expected", token); 1389 return token 1390 else: 1391 self.error("parsing function type, ')' expected", token); 1392 return token 1393 self.lexer.push(token) 1394 token = nametok 1395 return token 1396 1397 # 1398 # do some lookahead for arrays 1399 # 1400 if token != None and token[0] == "name": 1401 nametok = token 1402 token = self.token() 1403 if token != None and token[0] == "sep" and token[1] == '[': 1404 self.type = self.type + nametok[1] 1405 while token != None and token[0] == "sep" and token[1] == '[': 1406 self.type = self.type + token[1] 1407 token = self.token() 1408 while token != None and token[0] != 'sep' and \ 1409 token[1] != ']' and token[1] != ';': 1410 self.type = self.type + token[1] 1411 token = self.token() 1412 if token != None and token[0] == 'sep' and token[1] == ']': 1413 self.type = self.type + token[1] 1414 token = self.token() 1415 else: 1416 self.error("parsing array type, ']' expected", token); 1417 return token 1418 elif token != None and token[0] == "sep" and token[1] == ':': 1419 # remove :12 in case it's a limited int size 1420 token = self.token() 1421 token = self.token() 1422 self.lexer.push(token) 1423 token = nametok 1424 1425 return token 1426 1427 # 1428 # Parse a signature: '(' has been parsed and we scan the type definition 1429 # up to the ')' included 1430 def parseSignature(self, token): 1431 signature = [] 1432 if token != None and token[0] == "sep" and token[1] == ')': 1433 self.signature = [] 1434 token = self.token() 1435 return token 1436 while token != None: 1437 token = self.parseType(token) 1438 if token != None and token[0] == "name": 1439 signature.append((self.type, token[1], None)) 1440 token = self.token() 1441 elif token != None and token[0] == "sep" and token[1] == ',': 1442 token = self.token() 1443 continue 1444 elif token != None and token[0] == "sep" and token[1] == ')': 1445 # only the type was provided 1446 if self.type == "...": 1447 signature.append((self.type, "...", None)) 1448 else: 1449 signature.append((self.type, None, None)) 1450 if token != None and token[0] == "sep": 1451 if token[1] == ',': 1452 token = self.token() 1453 continue 1454 elif token[1] == ')': 1455 token = self.token() 1456 break 1457 self.signature = signature 1458 return token 1459 1460 # 1461 # Parse a global definition, be it a type, variable or function 1462 # the extern "C" blocks are a bit nasty and require it to recurse. 1463 # 1464 def parseGlobal(self, token): 1465 static = 0 1466 if token[1] == 'extern': 1467 token = self.token() 1468 if token == None: 1469 return token 1470 if token[0] == 'string': 1471 if token[1] == 'C': 1472 token = self.token() 1473 if token == None: 1474 return token 1475 if token[0] == 'sep' and token[1] == "{": 1476 token = self.token() 1477# print 'Entering extern "C line ', self.lineno() 1478 while token != None and (token[0] != 'sep' or 1479 token[1] != "}"): 1480 if token[0] == 'name': 1481 token = self.parseGlobal(token) 1482 else: 1483 self.error( 1484 "token %s %s unexpected at the top level" % ( 1485 token[0], token[1])) 1486 token = self.parseGlobal(token) 1487# print 'Exiting extern "C" line', self.lineno() 1488 token = self.token() 1489 return token 1490 else: 1491 return token 1492 elif token[1] == 'static': 1493 static = 1 1494 token = self.token() 1495 if token == None or token[0] != 'name': 1496 return token 1497 1498 if token[1] == 'typedef': 1499 token = self.token() 1500 return self.parseTypedef(token) 1501 else: 1502 token = self.parseType(token) 1503 type_orig = self.type 1504 if token == None or token[0] != "name": 1505 return token 1506 type = type_orig 1507 self.name = token[1] 1508 token = self.token() 1509 while token != None and (token[0] == "sep" or token[0] == "op"): 1510 if token[0] == "sep": 1511 if token[1] == "[": 1512 type = type + token[1] 1513 token = self.token() 1514 while token != None and (token[0] != "sep" or \ 1515 token[1] != ";"): 1516 type = type + token[1] 1517 token = self.token() 1518 1519 if token != None and token[0] == "op" and token[1] == "=": 1520 # 1521 # Skip the initialization of the variable 1522 # 1523 token = self.token() 1524 if token[0] == 'sep' and token[1] == '{': 1525 token = self.token() 1526 token = self.parseBlock(token) 1527 else: 1528 while token != None and (token[0] != "sep" or \ 1529 (token[1] != ';' and token[1] != ',')): 1530 token = self.token() 1531 if token == None or token[0] != "sep" or (token[1] != ';' and 1532 token[1] != ','): 1533 self.error("missing ';' or ',' after value") 1534 1535 if token != None and token[0] == "sep": 1536 if token[1] == ";": 1537 if type == "struct": 1538 self.index_add(self.name, self.filename, 1539 not self.is_header, "struct", self.struct_fields) 1540 else: 1541 info = self.parseSimpleComment(self.name, True) 1542 self.index_add(self.name, self.filename, 1543 not self.is_header, "variable", type, info) 1544 self.comment = None 1545 token = self.token() 1546 break 1547 elif token[1] == "(": 1548 token = self.token() 1549 token = self.parseSignature(token) 1550 if token == None: 1551 return None 1552 if token[0] == "sep" and token[1] == ";": 1553 d = self.mergeFunctionComment(self.name, 1554 ((type, None), self.signature), 1) 1555 self.index_add(self.name, self.filename, static, 1556 "function", d) 1557 self.comment = None 1558 token = self.token() 1559 elif token[0] == "sep" and token[1] == "{": 1560 d = self.mergeFunctionComment(self.name, 1561 ((type, None), self.signature), static) 1562 self.index_add(self.name, self.filename, static, 1563 "function", d) 1564 self.comment = None 1565 token = self.token() 1566 token = self.parseBlock(token); 1567 elif token[1] == ',': 1568 self.index_add(self.name, self.filename, static, 1569 "variable", type) 1570 self.comment = None 1571 type = type_orig 1572 token = self.token() 1573 while token != None and token[0] == "sep": 1574 type = type + token[1] 1575 token = self.token() 1576 if token != None and token[0] == "name": 1577 self.name = token[1] 1578 token = self.token() 1579 else: 1580 break 1581 1582 return token 1583 1584 def parse(self): 1585 self.warning("Parsing %s" % (self.filename)) 1586 token = self.token() 1587 while token != None: 1588 if token[0] == 'name': 1589 token = self.parseGlobal(token) 1590 else: 1591 self.error("token %s %s unexpected at the top level" % ( 1592 token[0], token[1])) 1593 token = self.parseGlobal(token) 1594 return 1595 self.parseTopComment(self.top_comment) 1596 return self.index 1597 1598 1599class docBuilder: 1600 """A documentation builder""" 1601 def __init__(self, name, directories=['.'], excludes=[]): 1602 self.name = name 1603 self.directories = directories 1604 self.excludes = excludes + list(ignored_files.keys()) 1605 self.modules = {} 1606 self.headers = {} 1607 self.idx = index() 1608 self.index = {} 1609 if name == 'libxml2': 1610 self.basename = 'libxml' 1611 else: 1612 self.basename = name 1613 1614 def analyze(self): 1615 print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys())))) 1616 self.idx.analyze() 1617 1618 def scanHeaders(self): 1619 for header in list(self.headers.keys()): 1620 parser = CParser(header) 1621 idx = parser.parse() 1622 self.headers[header] = idx; 1623 self.idx.merge(idx) 1624 1625 def scanModules(self): 1626 for module in list(self.modules.keys()): 1627 parser = CParser(module) 1628 idx = parser.parse() 1629 # idx.analyze() 1630 self.modules[module] = idx 1631 self.idx.merge_public(idx) 1632 1633 def scan(self): 1634 for directory in self.directories: 1635 files = glob.glob(directory + "/*.c") 1636 for file in files: 1637 skip = 0 1638 for excl in self.excludes: 1639 if file.find(excl) != -1: 1640 print("Skipping %s" % file) 1641 skip = 1 1642 break 1643 if skip == 0: 1644 self.modules[file] = None; 1645 files = glob.glob(directory + "/*.h") 1646 for file in files: 1647 skip = 0 1648 for excl in self.excludes: 1649 if file.find(excl) != -1: 1650 print("Skipping %s" % file) 1651 skip = 1 1652 break 1653 if skip == 0: 1654 self.headers[file] = None; 1655 self.scanHeaders() 1656 self.scanModules() 1657 1658 def modulename_file(self, file): 1659 module = os.path.basename(file) 1660 if module[-2:] == '.h': 1661 module = module[:-2] 1662 elif module[-2:] == '.c': 1663 module = module[:-2] 1664 return module 1665 1666 def serialize_enum(self, output, name): 1667 id = self.idx.enums[name] 1668 output.write(" <enum name='%s' file='%s'" % (name, 1669 self.modulename_file(id.header))) 1670 if id.info != None: 1671 info = id.info 1672 if info[0] != None and info[0] != '': 1673 try: 1674 val = eval(info[0]) 1675 except: 1676 val = info[0] 1677 output.write(" value='%s'" % (val)); 1678 if info[2] != None and info[2] != '': 1679 output.write(" type='%s'" % info[2]); 1680 if info[1] != None and info[1] != '': 1681 output.write(" info='%s'" % escape(info[1])); 1682 output.write("/>\n") 1683 1684 def serialize_macro(self, output, name): 1685 id = self.idx.macros[name] 1686 output.write(" <macro name='%s' file='%s'>\n" % (name, 1687 self.modulename_file(id.header))) 1688 if id.info != None: 1689 try: 1690 (args, desc) = id.info 1691 if desc != None and desc != "": 1692 output.write(" <info>%s</info>\n" % (escape(desc))) 1693 for arg in args: 1694 (name, desc) = arg 1695 if desc != None and desc != "": 1696 output.write(" <arg name='%s' info='%s'/>\n" % ( 1697 name, escape(desc))) 1698 else: 1699 output.write(" <arg name='%s'/>\n" % (name)) 1700 except: 1701 pass 1702 output.write(" </macro>\n") 1703 1704 def serialize_typedef(self, output, name): 1705 id = self.idx.typedefs[name] 1706 if id.info[0:7] == 'struct ': 1707 output.write(" <struct name='%s' file='%s' type='%s'" % ( 1708 name, self.modulename_file(id.header), id.info)) 1709 name = id.info[7:] 1710 if name in self.idx.structs and ( \ 1711 type(self.idx.structs[name].info) == type(()) or 1712 type(self.idx.structs[name].info) == type([])): 1713 output.write(">\n"); 1714 try: 1715 for field in self.idx.structs[name].info: 1716 output.write(" <field name='%s' type='%s'/>\n" % (field[1] , field[0])) 1717 except: 1718 print("Failed to serialize struct %s" % (name)) 1719 output.write(" </struct>\n") 1720 else: 1721 output.write("/>\n"); 1722 else : 1723 output.write(" <typedef name='%s' file='%s' type='%s'" % ( 1724 name, self.modulename_file(id.header), id.info)) 1725 try: 1726 desc = id.extra 1727 if desc != None and desc != "": 1728 output.write(">\n <info>%s</info>\n" % (escape(desc))) 1729 output.write(" </typedef>\n") 1730 else: 1731 output.write("/>\n") 1732 except: 1733 output.write("/>\n") 1734 1735 def serialize_variable(self, output, name): 1736 id = self.idx.variables[name] 1737 if id.info != None: 1738 output.write(" <variable name='%s' file='%s' type='%s'" % ( 1739 name, self.modulename_file(id.header), id.info)) 1740 else: 1741 output.write(" <variable name='%s' file='%s'" % ( 1742 name, self.modulename_file(id.header))) 1743 desc = id.extra 1744 if desc != None and desc != "": 1745 output.write(">\n <info>%s</info>\n" % (escape(desc))) 1746 output.write(" </variable>\n") 1747 else: 1748 output.write("/>\n") 1749 1750 def serialize_function(self, output, name): 1751 id = self.idx.functions[name] 1752 if name == debugsym: 1753 print("=>", id) 1754 1755 output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type, 1756 name, self.modulename_file(id.header), 1757 self.modulename_file(id.module))) 1758 # 1759 # Processing of conditionals modified by Bill 1/1/05 1760 # 1761 if id.conditionals != None: 1762 apstr = "" 1763 for cond in id.conditionals: 1764 if apstr != "": 1765 apstr = apstr + " && " 1766 apstr = apstr + cond 1767 output.write(" <cond>%s</cond>\n"% (apstr)); 1768 try: 1769 (ret, params, desc) = id.info 1770 if (desc == None or desc == '') and \ 1771 name[0:9] != "xmlThrDef" and name != "xmlDllMain" and \ 1772 ret[1] == '': 1773 print("%s %s from %s has no description" % (id.type, name, 1774 self.modulename_file(id.module))) 1775 1776 output.write(" <info>%s</info>\n" % (escape(desc))) 1777 if ret[0] != None: 1778 if ret[0] == "void": 1779 output.write(" <return type='void'/>\n") 1780 else: 1781 output.write(" <return type='%s' info='%s'/>\n" % ( 1782 ret[0], escape(ret[1]))) 1783 for param in params: 1784 if param[0] == 'void': 1785 continue 1786 if param[2] == None: 1787 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0])) 1788 else: 1789 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2]))) 1790 except: 1791 print("Failed to save function %s info: " % name, repr(id.info)) 1792 output.write(" </%s>\n" % (id.type)) 1793 1794 def serialize_exports(self, output, file): 1795 module = self.modulename_file(file) 1796 output.write(" <file name='%s'>\n" % (module)) 1797 dict = self.headers[file] 1798 if dict.info != None: 1799 for data in ('Summary', 'Description', 'Author'): 1800 try: 1801 output.write(" <%s>%s</%s>\n" % ( 1802 data.lower(), 1803 escape(dict.info[data]), 1804 data.lower())) 1805 except: 1806 if data != 'Author': 1807 print("Header %s lacks a %s description" % (module, data)) 1808 if 'Description' in dict.info: 1809 desc = dict.info['Description'] 1810 if desc.find("DEPRECATED") != -1: 1811 output.write(" <deprecated/>\n") 1812 1813 ids = list(dict.macros.keys()) 1814 ids.sort() 1815 for id in ids: 1816 # Macros are sometime used to masquerade other types. 1817 if id in dict.functions: 1818 continue 1819 if id in dict.variables: 1820 continue 1821 if id in dict.typedefs: 1822 continue 1823 if id in dict.structs: 1824 continue 1825 if id in dict.enums: 1826 continue 1827 output.write(" <exports symbol='%s' type='macro'/>\n" % (id)) 1828 ids = list(dict.enums.keys()) 1829 ids.sort() 1830 for id in ids: 1831 output.write(" <exports symbol='%s' type='enum'/>\n" % (id)) 1832 ids = list(dict.typedefs.keys()) 1833 ids.sort() 1834 for id in ids: 1835 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id)) 1836 ids = list(dict.structs.keys()) 1837 ids.sort() 1838 for id in ids: 1839 output.write(" <exports symbol='%s' type='struct'/>\n" % (id)) 1840 ids = list(dict.variables.keys()) 1841 ids.sort() 1842 for id in ids: 1843 output.write(" <exports symbol='%s' type='variable'/>\n" % (id)) 1844 ids = list(dict.functions.keys()) 1845 ids.sort() 1846 for id in ids: 1847 output.write(" <exports symbol='%s' type='function'/>\n" % (id)) 1848 output.write(" </file>\n") 1849 1850 def serialize(self): 1851 filename = "%s-api.xml" % self.name 1852 print("Saving XML description %s" % (filename)) 1853 output = open(filename, "w") 1854 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 1855 output.write("<api name='%s'>\n" % self.name) 1856 output.write(" <files>\n") 1857 headers = list(self.headers.keys()) 1858 headers.sort() 1859 for file in headers: 1860 self.serialize_exports(output, file) 1861 output.write(" </files>\n") 1862 output.write(" <symbols>\n") 1863 macros = list(self.idx.macros.keys()) 1864 macros.sort() 1865 for macro in macros: 1866 self.serialize_macro(output, macro) 1867 enums = list(self.idx.enums.keys()) 1868 enums.sort() 1869 for enum in enums: 1870 self.serialize_enum(output, enum) 1871 typedefs = list(self.idx.typedefs.keys()) 1872 typedefs.sort() 1873 for typedef in typedefs: 1874 self.serialize_typedef(output, typedef) 1875 variables = list(self.idx.variables.keys()) 1876 variables.sort() 1877 for variable in variables: 1878 self.serialize_variable(output, variable) 1879 functions = list(self.idx.functions.keys()) 1880 functions.sort() 1881 for function in functions: 1882 self.serialize_function(output, function) 1883 output.write(" </symbols>\n") 1884 output.write("</api>\n") 1885 output.close() 1886 1887 1888def rebuild(): 1889 builder = None 1890 if glob.glob("parser.c") != [] : 1891 print("Rebuilding API description for libxml2") 1892 builder = docBuilder("libxml2", [".", "."], 1893 ["tst.c"]) 1894 elif glob.glob("../parser.c") != [] : 1895 print("Rebuilding API description for libxml2") 1896 builder = docBuilder("libxml2", ["..", "../include/libxml"], 1897 ["tst.c"]) 1898 elif glob.glob("../libxslt/transform.c") != [] : 1899 print("Rebuilding API description for libxslt") 1900 builder = docBuilder("libxslt", ["../libxslt"], 1901 ["win32config.h", "libxslt.h", "tst.c"]) 1902 else: 1903 print("rebuild() failed, unable to guess the module") 1904 return None 1905 builder.scan() 1906 builder.analyze() 1907 builder.serialize() 1908 if glob.glob("../libexslt/exslt.c") != [] : 1909 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"]) 1910 extra.scan() 1911 extra.analyze() 1912 extra.serialize() 1913 return builder 1914 1915# 1916# for debugging the parser 1917# 1918def parse(filename): 1919 parser = CParser(filename) 1920 idx = parser.parse() 1921 return idx 1922 1923if __name__ == "__main__": 1924 if len(sys.argv) > 1: 1925 debug = 1 1926 parse(sys.argv[1]) 1927 else: 1928 rebuild() 1929