xref: /aosp_15_r20/external/libxml2/doc/apibuild.py (revision 7c5688314b92172186c154356a6374bf7684c3ca)
1#!/usr/bin/env python3
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# [email protected]
9#
10import os, sys
11import string
12import glob
13
14debug=0
15#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
17
18#
19# C parser analysis code
20#
21ignored_files = {
22  "config.h": "generated portability layer",
23  "libxml.h": "internal only",
24  "testModule.c": "test tool",
25  "testapi.c": "generated regression tests",
26  "runtest.c": "regression tests program",
27  "runsuite.c": "regression tests program",
28  "tst.c": "not part of the library",
29  "test.c": "not part of the library",
30  "testdso.c": "test for dynamid shared libraries",
31  "testrecurse.c": "test for entities recursions",
32  "timsort.h": "Internal header only for xpath.c 2.9.0",
33  "nanoftp.h": "empty",
34  "SAX.h": "empty",
35}
36
37ignored_words = {
38  "WINAPI": (0, "Windows keyword"),
39  "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
40  "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
41  "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
42  "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
43  "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
44  "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
45  "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
46  "XSLTCALL": (0, "Special macro for win32 calls"),
47  "EXSLTCALL": (0, "Special macro for win32 calls"),
48  "__declspec": (3, "Windows keyword"),
49  "__stdcall": (0, "Windows keyword"),
50  "ATTRIBUTE_UNUSED": (0, "macro keyword"),
51  "ATTRIBUTE_DESTRUCTOR": (0, "macro keyword"),
52  "LIBEXSLT_PUBLIC": (0, "macro keyword"),
53  "X_IN_Y": (5, "macro function builder"),
54  "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
55  "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
56  "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
57  "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
58  "ATTRIBUTE_NO_SANITIZE": (3, "macro keyword"),
59  "ATTRIBUTE_NO_SANITIZE_INTEGER": (0, "macro keyword"),
60  "XML_DEPRECATED": (0, "macro keyword"),
61  "XML_DEPRECATED_MEMBER": (0, "macro keyword"),
62  "XML_GLOBALS_ALLOC": (0, "macro keyword"),
63  "XML_GLOBALS_ERROR": (0, "macro keyword"),
64  "XML_GLOBALS_IO": (0, "macro keyword"),
65  "XML_GLOBALS_PARSER": (0, "macro keyword"),
66  "XML_GLOBALS_TREE": (0, "macro keyword"),
67  "XML_THREAD_LOCAL": (0, "macro keyword"),
68}
69
70def escape(raw):
71    raw = raw.replace('&', '&')
72    raw = raw.replace('<', '&lt;')
73    raw = raw.replace('>', '&gt;')
74    raw = raw.replace("'", '&apos;')
75    raw = raw.replace('"', '&quot;')
76    return raw
77
78class identifier:
79    def __init__(self, name, header=None, module=None, type=None, lineno = 0,
80                 info=None, extra=None, conditionals = None):
81        self.name = name
82        self.header = header
83        self.module = module
84        self.type = type
85        self.info = info
86        self.extra = extra
87        self.lineno = lineno
88        self.static = 0
89        if conditionals == None or len(conditionals) == 0:
90            self.conditionals = None
91        else:
92            self.conditionals = conditionals[:]
93        if self.name == debugsym:
94            print("=> define %s : %s" % (debugsym, (module, type, info,
95                                         extra, conditionals)))
96
97    def __repr__(self):
98        r = "%s %s:" % (self.type, self.name)
99        if self.static:
100            r = r + " static"
101        if self.module != None:
102            r = r + " from %s" % (self.module)
103        if self.info != None:
104            r = r + " " +  repr(self.info)
105        if self.extra != None:
106            r = r + " " + repr(self.extra)
107        if self.conditionals != None:
108            r = r + " " + repr(self.conditionals)
109        return r
110
111
112    def set_header(self, header):
113        self.header = header
114    def set_module(self, module):
115        self.module = module
116    def set_type(self, type):
117        self.type = type
118    def set_info(self, info):
119        self.info = info
120    def set_extra(self, extra):
121        self.extra = extra
122    def set_lineno(self, lineno):
123        self.lineno = lineno
124    def set_static(self, static):
125        self.static = static
126    def set_conditionals(self, conditionals):
127        if conditionals == None or len(conditionals) == 0:
128            self.conditionals = None
129        else:
130            self.conditionals = conditionals[:]
131
132    def get_name(self):
133        return self.name
134    def get_header(self):
135        return self.module
136    def get_module(self):
137        return self.module
138    def get_type(self):
139        return self.type
140    def get_info(self):
141        return self.info
142    def get_lineno(self):
143        return self.lineno
144    def get_extra(self):
145        return self.extra
146    def get_static(self):
147        return self.static
148    def get_conditionals(self):
149        return self.conditionals
150
151    def update(self, header, module, type = None, info = None, extra=None,
152               conditionals=None):
153        if self.name == debugsym:
154            print("=> update %s : %s" % (debugsym, (module, type, info,
155                                         extra, conditionals)))
156        if header != None and self.header == None:
157            self.set_header(module)
158        if module != None and (self.module == None or self.header == self.module):
159            self.set_module(module)
160        if type != None and self.type == None:
161            self.set_type(type)
162        if info != None:
163            self.set_info(info)
164        if extra != None:
165            self.set_extra(extra)
166        if conditionals != None:
167            self.set_conditionals(conditionals)
168
169class index:
170    def __init__(self, name = "noname"):
171        self.name = name
172        self.identifiers = {}
173        self.functions = {}
174        self.variables = {}
175        self.includes = {}
176        self.structs = {}
177        self.enums = {}
178        self.typedefs = {}
179        self.macros = {}
180        self.references = {}
181        self.info = {}
182
183    def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
184        if name[0:2] == '__':
185            return None
186        d = None
187        if name in self.identifiers:
188           d = self.identifiers[name]
189           d.update(header, module, type, info, extra, conditionals)
190        else:
191           d = identifier(name, header, module, type, lineno, info, extra, conditionals)
192           self.identifiers[name] = d
193
194        if d != None and static == 1:
195            d.set_static(1)
196
197        if d != None and name != None and type != None:
198            self.references[name] = d
199
200        if name == debugsym:
201            print("New ref: %s" % (d))
202
203        return d
204
205    def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
206        if name[0:2] == '__':
207            return None
208        d = None
209        if name in self.identifiers:
210           d = self.identifiers[name]
211           d.update(header, module, type, info, extra, conditionals)
212        else:
213           d = identifier(name, header, module, type, lineno, info, extra, conditionals)
214           self.identifiers[name] = d
215
216        if d != None and static == 1:
217            d.set_static(1)
218
219        if d != None and name != None and type != None:
220            if type == "function":
221                self.functions[name] = d
222            elif type == "functype":
223                self.functions[name] = d
224            elif type == "variable":
225                self.variables[name] = d
226            elif type == "include":
227                self.includes[name] = d
228            elif type == "struct":
229                self.structs[name] = d
230            elif type == "enum":
231                self.enums[name] = d
232            elif type == "typedef":
233                self.typedefs[name] = d
234            elif type == "macro":
235                self.macros[name] = d
236            else:
237                print("Unable to register type ", type)
238
239        if name == debugsym:
240            print("New symbol: %s" % (d))
241
242        return d
243
244    def merge(self, idx):
245        for id in list(idx.functions.keys()):
246              #
247              # macro might be used to override functions or variables
248              # definitions
249              #
250             if id in self.macros:
251                 del self.macros[id]
252             if id in self.functions:
253                 print("function %s from %s redeclared in %s" % (
254                    id, self.functions[id].header, idx.functions[id].header))
255             else:
256                 self.functions[id] = idx.functions[id]
257                 self.identifiers[id] = idx.functions[id]
258        for id in list(idx.variables.keys()):
259              #
260              # macro might be used to override functions or variables
261              # definitions
262              #
263             if id in self.macros:
264                 del self.macros[id]
265             if id in self.variables:
266                 print("variable %s from %s redeclared in %s" % (
267                    id, self.variables[id].header, idx.variables[id].header))
268             else:
269                 self.variables[id] = idx.variables[id]
270                 self.identifiers[id] = idx.variables[id]
271        for id in list(idx.structs.keys()):
272             if id in self.structs:
273                 print("struct %s from %s redeclared in %s" % (
274                    id, self.structs[id].header, idx.structs[id].header))
275             else:
276                 self.structs[id] = idx.structs[id]
277                 self.identifiers[id] = idx.structs[id]
278        for id in list(idx.typedefs.keys()):
279             if id in self.typedefs:
280                 print("typedef %s from %s redeclared in %s" % (
281                    id, self.typedefs[id].header, idx.typedefs[id].header))
282             else:
283                 self.typedefs[id] = idx.typedefs[id]
284                 self.identifiers[id] = idx.typedefs[id]
285        for id in list(idx.macros.keys()):
286              #
287              # macro might be used to override functions or variables
288              # definitions
289              #
290             if id in self.variables:
291                 continue
292             if id in self.functions:
293                 continue
294             if id in self.enums:
295                 continue
296             if id in self.macros and id != 'XML_OP':
297                 print("macro %s from %s redeclared in %s" % (
298                    id, self.macros[id].header, idx.macros[id].header))
299             else:
300                 self.macros[id] = idx.macros[id]
301                 self.identifiers[id] = idx.macros[id]
302        for id in list(idx.enums.keys()):
303             if id in self.enums:
304                 print("enum %s from %s redeclared in %s" % (
305                    id, self.enums[id].header, idx.enums[id].header))
306             else:
307                 self.enums[id] = idx.enums[id]
308                 self.identifiers[id] = idx.enums[id]
309
310    def merge_public(self, idx):
311        for id in list(idx.functions.keys()):
312             if id in self.functions:
313                 # check that function condition agrees with header
314                 if idx.functions[id].conditionals != \
315                    self.functions[id].conditionals:
316                     print("Header condition differs from Function for %s:" \
317                        % id)
318                     print("  H: %s" % self.functions[id].conditionals)
319                     print("  C: %s" % idx.functions[id].conditionals)
320                 up = idx.functions[id]
321                 self.functions[id].update(None, up.module, up.type, up.info, up.extra)
322         #     else:
323         #         print "Function %s from %s is not declared in headers" % (
324         #                id, idx.functions[id].module)
325
326        for id in list(idx.variables.keys()):
327            if id in self.variables:
328                # check that variable condition agrees with header
329                # TODO: produces many false positives
330                #if idx.variables[id].conditionals != \
331                #   self.variables[id].conditionals:
332                #    print("Header condition differs from Variable for %s:" \
333                #       % id)
334                #    print("  H: %s" % self.variables[id].conditionals)
335                #    print("  C: %s" % idx.variables[id].conditionals)
336                up = idx.variables[id]
337                self.variables[id].update(None, up.module, up.type, up.info, up.extra)
338
339    def analyze_dict(self, type, dict):
340        count = 0
341        public = 0
342        for name in list(dict.keys()):
343            id = dict[name]
344            count = count + 1
345            if id.static == 0:
346                public = public + 1
347        if count != public:
348            print("  %d %s , %d public" % (count, type, public))
349        elif count != 0:
350            print("  %d public %s" % (count, type))
351
352
353    def analyze(self):
354        self.analyze_dict("functions", self.functions)
355        self.analyze_dict("variables", self.variables)
356        self.analyze_dict("structs", self.structs)
357        self.analyze_dict("typedefs", self.typedefs)
358        self.analyze_dict("macros", self.macros)
359
360class CLexer:
361    """A lexer for the C language, tokenize the input by reading and
362       analyzing it line by line"""
363    def __init__(self, input):
364        self.input = input
365        self.tokens = []
366        self.line = ""
367        self.lineno = 0
368
369    def getline(self):
370        line = ''
371        while line == '':
372            line = self.input.readline()
373            if not line:
374                return None
375            self.lineno = self.lineno + 1
376            line = line.lstrip()
377            line = line.rstrip()
378            if line == '':
379                continue
380            while line[-1] == '\\':
381                line = line[:-1]
382                n = self.input.readline()
383                self.lineno = self.lineno + 1
384                n = n.lstrip()
385                n = n.rstrip()
386                if not n:
387                    break
388                else:
389                    line = line + n
390        return line
391
392    def getlineno(self):
393        return self.lineno
394
395    def push(self, token):
396        self.tokens.insert(0, token);
397
398    def debug(self):
399        print("Last token: ", self.last)
400        print("Token queue: ", self.tokens)
401        print("Line %d end: " % (self.lineno), self.line)
402
403    def token(self):
404        while self.tokens == []:
405            if self.line == "":
406                line = self.getline()
407            else:
408                line = self.line
409                self.line = ""
410            if line == None:
411                return None
412
413            if line[0] == '#':
414                self.tokens = list(map((lambda x: ('preproc', x)),
415                                  line.split()))
416                break;
417            l = len(line)
418            if line[0] == '"' or line[0] == "'":
419                end = line[0]
420                line = line[1:]
421                found = 0
422                tok = ""
423                while found == 0:
424                    i = 0
425                    l = len(line)
426                    while i < l:
427                        if line[i] == end:
428                            self.line = line[i+1:]
429                            line = line[:i]
430                            l = i
431                            found = 1
432                            break
433                        if line[i] == '\\':
434                            i = i + 1
435                        i = i + 1
436                    tok = tok + line
437                    if found == 0:
438                        line = self.getline()
439                        if line == None:
440                            return None
441                self.last = ('string', tok)
442                return self.last
443
444            if l >= 2 and line[0] == '/' and line[1] == '*':
445                line = line[2:]
446                found = 0
447                tok = ""
448                while found == 0:
449                    i = 0
450                    l = len(line)
451                    while i < l:
452                        if line[i] == '*' and i+1 < l and line[i+1] == '/':
453                            self.line = line[i+2:]
454                            line = line[:i-1]
455                            l = i
456                            found = 1
457                            break
458                        i = i + 1
459                    if tok != "":
460                        tok = tok + "\n"
461                    tok = tok + line
462                    if found == 0:
463                        line = self.getline()
464                        if line == None:
465                            return None
466                self.last = ('comment', tok)
467                return self.last
468            if l >= 2 and line[0] == '/' and line[1] == '/':
469                line = line[2:]
470                self.last = ('comment', line)
471                return self.last
472            i = 0
473            while i < l:
474                if line[i] == '/' and i+1 < l and line[i+1] == '/':
475                    self.line = line[i:]
476                    line = line[:i]
477                    break
478                if line[i] == '/' and i+1 < l and line[i+1] == '*':
479                    self.line = line[i:]
480                    line = line[:i]
481                    break
482                if line[i] == '"' or line[i] == "'":
483                    self.line = line[i:]
484                    line = line[:i]
485                    break
486                i = i + 1
487            l = len(line)
488            i = 0
489            while i < l:
490                if line[i] == ' ' or line[i] == '\t':
491                    i = i + 1
492                    continue
493                o = ord(line[i])
494                if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
495                   (o >= 48 and o <= 57):
496                    s = i
497                    while i < l:
498                        o = ord(line[i])
499                        if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
500                           (o >= 48 and o <= 57) or \
501			   (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1:
502                            i = i + 1
503                        else:
504                            break
505                    self.tokens.append(('name', line[s:i]))
506                    continue
507                if "(){}:;,[]".find(line[i]) != -1:
508#                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
509#                    line[i] == '}' or line[i] == ':' or line[i] == ';' or \
510#                    line[i] == ',' or line[i] == '[' or line[i] == ']':
511                    self.tokens.append(('sep', line[i]))
512                    i = i + 1
513                    continue
514                if "+-*><=/%&!|.".find(line[i]) != -1:
515#                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
516#                    line[i] == '>' or line[i] == '<' or line[i] == '=' or \
517#                    line[i] == '/' or line[i] == '%' or line[i] == '&' or \
518#                    line[i] == '!' or line[i] == '|' or line[i] == '.':
519                    if line[i] == '.' and  i + 2 < l and \
520                       line[i+1] == '.' and line[i+2] == '.':
521                        self.tokens.append(('name', '...'))
522                        i = i + 3
523                        continue
524
525                    j = i + 1
526                    if j < l and (
527                       "+-*><=/%&!|".find(line[j]) != -1):
528#                        line[j] == '+' or line[j] == '-' or line[j] == '*' or \
529#                        line[j] == '>' or line[j] == '<' or line[j] == '=' or \
530#                        line[j] == '/' or line[j] == '%' or line[j] == '&' or \
531#                        line[j] == '!' or line[j] == '|'):
532                        self.tokens.append(('op', line[i:j+1]))
533                        i = j + 1
534                    else:
535                        self.tokens.append(('op', line[i]))
536                        i = i + 1
537                    continue
538                s = i
539                while i < l:
540                    o = ord(line[i])
541                    if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
542                       (o >= 48 and o <= 57) or (
543                        " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1):
544#                         line[i] != ' ' and line[i] != '\t' and
545#                         line[i] != '(' and line[i] != ')' and
546#                         line[i] != '{'  and line[i] != '}' and
547#                         line[i] != ':' and line[i] != ';' and
548#                         line[i] != ',' and line[i] != '+' and
549#                         line[i] != '-' and line[i] != '*' and
550#                         line[i] != '/' and line[i] != '%' and
551#                         line[i] != '&' and line[i] != '!' and
552#                         line[i] != '|' and line[i] != '[' and
553#                         line[i] != ']' and line[i] != '=' and
554#                         line[i] != '*' and line[i] != '>' and
555#                         line[i] != '<'):
556                        i = i + 1
557                    else:
558                        break
559                self.tokens.append(('name', line[s:i]))
560
561        tok = self.tokens[0]
562        self.tokens = self.tokens[1:]
563        self.last = tok
564        return tok
565
566class CParser:
567    """The C module parser"""
568    def __init__(self, filename, idx = None):
569        self.filename = filename
570        if len(filename) > 2 and filename[-2:] == '.h':
571            self.is_header = 1
572        else:
573            self.is_header = 0
574        self.input = open(filename)
575        self.lexer = CLexer(self.input)
576        if idx == None:
577            self.index = index()
578        else:
579            self.index = idx
580        self.top_comment = ""
581        self.last_comment = ""
582        self.comment = None
583        self.collect_ref = 0
584        self.doc_disable = 0
585        self.conditionals = []
586        self.defines = []
587
588    def collect_references(self):
589        self.collect_ref = 1
590
591    def disable(self):
592        self.doc_disable = 1
593
594    def enable(self):
595        self.doc_disable = 0
596
597    def lineno(self):
598        return self.lexer.getlineno()
599
600    def index_add(self, name, module, static, type, info=None, extra = None):
601        if self.doc_disable:
602            return
603        if self.is_header == 1:
604            self.index.add(name, module, module, static, type, self.lineno(),
605                           info, extra, self.conditionals)
606        else:
607            self.index.add(name, None, module, static, type, self.lineno(),
608                           info, extra, self.conditionals)
609
610    def index_add_ref(self, name, module, static, type, info=None,
611                      extra = None):
612        if self.is_header == 1:
613            self.index.add_ref(name, module, module, static, type,
614                               self.lineno(), info, extra, self.conditionals)
615        else:
616            self.index.add_ref(name, None, module, static, type, self.lineno(),
617                               info, extra, self.conditionals)
618
619    def warning(self, msg):
620        if self.doc_disable:
621            return
622        print(msg)
623
624    def error(self, msg, token=-1):
625        if self.doc_disable:
626            return
627
628        print("Parse Error: " + msg)
629        if token != -1:
630            print("Got token ", token)
631        self.lexer.debug()
632        sys.exit(1)
633
634    def debug(self, msg, token=-1):
635        print("Debug: " + msg)
636        if token != -1:
637            print("Got token ", token)
638        self.lexer.debug()
639
640    def parseTopComment(self, comment):
641        res = {}
642        lines = comment.split("\n")
643        item = None
644        for line in lines:
645            while line != "" and (line[0] == ' ' or line[0] == '\t'):
646                line = line[1:]
647            while line != "" and line[0] == '*':
648                line = line[1:]
649            while line != "" and (line[0] == ' ' or line[0] == '\t'):
650                line = line[1:]
651            try:
652                (it, line) = line.split(":", 1)
653                item = it
654                while line != "" and (line[0] == ' ' or line[0] == '\t'):
655                    line = line[1:]
656                if item in res:
657                    res[item] = res[item] + " " + line
658                else:
659                    res[item] = line
660            except:
661                if item != None:
662                    if item in res:
663                        res[item] = res[item] + " " + line
664                    else:
665                        res[item] = line
666        self.index.info = res
667
668    def parseComment(self, token):
669        if self.top_comment == "":
670            self.top_comment = token[1]
671        if self.comment == None or token[1][0] == '*':
672            self.comment = token[1];
673        else:
674            self.comment = self.comment + token[1]
675        token = self.lexer.token()
676
677        if self.comment.find("DOC_DISABLE") != -1:
678            self.disable()
679
680        if self.comment.find("DOC_ENABLE") != -1:
681            self.enable()
682
683        return token
684
685    #
686    # Parse a simple comment block for typedefs or global variables
687    #
688    def parseSimpleComment(self, name, quiet = False):
689        if name[0:2] == '__':
690            quiet = 1
691
692        args = []
693        desc = ""
694
695        if self.comment == None:
696            if not quiet:
697                self.warning("Missing comment for %s" % (name))
698            return(None)
699        if self.comment[0] != '*':
700            if not quiet:
701                self.warning("Missing * in comment for %s" % (name))
702            return(None)
703        lines = self.comment.split('\n')
704        if lines[0] == '*':
705            del lines[0]
706        if lines[0] != "* %s:" % (name):
707            if not quiet:
708                self.warning("Misformatted comment for %s" % (name))
709                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
710            return(None)
711        del lines[0]
712        while len(lines) > 0 and lines[0] == '*':
713            del lines[0]
714        desc = ""
715        while len(lines) > 0:
716            l = lines[0]
717            while len(l) > 0 and l[0] == '*':
718                l = l[1:]
719            l = l.strip()
720            desc = desc + " " + l
721            del lines[0]
722
723        desc = desc.strip()
724
725        if quiet == 0:
726            if desc == "":
727                self.warning("Comment for %s lacks description" % (name))
728
729        return(desc)
730    #
731    # Parse a comment block associate to a macro
732    #
733    def parseMacroComment(self, name, quiet = 0):
734        if name[0:2] == '__':
735            quiet = 1
736
737        args = []
738        desc = ""
739
740        if self.comment == None:
741            if not quiet:
742                self.warning("Missing comment for macro %s" % (name))
743            return((args, desc))
744        if self.comment[0] != '*':
745            if not quiet:
746                self.warning("Missing * in macro comment for %s" % (name))
747            return((args, desc))
748        lines = self.comment.split('\n')
749        if lines[0] == '*':
750            del lines[0]
751        if lines[0] != "* %s:" % (name):
752            if not quiet:
753                self.warning("Misformatted macro comment for %s" % (name))
754                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
755            return((args, desc))
756        del lines[0]
757        while lines[0] == '*':
758            del lines[0]
759        while len(lines) > 0 and lines[0][0:3] == '* @':
760            l = lines[0][3:]
761            try:
762                (arg, desc) = l.split(':', 1)
763                desc=desc.strip()
764                arg=arg.strip()
765            except:
766                if not quiet:
767                    self.warning("Misformatted macro comment for %s" % (name))
768                    self.warning("  problem with '%s'" % (lines[0]))
769                del lines[0]
770                continue
771            del lines[0]
772            l = lines[0].strip()
773            while len(l) > 2 and l[0:3] != '* @':
774                while l[0] == '*':
775                    l = l[1:]
776                desc = desc + ' ' + l.strip()
777                del lines[0]
778                if len(lines) == 0:
779                    break
780                l = lines[0]
781            args.append((arg, desc))
782        while len(lines) > 0 and lines[0] == '*':
783            del lines[0]
784        desc = ""
785        while len(lines) > 0:
786            l = lines[0]
787            while len(l) > 0 and l[0] == '*':
788                l = l[1:]
789            l = l.strip()
790            desc = desc + " " + l
791            del lines[0]
792
793        desc = desc.strip()
794
795        if quiet == 0:
796            if desc == "":
797                self.warning("Macro comment for %s lack description of the macro" % (name))
798
799        return((args, desc))
800
801     #
802     # Parse a comment block and merge the information found in the
803     # parameters descriptions, finally returns a block as complete
804     # as possible
805     #
806    def mergeFunctionComment(self, name, description, quiet = 0):
807        if name == 'main':
808            quiet = 1
809        if name[0:2] == '__':
810            quiet = 1
811
812        (ret, args) = description
813        desc = ""
814        retdesc = ""
815
816        if self.comment == None:
817            if not quiet:
818                self.warning("Missing comment for function %s" % (name))
819            return(((ret[0], retdesc), args, desc))
820        if self.comment[0] != '*':
821            if not quiet:
822                self.warning("Missing * in function comment for %s" % (name))
823            return(((ret[0], retdesc), args, desc))
824        lines = self.comment.split('\n')
825        if lines[0] == '*':
826            del lines[0]
827        if lines[0] != "* %s:" % (name):
828            if not quiet:
829                self.warning("Misformatted function comment for %s" % (name))
830                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
831            return(((ret[0], retdesc), args, desc))
832        del lines[0]
833        while lines[0] == '*':
834            del lines[0]
835        nbargs = len(args)
836        while len(lines) > 0 and lines[0][0:3] == '* @':
837            l = lines[0][3:]
838            try:
839                (arg, desc) = l.split(':', 1)
840                desc=desc.strip()
841                arg=arg.strip()
842            except:
843                if not quiet:
844                    self.warning("Misformatted function comment for %s" % (name))
845                    self.warning("  problem with '%s'" % (lines[0]))
846                del lines[0]
847                continue
848            del lines[0]
849            l = lines[0].strip()
850            while len(l) > 2 and l[0:3] != '* @':
851                while l[0] == '*':
852                    l = l[1:]
853                desc = desc + ' ' + l.strip()
854                del lines[0]
855                if len(lines) == 0:
856                    break
857                l = lines[0]
858            i = 0
859            while i < nbargs:
860                if args[i][1] == arg:
861                    args[i] = (args[i][0], arg, desc)
862                    break;
863                i = i + 1
864            if i >= nbargs:
865                if not quiet:
866                    self.warning("Unable to find arg %s from function comment for %s" % (
867                       arg, name))
868        while len(lines) > 0 and lines[0] == '*':
869            del lines[0]
870        desc = ""
871        while len(lines) > 0:
872            l = lines[0]
873            while len(l) > 0 and l[0] == '*':
874                l = l[1:]
875            l = l.strip()
876            if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
877                try:
878                    l = l.split(' ', 1)[1]
879                except:
880                    l = ""
881                retdesc = l.strip()
882                del lines[0]
883                while len(lines) > 0:
884                    l = lines[0]
885                    while len(l) > 0 and l[0] == '*':
886                        l = l[1:]
887                    l = l.strip()
888                    retdesc = retdesc + " " + l
889                    del lines[0]
890            else:
891                desc = desc + " " + l
892                del lines[0]
893
894        retdesc = retdesc.strip()
895        desc = desc.strip()
896
897        if quiet == 0:
898             #
899             # report missing comments
900             #
901            i = 0
902            while i < nbargs:
903                if args[i][2] == None and args[i][0] != "void" and \
904                   ((args[i][1] != None) or (args[i][1] == '')):
905                    self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
906                i = i + 1
907            if retdesc == "" and ret[0] != "void":
908                self.warning("Function comment for %s lacks description of return value" % (name))
909            if desc == "" and retdesc == "":
910                self.warning("Function comment for %s lacks description of the function" % (name))
911
912        return(((ret[0], retdesc), args, desc))
913
914    def parsePreproc(self, token):
915        if debug:
916            print("=> preproc ", token, self.lexer.tokens)
917        name = token[1]
918        if name == "#include":
919            token = self.lexer.token()
920            if token == None:
921                return None
922            if token[0] == 'preproc':
923                self.index_add(token[1], self.filename, not self.is_header,
924                                "include")
925                return self.lexer.token()
926            return token
927        if name == "#define":
928            token = self.lexer.token()
929            if token == None:
930                return None
931            if token[0] == 'preproc':
932                 # TODO macros with arguments
933                name = token[1]
934                lst = []
935                token = self.lexer.token()
936                while token != None and token[0] == 'preproc' and \
937                      token[1][0] != '#':
938                    lst.append(token[1])
939                    token = self.lexer.token()
940                try:
941                    name = name.split('(') [0]
942                except:
943                    pass
944                info = self.parseMacroComment(name, True)
945                self.index_add(name, self.filename, not self.is_header,
946                                "macro", info)
947                return token
948
949        #
950        # Processing of conditionals modified by Bill 1/1/05
951        #
952        # We process conditionals (i.e. tokens from #ifdef, #ifndef,
953        # #if, #else and #endif) for headers and mainline code,
954        # store the ones from the header in libxml2-api.xml, and later
955        # (in the routine merge_public) verify that the two (header and
956        # mainline code) agree.
957        #
958        # There is a small problem with processing the headers. Some of
959        # the variables are not concerned with enabling / disabling of
960        # library functions (e.g. '__XML_PARSER_H__'), and we don't want
961        # them to be included in libxml2-api.xml, or involved in
962        # the check between the header and the mainline code.  To
963        # accomplish this, we ignore any conditional which doesn't include
964        # the string 'ENABLED'
965        #
966        if name == "#ifdef":
967            apstr = self.lexer.tokens[0][1]
968            try:
969                self.defines.append(apstr)
970                if apstr.find('ENABLED') != -1:
971                    self.conditionals.append("defined(%s)" % apstr)
972            except:
973                pass
974        elif name == "#ifndef":
975            apstr = self.lexer.tokens[0][1]
976            try:
977                self.defines.append(apstr)
978                if apstr.find('ENABLED') != -1:
979                    self.conditionals.append("!defined(%s)" % apstr)
980            except:
981                pass
982        elif name == "#if":
983            apstr = ""
984            for tok in self.lexer.tokens:
985                if apstr != "":
986                    apstr = apstr + " "
987                apstr = apstr + tok[1]
988            try:
989                self.defines.append(apstr)
990                if apstr.find('ENABLED') != -1:
991                    self.conditionals.append(apstr)
992            except:
993                pass
994        elif name == "#else":
995            if self.conditionals != [] and \
996               self.defines[-1].find('ENABLED') != -1:
997                self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
998        elif name == "#endif":
999            if self.conditionals != [] and \
1000               self.defines[-1].find('ENABLED') != -1:
1001                self.conditionals = self.conditionals[:-1]
1002            self.defines = self.defines[:-1]
1003        token = self.lexer.token()
1004        while token != None and token[0] == 'preproc' and \
1005            token[1][0] != '#':
1006            token = self.lexer.token()
1007        return token
1008
1009     #
1010     # token acquisition on top of the lexer, it handle internally
1011     # preprocessor and comments since they are logically not part of
1012     # the program structure.
1013     #
1014    def token(self):
1015        global ignored_words
1016
1017        token = self.lexer.token()
1018        while token != None:
1019            if token[0] == 'comment':
1020                token = self.parseComment(token)
1021                continue
1022            elif token[0] == 'preproc':
1023                token = self.parsePreproc(token)
1024                continue
1025            elif token[0] == "name" and token[1] == "__const":
1026                token = ("name", "const")
1027                return token
1028            elif token[0] == "name" and token[1] == "__attribute":
1029                token = self.lexer.token()
1030                while token != None and token[1] != ";":
1031                    token = self.lexer.token()
1032                return token
1033            elif token[0] == "name" and token[1] in ignored_words:
1034                (n, info) = ignored_words[token[1]]
1035                i = 0
1036                while i < n:
1037                    token = self.lexer.token()
1038                    i = i + 1
1039                token = self.lexer.token()
1040                continue
1041            else:
1042                if debug:
1043                    print("=> ", token)
1044                return token
1045        return None
1046
1047     #
1048     # Parse a typedef, it records the type and its name.
1049     #
1050    def parseTypedef(self, token):
1051        if token == None:
1052            return None
1053        token = self.parseType(token)
1054        if token == None:
1055            self.error("parsing typedef")
1056            return None
1057        base_type = self.type
1058        type = base_type
1059         #self.debug("end typedef type", token)
1060        while token != None:
1061            if token[0] == "name":
1062                name = token[1]
1063                signature = self.signature
1064                if signature != None:
1065                    type = type.split('(')[0]
1066                    d = self.mergeFunctionComment(name,
1067                            ((type, None), signature), 1)
1068                    self.index_add(name, self.filename, not self.is_header,
1069                                    "functype", d)
1070                else:
1071                    if base_type == "struct":
1072                        self.index_add(name, self.filename, not self.is_header,
1073                                        "struct", type)
1074                        base_type = "struct " + name
1075                    else:
1076                        # TODO report missing or misformatted comments
1077                        info = self.parseSimpleComment(name, True)
1078                        self.index_add(name, self.filename, not self.is_header,
1079                                    "typedef", type, info)
1080                token = self.token()
1081            else:
1082                self.error("parsing typedef: expecting a name")
1083                return token
1084             #self.debug("end typedef", token)
1085            if token != None and token[0] == 'sep' and token[1] == ',':
1086                type = base_type
1087                token = self.token()
1088                while token != None and token[0] == "op":
1089                    type = type + token[1]
1090                    token = self.token()
1091            elif token != None and token[0] == 'sep' and token[1] == ';':
1092                break;
1093            elif token != None and token[0] == 'name':
1094                type = base_type
1095                continue;
1096            else:
1097                self.error("parsing typedef: expecting ';'", token)
1098                return token
1099        token = self.token()
1100        return token
1101
1102     #
1103     # Parse a C code block, used for functions it parse till
1104     # the balancing } included
1105     #
1106    def parseBlock(self, token):
1107        while token != None:
1108            if token[0] == "sep" and token[1] == "{":
1109                token = self.token()
1110                token = self.parseBlock(token)
1111            elif token[0] == "sep" and token[1] == "}":
1112                token = self.token()
1113                return token
1114            else:
1115                if self.collect_ref == 1:
1116                    oldtok = token
1117                    token = self.token()
1118                    if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1119                        if token[0] == "sep" and token[1] == "(":
1120                            self.index_add_ref(oldtok[1], self.filename,
1121                                                0, "function")
1122                            token = self.token()
1123                        elif token[0] == "name":
1124                            token = self.token()
1125                            if token[0] == "sep" and (token[1] == ";" or
1126                               token[1] == "," or token[1] == "="):
1127                                self.index_add_ref(oldtok[1], self.filename,
1128                                                    0, "type")
1129                    elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1130                        self.index_add_ref(oldtok[1], self.filename,
1131                                            0, "typedef")
1132                    elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1133                        self.index_add_ref(oldtok[1], self.filename,
1134                                            0, "typedef")
1135
1136                else:
1137                    token = self.token()
1138        return token
1139
1140     #
1141     # Parse a C struct definition till the balancing }
1142     #
1143    def parseStruct(self, token):
1144        fields = []
1145         #self.debug("start parseStruct", token)
1146        while token != None:
1147            if token[0] == "sep" and token[1] == "{":
1148                token = self.token()
1149                token = self.parseTypeBlock(token)
1150            elif token[0] == "sep" and token[1] == "}":
1151                self.struct_fields = fields
1152                 #self.debug("end parseStruct", token)
1153                 #print fields
1154                token = self.token()
1155                return token
1156            else:
1157                base_type = self.type
1158                 #self.debug("before parseType", token)
1159                token = self.parseType(token)
1160                 #self.debug("after parseType", token)
1161                if token != None and token[0] == "name":
1162                    fname = token[1]
1163                    token = self.token()
1164                    if token[0] == "sep" and token[1] == ";":
1165                        token = self.token()
1166                        fields.append((self.type, fname))
1167                    else:
1168                        self.error("parseStruct: expecting ;", token)
1169                elif token != None and token[0] == "sep" and token[1] == "{":
1170                    token = self.token()
1171                    token = self.parseTypeBlock(token)
1172                    if token != None and token[0] == "name":
1173                        token = self.token()
1174                    if token != None and token[0] == "sep" and token[1] == ";":
1175                        token = self.token()
1176                    else:
1177                        self.error("parseStruct: expecting ;", token)
1178                else:
1179                    self.error("parseStruct: name", token)
1180                    token = self.token()
1181                self.type = base_type;
1182        self.struct_fields = fields
1183         #self.debug("end parseStruct", token)
1184         #print fields
1185        return token
1186
1187     #
1188     # Parse a C enum block, parse till the balancing }
1189     #
1190    def parseEnumBlock(self, token):
1191        self.enums = []
1192        name = None
1193        self.comment = None
1194        comment = ""
1195        value = "0"
1196        while token != None:
1197            if token[0] == "sep" and token[1] == "{":
1198                token = self.token()
1199                token = self.parseTypeBlock(token)
1200            elif token[0] == "sep" and token[1] == "}":
1201                if name != None:
1202                    if self.comment != None:
1203                        comment = self.comment
1204                        self.comment = None
1205                    self.enums.append((name, value, comment))
1206                token = self.token()
1207                return token
1208            elif token[0] == "name":
1209                    if name != None:
1210                        if self.comment != None:
1211                            comment = self.comment.strip()
1212                            self.comment = None
1213                        self.enums.append((name, value, comment))
1214                    name = token[1]
1215                    comment = ""
1216                    token = self.token()
1217                    if token[0] == "op" and token[1][0] == "=":
1218                        value = ""
1219                        if len(token[1]) > 1:
1220                            value = token[1][1:]
1221                        token = self.token()
1222                        while token[0] != "sep" or (token[1] != ',' and
1223                              token[1] != '}'):
1224                            value = value + token[1]
1225                            token = self.token()
1226                    else:
1227                        try:
1228                            value = "%d" % (int(value) + 1)
1229                        except:
1230                            self.warning("Failed to compute value of enum %s" % (name))
1231                            value=""
1232                    if token[0] == "sep" and token[1] == ",":
1233                        token = self.token()
1234            else:
1235                token = self.token()
1236        return token
1237
1238     #
1239     # Parse a C definition block, used for structs it parse till
1240     # the balancing }
1241     #
1242    def parseTypeBlock(self, token):
1243        while token != None:
1244            if token[0] == "sep" and token[1] == "{":
1245                token = self.token()
1246                token = self.parseTypeBlock(token)
1247            elif token[0] == "sep" and token[1] == "}":
1248                token = self.token()
1249                return token
1250            else:
1251                token = self.token()
1252        return token
1253
1254     #
1255     # Parse a type: the fact that the type name can either occur after
1256     #    the definition or within the definition makes it a little harder
1257     #    if inside, the name token is pushed back before returning
1258     #
1259    def parseType(self, token):
1260        self.type = ""
1261        self.struct_fields = []
1262        self.signature = None
1263        if token == None:
1264            return token
1265
1266        have_sign = 0
1267        done = 0
1268
1269        while token[0] == "name" and (
1270              token[1] == "const" or \
1271              token[1] == "unsigned" or \
1272              token[1] == "signed"):
1273            if token[1] == "unsigned" or token[1] == "signed":
1274                have_sign = 1
1275            if self.type == "":
1276                self.type = token[1]
1277            else:
1278                self.type = self.type + " " + token[1]
1279            token = self.token()
1280
1281        if token[0] == "name" and token[1] in ("char", "short", "int", "long"):
1282            if self.type == "":
1283                self.type = token[1]
1284            else:
1285                self.type = self.type + " " + token[1]
1286
1287        elif have_sign:
1288            done = 1
1289
1290        elif token[0] == "name" and token[1] == "struct":
1291            if self.type == "":
1292                self.type = token[1]
1293            else:
1294                self.type = self.type + " " + token[1]
1295            token = self.token()
1296            nametok = None
1297            if token[0] == "name":
1298                nametok = token
1299                token = self.token()
1300            if token != None and token[0] == "sep" and token[1] == "{":
1301                token = self.token()
1302                token = self.parseStruct(token)
1303            elif token != None and token[0] == "op" and token[1] == "*":
1304                self.type = self.type + " " + nametok[1] + " *"
1305                token = self.token()
1306                while token != None and token[0] == "op" and token[1] == "*":
1307                    self.type = self.type + " *"
1308                    token = self.token()
1309                if token[0] == "name":
1310                    nametok = token
1311                    token = self.token()
1312                else:
1313                    self.error("struct : expecting name", token)
1314                    return token
1315            elif token != None and token[0] == "name" and nametok != None:
1316                self.type = self.type + " " + nametok[1]
1317                return token
1318
1319            if nametok != None:
1320                self.lexer.push(token)
1321                token = nametok
1322            return token
1323
1324        elif token[0] == "name" and token[1] == "enum":
1325            if self.type == "":
1326                self.type = token[1]
1327            else:
1328                self.type = self.type + " " + token[1]
1329            self.enums = []
1330            token = self.token()
1331            if token != None and token[0] == "sep" and token[1] == "{":
1332                token = self.token()
1333                token = self.parseEnumBlock(token)
1334            else:
1335                self.error("parsing enum: expecting '{'", token)
1336            enum_type = None
1337            if token != None and token[0] != "name":
1338                self.lexer.push(token)
1339                token = ("name", "enum")
1340            else:
1341                enum_type = token[1]
1342            for enum in self.enums:
1343                self.index_add(enum[0], self.filename,
1344                               not self.is_header, "enum",
1345                               (enum[1], enum[2], enum_type))
1346            return token
1347
1348        elif token[0] == "name":
1349            if self.type == "":
1350                self.type = token[1]
1351            else:
1352                self.type = self.type + " " + token[1]
1353        else:
1354            self.error("parsing type %s: expecting a name" % (self.type),
1355                       token)
1356            return token
1357        if not done:
1358            token = self.token()
1359        while token != None and (token[0] == "op" or
1360              token[0] == "name" and token[1] == "const"):
1361            self.type = self.type + " " + token[1]
1362            token = self.token()
1363
1364         #
1365         # if there is a parenthesis here, this means a function type
1366         #
1367        if token != None and token[0] == "sep" and token[1] == '(':
1368            self.type = self.type + token[1]
1369            token = self.token()
1370            while token != None and token[0] == "op" and token[1] == '*':
1371                self.type = self.type + token[1]
1372                token = self.token()
1373            if token == None or token[0] != "name" :
1374                self.error("parsing function type, name expected", token);
1375                return token
1376            self.type = self.type + token[1]
1377            nametok = token
1378            token = self.token()
1379            if token != None and token[0] == "sep" and token[1] == ')':
1380                self.type = self.type + token[1]
1381                token = self.token()
1382                if token != None and token[0] == "sep" and token[1] == '(':
1383                    token = self.token()
1384                    type = self.type;
1385                    token = self.parseSignature(token);
1386                    self.type = type;
1387                else:
1388                    self.error("parsing function type, '(' expected", token);
1389                    return token
1390            else:
1391                self.error("parsing function type, ')' expected", token);
1392                return token
1393            self.lexer.push(token)
1394            token = nametok
1395            return token
1396
1397         #
1398         # do some lookahead for arrays
1399         #
1400        if token != None and token[0] == "name":
1401            nametok = token
1402            token = self.token()
1403            if token != None and token[0] == "sep" and token[1] == '[':
1404                self.type = self.type + nametok[1]
1405                while token != None and token[0] == "sep" and token[1] == '[':
1406                    self.type = self.type + token[1]
1407                    token = self.token()
1408                    while token != None and token[0] != 'sep' and \
1409                          token[1] != ']' and token[1] != ';':
1410                        self.type = self.type + token[1]
1411                        token = self.token()
1412                if token != None and token[0] == 'sep' and token[1] == ']':
1413                    self.type = self.type + token[1]
1414                    token = self.token()
1415                else:
1416                    self.error("parsing array type, ']' expected", token);
1417                    return token
1418            elif token != None and token[0] == "sep" and token[1] == ':':
1419                 # remove :12 in case it's a limited int size
1420                token = self.token()
1421                token = self.token()
1422            self.lexer.push(token)
1423            token = nametok
1424
1425        return token
1426
1427     #
1428     # Parse a signature: '(' has been parsed and we scan the type definition
1429     #    up to the ')' included
1430    def parseSignature(self, token):
1431        signature = []
1432        if token != None and token[0] == "sep" and token[1] == ')':
1433            self.signature = []
1434            token = self.token()
1435            return token
1436        while token != None:
1437            token = self.parseType(token)
1438            if token != None and token[0] == "name":
1439                signature.append((self.type, token[1], None))
1440                token = self.token()
1441            elif token != None and token[0] == "sep" and token[1] == ',':
1442                token = self.token()
1443                continue
1444            elif token != None and token[0] == "sep" and token[1] == ')':
1445                 # only the type was provided
1446                if self.type == "...":
1447                    signature.append((self.type, "...", None))
1448                else:
1449                    signature.append((self.type, None, None))
1450            if token != None and token[0] == "sep":
1451                if token[1] == ',':
1452                    token = self.token()
1453                    continue
1454                elif token[1] == ')':
1455                    token = self.token()
1456                    break
1457        self.signature = signature
1458        return token
1459
1460     #
1461     # Parse a global definition, be it a type, variable or function
1462     # the extern "C" blocks are a bit nasty and require it to recurse.
1463     #
1464    def parseGlobal(self, token):
1465        static = 0
1466        if token[1] == 'extern':
1467            token = self.token()
1468            if token == None:
1469                return token
1470            if token[0] == 'string':
1471                if token[1] == 'C':
1472                    token = self.token()
1473                    if token == None:
1474                        return token
1475                    if token[0] == 'sep' and token[1] == "{":
1476                        token = self.token()
1477#                         print 'Entering extern "C line ', self.lineno()
1478                        while token != None and (token[0] != 'sep' or
1479                              token[1] != "}"):
1480                            if token[0] == 'name':
1481                                token = self.parseGlobal(token)
1482                            else:
1483                                self.error(
1484                                 "token %s %s unexpected at the top level" % (
1485                                        token[0], token[1]))
1486                                token = self.parseGlobal(token)
1487#                         print 'Exiting extern "C" line', self.lineno()
1488                        token = self.token()
1489                        return token
1490                else:
1491                    return token
1492        elif token[1] == 'static':
1493            static = 1
1494            token = self.token()
1495            if token == None or  token[0] != 'name':
1496                return token
1497
1498        if token[1] == 'typedef':
1499            token = self.token()
1500            return self.parseTypedef(token)
1501        else:
1502            token = self.parseType(token)
1503            type_orig = self.type
1504        if token == None or token[0] != "name":
1505            return token
1506        type = type_orig
1507        self.name = token[1]
1508        token = self.token()
1509        while token != None and (token[0] == "sep" or token[0] == "op"):
1510            if token[0] == "sep":
1511                if token[1] == "[":
1512                    type = type + token[1]
1513                    token = self.token()
1514                    while token != None and (token[0] != "sep" or \
1515                          token[1] != ";"):
1516                        type = type + token[1]
1517                        token = self.token()
1518
1519            if token != None and token[0] == "op" and token[1] == "=":
1520                 #
1521                 # Skip the initialization of the variable
1522                 #
1523                token = self.token()
1524                if token[0] == 'sep' and token[1] == '{':
1525                    token = self.token()
1526                    token = self.parseBlock(token)
1527                else:
1528                    while token != None and (token[0] != "sep" or \
1529                          (token[1] != ';' and token[1] != ',')):
1530                            token = self.token()
1531                if token == None or token[0] != "sep" or (token[1] != ';' and
1532                   token[1] != ','):
1533                    self.error("missing ';' or ',' after value")
1534
1535            if token != None and token[0] == "sep":
1536                if token[1] == ";":
1537                    if type == "struct":
1538                        self.index_add(self.name, self.filename,
1539                             not self.is_header, "struct", self.struct_fields)
1540                    else:
1541                        info = self.parseSimpleComment(self.name, True)
1542                        self.index_add(self.name, self.filename,
1543                             not self.is_header, "variable", type, info)
1544                    self.comment = None
1545                    token = self.token()
1546                    break
1547                elif token[1] == "(":
1548                    token = self.token()
1549                    token = self.parseSignature(token)
1550                    if token == None:
1551                        return None
1552                    if token[0] == "sep" and token[1] == ";":
1553                        d = self.mergeFunctionComment(self.name,
1554                                ((type, None), self.signature), 1)
1555                        self.index_add(self.name, self.filename, static,
1556                                        "function", d)
1557                        self.comment = None
1558                        token = self.token()
1559                    elif token[0] == "sep" and token[1] == "{":
1560                        d = self.mergeFunctionComment(self.name,
1561                                ((type, None), self.signature), static)
1562                        self.index_add(self.name, self.filename, static,
1563                                        "function", d)
1564                        self.comment = None
1565                        token = self.token()
1566                        token = self.parseBlock(token);
1567                elif token[1] == ',':
1568                    self.index_add(self.name, self.filename, static,
1569                                    "variable", type)
1570                    self.comment = None
1571                    type = type_orig
1572                    token = self.token()
1573                    while token != None and token[0] == "sep":
1574                        type = type + token[1]
1575                        token = self.token()
1576                    if token != None and token[0] == "name":
1577                        self.name = token[1]
1578                        token = self.token()
1579                else:
1580                    break
1581
1582        return token
1583
1584    def parse(self):
1585        self.warning("Parsing %s" % (self.filename))
1586        token = self.token()
1587        while token != None:
1588            if token[0] == 'name':
1589                token = self.parseGlobal(token)
1590            else:
1591                self.error("token %s %s unexpected at the top level" % (
1592                       token[0], token[1]))
1593                token = self.parseGlobal(token)
1594                return
1595        self.parseTopComment(self.top_comment)
1596        return self.index
1597
1598
1599class docBuilder:
1600    """A documentation builder"""
1601    def __init__(self, name, directories=['.'], excludes=[]):
1602        self.name = name
1603        self.directories = directories
1604        self.excludes = excludes + list(ignored_files.keys())
1605        self.modules = {}
1606        self.headers = {}
1607        self.idx = index()
1608        self.index = {}
1609        if name == 'libxml2':
1610            self.basename = 'libxml'
1611        else:
1612            self.basename = name
1613
1614    def analyze(self):
1615        print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys()))))
1616        self.idx.analyze()
1617
1618    def scanHeaders(self):
1619        for header in list(self.headers.keys()):
1620            parser = CParser(header)
1621            idx = parser.parse()
1622            self.headers[header] = idx;
1623            self.idx.merge(idx)
1624
1625    def scanModules(self):
1626        for module in list(self.modules.keys()):
1627            parser = CParser(module)
1628            idx = parser.parse()
1629            # idx.analyze()
1630            self.modules[module] = idx
1631            self.idx.merge_public(idx)
1632
1633    def scan(self):
1634        for directory in self.directories:
1635            files = glob.glob(directory + "/*.c")
1636            for file in files:
1637                skip = 0
1638                for excl in self.excludes:
1639                    if file.find(excl) != -1:
1640                        print("Skipping %s" % file)
1641                        skip = 1
1642                        break
1643                if skip == 0:
1644                    self.modules[file] = None;
1645            files = glob.glob(directory + "/*.h")
1646            for file in files:
1647                skip = 0
1648                for excl in self.excludes:
1649                    if file.find(excl) != -1:
1650                        print("Skipping %s" % file)
1651                        skip = 1
1652                        break
1653                if skip == 0:
1654                    self.headers[file] = None;
1655        self.scanHeaders()
1656        self.scanModules()
1657
1658    def modulename_file(self, file):
1659        module = os.path.basename(file)
1660        if module[-2:] == '.h':
1661            module = module[:-2]
1662        elif module[-2:] == '.c':
1663            module = module[:-2]
1664        return module
1665
1666    def serialize_enum(self, output, name):
1667        id = self.idx.enums[name]
1668        output.write("    <enum name='%s' file='%s'" % (name,
1669                     self.modulename_file(id.header)))
1670        if id.info != None:
1671            info = id.info
1672            if info[0] != None and info[0] != '':
1673                try:
1674                    val = eval(info[0])
1675                except:
1676                    val = info[0]
1677                output.write(" value='%s'" % (val));
1678            if info[2] != None and info[2] != '':
1679                output.write(" type='%s'" % info[2]);
1680            if info[1] != None and info[1] != '':
1681                output.write(" info='%s'" % escape(info[1]));
1682        output.write("/>\n")
1683
1684    def serialize_macro(self, output, name):
1685        id = self.idx.macros[name]
1686        output.write("    <macro name='%s' file='%s'>\n" % (name,
1687                     self.modulename_file(id.header)))
1688        if id.info != None:
1689            try:
1690                (args, desc) = id.info
1691                if desc != None and desc != "":
1692                    output.write("      <info>%s</info>\n" % (escape(desc)))
1693                for arg in args:
1694                    (name, desc) = arg
1695                    if desc != None and desc != "":
1696                        output.write("      <arg name='%s' info='%s'/>\n" % (
1697                                     name, escape(desc)))
1698                    else:
1699                        output.write("      <arg name='%s'/>\n" % (name))
1700            except:
1701                pass
1702        output.write("    </macro>\n")
1703
1704    def serialize_typedef(self, output, name):
1705        id = self.idx.typedefs[name]
1706        if id.info[0:7] == 'struct ':
1707            output.write("    <struct name='%s' file='%s' type='%s'" % (
1708                     name, self.modulename_file(id.header), id.info))
1709            name = id.info[7:]
1710            if name in self.idx.structs and ( \
1711               type(self.idx.structs[name].info) == type(()) or
1712                type(self.idx.structs[name].info) == type([])):
1713                output.write(">\n");
1714                try:
1715                    for field in self.idx.structs[name].info:
1716                        output.write("      <field name='%s' type='%s'/>\n" % (field[1] , field[0]))
1717                except:
1718                    print("Failed to serialize struct %s" % (name))
1719                output.write("    </struct>\n")
1720            else:
1721                output.write("/>\n");
1722        else :
1723            output.write("    <typedef name='%s' file='%s' type='%s'" % (
1724                         name, self.modulename_file(id.header), id.info))
1725            try:
1726                desc = id.extra
1727                if desc != None and desc != "":
1728                    output.write(">\n      <info>%s</info>\n" % (escape(desc)))
1729                    output.write("    </typedef>\n")
1730                else:
1731                    output.write("/>\n")
1732            except:
1733                output.write("/>\n")
1734
1735    def serialize_variable(self, output, name):
1736        id = self.idx.variables[name]
1737        if id.info != None:
1738            output.write("    <variable name='%s' file='%s' type='%s'" % (
1739                    name, self.modulename_file(id.header), id.info))
1740        else:
1741            output.write("    <variable name='%s' file='%s'" % (
1742                    name, self.modulename_file(id.header)))
1743        desc = id.extra
1744        if desc != None and desc != "":
1745            output.write(">\n      <info>%s</info>\n" % (escape(desc)))
1746            output.write("    </variable>\n")
1747        else:
1748            output.write("/>\n")
1749
1750    def serialize_function(self, output, name):
1751        id = self.idx.functions[name]
1752        if name == debugsym:
1753            print("=>", id)
1754
1755        output.write("    <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1756                     name, self.modulename_file(id.header),
1757                     self.modulename_file(id.module)))
1758        #
1759        # Processing of conditionals modified by Bill 1/1/05
1760        #
1761        if id.conditionals != None:
1762            apstr = ""
1763            for cond in id.conditionals:
1764                if apstr != "":
1765                    apstr = apstr + " &amp;&amp; "
1766                apstr = apstr + cond
1767            output.write("      <cond>%s</cond>\n"% (apstr));
1768        try:
1769            (ret, params, desc) = id.info
1770            if (desc == None or desc == '') and \
1771               name[0:9] != "xmlThrDef" and name != "xmlDllMain" and \
1772               ret[1] == '':
1773                print("%s %s from %s has no description" % (id.type, name,
1774                       self.modulename_file(id.module)))
1775
1776            output.write("      <info>%s</info>\n" % (escape(desc)))
1777            if ret[0] != None:
1778                if ret[0] == "void":
1779                    output.write("      <return type='void'/>\n")
1780                else:
1781                    output.write("      <return type='%s' info='%s'/>\n" % (
1782                             ret[0], escape(ret[1])))
1783            for param in params:
1784                if param[0] == 'void':
1785                    continue
1786                if param[2] == None:
1787                    output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1788                else:
1789                    output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1790        except:
1791            print("Failed to save function %s info: " % name, repr(id.info))
1792        output.write("    </%s>\n" % (id.type))
1793
1794    def serialize_exports(self, output, file):
1795        module = self.modulename_file(file)
1796        output.write("    <file name='%s'>\n" % (module))
1797        dict = self.headers[file]
1798        if dict.info != None:
1799            for data in ('Summary', 'Description', 'Author'):
1800                try:
1801                    output.write("     <%s>%s</%s>\n" % (
1802                                 data.lower(),
1803                                 escape(dict.info[data]),
1804                                 data.lower()))
1805                except:
1806                    if data != 'Author':
1807                        print("Header %s lacks a %s description" % (module, data))
1808            if 'Description' in dict.info:
1809                desc = dict.info['Description']
1810                if desc.find("DEPRECATED") != -1:
1811                    output.write("     <deprecated/>\n")
1812
1813        ids = list(dict.macros.keys())
1814        ids.sort()
1815        for id in ids:
1816            # Macros are sometime used to masquerade other types.
1817            if id in dict.functions:
1818                continue
1819            if id in dict.variables:
1820                continue
1821            if id in dict.typedefs:
1822                continue
1823            if id in dict.structs:
1824                continue
1825            if id in dict.enums:
1826                continue
1827            output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
1828        ids = list(dict.enums.keys())
1829        ids.sort()
1830        for id in ids:
1831            output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
1832        ids = list(dict.typedefs.keys())
1833        ids.sort()
1834        for id in ids:
1835            output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
1836        ids = list(dict.structs.keys())
1837        ids.sort()
1838        for id in ids:
1839            output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
1840        ids = list(dict.variables.keys())
1841        ids.sort()
1842        for id in ids:
1843            output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
1844        ids = list(dict.functions.keys())
1845        ids.sort()
1846        for id in ids:
1847            output.write("     <exports symbol='%s' type='function'/>\n" % (id))
1848        output.write("    </file>\n")
1849
1850    def serialize(self):
1851        filename = "%s-api.xml" % self.name
1852        print("Saving XML description %s" % (filename))
1853        output = open(filename, "w")
1854        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1855        output.write("<api name='%s'>\n" % self.name)
1856        output.write("  <files>\n")
1857        headers = list(self.headers.keys())
1858        headers.sort()
1859        for file in headers:
1860            self.serialize_exports(output, file)
1861        output.write("  </files>\n")
1862        output.write("  <symbols>\n")
1863        macros = list(self.idx.macros.keys())
1864        macros.sort()
1865        for macro in macros:
1866            self.serialize_macro(output, macro)
1867        enums = list(self.idx.enums.keys())
1868        enums.sort()
1869        for enum in enums:
1870            self.serialize_enum(output, enum)
1871        typedefs = list(self.idx.typedefs.keys())
1872        typedefs.sort()
1873        for typedef in typedefs:
1874            self.serialize_typedef(output, typedef)
1875        variables = list(self.idx.variables.keys())
1876        variables.sort()
1877        for variable in variables:
1878            self.serialize_variable(output, variable)
1879        functions = list(self.idx.functions.keys())
1880        functions.sort()
1881        for function in functions:
1882            self.serialize_function(output, function)
1883        output.write("  </symbols>\n")
1884        output.write("</api>\n")
1885        output.close()
1886
1887
1888def rebuild():
1889    builder = None
1890    if glob.glob("parser.c") != [] :
1891        print("Rebuilding API description for libxml2")
1892        builder = docBuilder("libxml2", [".", "."],
1893                             ["tst.c"])
1894    elif glob.glob("../parser.c") != [] :
1895        print("Rebuilding API description for libxml2")
1896        builder = docBuilder("libxml2", ["..", "../include/libxml"],
1897                             ["tst.c"])
1898    elif glob.glob("../libxslt/transform.c") != [] :
1899        print("Rebuilding API description for libxslt")
1900        builder = docBuilder("libxslt", ["../libxslt"],
1901                             ["win32config.h", "libxslt.h", "tst.c"])
1902    else:
1903        print("rebuild() failed, unable to guess the module")
1904        return None
1905    builder.scan()
1906    builder.analyze()
1907    builder.serialize()
1908    if glob.glob("../libexslt/exslt.c") != [] :
1909        extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
1910        extra.scan()
1911        extra.analyze()
1912        extra.serialize()
1913    return builder
1914
1915#
1916# for debugging the parser
1917#
1918def parse(filename):
1919    parser = CParser(filename)
1920    idx = parser.parse()
1921    return idx
1922
1923if __name__ == "__main__":
1924    if len(sys.argv) > 1:
1925        debug = 1
1926        parse(sys.argv[1])
1927    else:
1928        rebuild()
1929