xref: /aosp_15_r20/external/clang/tools/scan-build-py/libscanbuild/compilation.py (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li# -*- coding: utf-8 -*-
2*67e74705SXin Li#                     The LLVM Compiler Infrastructure
3*67e74705SXin Li#
4*67e74705SXin Li# This file is distributed under the University of Illinois Open Source
5*67e74705SXin Li# License. See LICENSE.TXT for details.
6*67e74705SXin Li""" This module is responsible for to parse a compiler invocation. """
7*67e74705SXin Li
8*67e74705SXin Liimport re
9*67e74705SXin Liimport os
10*67e74705SXin Liimport collections
11*67e74705SXin Li
12*67e74705SXin Li__all__ = ['split_command', 'classify_source', 'compiler_language']
13*67e74705SXin Li
14*67e74705SXin Li# Ignored compiler options map for compilation database creation.
15*67e74705SXin Li# The map is used in `split_command` method. (Which does ignore and classify
16*67e74705SXin Li# parameters.) Please note, that these are not the only parameters which
17*67e74705SXin Li# might be ignored.
18*67e74705SXin Li#
19*67e74705SXin Li# Keys are the option name, value number of options to skip
20*67e74705SXin LiIGNORED_FLAGS = {
21*67e74705SXin Li    # compiling only flag, ignored because the creator of compilation
22*67e74705SXin Li    # database will explicitly set it.
23*67e74705SXin Li    '-c': 0,
24*67e74705SXin Li    # preprocessor macros, ignored because would cause duplicate entries in
25*67e74705SXin Li    # the output (the only difference would be these flags). this is actual
26*67e74705SXin Li    # finding from users, who suffered longer execution time caused by the
27*67e74705SXin Li    # duplicates.
28*67e74705SXin Li    '-MD': 0,
29*67e74705SXin Li    '-MMD': 0,
30*67e74705SXin Li    '-MG': 0,
31*67e74705SXin Li    '-MP': 0,
32*67e74705SXin Li    '-MF': 1,
33*67e74705SXin Li    '-MT': 1,
34*67e74705SXin Li    '-MQ': 1,
35*67e74705SXin Li    # linker options, ignored because for compilation database will contain
36*67e74705SXin Li    # compilation commands only. so, the compiler would ignore these flags
37*67e74705SXin Li    # anyway. the benefit to get rid of them is to make the output more
38*67e74705SXin Li    # readable.
39*67e74705SXin Li    '-static': 0,
40*67e74705SXin Li    '-shared': 0,
41*67e74705SXin Li    '-s': 0,
42*67e74705SXin Li    '-rdynamic': 0,
43*67e74705SXin Li    '-l': 1,
44*67e74705SXin Li    '-L': 1,
45*67e74705SXin Li    '-u': 1,
46*67e74705SXin Li    '-z': 1,
47*67e74705SXin Li    '-T': 1,
48*67e74705SXin Li    '-Xlinker': 1
49*67e74705SXin Li}
50*67e74705SXin Li
51*67e74705SXin Li# Known C/C++ compiler executable name patterns
52*67e74705SXin LiCOMPILER_PATTERNS = frozenset([
53*67e74705SXin Li    re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'),
54*67e74705SXin Li    re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'),
55*67e74705SXin Li    re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'),
56*67e74705SXin Li    re.compile(r'^llvm-g(cc|\+\+)$'),
57*67e74705SXin Li])
58*67e74705SXin Li
59*67e74705SXin Li
60*67e74705SXin Lidef split_command(command):
61*67e74705SXin Li    """ Returns a value when the command is a compilation, None otherwise.
62*67e74705SXin Li
63*67e74705SXin Li    The value on success is a named tuple with the following attributes:
64*67e74705SXin Li
65*67e74705SXin Li        files:    list of source files
66*67e74705SXin Li        flags:    list of compile options
67*67e74705SXin Li        compiler: string value of 'c' or 'c++' """
68*67e74705SXin Li
69*67e74705SXin Li    # the result of this method
70*67e74705SXin Li    result = collections.namedtuple('Compilation',
71*67e74705SXin Li                                    ['compiler', 'flags', 'files'])
72*67e74705SXin Li    result.compiler = compiler_language(command)
73*67e74705SXin Li    result.flags = []
74*67e74705SXin Li    result.files = []
75*67e74705SXin Li    # quit right now, if the program was not a C/C++ compiler
76*67e74705SXin Li    if not result.compiler:
77*67e74705SXin Li        return None
78*67e74705SXin Li    # iterate on the compile options
79*67e74705SXin Li    args = iter(command[1:])
80*67e74705SXin Li    for arg in args:
81*67e74705SXin Li        # quit when compilation pass is not involved
82*67e74705SXin Li        if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}:
83*67e74705SXin Li            return None
84*67e74705SXin Li        # ignore some flags
85*67e74705SXin Li        elif arg in IGNORED_FLAGS:
86*67e74705SXin Li            count = IGNORED_FLAGS[arg]
87*67e74705SXin Li            for _ in range(count):
88*67e74705SXin Li                next(args)
89*67e74705SXin Li        elif re.match(r'^-(l|L|Wl,).+', arg):
90*67e74705SXin Li            pass
91*67e74705SXin Li        # some parameters could look like filename, take as compile option
92*67e74705SXin Li        elif arg in {'-D', '-I'}:
93*67e74705SXin Li            result.flags.extend([arg, next(args)])
94*67e74705SXin Li        # parameter which looks source file is taken...
95*67e74705SXin Li        elif re.match(r'^[^-].+', arg) and classify_source(arg):
96*67e74705SXin Li            result.files.append(arg)
97*67e74705SXin Li        # and consider everything else as compile option.
98*67e74705SXin Li        else:
99*67e74705SXin Li            result.flags.append(arg)
100*67e74705SXin Li    # do extra check on number of source files
101*67e74705SXin Li    return result if result.files else None
102*67e74705SXin Li
103*67e74705SXin Li
104*67e74705SXin Lidef classify_source(filename, c_compiler=True):
105*67e74705SXin Li    """ Return the language from file name extension. """
106*67e74705SXin Li
107*67e74705SXin Li    mapping = {
108*67e74705SXin Li        '.c': 'c' if c_compiler else 'c++',
109*67e74705SXin Li        '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output',
110*67e74705SXin Li        '.ii': 'c++-cpp-output',
111*67e74705SXin Li        '.m': 'objective-c',
112*67e74705SXin Li        '.mi': 'objective-c-cpp-output',
113*67e74705SXin Li        '.mm': 'objective-c++',
114*67e74705SXin Li        '.mii': 'objective-c++-cpp-output',
115*67e74705SXin Li        '.C': 'c++',
116*67e74705SXin Li        '.cc': 'c++',
117*67e74705SXin Li        '.CC': 'c++',
118*67e74705SXin Li        '.cp': 'c++',
119*67e74705SXin Li        '.cpp': 'c++',
120*67e74705SXin Li        '.cxx': 'c++',
121*67e74705SXin Li        '.c++': 'c++',
122*67e74705SXin Li        '.C++': 'c++',
123*67e74705SXin Li        '.txx': 'c++'
124*67e74705SXin Li    }
125*67e74705SXin Li
126*67e74705SXin Li    __, extension = os.path.splitext(os.path.basename(filename))
127*67e74705SXin Li    return mapping.get(extension)
128*67e74705SXin Li
129*67e74705SXin Li
130*67e74705SXin Lidef compiler_language(command):
131*67e74705SXin Li    """ A predicate to decide the command is a compiler call or not.
132*67e74705SXin Li
133*67e74705SXin Li    Returns 'c' or 'c++' when it match. None otherwise. """
134*67e74705SXin Li
135*67e74705SXin Li    cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$')
136*67e74705SXin Li
137*67e74705SXin Li    if command:
138*67e74705SXin Li        executable = os.path.basename(command[0])
139*67e74705SXin Li        if any(pattern.match(executable) for pattern in COMPILER_PATTERNS):
140*67e74705SXin Li            return 'c++' if cplusplus.match(executable) else 'c'
141*67e74705SXin Li    return None
142