1*67e74705SXin Li# -*- coding: utf-8 -*- 2*67e74705SXin Li# The LLVM Compiler Infrastructure 3*67e74705SXin Li# 4*67e74705SXin Li# This file is distributed under the University of Illinois Open Source 5*67e74705SXin Li# License. See LICENSE.TXT for details. 6*67e74705SXin Li""" This module is responsible for to parse a compiler invocation. """ 7*67e74705SXin Li 8*67e74705SXin Liimport re 9*67e74705SXin Liimport os 10*67e74705SXin Liimport collections 11*67e74705SXin Li 12*67e74705SXin Li__all__ = ['split_command', 'classify_source', 'compiler_language'] 13*67e74705SXin Li 14*67e74705SXin Li# Ignored compiler options map for compilation database creation. 15*67e74705SXin Li# The map is used in `split_command` method. (Which does ignore and classify 16*67e74705SXin Li# parameters.) Please note, that these are not the only parameters which 17*67e74705SXin Li# might be ignored. 18*67e74705SXin Li# 19*67e74705SXin Li# Keys are the option name, value number of options to skip 20*67e74705SXin LiIGNORED_FLAGS = { 21*67e74705SXin Li # compiling only flag, ignored because the creator of compilation 22*67e74705SXin Li # database will explicitly set it. 23*67e74705SXin Li '-c': 0, 24*67e74705SXin Li # preprocessor macros, ignored because would cause duplicate entries in 25*67e74705SXin Li # the output (the only difference would be these flags). this is actual 26*67e74705SXin Li # finding from users, who suffered longer execution time caused by the 27*67e74705SXin Li # duplicates. 28*67e74705SXin Li '-MD': 0, 29*67e74705SXin Li '-MMD': 0, 30*67e74705SXin Li '-MG': 0, 31*67e74705SXin Li '-MP': 0, 32*67e74705SXin Li '-MF': 1, 33*67e74705SXin Li '-MT': 1, 34*67e74705SXin Li '-MQ': 1, 35*67e74705SXin Li # linker options, ignored because for compilation database will contain 36*67e74705SXin Li # compilation commands only. so, the compiler would ignore these flags 37*67e74705SXin Li # anyway. the benefit to get rid of them is to make the output more 38*67e74705SXin Li # readable. 39*67e74705SXin Li '-static': 0, 40*67e74705SXin Li '-shared': 0, 41*67e74705SXin Li '-s': 0, 42*67e74705SXin Li '-rdynamic': 0, 43*67e74705SXin Li '-l': 1, 44*67e74705SXin Li '-L': 1, 45*67e74705SXin Li '-u': 1, 46*67e74705SXin Li '-z': 1, 47*67e74705SXin Li '-T': 1, 48*67e74705SXin Li '-Xlinker': 1 49*67e74705SXin Li} 50*67e74705SXin Li 51*67e74705SXin Li# Known C/C++ compiler executable name patterns 52*67e74705SXin LiCOMPILER_PATTERNS = frozenset([ 53*67e74705SXin Li re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'), 54*67e74705SXin Li re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'), 55*67e74705SXin Li re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'), 56*67e74705SXin Li re.compile(r'^llvm-g(cc|\+\+)$'), 57*67e74705SXin Li]) 58*67e74705SXin Li 59*67e74705SXin Li 60*67e74705SXin Lidef split_command(command): 61*67e74705SXin Li """ Returns a value when the command is a compilation, None otherwise. 62*67e74705SXin Li 63*67e74705SXin Li The value on success is a named tuple with the following attributes: 64*67e74705SXin Li 65*67e74705SXin Li files: list of source files 66*67e74705SXin Li flags: list of compile options 67*67e74705SXin Li compiler: string value of 'c' or 'c++' """ 68*67e74705SXin Li 69*67e74705SXin Li # the result of this method 70*67e74705SXin Li result = collections.namedtuple('Compilation', 71*67e74705SXin Li ['compiler', 'flags', 'files']) 72*67e74705SXin Li result.compiler = compiler_language(command) 73*67e74705SXin Li result.flags = [] 74*67e74705SXin Li result.files = [] 75*67e74705SXin Li # quit right now, if the program was not a C/C++ compiler 76*67e74705SXin Li if not result.compiler: 77*67e74705SXin Li return None 78*67e74705SXin Li # iterate on the compile options 79*67e74705SXin Li args = iter(command[1:]) 80*67e74705SXin Li for arg in args: 81*67e74705SXin Li # quit when compilation pass is not involved 82*67e74705SXin Li if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: 83*67e74705SXin Li return None 84*67e74705SXin Li # ignore some flags 85*67e74705SXin Li elif arg in IGNORED_FLAGS: 86*67e74705SXin Li count = IGNORED_FLAGS[arg] 87*67e74705SXin Li for _ in range(count): 88*67e74705SXin Li next(args) 89*67e74705SXin Li elif re.match(r'^-(l|L|Wl,).+', arg): 90*67e74705SXin Li pass 91*67e74705SXin Li # some parameters could look like filename, take as compile option 92*67e74705SXin Li elif arg in {'-D', '-I'}: 93*67e74705SXin Li result.flags.extend([arg, next(args)]) 94*67e74705SXin Li # parameter which looks source file is taken... 95*67e74705SXin Li elif re.match(r'^[^-].+', arg) and classify_source(arg): 96*67e74705SXin Li result.files.append(arg) 97*67e74705SXin Li # and consider everything else as compile option. 98*67e74705SXin Li else: 99*67e74705SXin Li result.flags.append(arg) 100*67e74705SXin Li # do extra check on number of source files 101*67e74705SXin Li return result if result.files else None 102*67e74705SXin Li 103*67e74705SXin Li 104*67e74705SXin Lidef classify_source(filename, c_compiler=True): 105*67e74705SXin Li """ Return the language from file name extension. """ 106*67e74705SXin Li 107*67e74705SXin Li mapping = { 108*67e74705SXin Li '.c': 'c' if c_compiler else 'c++', 109*67e74705SXin Li '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output', 110*67e74705SXin Li '.ii': 'c++-cpp-output', 111*67e74705SXin Li '.m': 'objective-c', 112*67e74705SXin Li '.mi': 'objective-c-cpp-output', 113*67e74705SXin Li '.mm': 'objective-c++', 114*67e74705SXin Li '.mii': 'objective-c++-cpp-output', 115*67e74705SXin Li '.C': 'c++', 116*67e74705SXin Li '.cc': 'c++', 117*67e74705SXin Li '.CC': 'c++', 118*67e74705SXin Li '.cp': 'c++', 119*67e74705SXin Li '.cpp': 'c++', 120*67e74705SXin Li '.cxx': 'c++', 121*67e74705SXin Li '.c++': 'c++', 122*67e74705SXin Li '.C++': 'c++', 123*67e74705SXin Li '.txx': 'c++' 124*67e74705SXin Li } 125*67e74705SXin Li 126*67e74705SXin Li __, extension = os.path.splitext(os.path.basename(filename)) 127*67e74705SXin Li return mapping.get(extension) 128*67e74705SXin Li 129*67e74705SXin Li 130*67e74705SXin Lidef compiler_language(command): 131*67e74705SXin Li """ A predicate to decide the command is a compiler call or not. 132*67e74705SXin Li 133*67e74705SXin Li Returns 'c' or 'c++' when it match. None otherwise. """ 134*67e74705SXin Li 135*67e74705SXin Li cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$') 136*67e74705SXin Li 137*67e74705SXin Li if command: 138*67e74705SXin Li executable = os.path.basename(command[0]) 139*67e74705SXin Li if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): 140*67e74705SXin Li return 'c++' if cplusplus.match(executable) else 'c' 141*67e74705SXin Li return None 142