1# -*- coding: utf-8 -*- 2# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3# See https://llvm.org/LICENSE.txt for license information. 4# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5""" This module is responsible for to parse a compiler invocation. """ 6 7import re 8import os 9import collections 10 11__all__ = ["split_command", "classify_source", "compiler_language"] 12 13# Ignored compiler options map for compilation database creation. 14# The map is used in `split_command` method. (Which does ignore and classify 15# parameters.) Please note, that these are not the only parameters which 16# might be ignored. 17# 18# Keys are the option name, value number of options to skip 19IGNORED_FLAGS = { 20 # compiling only flag, ignored because the creator of compilation 21 # database will explicitly set it. 22 "-c": 0, 23 # preprocessor macros, ignored because would cause duplicate entries in 24 # the output (the only difference would be these flags). this is actual 25 # finding from users, who suffered longer execution time caused by the 26 # duplicates. 27 "-MD": 0, 28 "-MMD": 0, 29 "-MG": 0, 30 "-MP": 0, 31 "-MF": 1, 32 "-MT": 1, 33 "-MQ": 1, 34 # linker options, ignored because for compilation database will contain 35 # compilation commands only. so, the compiler would ignore these flags 36 # anyway. the benefit to get rid of them is to make the output more 37 # readable. 38 "-static": 0, 39 "-shared": 0, 40 "-s": 0, 41 "-rdynamic": 0, 42 "-l": 1, 43 "-L": 1, 44 "-u": 1, 45 "-z": 1, 46 "-T": 1, 47 "-Xlinker": 1, 48} 49 50# Known C/C++ compiler executable name patterns 51COMPILER_PATTERNS = frozenset( 52 [ 53 re.compile(r"^(intercept-|analyze-|)c(c|\+\+)$"), 54 re.compile(r"^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$"), 55 re.compile(r"^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$"), 56 re.compile(r"^llvm-g(cc|\+\+)$"), 57 ] 58) 59 60 61def split_command(command): 62 """Returns a value when the command is a compilation, None otherwise. 63 64 The value on success is a named tuple with the following attributes: 65 66 files: list of source files 67 flags: list of compile options 68 compiler: string value of 'c' or 'c++'""" 69 70 # the result of this method 71 result = collections.namedtuple("Compilation", ["compiler", "flags", "files"]) 72 result.compiler = compiler_language(command) 73 result.flags = [] 74 result.files = [] 75 # quit right now, if the program was not a C/C++ compiler 76 if not result.compiler: 77 return None 78 # iterate on the compile options 79 args = iter(command[1:]) 80 for arg in args: 81 # quit when compilation pass is not involved 82 if arg in {"-E", "-S", "-cc1", "-M", "-MM", "-###"}: 83 return None 84 # ignore some flags 85 elif arg in IGNORED_FLAGS: 86 count = IGNORED_FLAGS[arg] 87 for _ in range(count): 88 next(args) 89 elif re.match(r"^-(l|L|Wl,).+", arg): 90 pass 91 # some parameters could look like filename, take as compile option 92 elif arg in {"-D", "-I"}: 93 result.flags.extend([arg, next(args)]) 94 # parameter which looks source file is taken... 95 elif re.match(r"^[^-].+", arg) and classify_source(arg): 96 result.files.append(arg) 97 # and consider everything else as compile option. 98 else: 99 result.flags.append(arg) 100 # do extra check on number of source files 101 return result if result.files else None 102 103 104def classify_source(filename, c_compiler=True): 105 """Return the language from file name extension.""" 106 107 mapping = { 108 ".c": "c" if c_compiler else "c++", 109 ".i": "c-cpp-output" if c_compiler else "c++-cpp-output", 110 ".ii": "c++-cpp-output", 111 ".m": "objective-c", 112 ".mi": "objective-c-cpp-output", 113 ".mm": "objective-c++", 114 ".mii": "objective-c++-cpp-output", 115 ".C": "c++", 116 ".cc": "c++", 117 ".CC": "c++", 118 ".cp": "c++", 119 ".cpp": "c++", 120 ".cxx": "c++", 121 ".c++": "c++", 122 ".C++": "c++", 123 ".txx": "c++", 124 } 125 126 __, extension = os.path.splitext(os.path.basename(filename)) 127 return mapping.get(extension) 128 129 130def compiler_language(command): 131 """A predicate to decide the command is a compiler call or not. 132 133 Returns 'c' or 'c++' when it match. None otherwise.""" 134 135 cplusplus = re.compile(r"^(.+)(\+\+)(-.+|)$") 136 137 if command: 138 executable = os.path.basename(command[0]) 139 if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): 140 return "c++" if cplusplus.match(executable) else "c" 141 return None 142