1# -*- coding: utf-8 -*-
2# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3# See https://llvm.org/LICENSE.txt for license information.
4# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5""" This module is responsible for to parse a compiler invocation. """
6
7import re
8import os
9import collections
10
11__all__ = ["split_command", "classify_source", "compiler_language"]
12
13# Ignored compiler options map for compilation database creation.
14# The map is used in `split_command` method. (Which does ignore and classify
15# parameters.) Please note, that these are not the only parameters which
16# might be ignored.
17#
18# Keys are the option name, value number of options to skip
19IGNORED_FLAGS = {
20    # compiling only flag, ignored because the creator of compilation
21    # database will explicitly set it.
22    "-c": 0,
23    # preprocessor macros, ignored because would cause duplicate entries in
24    # the output (the only difference would be these flags). this is actual
25    # finding from users, who suffered longer execution time caused by the
26    # duplicates.
27    "-MD": 0,
28    "-MMD": 0,
29    "-MG": 0,
30    "-MP": 0,
31    "-MF": 1,
32    "-MT": 1,
33    "-MQ": 1,
34    # linker options, ignored because for compilation database will contain
35    # compilation commands only. so, the compiler would ignore these flags
36    # anyway. the benefit to get rid of them is to make the output more
37    # readable.
38    "-static": 0,
39    "-shared": 0,
40    "-s": 0,
41    "-rdynamic": 0,
42    "-l": 1,
43    "-L": 1,
44    "-u": 1,
45    "-z": 1,
46    "-T": 1,
47    "-Xlinker": 1,
48}
49
50# Known C/C++ compiler executable name patterns
51COMPILER_PATTERNS = frozenset(
52    [
53        re.compile(r"^(intercept-|analyze-|)c(c|\+\+)$"),
54        re.compile(r"^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$"),
55        re.compile(r"^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$"),
56        re.compile(r"^llvm-g(cc|\+\+)$"),
57    ]
58)
59
60
61def split_command(command):
62    """Returns a value when the command is a compilation, None otherwise.
63
64    The value on success is a named tuple with the following attributes:
65
66        files:    list of source files
67        flags:    list of compile options
68        compiler: string value of 'c' or 'c++'"""
69
70    # the result of this method
71    result = collections.namedtuple("Compilation", ["compiler", "flags", "files"])
72    result.compiler = compiler_language(command)
73    result.flags = []
74    result.files = []
75    # quit right now, if the program was not a C/C++ compiler
76    if not result.compiler:
77        return None
78    # iterate on the compile options
79    args = iter(command[1:])
80    for arg in args:
81        # quit when compilation pass is not involved
82        if arg in {"-E", "-S", "-cc1", "-M", "-MM", "-###"}:
83            return None
84        # ignore some flags
85        elif arg in IGNORED_FLAGS:
86            count = IGNORED_FLAGS[arg]
87            for _ in range(count):
88                next(args)
89        elif re.match(r"^-(l|L|Wl,).+", arg):
90            pass
91        # some parameters could look like filename, take as compile option
92        elif arg in {"-D", "-I"}:
93            result.flags.extend([arg, next(args)])
94        # parameter which looks source file is taken...
95        elif re.match(r"^[^-].+", arg) and classify_source(arg):
96            result.files.append(arg)
97        # and consider everything else as compile option.
98        else:
99            result.flags.append(arg)
100    # do extra check on number of source files
101    return result if result.files else None
102
103
104def classify_source(filename, c_compiler=True):
105    """Return the language from file name extension."""
106
107    mapping = {
108        ".c": "c" if c_compiler else "c++",
109        ".i": "c-cpp-output" if c_compiler else "c++-cpp-output",
110        ".ii": "c++-cpp-output",
111        ".m": "objective-c",
112        ".mi": "objective-c-cpp-output",
113        ".mm": "objective-c++",
114        ".mii": "objective-c++-cpp-output",
115        ".C": "c++",
116        ".cc": "c++",
117        ".CC": "c++",
118        ".cp": "c++",
119        ".cpp": "c++",
120        ".cxx": "c++",
121        ".c++": "c++",
122        ".C++": "c++",
123        ".txx": "c++",
124    }
125
126    __, extension = os.path.splitext(os.path.basename(filename))
127    return mapping.get(extension)
128
129
130def compiler_language(command):
131    """A predicate to decide the command is a compiler call or not.
132
133    Returns 'c' or 'c++' when it match. None otherwise."""
134
135    cplusplus = re.compile(r"^(.+)(\+\+)(-.+|)$")
136
137    if command:
138        executable = os.path.basename(command[0])
139        if any(pattern.match(executable) for pattern in COMPILER_PATTERNS):
140            return "c++" if cplusplus.match(executable) else "c"
141    return None
142