| # -*- coding: utf-8 -*- |
| # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| # See https://llvm.org/LICENSE.txt for license information. |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| """ This module is responsible for to parse a compiler invocation. """ |
| |
| import re |
| import os |
| import collections |
| |
| __all__ = ["split_command", "classify_source", "compiler_language"] |
| |
| # Ignored compiler options map for compilation database creation. |
| # The map is used in `split_command` method. (Which does ignore and classify |
| # parameters.) Please note, that these are not the only parameters which |
| # might be ignored. |
| # |
| # Keys are the option name, value number of options to skip |
| IGNORED_FLAGS = { |
| # compiling only flag, ignored because the creator of compilation |
| # database will explicitly set it. |
| "-c": 0, |
| # preprocessor macros, ignored because would cause duplicate entries in |
| # the output (the only difference would be these flags). this is actual |
| # finding from users, who suffered longer execution time caused by the |
| # duplicates. |
| "-MD": 0, |
| "-MMD": 0, |
| "-MG": 0, |
| "-MP": 0, |
| "-MF": 1, |
| "-MT": 1, |
| "-MQ": 1, |
| # linker options, ignored because for compilation database will contain |
| # compilation commands only. so, the compiler would ignore these flags |
| # anyway. the benefit to get rid of them is to make the output more |
| # readable. |
| "-static": 0, |
| "-shared": 0, |
| "-s": 0, |
| "-rdynamic": 0, |
| "-l": 1, |
| "-L": 1, |
| "-u": 1, |
| "-z": 1, |
| "-T": 1, |
| "-Xlinker": 1, |
| } |
| |
| # Known C/C++ compiler executable name patterns |
| COMPILER_PATTERNS = frozenset( |
| [ |
| re.compile(r"^(intercept-|analyze-|)c(c|\+\+)$"), |
| re.compile(r"^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$"), |
| re.compile(r"^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$"), |
| re.compile(r"^llvm-g(cc|\+\+)$"), |
| ] |
| ) |
| |
| |
| def split_command(command): |
| """Returns a value when the command is a compilation, None otherwise. |
| |
| The value on success is a named tuple with the following attributes: |
| |
| files: list of source files |
| flags: list of compile options |
| compiler: string value of 'c' or 'c++'""" |
| |
| # the result of this method |
| result = collections.namedtuple("Compilation", ["compiler", "flags", "files"]) |
| result.compiler = compiler_language(command) |
| result.flags = [] |
| result.files = [] |
| # quit right now, if the program was not a C/C++ compiler |
| if not result.compiler: |
| return None |
| # iterate on the compile options |
| args = iter(command[1:]) |
| for arg in args: |
| # quit when compilation pass is not involved |
| if arg in {"-E", "-S", "-cc1", "-M", "-MM", "-###"}: |
| return None |
| # ignore some flags |
| elif arg in IGNORED_FLAGS: |
| count = IGNORED_FLAGS[arg] |
| for _ in range(count): |
| next(args) |
| elif re.match(r"^-(l|L|Wl,).+", arg): |
| pass |
| # some parameters could look like filename, take as compile option |
| elif arg in {"-D", "-I"}: |
| result.flags.extend([arg, next(args)]) |
| # parameter which looks source file is taken... |
| elif re.match(r"^[^-].+", arg) and classify_source(arg): |
| result.files.append(arg) |
| # and consider everything else as compile option. |
| else: |
| result.flags.append(arg) |
| # do extra check on number of source files |
| return result if result.files else None |
| |
| |
| def classify_source(filename, c_compiler=True): |
| """Return the language from file name extension.""" |
| |
| mapping = { |
| ".c": "c" if c_compiler else "c++", |
| ".i": "c-cpp-output" if c_compiler else "c++-cpp-output", |
| ".ii": "c++-cpp-output", |
| ".m": "objective-c", |
| ".mi": "objective-c-cpp-output", |
| ".mm": "objective-c++", |
| ".mii": "objective-c++-cpp-output", |
| ".C": "c++", |
| ".cc": "c++", |
| ".CC": "c++", |
| ".cp": "c++", |
| ".cpp": "c++", |
| ".cxx": "c++", |
| ".c++": "c++", |
| ".C++": "c++", |
| ".txx": "c++", |
| } |
| |
| __, extension = os.path.splitext(os.path.basename(filename)) |
| return mapping.get(extension) |
| |
| |
| def compiler_language(command): |
| """A predicate to decide the command is a compiler call or not. |
| |
| Returns 'c' or 'c++' when it match. None otherwise.""" |
| |
| cplusplus = re.compile(r"^(.+)(\+\+)(-.+|)$") |
| |
| if command: |
| executable = os.path.basename(command[0]) |
| if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): |
| return "c++" if cplusplus.match(executable) else "c" |
| return None |