xref: /aosp_15_r20/external/cronet/third_party/libc++/src/test/libcxx/transitive_includes_to_csv.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1#!/usr/bin/env python
2# ===----------------------------------------------------------------------===##
3#
4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5# See https://llvm.org/LICENSE.txt for license information.
6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7#
8# ===----------------------------------------------------------------------===##
9
10from dataclasses import dataclass
11from typing import List  # Needed for python 3.8 compatibility.
12import argparse
13import pathlib
14import re
15import sys
16
17
18@dataclass
19class header:
20    name: str = None
21    level: int = -1
22
23
24def parse_line(line: str) -> header:
25    """
26    Parse an output line from --trace-includes into a `header`.
27    """
28    match = re.match(r"(\.+) (.+)", line)
29    if not match:
30        sys.exit(f"Line {line} contains invalid data.")
31
32    # The number of periods in front of the header name is the nesting level of
33    # that header.
34    return header(match.group(2), len(match.group(1)))
35
36
37# On Windows, the path separators can either be forward slash or backslash.
38# If it is a backslash, Clang prints it escaped as two consecutive
39# backslashes, and they need to be escaped in the RE. (Use a raw string for
40# the pattern to avoid needing another level of escaping on the Python string
41# literal level.)
42LIBCXX_HEADER_REGEX = r".*c\+\+(?:/|\\\\)v[0-9]+(?:/|\\\\)(.+)"
43
44def is_libcxx_header(header: str) -> bool:
45    """
46    Returns whether a header is a libc++ header, excluding the C-compatibility headers.
47    """
48    # Only keep files in the c++/vN directory.
49    match = re.match(LIBCXX_HEADER_REGEX, header)
50    if not match:
51        return False
52
53    # Skip C compatibility headers (in particular, make sure not to skip libc++ detail headers).
54    relative = match.group(1)
55    if relative.endswith(".h") and not (
56        relative.startswith("__") or re.search(r"(/|\\\\)__", relative)
57    ):
58        return False
59
60    return True
61
62
63def parse_file(file: pathlib.Path) -> List[str]:
64    """
65    Parse a file containing --trace-include output to generate a list of the top-level C++ includes
66    contained in it.
67
68    This effectively generates the dependency graph of C++ Standard Library headers of the header
69    whose --trace-include it is. In order to get the expected result of --trace-include, the
70    -fshow-skipped-includes flag also needs to be passed.
71    """
72    result = list()
73    with file.open(encoding="utf-8") as f:
74        for line in f.readlines():
75            header = parse_line(line)
76
77            # Skip non-libc++ headers
78            if not is_libcxx_header(header.name):
79                continue
80
81            # Include top-level headers in the output. There's usually exactly one,
82            # except if the compiler is passed a file with `-include`. Top-level
83            # headers are transparent, in the sense that we want to go look at
84            # transitive includes underneath.
85            if header.level == 1:
86                level = 999
87                result.append(header)
88                continue
89
90            # Skip libc++ headers included transitively.
91            if header.level > level:
92                continue
93
94            # Detail headers are transparent too: we attribute all includes of public libc++
95            # headers under a detail header to the last public libc++ header that included it.
96            if header.name.startswith("__") or re.search(r"(/|\\\\)__", header.name):
97                level = 999
98                continue
99
100            # Add the non-detail libc++ header to the list.
101            level = header.level
102            result.append(header)
103    return result
104
105
106def create_include_graph(trace_includes: List[pathlib.Path]) -> List[str]:
107    result = list()
108    for file in trace_includes:
109        headers = parse_file(file)
110
111        # Get actual filenames relative to libc++'s installation directory instead of full paths
112        relative = lambda h: re.match(LIBCXX_HEADER_REGEX, h).group(1)
113
114        top_level = relative(
115            next(h.name for h in headers if h.level == 1)
116        )  # There should be only one top-level header
117        includes = [relative(h.name) for h in headers if h.level != 1]
118
119        # Remove duplicates in all includes.
120        includes = list(set(includes))
121
122        if len(includes) != 0:
123            result.append([top_level] + includes)
124    return result
125
126
127def print_csv(graph: List[str]) -> None:
128    for includes in graph:
129        header = includes[0]
130        for include in sorted(includes[1:]):
131            if header == include:
132                sys.exit(f"Cycle detected: header {header} includes itself.")
133            print(f"{header} {include}")
134
135
136if __name__ == "__main__":
137    parser = argparse.ArgumentParser(
138        description="""Produce a dependency graph of libc++ headers, in CSV format.
139This script is normally executed by libcxx/test/libcxx/transitive_includes.gen.py""",
140        formatter_class=argparse.RawDescriptionHelpFormatter,
141    )
142    parser.add_argument(
143        "inputs",
144        default=None,
145        metavar="FILE",
146        nargs='+',
147        help="One or more files containing the result of --trace-includes on the headers one wishes to graph.",
148    )
149    options = parser.parse_args()
150
151    print_csv(create_include_graph(map(pathlib.Path, options.inputs)))
152