1#!/usr/bin/env python 2# ===----------------------------------------------------------------------===## 3# 4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5# See https://llvm.org/LICENSE.txt for license information. 6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7# 8# ===----------------------------------------------------------------------===## 9 10from dataclasses import dataclass 11from typing import List # Needed for python 3.8 compatibility. 12import argparse 13import pathlib 14import re 15import sys 16 17 18@dataclass 19class header: 20 name: str = None 21 level: int = -1 22 23 24def parse_line(line: str) -> header: 25 """ 26 Parse an output line from --trace-includes into a `header`. 27 """ 28 match = re.match(r"(\.+) (.+)", line) 29 if not match: 30 sys.exit(f"Line {line} contains invalid data.") 31 32 # The number of periods in front of the header name is the nesting level of 33 # that header. 34 return header(match.group(2), len(match.group(1))) 35 36 37# On Windows, the path separators can either be forward slash or backslash. 38# If it is a backslash, Clang prints it escaped as two consecutive 39# backslashes, and they need to be escaped in the RE. (Use a raw string for 40# the pattern to avoid needing another level of escaping on the Python string 41# literal level.) 42LIBCXX_HEADER_REGEX = r".*c\+\+(?:/|\\\\)v[0-9]+(?:/|\\\\)(.+)" 43 44def is_libcxx_header(header: str) -> bool: 45 """ 46 Returns whether a header is a libc++ header, excluding the C-compatibility headers. 47 """ 48 # Only keep files in the c++/vN directory. 49 match = re.match(LIBCXX_HEADER_REGEX, header) 50 if not match: 51 return False 52 53 # Skip C compatibility headers (in particular, make sure not to skip libc++ detail headers). 54 relative = match.group(1) 55 if relative.endswith(".h") and not ( 56 relative.startswith("__") or re.search(r"(/|\\\\)__", relative) 57 ): 58 return False 59 60 return True 61 62 63def parse_file(file: pathlib.Path) -> List[str]: 64 """ 65 Parse a file containing --trace-include output to generate a list of the top-level C++ includes 66 contained in it. 67 68 This effectively generates the dependency graph of C++ Standard Library headers of the header 69 whose --trace-include it is. In order to get the expected result of --trace-include, the 70 -fshow-skipped-includes flag also needs to be passed. 71 """ 72 result = list() 73 with file.open(encoding="utf-8") as f: 74 for line in f.readlines(): 75 header = parse_line(line) 76 77 # Skip non-libc++ headers 78 if not is_libcxx_header(header.name): 79 continue 80 81 # Include top-level headers in the output. There's usually exactly one, 82 # except if the compiler is passed a file with `-include`. Top-level 83 # headers are transparent, in the sense that we want to go look at 84 # transitive includes underneath. 85 if header.level == 1: 86 level = 999 87 result.append(header) 88 continue 89 90 # Skip libc++ headers included transitively. 91 if header.level > level: 92 continue 93 94 # Detail headers are transparent too: we attribute all includes of public libc++ 95 # headers under a detail header to the last public libc++ header that included it. 96 if header.name.startswith("__") or re.search(r"(/|\\\\)__", header.name): 97 level = 999 98 continue 99 100 # Add the non-detail libc++ header to the list. 101 level = header.level 102 result.append(header) 103 return result 104 105 106def create_include_graph(trace_includes: List[pathlib.Path]) -> List[str]: 107 result = list() 108 for file in trace_includes: 109 headers = parse_file(file) 110 111 # Get actual filenames relative to libc++'s installation directory instead of full paths 112 relative = lambda h: re.match(LIBCXX_HEADER_REGEX, h).group(1) 113 114 top_level = relative( 115 next(h.name for h in headers if h.level == 1) 116 ) # There should be only one top-level header 117 includes = [relative(h.name) for h in headers if h.level != 1] 118 119 # Remove duplicates in all includes. 120 includes = list(set(includes)) 121 122 if len(includes) != 0: 123 result.append([top_level] + includes) 124 return result 125 126 127def print_csv(graph: List[str]) -> None: 128 for includes in graph: 129 header = includes[0] 130 for include in sorted(includes[1:]): 131 if header == include: 132 sys.exit(f"Cycle detected: header {header} includes itself.") 133 print(f"{header} {include}") 134 135 136if __name__ == "__main__": 137 parser = argparse.ArgumentParser( 138 description="""Produce a dependency graph of libc++ headers, in CSV format. 139This script is normally executed by libcxx/test/libcxx/transitive_includes.gen.py""", 140 formatter_class=argparse.RawDescriptionHelpFormatter, 141 ) 142 parser.add_argument( 143 "inputs", 144 default=None, 145 metavar="FILE", 146 nargs='+', 147 help="One or more files containing the result of --trace-includes on the headers one wishes to graph.", 148 ) 149 options = parser.parse_args() 150 151 print_csv(create_include_graph(map(pathlib.Path, options.inputs))) 152