xref: /aosp_15_r20/bionic/libc/tools/generate_notice.py (revision 8d67ca893c1523eb926b9080dbe4e2ffd2a27ba1)
1#!/usr/bin/env python3
2# Run with directory arguments from any directory, with no special setup
3# required.
4
5import os
6from pathlib import Path
7import re
8import sys
9from typing import Sequence
10
11VERBOSE = False
12
13copyrights = set()
14
15
16def warn(s):
17    sys.stderr.write("warning: %s\n" % s)
18
19
20def warn_verbose(s):
21    if VERBOSE:
22        warn(s)
23
24
25def is_interesting(path_str: str) -> bool:
26    path = Path(path_str.lower())
27    uninteresting_extensions = [
28        ".bp",
29        ".map",
30        ".md",
31        ".mk",
32        ".py",
33        ".pyc",
34        ".swp",
35        ".txt",
36        ".xml",
37    ]
38    if path.suffix in uninteresting_extensions:
39        return False
40    if path.name in {"notice", "readme", "pylintrc"}:
41        return False
42    # Backup files for some editors.
43    if path.match("*~"):
44        return False
45    return True
46
47
48def is_copyright_end(line: str, first_line_was_hash: bool) -> bool:
49    endings = [
50        " $FreeBSD: ",
51        "$Citrus$",
52        "$FreeBSD$",
53        "*/",
54        "From: @(#)",
55        # OpenBSD likes to say where stuff originally came from:
56        "Original version ID:",
57        "\t$Citrus: ",
58        "\t$NetBSD: ",
59        "\t$OpenBSD: ",
60        "\t@(#)",
61        "\tcitrus Id: ",
62        "\tfrom: @(#)",
63        "from OpenBSD:",
64    ]
65    if first_line_was_hash and not line:
66        return True
67
68    for ending in endings:
69        if ending in line:
70            return True
71
72    return False
73
74
75def extract_copyright_at(lines: Sequence[str], i: int) -> int:
76    first_line_was_hash = lines[i].startswith("#")
77
78    # Do we need to back up to find the start of the copyright header?
79    start = i
80    if not first_line_was_hash:
81        while start > 0:
82            if "/*" in lines[start - 1]:
83                break
84            start -= 1
85
86    # Read comment lines until we hit something that terminates a
87    # copyright header.
88    while i < len(lines):
89        if is_copyright_end(lines[i], first_line_was_hash):
90            break
91        i += 1
92
93    end = i
94
95    # Trim trailing cruft.
96    while end > 0:
97        line = lines[end - 1]
98        if line not in {
99                " *", " * ===================================================="
100        }:
101            break
102        end -= 1
103
104    # Remove C/assembler comment formatting, pulling out just the text.
105    clean_lines = []
106    for line in lines[start:end]:
107        line = line.replace("\t", "    ")
108        line = line.replace("/* ", "")
109        line = re.sub(r"^ \* ", "", line)
110        line = line.replace("** ", "")
111        line = line.replace("# ", "")
112        if line.startswith("++Copyright++"):
113            continue
114        line = line.replace("--Copyright--", "")
115        line = line.rstrip()
116        # These come last and take care of "blank" comment lines.
117        if line in {"#", " *", "**", "-"}:
118            line = ""
119        clean_lines.append(line)
120
121    # Trim blank lines from head and tail.
122    while clean_lines[0] == "":
123        clean_lines = clean_lines[1:]
124    while clean_lines[len(clean_lines) - 1] == "":
125        clean_lines = clean_lines[0:(len(clean_lines) - 1)]
126
127    copyrights.add("\n".join(clean_lines))
128
129    return i
130
131
132def do_file(path: str) -> None:
133    raw = Path(path).read_bytes()
134    try:
135        content = raw.decode("utf-8")
136    except UnicodeDecodeError:
137        warn("bad UTF-8 in %s" % path)
138        content = raw.decode("iso-8859-1")
139
140    lines = content.split("\n")
141
142    if len(lines) <= 4:
143        warn_verbose("ignoring short file %s" % path)
144        return
145
146    if not "Copyright" in content:
147        if "public domain" in content.lower():
148            warn_verbose("ignoring public domain file %s" % path)
149            return
150        warn('no copyright notice found in "%s" (%d lines)' %
151             (path, len(lines)))
152        return
153
154    # Manually iterate because extract_copyright_at tells us how many lines to
155    # skip.
156    i = 0
157    while i < len(lines):
158        if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
159            i = extract_copyright_at(lines, i)
160        else:
161            i += 1
162
163
164def do_dir(arg):
165    for directory, sub_directories, filenames in os.walk(arg):
166        if ".git" in sub_directories:
167            sub_directories.remove(".git")
168        sub_directories = sorted(sub_directories)
169
170        for filename in sorted(filenames):
171            path = os.path.join(directory, filename)
172            if is_interesting(path):
173                do_file(path)
174
175
176def main() -> None:
177    args = sys.argv[1:]
178    if len(args) == 0:
179        args = ["."]
180
181    for arg in args:
182        if os.path.isdir(arg):
183            do_dir(arg)
184        else:
185            do_file(arg)
186
187    for notice in sorted(copyrights):
188        print(notice)
189        print()
190        print("-" * 67)
191        print()
192
193
194if __name__ == "__main__":
195    main()
196