1#!/usr/bin/env python3 2# Run with directory arguments from any directory, with no special setup 3# required. 4 5import os 6from pathlib import Path 7import re 8import sys 9from typing import Sequence 10 11VERBOSE = False 12 13copyrights = set() 14 15 16def warn(s): 17 sys.stderr.write("warning: %s\n" % s) 18 19 20def warn_verbose(s): 21 if VERBOSE: 22 warn(s) 23 24 25def is_interesting(path_str: str) -> bool: 26 path = Path(path_str.lower()) 27 uninteresting_extensions = [ 28 ".bp", 29 ".map", 30 ".md", 31 ".mk", 32 ".py", 33 ".pyc", 34 ".swp", 35 ".txt", 36 ".xml", 37 ] 38 if path.suffix in uninteresting_extensions: 39 return False 40 if path.name in {"notice", "readme", "pylintrc"}: 41 return False 42 # Backup files for some editors. 43 if path.match("*~"): 44 return False 45 return True 46 47 48def is_copyright_end(line: str, first_line_was_hash: bool) -> bool: 49 endings = [ 50 " $FreeBSD: ", 51 "$Citrus$", 52 "$FreeBSD$", 53 "*/", 54 "From: @(#)", 55 # OpenBSD likes to say where stuff originally came from: 56 "Original version ID:", 57 "\t$Citrus: ", 58 "\t$NetBSD: ", 59 "\t$OpenBSD: ", 60 "\t@(#)", 61 "\tcitrus Id: ", 62 "\tfrom: @(#)", 63 "from OpenBSD:", 64 ] 65 if first_line_was_hash and not line: 66 return True 67 68 for ending in endings: 69 if ending in line: 70 return True 71 72 return False 73 74 75def extract_copyright_at(lines: Sequence[str], i: int) -> int: 76 first_line_was_hash = lines[i].startswith("#") 77 78 # Do we need to back up to find the start of the copyright header? 79 start = i 80 if not first_line_was_hash: 81 while start > 0: 82 if "/*" in lines[start - 1]: 83 break 84 start -= 1 85 86 # Read comment lines until we hit something that terminates a 87 # copyright header. 88 while i < len(lines): 89 if is_copyright_end(lines[i], first_line_was_hash): 90 break 91 i += 1 92 93 end = i 94 95 # Trim trailing cruft. 96 while end > 0: 97 line = lines[end - 1] 98 if line not in { 99 " *", " * ====================================================" 100 }: 101 break 102 end -= 1 103 104 # Remove C/assembler comment formatting, pulling out just the text. 105 clean_lines = [] 106 for line in lines[start:end]: 107 line = line.replace("\t", " ") 108 line = line.replace("/* ", "") 109 line = re.sub(r"^ \* ", "", line) 110 line = line.replace("** ", "") 111 line = line.replace("# ", "") 112 if line.startswith("++Copyright++"): 113 continue 114 line = line.replace("--Copyright--", "") 115 line = line.rstrip() 116 # These come last and take care of "blank" comment lines. 117 if line in {"#", " *", "**", "-"}: 118 line = "" 119 clean_lines.append(line) 120 121 # Trim blank lines from head and tail. 122 while clean_lines[0] == "": 123 clean_lines = clean_lines[1:] 124 while clean_lines[len(clean_lines) - 1] == "": 125 clean_lines = clean_lines[0:(len(clean_lines) - 1)] 126 127 copyrights.add("\n".join(clean_lines)) 128 129 return i 130 131 132def do_file(path: str) -> None: 133 raw = Path(path).read_bytes() 134 try: 135 content = raw.decode("utf-8") 136 except UnicodeDecodeError: 137 warn("bad UTF-8 in %s" % path) 138 content = raw.decode("iso-8859-1") 139 140 lines = content.split("\n") 141 142 if len(lines) <= 4: 143 warn_verbose("ignoring short file %s" % path) 144 return 145 146 if not "Copyright" in content: 147 if "public domain" in content.lower(): 148 warn_verbose("ignoring public domain file %s" % path) 149 return 150 warn('no copyright notice found in "%s" (%d lines)' % 151 (path, len(lines))) 152 return 153 154 # Manually iterate because extract_copyright_at tells us how many lines to 155 # skip. 156 i = 0 157 while i < len(lines): 158 if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: 159 i = extract_copyright_at(lines, i) 160 else: 161 i += 1 162 163 164def do_dir(arg): 165 for directory, sub_directories, filenames in os.walk(arg): 166 if ".git" in sub_directories: 167 sub_directories.remove(".git") 168 sub_directories = sorted(sub_directories) 169 170 for filename in sorted(filenames): 171 path = os.path.join(directory, filename) 172 if is_interesting(path): 173 do_file(path) 174 175 176def main() -> None: 177 args = sys.argv[1:] 178 if len(args) == 0: 179 args = ["."] 180 181 for arg in args: 182 if os.path.isdir(arg): 183 do_dir(arg) 184 else: 185 do_file(arg) 186 187 for notice in sorted(copyrights): 188 print(notice) 189 print() 190 print("-" * 67) 191 print() 192 193 194if __name__ == "__main__": 195 main() 196