1#!/usr/bin/env python3
2
3# Copyright 2015 gRPC authors.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17import argparse
18import datetime
19import os
20import re
21import subprocess
22import sys
23
24# find our home
25ROOT = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '../..'))
26os.chdir(ROOT)
27
28# parse command line
29argp = argparse.ArgumentParser(description='copyright checker')
30argp.add_argument('-o',
31                  '--output',
32                  default='details',
33                  choices=['list', 'details'])
34argp.add_argument('-s', '--skips', default=0, action='store_const', const=1)
35argp.add_argument('-a', '--ancient', default=0, action='store_const', const=1)
36argp.add_argument('--precommit', action='store_true')
37argp.add_argument('--fix', action='store_true')
38args = argp.parse_args()
39
40# open the license text
41with open('NOTICE.txt') as f:
42    LICENSE_NOTICE = f.read().splitlines()
43
44# license format by file extension
45# key is the file extension, value is a format string
46# that given a line of license text, returns what should
47# be in the file
48LICENSE_PREFIX_RE = {
49    '.bat': r'@rem\s*',
50    '.c': r'\s*(?://|\*)\s*',
51    '.cc': r'\s*(?://|\*)\s*',
52    '.h': r'\s*(?://|\*)\s*',
53    '.m': r'\s*\*\s*',
54    '.mm': r'\s*\*\s*',
55    '.php': r'\s*\*\s*',
56    '.js': r'\s*\*\s*',
57    '.py': r'#\s*',
58    '.pyx': r'#\s*',
59    '.pxd': r'#\s*',
60    '.pxi': r'#\s*',
61    '.rb': r'#\s*',
62    '.sh': r'#\s*',
63    '.proto': r'//\s*',
64    '.cs': r'//\s*',
65    '.mak': r'#\s*',
66    '.bazel': r'#\s*',
67    '.bzl': r'#\s*',
68    'Makefile': r'#\s*',
69    'Dockerfile': r'#\s*',
70    'BUILD': r'#\s*',
71}
72
73# The key is the file extension, while the value is a tuple of fields
74# (header, prefix, footer).
75# For example, for javascript multi-line comments, the header will be '/*', the
76# prefix will be '*' and the footer will be '*/'.
77# If header and footer are irrelevant for a specific file extension, they are
78# set to None.
79LICENSE_PREFIX_TEXT = {
80    '.bat': (None, '@rem', None),
81    '.c': (None, '//', None),
82    '.cc': (None, '//', None),
83    '.h': (None, '//', None),
84    '.m': ('/**', ' *', ' */'),
85    '.mm': ('/**', ' *', ' */'),
86    '.php': ('/**', ' *', ' */'),
87    '.js': ('/**', ' *', ' */'),
88    '.py': (None, '#', None),
89    '.pyx': (None, '#', None),
90    '.pxd': (None, '#', None),
91    '.pxi': (None, '#', None),
92    '.rb': (None, '#', None),
93    '.sh': (None, '#', None),
94    '.proto': (None, '//', None),
95    '.cs': (None, '//', None),
96    '.mak': (None, '#', None),
97    '.bazel': (None, '#', None),
98    '.bzl': (None, '#', None),
99    'Makefile': (None, '#', None),
100    'Dockerfile': (None, '#', None),
101    'BUILD': (None, '#', None),
102}
103
104_EXEMPT = frozenset((
105    # Generated protocol compiler output.
106    'examples/python/helloworld/helloworld_pb2.py',
107    'examples/python/helloworld/helloworld_pb2_grpc.py',
108    'examples/python/multiplex/helloworld_pb2.py',
109    'examples/python/multiplex/helloworld_pb2_grpc.py',
110    'examples/python/multiplex/route_guide_pb2.py',
111    'examples/python/multiplex/route_guide_pb2_grpc.py',
112    'examples/python/route_guide/route_guide_pb2.py',
113    'examples/python/route_guide/route_guide_pb2_grpc.py',
114
115    # Generated doxygen config file
116    'tools/doxygen/Doxyfile.php',
117
118    # An older file originally from outside gRPC.
119    'src/php/tests/bootstrap.php',
120    # census.proto copied from github
121    'tools/grpcz/census.proto',
122    # status.proto copied from googleapis
123    'src/proto/grpc/status/status.proto',
124
125    # Gradle wrappers used to build for Android
126    'examples/android/helloworld/gradlew.bat',
127    'src/android/test/interop/gradlew.bat',
128
129    # Designer-generated source
130    'examples/csharp/HelloworldXamarin/Droid/Resources/Resource.designer.cs',
131    'examples/csharp/HelloworldXamarin/iOS/ViewController.designer.cs',
132
133    # BoringSSL generated header. It has commit version information at the head
134    # of the file so we cannot check the license info.
135    'src/boringssl/boringssl_prefix_symbols.h',
136))
137
138_ENFORCE_CPP_STYLE_COMMENT_PATH_PREFIX = tuple([
139    'include/grpc++/',
140    'include/grpcpp/',
141    'src/core/',
142    'src/cpp/',
143    'test/core/',
144    'test/cpp/',
145    'fuzztest/',
146])
147
148RE_YEAR = r'Copyright (?P<first_year>[0-9]+\-)?(?P<last_year>[0-9]+) ([Tt]he )?gRPC [Aa]uthors(\.|)'
149RE_LICENSE = dict(
150    (k, r'\n'.join(LICENSE_PREFIX_RE[k] +
151                   (RE_YEAR if re.search(RE_YEAR, line) else re.escape(line))
152                   for line in LICENSE_NOTICE))
153    for k, v in list(LICENSE_PREFIX_RE.items()))
154
155RE_C_STYLE_COMMENT_START = r'^/\*\s*\n'
156RE_C_STYLE_COMMENT_OPTIONAL_LINE = r'(?:\s*\*\s*\n)*'
157RE_C_STYLE_COMMENT_END = r'\s*\*/'
158RE_C_STYLE_COMMENT_LICENSE = RE_C_STYLE_COMMENT_START + RE_C_STYLE_COMMENT_OPTIONAL_LINE + r'\n'.join(
159    r'\s*(?:\*)\s*' + (RE_YEAR if re.search(RE_YEAR, line) else re.escape(line))
160    for line in LICENSE_NOTICE
161) + r'\n' + RE_C_STYLE_COMMENT_OPTIONAL_LINE + RE_C_STYLE_COMMENT_END
162RE_CPP_STYLE_COMMENT_LICENSE = r'\n'.join(
163    r'\s*(?://)\s*' + (RE_YEAR if re.search(RE_YEAR, line) else re.escape(line))
164    for line in LICENSE_NOTICE)
165
166YEAR = datetime.datetime.now().year
167
168LICENSE_YEAR = f'Copyright {YEAR} gRPC authors.'
169
170
171def join_license_text(header, prefix, footer, notice):
172    text = (header + '\n') if header else ""
173
174    def add_prefix(prefix, line):
175        # Don't put whitespace between prefix and empty line to avoid having
176        # trailing whitespaces.
177        return prefix + ('' if len(line) == 0 else ' ') + line
178
179    text += '\n'.join(
180        add_prefix(prefix, (LICENSE_YEAR if re.search(RE_YEAR, line) else line))
181        for line in LICENSE_NOTICE)
182    text += '\n'
183    if footer:
184        text += footer + '\n'
185    return text
186
187
188LICENSE_TEXT = dict(
189    (k,
190     join_license_text(LICENSE_PREFIX_TEXT[k][0], LICENSE_PREFIX_TEXT[k][1],
191                       LICENSE_PREFIX_TEXT[k][2], LICENSE_NOTICE))
192    for k, v in list(LICENSE_PREFIX_TEXT.items()))
193
194if args.precommit:
195    FILE_LIST_COMMAND = 'git status -z | grep -Poz \'(?<=^[MARC][MARCD ] )[^\s]+\''
196else:
197    FILE_LIST_COMMAND = 'git ls-tree -r --name-only -r HEAD | ' \
198                        'grep -v ^third_party/ |' \
199                        'grep -v "\(ares_config.h\|ares_build.h\)"'
200
201
202def load(name):
203    with open(name) as f:
204        return f.read()
205
206
207def save(name, text):
208    with open(name, 'w') as f:
209        f.write(text)
210
211
212assert (re.search(RE_LICENSE['Makefile'], load('Makefile')))
213
214
215def log(cond, why, filename):
216    if not cond:
217        return
218    if args.output == 'details':
219        print(('%s: %s' % (why, filename)))
220    else:
221        print(filename)
222
223
224def write_copyright(license_text, file_text, filename):
225    shebang = ""
226    lines = file_text.split("\n")
227    if lines and lines[0].startswith("#!"):
228        shebang = lines[0] + "\n"
229        file_text = file_text[len(shebang):]
230
231    rewritten_text = shebang + license_text + "\n" + file_text
232    with open(filename, 'w') as f:
233        f.write(rewritten_text)
234
235
236def replace_copyright(license_text, file_text, filename):
237    m = re.search(RE_C_STYLE_COMMENT_LICENSE, text)
238    if m:
239        rewritten_text = license_text + file_text[m.end():]
240        with open(filename, 'w') as f:
241            f.write(rewritten_text)
242        return True
243    return False
244
245
246# scan files, validate the text
247ok = True
248filename_list = []
249try:
250    filename_list = subprocess.check_output(FILE_LIST_COMMAND,
251                                            shell=True).decode().splitlines()
252except subprocess.CalledProcessError:
253    sys.exit(0)
254
255for filename in filename_list:
256    enforce_cpp_style_comment = False
257    if filename in _EXEMPT:
258        continue
259    # Skip check for upb generated code.
260    if (filename.endswith('.upb.h') or filename.endswith('.upb.c') or
261            filename.endswith('.upbdefs.h') or filename.endswith('.upbdefs.c')):
262        continue
263    ext = os.path.splitext(filename)[1]
264    base = os.path.basename(filename)
265    if filename.startswith(_ENFORCE_CPP_STYLE_COMMENT_PATH_PREFIX) and ext in [
266            '.cc', '.h'
267    ]:
268        enforce_cpp_style_comment = True
269        re_license = RE_CPP_STYLE_COMMENT_LICENSE
270        license_text = LICENSE_TEXT[ext]
271    elif ext in RE_LICENSE:
272        re_license = RE_LICENSE[ext]
273        license_text = LICENSE_TEXT[ext]
274    elif base in RE_LICENSE:
275        re_license = RE_LICENSE[base]
276        license_text = LICENSE_TEXT[base]
277    else:
278        log(args.skips, 'skip', filename)
279        continue
280    try:
281        text = load(filename)
282    except:
283        continue
284    m = re.search(re_license, text)
285    if m:
286        pass
287    elif enforce_cpp_style_comment:
288        log(1, 'copyright missing or does not use cpp-style copyright header',
289            filename)
290        if args.fix:
291            # Attempt fix: search for c-style copyright header and replace it
292            # with cpp-style copyright header. If that doesn't work
293            # (e.g. missing copyright header), write cpp-style copyright header.
294            if not replace_copyright(license_text, text, filename):
295                write_copyright(license_text, text, filename)
296        ok = False
297    elif 'DO NOT EDIT' not in text:
298        if args.fix:
299            write_copyright(license_text, text, filename)
300            log(1, 'copyright missing (fixed)', filename)
301        else:
302            log(1, 'copyright missing', filename)
303        ok = False
304
305if not ok and not args.fix:
306    print(
307        'You may use following command to automatically fix copyright headers:')
308    print('    tools/distrib/check_copyright.py --fix')
309
310sys.exit(0 if ok else 1)
311