1#!/usr/bin/env python3
2# Copyright 2017 gRPC authors.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16import argparse
17import collections
18import operator
19import os
20import re
21import subprocess
22
23#
24# Find the root of the git tree
25#
26
27git_root = (subprocess.check_output(['git', 'rev-parse', '--show-toplevel'
28                                    ]).decode('utf-8').strip())
29
30#
31# Parse command line arguments
32#
33
34default_out = os.path.join(git_root, '.github', 'CODEOWNERS')
35
36argp = argparse.ArgumentParser('Generate .github/CODEOWNERS file')
37argp.add_argument('--out',
38                  '-o',
39                  type=str,
40                  default=default_out,
41                  help='Output file (default %s)' % default_out)
42args = argp.parse_args()
43
44#
45# Walk git tree to locate all OWNERS files
46#
47
48owners_files = [
49    os.path.join(root, 'OWNERS')
50    for root, dirs, files in os.walk(git_root)
51    if 'OWNERS' in files
52]
53
54#
55# Parse owners files
56#
57
58Owners = collections.namedtuple('Owners', 'parent directives dir')
59Directive = collections.namedtuple('Directive', 'who globs')
60
61
62def parse_owners(filename):
63    with open(filename) as f:
64        src = f.read().splitlines()
65    parent = True
66    directives = []
67    for line in src:
68        line = line.strip()
69        # line := directive | comment
70        if not line:
71            continue
72        if line[0] == '#':
73            continue
74        # it's a directive
75        directive = None
76        if line == 'set noparent':
77            parent = False
78        elif line == '*':
79            directive = Directive(who='*', globs=[])
80        elif ' ' in line:
81            (who, globs) = line.split(' ', 1)
82            globs_list = [glob for glob in globs.split(' ') if glob]
83            directive = Directive(who=who, globs=globs_list)
84        else:
85            directive = Directive(who=line, globs=[])
86        if directive:
87            directives.append(directive)
88    return Owners(parent=parent,
89                  directives=directives,
90                  dir=os.path.relpath(os.path.dirname(filename), git_root))
91
92
93owners_data = sorted([parse_owners(filename) for filename in owners_files],
94                     key=operator.attrgetter('dir'))
95
96#
97# Modify owners so that parented OWNERS files point to the actual
98# Owners tuple with their parent field
99#
100
101new_owners_data = []
102for owners in owners_data:
103    if owners.parent == True:
104        best_parent = None
105        best_parent_score = None
106        for possible_parent in owners_data:
107            if possible_parent is owners:
108                continue
109            rel = os.path.relpath(owners.dir, possible_parent.dir)
110            # '..' ==> we had to walk up from possible_parent to get to owners
111            #      ==> not a parent
112            if '..' in rel:
113                continue
114            depth = len(rel.split(os.sep))
115            if not best_parent or depth < best_parent_score:
116                best_parent = possible_parent
117                best_parent_score = depth
118        if best_parent:
119            owners = owners._replace(parent=best_parent.dir)
120        else:
121            owners = owners._replace(parent=None)
122    new_owners_data.append(owners)
123owners_data = new_owners_data
124
125#
126# In bottom to top order, process owners data structures to build up
127# a CODEOWNERS file for GitHub
128#
129
130
131def full_dir(rules_dir, sub_path):
132    return os.path.join(rules_dir, sub_path) if rules_dir != '.' else sub_path
133
134
135# glob using git
136gg_cache = {}
137
138
139def git_glob(glob):
140    global gg_cache
141    if glob in gg_cache:
142        return gg_cache[glob]
143    r = set(
144        subprocess.check_output([
145            'git', 'ls-files', os.path.join(git_root, glob)
146        ]).decode('utf-8').strip().splitlines())
147    gg_cache[glob] = r
148    return r
149
150
151def expand_directives(root, directives):
152    globs = collections.OrderedDict()
153    # build a table of glob --> owners
154    for directive in directives:
155        for glob in directive.globs or ['**']:
156            if glob not in globs:
157                globs[glob] = []
158            if directive.who not in globs[glob]:
159                globs[glob].append(directive.who)
160    # expand owners for intersecting globs
161    sorted_globs = sorted(list(globs.keys()),
162                          key=lambda g: len(git_glob(full_dir(root, g))),
163                          reverse=True)
164    out_globs = collections.OrderedDict()
165    for glob_add in sorted_globs:
166        who_add = globs[glob_add]
167        pre_items = [i for i in list(out_globs.items())]
168        out_globs[glob_add] = who_add.copy()
169        for glob_have, who_have in pre_items:
170            files_add = git_glob(full_dir(root, glob_add))
171            files_have = git_glob(full_dir(root, glob_have))
172            intersect = files_have.intersection(files_add)
173            if intersect:
174                for f in sorted(files_add):  # sorted to ensure merge stability
175                    if f not in intersect:
176                        out_globs[os.path.relpath(f, start=root)] = who_add
177                for who in who_have:
178                    if who not in out_globs[glob_add]:
179                        out_globs[glob_add].append(who)
180    return out_globs
181
182
183def add_parent_to_globs(parent, globs, globs_dir):
184    if not parent:
185        return
186    for owners in owners_data:
187        if owners.dir == parent:
188            owners_globs = expand_directives(owners.dir, owners.directives)
189            for oglob, oglob_who in list(owners_globs.items()):
190                for gglob, gglob_who in list(globs.items()):
191                    files_parent = git_glob(full_dir(owners.dir, oglob))
192                    files_child = git_glob(full_dir(globs_dir, gglob))
193                    intersect = files_parent.intersection(files_child)
194                    gglob_who_orig = gglob_who.copy()
195                    if intersect:
196                        for f in sorted(files_child
197                                       ):  # sorted to ensure merge stability
198                            if f not in intersect:
199                                who = gglob_who_orig.copy()
200                                globs[os.path.relpath(f, start=globs_dir)] = who
201                        for who in oglob_who:
202                            if who not in gglob_who:
203                                gglob_who.append(who)
204            add_parent_to_globs(owners.parent, globs, globs_dir)
205            return
206    assert (False)
207
208
209todo = owners_data.copy()
210done = set()
211with open(args.out, 'w') as out:
212    out.write('# Auto-generated by the tools/mkowners/mkowners.py tool\n')
213    out.write('# Uses OWNERS files in different modules throughout the\n')
214    out.write('# repository as the source of truth for module ownership.\n')
215    written_globs = []
216    while todo:
217        head, *todo = todo
218        if head.parent and not head.parent in done:
219            todo.append(head)
220            continue
221        globs = expand_directives(head.dir, head.directives)
222        add_parent_to_globs(head.parent, globs, head.dir)
223        for glob, owners in list(globs.items()):
224            skip = False
225            for glob1, owners1, dir1 in reversed(written_globs):
226                files = git_glob(full_dir(head.dir, glob))
227                files1 = git_glob(full_dir(dir1, glob1))
228                intersect = files.intersection(files1)
229                if files == intersect:
230                    if sorted(owners) == sorted(owners1):
231                        skip = True  # nothing new in this rule
232                        break
233                elif intersect:
234                    # continuing would cause a semantic change since some files are
235                    # affected differently by this rule and CODEOWNERS is order dependent
236                    break
237            if not skip:
238                out.write('/%s %s\n' %
239                          (full_dir(head.dir, glob), ' '.join(owners)))
240                written_globs.append((glob, owners, head.dir))
241        done.add(head.dir)
242