xref: /aosp_15_r20/external/wayland-protocols/import_snapshot.py (revision 6c119a463dd5c45dd05bbe67429293292dde15ee)
1#!/usr/bin/python3
2# Copyright 2024 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16import argparse
17import datetime
18import logging
19import pathlib
20import re
21import shutil
22import subprocess
23import sys
24
25DESCRIPTION = (
26    'Helper script for importing a snapshot from upstream Wayland protocol '
27    'sources.')
28
29INTENDED_USAGE = ('''
30Intended Usage:
31    # Update the freedesktop.org subdirectory to version 1.32
32    # Check https://gitlab.freedesktop.org/wayland/wayland-protocols/-/tags
33    # for valid version tags.
34    ./import_snapshot.py freedesktop.org 1.32
35
36    # Update the chromium.org subdirectory to the latest
37    ./import_snapshot.py chromium.org main
38''')
39
40
41class GitRepo:
42    """Issues git commands against a local checkout located at some path."""
43
44    def __init__(self, base: pathlib.PurePath):
45        logging.debug("GitRepo base %s", base)
46        self._base = base
47
48    @property
49    def base(self) -> pathlib.PurePath:
50        """Gets the base path used the repo."""
51        return self._base
52
53    def _git(self,
54             cmd: list[str],
55             capture_output: bool = True,
56             check: bool = True) -> subprocess.CompletedProcess:
57        return subprocess.run(['git', '-C', self._base] + cmd,
58                              capture_output=capture_output,
59                              check=check,
60                              text=True)
61
62    def get_hash_for_version(self, version) -> str:
63        """Gets the hash associated with a |version| tag or branch."""
64        logging.debug("GitRepo.get_hash_for_version version %s", version)
65        return self._git(['show-ref', '--hash',
66                          version]).stdout.splitlines()[0].strip()
67
68    def git_ref_name_for_version(self, version) -> str | None:
69        """Gets the named ref corresponding to |version|, if one exists."""
70        logging.debug("GitRepo.get_ref_name_for_version version %s", version)
71        ref = self._git(['describe', '--all', '--exact-match', version],
72                        check=False).stdout.splitlines()[0].strip()
73        if ref.startswith('tags/'):
74            return ref.removeprefix('tags/')
75        if ref.startswith('heads/'):
76            return ref.removeprefix('heads/')
77        return None
78
79    def get_files(self, version: str,
80                  paths: list[pathlib.PurePath]) -> list[pathlib.Path]:
81        """Gets the list of files under |paths| that are part of the Git tree at |version|."""
82        logging.debug("GitRepo.get_files version %s paths %s", version, paths)
83        stdout = self._git(
84            ['ls-tree', '-r', '--name-only', f'{version}^{{tree}}'] +
85            paths).stdout
86        return list(pathlib.PurePath(path) for path in stdout.splitlines())
87
88    def assert_no_uncommitted_changes(self) -> None:
89        """Asserts that the repo has no uncommited changes."""
90        r = self._git(['diff-files', '--quiet', '--ignore-submodules'],
91                      check=False)
92        if r.returncode:
93            sys.exit('Error: Your tree is dirty')
94
95        r = self._git([
96            'diff-index', '--quiet', '--ignore-submodules', '--cached', 'HEAD'
97        ],
98                      check=False)
99        if r.returncode:
100            sys.exit('Error: You have staged changes')
101
102    def sparse_depth1_clone(self,
103                            url: str,
104                            version: str | None,
105                            paths: list[str],
106                            force_clean: bool = True) -> None:
107        """Performs a sparse clone with depth=1 of a repo.
108
109        A sparse clone limits the clone to a particular set of files, and not
110        all the files available in the repo.
111
112        A depth=1 clone fetches only the most recent version of each file
113        cloned, and not the entire history.
114
115        Together that makes the checkout be faster and take up less space on
116        disk, which is important for large repositories like the Chromium
117        source tree.
118
119        |url| gives the url to the remote repository to clone.
120
121        |version| gives the version to clone. If not specified, 'HEAD' is assumed.
122
123        Paths in |paths| are included in the sparse checkout, which also means
124        all files in the parents directories leading up to those directories are
125        included. if |paths| is an empty list, all files at the root of the
126        repository will be included.
127
128        |force_clean| ensures any existing checkout at |base| is removed.
129        Setting this to False speeds up testing changes to the script when
130        syncing a particular version, as it will only be cloned the first
131        time.
132        """
133        logging.debug(
134            "GitRepo.sparse_depth1_clone url %s version %s paths %s force_clean %s",
135            url, version, paths, force_clean)
136        self._base.parent.mkdir(parents=True, exist_ok=True)
137        if force_clean and self._base.exists():
138            shutil.rmtree(self._base)
139
140        if not self._base.exists():
141            cmd = ['git', 'clone', '--filter=blob:none', '--depth=1']
142            if paths:
143                cmd.extend(['--sparse'])
144            if version is not None and version != 'HEAD':
145                cmd.extend(['-b', version])
146            cmd.extend([url, self._base])
147
148            subprocess.run(cmd, capture_output=False, check=True, text=True)
149
150            if paths:
151                self._git(['sparse-checkout', 'add'] + paths)
152
153    def add(self, path: pathlib.Path) -> None:
154        """Stages a local file |path| in the index."""
155        logging.debug("GitRepo.add path %s", path)
156        self._git(['add', path])
157
158    def commit(self,
159               message: str,
160               allow_empty: bool = False,
161               auto_add: bool = True) -> None:
162        """Commits stages changed using |message|.
163
164        If |allow_empty| is true, an empty commit is allowed.
165        If |auto_add| is true, changed files are added automatically.
166        """
167        logging.debug("GitRepo.commit message %s allow_empty %s auto_add %s",
168                      message, allow_empty, auto_add)
169        cmd = ['commit', '-m', message]
170        if allow_empty:
171            cmd.extend(['--allow-empty'])
172        if auto_add:
173            cmd.extend(['-a'])
174
175        self._git(cmd, capture_output=False)
176
177
178class AndroidMetadata:
179    """Minimal set of functions for reading and updating METADATA files.
180
181    Officially these files are meant to be read and written using code
182    generated from
183    //build/soong/compliance/project_metadata_proto/project_metadata.proto,
184    but using it would require adding a dependency on Python protocol buffer
185    libraries as well as the generated code for the .proto file.
186
187    Instead we use the Python regex library module to parse and rewrite the
188    metadata, as we don't need to do anything really complicated.
189    """
190
191    def __init__(self, metadata_path: pathlib.Path):
192        assert metadata_path.exists()
193        self._metadata_path: pathlib.Path = metadata_path
194        self._content: str | None = None
195        self._url: str | None = None
196        self._paths: list[pathlib.PurePath] | None = None
197
198    def _read_content(self) -> None:
199        if self._content is None:
200            with open(self._metadata_path, 'rt') as metadata_file:
201                self._content = metadata_file.read()
202
203    def _write_content(self) -> None:
204        if self._content is not None:
205            with open(self._metadata_path, 'wt') as metadata_file:
206                metadata_file.write(self._content)
207
208    def _read_raw_git_urls(self) -> None:
209        if self._url is None:
210            self._read_content()
211
212            paths = []
213            URL_PATTERN = r'url\s*{\s*type:\s*GIT\s*value:\s*"([^"]*)"\s*}'
214            for url in re.findall(URL_PATTERN, self._content):
215                base_url = url
216                path = None
217
218                if '/-/tree/' in url:
219                    base_url, path = url.split('/-/tree/')
220                    _, path = path.split('/', 1)
221                elif '/+/' in url:
222                    base_url, path = url.split('/+/')
223                    _, path = path.split('/', 1)
224
225                if self._url and self._url != base_url:
226                    sys.exit(
227                        f'Error: Inconsistent git URLs in {self._metadata_path} ({self._url} vs {base_url})'
228                    )
229
230                self._url = base_url
231                if path:
232                    paths.append(path)
233
234            self._paths = tuple(paths)
235
236    @property
237    def current_version(self) -> str:
238        """Obtains the current version according to the metadata."""
239        self._read_content()
240
241        match = re.search(r'version: "([^"]*)"', self._content)
242        if not match:
243            sys.exit(
244                f'Error: Unable to determine current version from {self._metadata_path}'
245            )
246        return match.group(1)
247
248    @property
249    def git_url(self) -> str:
250        """Obtains the git URL to use from the metadata."""
251        self._read_raw_git_urls()
252        return self._url
253
254    @property
255    def git_paths(self) -> list[pathlib.PurePath]:
256        """Obtains the child paths to sync from the metadata.
257
258        This can be an empty list if the entire repo should be synced.
259        """
260        self._read_raw_git_urls()
261        return list(self._paths)
262
263    def update_version_and_import_date(self, version: str) -> None:
264        """Updates the version and import date in the metadata.
265
266        |version| gives the version string to write.
267        The import date is set to the current date.
268        """
269        self._read_content()
270
271        now = datetime.datetime.now()
272        self._content = re.sub(r'version: "[^"]*"', f'version: "{version}"',
273                               self._content)
274        self._content = re.sub(
275            r'last_upgrade_date {[^}]*}',
276            (f'last_upgrade_date {{ year: {now.year} month: {now.month} '
277             f'day: {now.day} }}'), self._content)
278
279        self._write_content()
280
281
282def must_ignore(path: pathlib.PurePath) -> bool:
283    """Checks if |path| should be ignored and not imported, as doing so might conflict with Android metadata.."""
284    IGNORE_PATTERNS: tuple[str] = (
285        'METADATA',
286        'MODULE_LICENSE_*',
287        '**/OWNERS',
288        '**/Android.bp',
289    )
290    ignore = any(path.match(pattern) for pattern in IGNORE_PATTERNS)
291    if ignore:
292        print('Ignoring source {path}')
293    return ignore
294
295
296def main():
297    parser = argparse.ArgumentParser(
298        description=DESCRIPTION,
299        epilog=INTENDED_USAGE,
300        formatter_class=argparse.RawDescriptionHelpFormatter)
301
302    parser.add_argument('group',
303                        default=None,
304                        help='The subdirectory (group) to update')
305
306    parser.add_argument(
307        'version',
308        nargs='?',
309        default='HEAD',
310        help='The official version to import. Uses HEAD by default.')
311
312    parser.add_argument('--loglevel',
313                        default='INFO',
314                        choices=('DEBUG', 'INFO', 'WARNING', 'ERROR',
315                                 'CRITICAL'),
316                        help='Logging level.')
317
318    parser.add_argument('--no-force-clean',
319                        dest='force_clean',
320                        default=True,
321                        action='store_false',
322                        help='Disables clean fetches of upstream code')
323
324    parser.add_argument(
325        '--no-remove-old-files',
326        dest='remove_old_files',
327        default=True,
328        action='store_false',
329        help=
330        'Disables syncing the previous version to determine what files to remove'
331    )
332
333    args: argparse.ArgumentParser = parser.parse_args()
334
335    logging.basicConfig(level=getattr(logging, args.loglevel))
336
337    base = pathlib.Path(sys.argv[0]).parent.resolve().absolute()
338    assert base.exists()
339
340    print(
341        f'Importing {args.group} Wayland protocols at {args.version} to {args.group}'
342    )
343
344    target_git = GitRepo(base)
345    target_git.assert_no_uncommitted_changes()
346    target_group_path = base / args.group
347
348    meta = AndroidMetadata(target_group_path / 'METADATA')
349
350    print(f'Cloning {meta.git_url} [sparse/limited] at {args.version}')
351    import_new_git = GitRepo(base / '.import' / args.group / (args.version))
352    import_new_git.sparse_depth1_clone(meta.git_url,
353                                       args.version,
354                                       meta.git_paths,
355                                       force_clean=args.force_clean)
356    import_new_hash = import_new_git.get_hash_for_version(args.version)
357    import_new_ref_name = import_new_git.git_ref_name_for_version(args.version)
358    print(f'Synced "{import_new_hash} ({import_new_ref_name})"')
359    import_new_files = import_new_git.get_files(import_new_hash,
360                                                meta.git_paths)
361    if args.remove_old_files:
362        print(
363            f'Cloning {meta.git_url} [sparse/limited] at prior {meta.current_version}'
364        )
365        import_old_git = GitRepo(base / '.import' / args.group /
366                                 meta.current_version)
367        import_old_git.sparse_depth1_clone(meta.git_url,
368                                           meta.current_version,
369                                           meta.git_paths,
370                                           force_clean=args.force_clean)
371        import_old_hash = import_old_git.get_hash_for_version(
372            meta.current_version)
373        print(f'Synced "{import_old_hash}"')
374        import_old_files = import_old_git.get_files(import_old_hash,
375                                                    meta.git_paths)
376
377        files_to_remove = set(import_old_files).difference(import_new_files)
378        for path in files_to_remove:
379            if must_ignore(path):
380                continue
381            old: pathlib.Path = target_group_path / path
382            logging.debug("removing old path %s", old)
383            old.unlink(missing_ok=True)
384
385    for path in import_new_files:
386        if must_ignore(path):
387            continue
388        src: pathlib.Path = import_new_git.base / path
389        dst: pathlib.Path = target_group_path / path
390        logging.debug("copying %s to %s", src, dst)
391        dst.parent.mkdir(parents=True, exist_ok=True)
392        shutil.copy(src, dst)
393        target_git.add(target_group_path / path)
394
395    meta.update_version_and_import_date(import_new_ref_name or import_new_hash)
396    target_git.add(target_group_path / 'METADATA')
397
398    message = f'''
399Update to {args.group} protocols {import_new_ref_name or import_new_hash}
400
401This imports {import_new_hash} from the upstream repository.
402
403Test: Builds
404'''.lstrip()
405    target_git.commit(message, allow_empty=True)
406
407
408if __name__ == '__main__':
409    main()
410