1#!/usr/bin/python3 2# Copyright 2024 The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16import argparse 17import datetime 18import logging 19import pathlib 20import re 21import shutil 22import subprocess 23import sys 24 25DESCRIPTION = ( 26 'Helper script for importing a snapshot from upstream Wayland protocol ' 27 'sources.') 28 29INTENDED_USAGE = (''' 30Intended Usage: 31 # Update the freedesktop.org subdirectory to version 1.32 32 # Check https://gitlab.freedesktop.org/wayland/wayland-protocols/-/tags 33 # for valid version tags. 34 ./import_snapshot.py freedesktop.org 1.32 35 36 # Update the chromium.org subdirectory to the latest 37 ./import_snapshot.py chromium.org main 38''') 39 40 41class GitRepo: 42 """Issues git commands against a local checkout located at some path.""" 43 44 def __init__(self, base: pathlib.PurePath): 45 logging.debug("GitRepo base %s", base) 46 self._base = base 47 48 @property 49 def base(self) -> pathlib.PurePath: 50 """Gets the base path used the repo.""" 51 return self._base 52 53 def _git(self, 54 cmd: list[str], 55 capture_output: bool = True, 56 check: bool = True) -> subprocess.CompletedProcess: 57 return subprocess.run(['git', '-C', self._base] + cmd, 58 capture_output=capture_output, 59 check=check, 60 text=True) 61 62 def get_hash_for_version(self, version) -> str: 63 """Gets the hash associated with a |version| tag or branch.""" 64 logging.debug("GitRepo.get_hash_for_version version %s", version) 65 return self._git(['show-ref', '--hash', 66 version]).stdout.splitlines()[0].strip() 67 68 def git_ref_name_for_version(self, version) -> str | None: 69 """Gets the named ref corresponding to |version|, if one exists.""" 70 logging.debug("GitRepo.get_ref_name_for_version version %s", version) 71 ref = self._git(['describe', '--all', '--exact-match', version], 72 check=False).stdout.splitlines()[0].strip() 73 if ref.startswith('tags/'): 74 return ref.removeprefix('tags/') 75 if ref.startswith('heads/'): 76 return ref.removeprefix('heads/') 77 return None 78 79 def get_files(self, version: str, 80 paths: list[pathlib.PurePath]) -> list[pathlib.Path]: 81 """Gets the list of files under |paths| that are part of the Git tree at |version|.""" 82 logging.debug("GitRepo.get_files version %s paths %s", version, paths) 83 stdout = self._git( 84 ['ls-tree', '-r', '--name-only', f'{version}^{{tree}}'] + 85 paths).stdout 86 return list(pathlib.PurePath(path) for path in stdout.splitlines()) 87 88 def assert_no_uncommitted_changes(self) -> None: 89 """Asserts that the repo has no uncommited changes.""" 90 r = self._git(['diff-files', '--quiet', '--ignore-submodules'], 91 check=False) 92 if r.returncode: 93 sys.exit('Error: Your tree is dirty') 94 95 r = self._git([ 96 'diff-index', '--quiet', '--ignore-submodules', '--cached', 'HEAD' 97 ], 98 check=False) 99 if r.returncode: 100 sys.exit('Error: You have staged changes') 101 102 def sparse_depth1_clone(self, 103 url: str, 104 version: str | None, 105 paths: list[str], 106 force_clean: bool = True) -> None: 107 """Performs a sparse clone with depth=1 of a repo. 108 109 A sparse clone limits the clone to a particular set of files, and not 110 all the files available in the repo. 111 112 A depth=1 clone fetches only the most recent version of each file 113 cloned, and not the entire history. 114 115 Together that makes the checkout be faster and take up less space on 116 disk, which is important for large repositories like the Chromium 117 source tree. 118 119 |url| gives the url to the remote repository to clone. 120 121 |version| gives the version to clone. If not specified, 'HEAD' is assumed. 122 123 Paths in |paths| are included in the sparse checkout, which also means 124 all files in the parents directories leading up to those directories are 125 included. if |paths| is an empty list, all files at the root of the 126 repository will be included. 127 128 |force_clean| ensures any existing checkout at |base| is removed. 129 Setting this to False speeds up testing changes to the script when 130 syncing a particular version, as it will only be cloned the first 131 time. 132 """ 133 logging.debug( 134 "GitRepo.sparse_depth1_clone url %s version %s paths %s force_clean %s", 135 url, version, paths, force_clean) 136 self._base.parent.mkdir(parents=True, exist_ok=True) 137 if force_clean and self._base.exists(): 138 shutil.rmtree(self._base) 139 140 if not self._base.exists(): 141 cmd = ['git', 'clone', '--filter=blob:none', '--depth=1'] 142 if paths: 143 cmd.extend(['--sparse']) 144 if version is not None and version != 'HEAD': 145 cmd.extend(['-b', version]) 146 cmd.extend([url, self._base]) 147 148 subprocess.run(cmd, capture_output=False, check=True, text=True) 149 150 if paths: 151 self._git(['sparse-checkout', 'add'] + paths) 152 153 def add(self, path: pathlib.Path) -> None: 154 """Stages a local file |path| in the index.""" 155 logging.debug("GitRepo.add path %s", path) 156 self._git(['add', path]) 157 158 def commit(self, 159 message: str, 160 allow_empty: bool = False, 161 auto_add: bool = True) -> None: 162 """Commits stages changed using |message|. 163 164 If |allow_empty| is true, an empty commit is allowed. 165 If |auto_add| is true, changed files are added automatically. 166 """ 167 logging.debug("GitRepo.commit message %s allow_empty %s auto_add %s", 168 message, allow_empty, auto_add) 169 cmd = ['commit', '-m', message] 170 if allow_empty: 171 cmd.extend(['--allow-empty']) 172 if auto_add: 173 cmd.extend(['-a']) 174 175 self._git(cmd, capture_output=False) 176 177 178class AndroidMetadata: 179 """Minimal set of functions for reading and updating METADATA files. 180 181 Officially these files are meant to be read and written using code 182 generated from 183 //build/soong/compliance/project_metadata_proto/project_metadata.proto, 184 but using it would require adding a dependency on Python protocol buffer 185 libraries as well as the generated code for the .proto file. 186 187 Instead we use the Python regex library module to parse and rewrite the 188 metadata, as we don't need to do anything really complicated. 189 """ 190 191 def __init__(self, metadata_path: pathlib.Path): 192 assert metadata_path.exists() 193 self._metadata_path: pathlib.Path = metadata_path 194 self._content: str | None = None 195 self._url: str | None = None 196 self._paths: list[pathlib.PurePath] | None = None 197 198 def _read_content(self) -> None: 199 if self._content is None: 200 with open(self._metadata_path, 'rt') as metadata_file: 201 self._content = metadata_file.read() 202 203 def _write_content(self) -> None: 204 if self._content is not None: 205 with open(self._metadata_path, 'wt') as metadata_file: 206 metadata_file.write(self._content) 207 208 def _read_raw_git_urls(self) -> None: 209 if self._url is None: 210 self._read_content() 211 212 paths = [] 213 URL_PATTERN = r'url\s*{\s*type:\s*GIT\s*value:\s*"([^"]*)"\s*}' 214 for url in re.findall(URL_PATTERN, self._content): 215 base_url = url 216 path = None 217 218 if '/-/tree/' in url: 219 base_url, path = url.split('/-/tree/') 220 _, path = path.split('/', 1) 221 elif '/+/' in url: 222 base_url, path = url.split('/+/') 223 _, path = path.split('/', 1) 224 225 if self._url and self._url != base_url: 226 sys.exit( 227 f'Error: Inconsistent git URLs in {self._metadata_path} ({self._url} vs {base_url})' 228 ) 229 230 self._url = base_url 231 if path: 232 paths.append(path) 233 234 self._paths = tuple(paths) 235 236 @property 237 def current_version(self) -> str: 238 """Obtains the current version according to the metadata.""" 239 self._read_content() 240 241 match = re.search(r'version: "([^"]*)"', self._content) 242 if not match: 243 sys.exit( 244 f'Error: Unable to determine current version from {self._metadata_path}' 245 ) 246 return match.group(1) 247 248 @property 249 def git_url(self) -> str: 250 """Obtains the git URL to use from the metadata.""" 251 self._read_raw_git_urls() 252 return self._url 253 254 @property 255 def git_paths(self) -> list[pathlib.PurePath]: 256 """Obtains the child paths to sync from the metadata. 257 258 This can be an empty list if the entire repo should be synced. 259 """ 260 self._read_raw_git_urls() 261 return list(self._paths) 262 263 def update_version_and_import_date(self, version: str) -> None: 264 """Updates the version and import date in the metadata. 265 266 |version| gives the version string to write. 267 The import date is set to the current date. 268 """ 269 self._read_content() 270 271 now = datetime.datetime.now() 272 self._content = re.sub(r'version: "[^"]*"', f'version: "{version}"', 273 self._content) 274 self._content = re.sub( 275 r'last_upgrade_date {[^}]*}', 276 (f'last_upgrade_date {{ year: {now.year} month: {now.month} ' 277 f'day: {now.day} }}'), self._content) 278 279 self._write_content() 280 281 282def must_ignore(path: pathlib.PurePath) -> bool: 283 """Checks if |path| should be ignored and not imported, as doing so might conflict with Android metadata..""" 284 IGNORE_PATTERNS: tuple[str] = ( 285 'METADATA', 286 'MODULE_LICENSE_*', 287 '**/OWNERS', 288 '**/Android.bp', 289 ) 290 ignore = any(path.match(pattern) for pattern in IGNORE_PATTERNS) 291 if ignore: 292 print('Ignoring source {path}') 293 return ignore 294 295 296def main(): 297 parser = argparse.ArgumentParser( 298 description=DESCRIPTION, 299 epilog=INTENDED_USAGE, 300 formatter_class=argparse.RawDescriptionHelpFormatter) 301 302 parser.add_argument('group', 303 default=None, 304 help='The subdirectory (group) to update') 305 306 parser.add_argument( 307 'version', 308 nargs='?', 309 default='HEAD', 310 help='The official version to import. Uses HEAD by default.') 311 312 parser.add_argument('--loglevel', 313 default='INFO', 314 choices=('DEBUG', 'INFO', 'WARNING', 'ERROR', 315 'CRITICAL'), 316 help='Logging level.') 317 318 parser.add_argument('--no-force-clean', 319 dest='force_clean', 320 default=True, 321 action='store_false', 322 help='Disables clean fetches of upstream code') 323 324 parser.add_argument( 325 '--no-remove-old-files', 326 dest='remove_old_files', 327 default=True, 328 action='store_false', 329 help= 330 'Disables syncing the previous version to determine what files to remove' 331 ) 332 333 args: argparse.ArgumentParser = parser.parse_args() 334 335 logging.basicConfig(level=getattr(logging, args.loglevel)) 336 337 base = pathlib.Path(sys.argv[0]).parent.resolve().absolute() 338 assert base.exists() 339 340 print( 341 f'Importing {args.group} Wayland protocols at {args.version} to {args.group}' 342 ) 343 344 target_git = GitRepo(base) 345 target_git.assert_no_uncommitted_changes() 346 target_group_path = base / args.group 347 348 meta = AndroidMetadata(target_group_path / 'METADATA') 349 350 print(f'Cloning {meta.git_url} [sparse/limited] at {args.version}') 351 import_new_git = GitRepo(base / '.import' / args.group / (args.version)) 352 import_new_git.sparse_depth1_clone(meta.git_url, 353 args.version, 354 meta.git_paths, 355 force_clean=args.force_clean) 356 import_new_hash = import_new_git.get_hash_for_version(args.version) 357 import_new_ref_name = import_new_git.git_ref_name_for_version(args.version) 358 print(f'Synced "{import_new_hash} ({import_new_ref_name})"') 359 import_new_files = import_new_git.get_files(import_new_hash, 360 meta.git_paths) 361 if args.remove_old_files: 362 print( 363 f'Cloning {meta.git_url} [sparse/limited] at prior {meta.current_version}' 364 ) 365 import_old_git = GitRepo(base / '.import' / args.group / 366 meta.current_version) 367 import_old_git.sparse_depth1_clone(meta.git_url, 368 meta.current_version, 369 meta.git_paths, 370 force_clean=args.force_clean) 371 import_old_hash = import_old_git.get_hash_for_version( 372 meta.current_version) 373 print(f'Synced "{import_old_hash}"') 374 import_old_files = import_old_git.get_files(import_old_hash, 375 meta.git_paths) 376 377 files_to_remove = set(import_old_files).difference(import_new_files) 378 for path in files_to_remove: 379 if must_ignore(path): 380 continue 381 old: pathlib.Path = target_group_path / path 382 logging.debug("removing old path %s", old) 383 old.unlink(missing_ok=True) 384 385 for path in import_new_files: 386 if must_ignore(path): 387 continue 388 src: pathlib.Path = import_new_git.base / path 389 dst: pathlib.Path = target_group_path / path 390 logging.debug("copying %s to %s", src, dst) 391 dst.parent.mkdir(parents=True, exist_ok=True) 392 shutil.copy(src, dst) 393 target_git.add(target_group_path / path) 394 395 meta.update_version_and_import_date(import_new_ref_name or import_new_hash) 396 target_git.add(target_group_path / 'METADATA') 397 398 message = f''' 399Update to {args.group} protocols {import_new_ref_name or import_new_hash} 400 401This imports {import_new_hash} from the upstream repository. 402 403Test: Builds 404'''.lstrip() 405 target_git.commit(message, allow_empty=True) 406 407 408if __name__ == '__main__': 409 main() 410