1# Copyright 2023 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""Generates BUILD.gn files from rules in Bazel workspace.""" 15 16import argparse 17import json 18import os 19import re 20 21from collections import defaultdict, deque 22from pathlib import Path, PurePath, PurePosixPath 23from typing import ( 24 Deque, 25 IO, 26 Iterable, 27 Iterator, 28 Set, 29) 30 31from pw_build.bazel_query import ( 32 ParseError, 33 BazelLabel, 34 BazelRule, 35 BazelWorkspace, 36) 37from pw_build.gn_target import GnTarget 38from pw_build.gn_writer import GnFile 39 40 41class BazelToGnConverter: 42 """Manages the conversion of Bazel rules into GN targets.""" 43 44 def __init__(self, pw_root: Path) -> None: 45 """Instantiates a Bazel workspace. 46 47 Args: 48 pw_root: Path to Pigweed directory, e.g. "$PW_ROOT". 49 """ 50 self._names_by_repo: dict[str, str] = {} 51 self._names_by_build_arg: dict[str, str] = {} 52 self._pending: Deque[BazelLabel] = deque() 53 self._loaded: Set[str] = set() 54 self._source_dirs: dict[str, PurePath] = { 55 'pigweed': pw_root, 56 } 57 self._workspaces: dict[str, BazelWorkspace] = { 58 'pigweed': BazelWorkspace( 59 'com_google_pigweed', pw_root, fetch=False 60 ), 61 } 62 self._revisions: dict[str, str] = {} 63 64 def get_name( 65 self, 66 label: BazelLabel | None = None, 67 repo: str | None = None, 68 build_arg: str | None = None, 69 ) -> str: 70 """Returns the name of a third-party module. 71 72 Exactly one of the "label", "repo" or "build_arg" keyword arguments must 73 be provided. 74 75 Args: 76 label: Bazel label referring to the third party module. 77 repo: Bazel repository of the third party module, 78 e.g. "com_google_foo". 79 build_arg: GN build argument of the third party module, 80 e.g. "$dir_pw_third_party_foo". 81 """ 82 if label: 83 assert not repo, 'multiple keyword arguments provided' 84 repo = label.repo() 85 assert not repo or not build_arg, 'multiple keyword arguments provided' 86 try: 87 if repo: 88 return self._names_by_repo[repo] 89 if build_arg: 90 return self._names_by_build_arg[build_arg] 91 raise AssertionError('no keyword arguments provided') 92 except KeyError as e: 93 raise ParseError( 94 f'unrecognized third party module: "{e.args[0]}"; ' 95 'does it have a bazel_to_gn.json file?' 96 ) 97 98 def get_source_dir(self, name: str) -> PurePath: 99 """Returns the source directory for a third-party module. 100 101 Args: 102 name: Name of the third party module. 103 """ 104 build_arg = self._build_arg(name) 105 source_dir = self._source_dirs.get(build_arg) 106 if not source_dir: 107 raise KeyError(f'GN build argument not set: "{build_arg}"') 108 return source_dir 109 110 def parse_args_gn(self, file: IO) -> None: 111 """Reads third party build arguments from args.gn. 112 113 Args: 114 file: File-like object to read from. 115 """ 116 build_arg_pat = r'(dir_pw_third_party_\S*)\s*=\s*"([^"]*)"' 117 for line in file: 118 match = re.search(build_arg_pat, line) 119 if match: 120 build_arg = f'${match.group(1)}' 121 source_dir = PurePath(match.group(2)) 122 self._source_dirs[build_arg] = source_dir 123 124 def load_workspace(self, name: str, bazel_to_gn: IO) -> None: 125 """Parses a bazel_to_gn.json file and loads the workspace it describes. 126 127 Recognized fields include: 128 repo: The Bazel name of the repository. 129 generate: Disables generating GN if present and set to `False`. 130 targets: A list of Bazel labels to generate GN for. 131 options: A dictionary of mapping Bazel flags to build settings. 132 133 Args: 134 name: Name of a third party module. 135 bazel_to_gn: A file-like object describing the Bazel workspace. 136 """ 137 json_data = json.load(bazel_to_gn) 138 generate = json_data.get('generate', True) 139 source_dir = None 140 if generate: 141 source_dir = self.get_source_dir(name) 142 repo = json_data['repo'] 143 workspace = BazelWorkspace(repo, source_dir) 144 workspace.generate = generate 145 workspace.targets = json_data.get('targets', []) 146 workspace.options = json_data.get('options', {}) 147 self._names_by_repo[repo] = name 148 self._workspaces[name] = workspace 149 150 def get_initial_targets(self, name: str) -> list[BazelLabel]: 151 """Adds labels from a third party module to the converter queue. 152 153 Returns the number of labels added. 154 155 Args: 156 name: Name of a previously loaded repo. 157 """ 158 workspace = self._workspaces[name] 159 repo = workspace.repo() 160 self._loaded = set(workspace.targets) 161 return [BazelLabel(short, repo=repo) for short in self._loaded] 162 163 def pending(self) -> Iterable[BazelLabel]: 164 """Returns the label for the next rule that needs to be loaed.""" 165 while self._pending: 166 label = self._pending.popleft() 167 if str(label) in self._loaded: 168 continue 169 self._loaded.add(str(label)) 170 yield label 171 172 def load_rules(self, labels: list[BazelLabel]) -> Iterable[BazelRule]: 173 """Queries a Bazel workspace to instantiate a rule. 174 175 Return `None` if the GN files for the workspace are manually 176 generated, otherwise returns the rule. Adds the rules deps to the queue 177 of pending labels to be loaded. 178 179 Args: 180 label: The Bazel label indicating the workspace and target. 181 """ 182 by_repo: dict[str, list[BazelLabel]] = defaultdict(list) 183 deps: Set[str] = set() 184 for label in labels: 185 by_repo[label.repo()].append(label) 186 for repo, labels_for_repo in by_repo.items(): 187 name = self.get_name(repo=repo) 188 workspace = self._workspaces[name] 189 for rule in workspace.get_rules(labels_for_repo): 190 label = rule.label() 191 package = label.package() 192 for attr_name in ['deps', 'implementation_deps']: 193 for dep in rule.get_list(attr_name): 194 label = BazelLabel(dep, repo=repo, package=package) 195 deps.add(str(label)) 196 yield rule 197 self._pending.extend([BazelLabel(dep) for dep in deps]) 198 199 def package(self, rule: BazelRule) -> str: 200 """Returns the relative path to the BUILD.gn corresponding to a rule. 201 202 The relative path is relative to $dir_pw_third_party, and consists of 203 the third party module name and the package portion of the Bazel label. 204 205 Args: 206 rule: The rule to get the relative path for. 207 """ 208 label = rule.label() 209 name = self.get_name(label=label) 210 return f'{name}/{label.package()}' 211 212 def convert_rule(self, rule: BazelRule) -> GnTarget: 213 """Creates a GN target from a Bazel rule. 214 215 Args: 216 rule: The rule to convert into a GnTarget. 217 """ 218 label = rule.label() 219 name = self.get_name(label=label) 220 if rule.kind() == 'cc_library': 221 if rule.get_bool('linkstatic'): 222 target_type = f'{name}_static_library'.replace('-', '_') 223 else: 224 target_type = f'{name}_source_set'.replace('-', '_') 225 else: 226 raise ParseError(f'unsupported Bazel kind: {rule.kind()}') 227 gn_target = GnTarget(target_type, label.target()) 228 gn_target.origin = str(label) 229 gn_target.attrs = { 230 'public': list(self._source_relative(name, rule, 'hdrs')), 231 'sources': list(self._source_relative(name, rule, 'srcs')), 232 'inputs': list( 233 self._source_relative(name, rule, 'additional_linker_inputs') 234 ), 235 'include_dirs': list(self._source_relative(name, rule, 'includes')), 236 'cflags': rule.get_list('copts'), 237 'public_defines': rule.get_list('defines'), 238 'ldflags': rule.get_list('linkopts'), 239 'defines': rule.get_list('local_defines'), 240 'public_deps': list(self._build_relative(name, rule, 'deps')), 241 'deps': list( 242 self._build_relative(name, rule, 'implementation_deps') 243 ), 244 } 245 246 return gn_target 247 248 def num_loaded(self) -> int: 249 """Returns the number of rules loaded thus far.""" 250 return len(self._loaded) 251 252 def get_workspace_revisions(self) -> Iterable[str]: 253 """Returns the revisions needed by each generated workspace.""" 254 for name, workspace in self._workspaces.items(): 255 if name == 'pigweed': 256 continue 257 if workspace.generate: 258 yield f'{name:<16}: {workspace.revision()}' 259 260 def update_pw_package( 261 self, name: str, lines: Iterator[str] 262 ) -> Iterable[str]: 263 """Updates the third party package revision in the pw_package module. 264 265 Args: 266 lines: Contents of the existing pw_package package file. 267 """ 268 workspace = self._workspaces[name] 269 if name in self._revisions: 270 revision = self._revisions[name] 271 else: 272 revision = workspace.revision('HEAD') 273 for line in lines: 274 line = line.rstrip() 275 m = re.match(r'(.*commit=[\'"])([a-z0-9]*)([\'"],.*)', line) 276 if not m: 277 yield line 278 continue 279 current = m.group(2) 280 if workspace.timestamp(current) < workspace.timestamp(revision): 281 yield f'{m.group(1)}{revision}{m.group(3)}' 282 else: 283 yield line 284 yield '' 285 286 def get_imports(self, gn_target: GnTarget) -> Iterable[str]: 287 """Returns the GNI files needed by the given target.""" 288 for build_arg in gn_target.build_args(): 289 name = self.get_name(build_arg=build_arg) 290 yield f'$dir_pw_third_party/{name}/{name}.gni' 291 292 def update_doc_rst(self, name: str, lines: Iterator[str]) -> Iterable[str]: 293 """Replaces the "Version" part of docs.rst with the latest revision. 294 295 This will truncate everything after the "generated section" comment and 296 add the comment and version information. If the file does not have the 297 comment, the comment and information will appended to the end of the 298 file. 299 300 Args: 301 lines: Iterator of lines. 302 """ 303 workspace = self._workspaces[name] 304 comment = '.. DO NOT EDIT BELOW THIS LINE. Generated section.' 305 url = workspace.url().rstrip('.git') 306 revision = workspace.revision() 307 short = revision[:8] 308 for line in lines: 309 line = line.rstrip() 310 if line == comment: 311 break 312 yield line 313 yield comment 314 yield '' 315 yield 'Version' 316 yield '=======' 317 yield f'The update script was last run for revision `{short}`_.' 318 yield '' 319 yield f'.. _{short}: {url}/tree/{revision}' 320 yield '' 321 322 def _build_arg(self, name: str) -> str: 323 """Returns the GN build argument for a third party module.""" 324 build_arg = f'$dir_pw_third_party_{name}'.replace('-', '_') 325 if build_arg not in self._names_by_build_arg: 326 self._names_by_build_arg[build_arg] = name 327 return build_arg 328 329 def _source_relative( 330 self, name: str, rule: BazelRule, attr_name: str 331 ) -> Iterable[str]: 332 """Provides GN paths relative to the third party source directory.""" 333 if not rule.has_attr(attr_name): 334 return 335 attr_type = rule.attr_type(attr_name) 336 build_arg = self._build_arg(name) 337 repo = rule.label().repo() 338 if attr_type == 'string_list': 339 for item in rule.get_list(attr_name): 340 yield f'{build_arg}/{item}' 341 elif attr_type == 'label_list': 342 for item in rule.get_list(attr_name): 343 label = BazelLabel(item, repo=repo) 344 yield f'{build_arg}/{label.package()}/{label.target()}' 345 else: 346 raise ParseError(f'unknown attribute type: {attr_type}') 347 348 def _build_relative( 349 self, name: str, rule: BazelRule, attr_name: str 350 ) -> Iterable[str]: 351 """Provides GN labels relative to the directory under //third_party.""" 352 label = rule.label() 353 repo = label.repo() 354 for other_str in rule.get_list(attr_name): 355 other = BazelLabel(other_str, repo=repo, package=label.package()) 356 package = f'{name}/{label.package()}' 357 other_package = f'{self.get_name(label=other)}/{other.package()}' 358 359 # Abbreviate the label only if it is part of the same repo. 360 if label.repo() != other.repo(): 361 path = PurePosixPath('$dir_pw_third_party', other_package) 362 elif other_package == package: 363 path = None 364 else: 365 path = PurePosixPath(package) 366 other_path = PurePosixPath(other_package) 367 common = PurePosixPath( 368 *os.path.commonprefix([path.parts, other_path.parts]) 369 ) 370 walk_up = PurePosixPath( 371 *(['..'] * (len(path.parts) - len(common.parts))) 372 ) 373 walk_down = other_path.relative_to(common) 374 path = PurePosixPath(walk_up, walk_down) 375 376 if not path: 377 yield f':{other.target()}' 378 elif path.name == other.target(): 379 yield f'{path}' 380 else: 381 yield f'{path}:{other.target()}' 382 383 def _get_http_archives(self) -> dict[str, BazelRule]: 384 """Returns a mapping of third party modules to rules. 385 386 The returned rules described the most recently required version of the 387 third party module. 388 """ 389 # First, examine http_archives in the third_party workspaces. 390 http_archives = {} 391 for name, workspace in self._workspaces.items(): 392 if name == 'pigweed': 393 continue 394 if not workspace.generate: 395 continue 396 for rule in workspace.get_http_archives(): 397 repo = rule.label().target() 398 if repo not in self._names_by_repo: 399 continue 400 other_name = self._names_by_repo[repo] 401 other = self._workspaces[other_name] 402 if not other.generate: 403 continue 404 tag = rule.get_str('strip_prefix').replace(f'{other_name}-', '') 405 revision = other.revision(tag) 406 timestamp = other.timestamp(revision) 407 if other_name in self._revisions: 408 strictest = other.timestamp(self._revisions[other_name]) 409 keep = strictest < timestamp 410 else: 411 keep = True 412 if keep: 413 http_archives[repo] = rule 414 self._revisions[other_name] = revision 415 416 # Next, compare them to those in the WORKSPACE file. 417 pigweed = self._workspaces['pigweed'] 418 for rule in pigweed.get_http_archives(): 419 repo = rule.label().target() 420 if repo not in self._names_by_repo: 421 continue 422 name = self._names_by_repo[repo] 423 workspace = self._workspaces[name] 424 if not workspace.generate: 425 continue 426 if name not in self._revisions: 427 old_rev = rule.get_str('strip_prefix').replace(f'{name}-', '') 428 new_rev = workspace.revision('HEAD') 429 rule.set_attr('strip_prefix', f'{name}-{new_rev}') 430 if rule.has_attr('url'): 431 url = rule.get_str('url') 432 rule.set_attr('url', url.replace(old_rev, new_rev)) 433 if rule.has_attr('urls'): 434 urls = rule.get_list('urls') 435 urls = [url.replace(old_rev, new_rev) for url in urls] 436 rule.set_attr('urls', urls) 437 keep = True 438 else: 439 tag = rule.get_str('strip_prefix').replace(f'{name}-', '') 440 new_rev = workspace.revision(tag) 441 timestamp = workspace.timestamp(new_rev) 442 strictest = workspace.timestamp(self._revisions[name]) 443 keep = strictest < timestamp 444 if keep: 445 http_archives[repo] = rule 446 self._revisions[name] = new_rev 447 448 # Next, check that the current revisions satisfy the strict revisions. 449 for name, workspace in self._workspaces.items(): 450 if name not in self._revisions: 451 continue 452 needed = workspace.timestamp(self._revisions[name]) 453 actual = workspace.timestamp('HEAD') 454 if actual < needed: 455 raise RuntimeError(f'{name} must be from after {needed}.') 456 457 # Finally, return the mapping. 458 return http_archives 459 460 461def _parse_args() -> argparse.Namespace: 462 """Parse arguments.""" 463 parser = argparse.ArgumentParser(description=__doc__) 464 parser.add_argument( 465 '-b', 466 '--build_dir', 467 type=PurePath, 468 help=('Build output directory, which must contain "args.gn"'), 469 ) 470 parser.add_argument( 471 'names', 472 type=str, 473 nargs='+', 474 help=( 475 'Third-party dependencies to generate GN for. ' 476 'Must match a subdirectoy of $PW_ROOT/third_party' 477 ), 478 ) 479 args = parser.parse_args() 480 481 if not args.build_dir: 482 pw_root = os.getenv('PW_ROOT') 483 if not pw_root: 484 raise RuntimeError('PW_ROOT is not set') 485 args.build_dir = PurePath(pw_root, 'out') 486 487 if not args.build_dir.is_absolute(): 488 args.build_dir = args.pw_root.joinpath(args.build_dir) 489 490 return args 491 492 493def _overprint(msg: str) -> None: 494 """Prints with a carriage return instead of a newline.""" 495 print(msg.ljust(80), end='\r', flush=True) 496 497 498def _bazel_to_gn(args: argparse.Namespace) -> None: 499 """Generates BUILD.gn files from rules in Bazel workspace. 500 501 This script is intended to be as unit-testable as possible. As a result, 502 most functionality has been pushed into testable methods of 503 BazelToGnConverter. 504 505 This method primarily consists of three things: 506 1. Print statements to provide feedback to the user. 507 2. File operations, to make subroutines more unit testable. 508 3. Control flow and loops around the two previous categories. 509 510 Args: 511 args: Script arguments. See `_parse_args`. 512 """ 513 build_dir = Path(args.build_dir) 514 b2g = BazelToGnConverter(build_dir.parent) 515 516 args_gn_path = build_dir.joinpath('args.gn') 517 print(f'Reading build arguments from {args_gn_path}...') 518 with open(args_gn_path) as args_gn: 519 b2g.parse_args_gn(args_gn) 520 521 print('Converting Bazel rules and their dependencies to GN targets...') 522 third_party_path = Path(build_dir.parent, 'third_party') 523 for child in third_party_path.iterdir(): 524 try: 525 if child.is_dir(): 526 with open(child.joinpath('bazel_to_gn.json')) as file: 527 b2g.load_workspace(child.name, file) 528 print(f'Bazel workspace loaded for //third_party/{child.name}') 529 except FileNotFoundError: 530 pass 531 532 print('Starting from:') 533 for name in args.names: 534 try: 535 labels = b2g.get_initial_targets(name) 536 except KeyError: 537 print(f'E: Unable to get initial targets for "{name}".') 538 print(f'E: Is "//third_party/{name}/bazel_to_gn.json" missing?') 539 return 540 print(f' {len(labels)} initial rule(s) in {name}') 541 542 by_package: dict[str, list[GnTarget]] = defaultdict(list) 543 while labels: 544 for rule in b2g.load_rules(labels): 545 by_package[b2g.package(rule)].append(b2g.convert_rule(rule)) 546 _overprint(f'[{b2g.num_loaded()}] {rule.label()}') 547 labels = list(b2g.pending()) 548 print(f'[{b2g.num_loaded()}] Conversion complete!'.ljust(80)) 549 550 for package, gn_targets in sorted(by_package.items()): 551 build_gn_path = third_party_path.joinpath(package, 'BUILD.gn') 552 imports = set().union( 553 *[b2g.get_imports(gn_target) for gn_target in gn_targets] 554 ) 555 _overprint(f'Writing {build_gn_path}...') 556 with GnFile(build_gn_path) as build_gn: 557 build_gn.write_file(imports, gn_targets) 558 559 names = {package.split('/')[0] for package in by_package.keys()} 560 561 for name in names: 562 update_path = build_dir.parent.joinpath( 563 'pw_package', 564 'py', 565 'pw_package', 566 'packages', 567 name.replace('-', '_') + '.py', 568 ) 569 _overprint(f'Updating {update_path}...') 570 with open(update_path, 'r') as pkg_file: 571 contents = '\n'.join(b2g.update_pw_package(name, pkg_file)) 572 with open(update_path, 'w') as pkg_file: 573 pkg_file.write(contents) 574 print(f'Updating {update_path} with current revision.') 575 576 for name in names: 577 update_path = third_party_path.joinpath(name, 'docs.rst') 578 _overprint(f'Updating {update_path}...') 579 with open(update_path, 'r') as docs_rst: 580 contents = '\n'.join(b2g.update_doc_rst(name, docs_rst)) 581 with open(update_path, 'w') as docs_rst: 582 docs_rst.write(contents) 583 print(f'Updated {update_path} with current revision.') 584 585 print('Done!') 586 587 print( 588 'Make sure to update your WORKSPACE file to fetch the following ' 589 + 'revisions or later:' 590 ) 591 for revision in b2g.get_workspace_revisions(): 592 print(revision) 593 594 595if __name__ == '__main__': 596 _bazel_to_gn(_parse_args()) 597