#!/usr/bin/env python3 # # Copyright (C) 2021 The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Provides useful diff information for build artifacts. Uses collected build artifacts from two separate build invocations to compare output artifacts of these builds and/or the commands executed to generate them. See the directory-level README for information about full usage, including the collection step: a preparatory step required before invocation of this tool. Use `difftool.py --help` for full usage information of this tool. Example Usage: ./difftool.py [left_dir] [left_output_file] [right_dir] [right_output_file] Difftool will compare [left_dir]/[left_output_file] and [right_dir]/[right_output_file] and provide its best insightful analysis on the differences between these files. The content and depth of this analysis depends on the types of these files, and also on Difftool"s verbosity mode. Difftool may also use command data present in the left and right directories as part of its analysis. """ import argparse import enum import functools import json import os import pathlib import re import subprocess import sys from typing import Callable import clangcompile import commands from collect import COLLECTION_INFO_FILENAME DiffFunction = Callable[[pathlib.Path, pathlib.Path], list[str]] """Given two files, produces a list of differences.""" @functools.total_ordering class DiffLevel(enum.Enum): """Defines the level of differences that should trigger a failure. E.g. when set to WARNING, differences deemed WARNING or SEVERE are taken into account while other differences (INFO, FINE etc.) will be ignored. """ SEVERE = 1 WARNING = 2 INFO = 3 FINE = 4 def __lt__(self, other): if self.__class__ is other.__class__: return self.value < other.value return NotImplemented class EnumAction(argparse.Action): """Parses command line options into Enum types.""" def __init__(self, **kwargs): enum_type = kwargs.pop("type", None) kwargs.setdefault("choices", list(e.name for e in enum_type)) super(EnumAction, self).__init__(**kwargs) self._enum = enum_type def __call__(self, parser, namespace, values, option_string=None): value = self._enum[values] setattr(namespace, self.dest, value) class ArtifactType(enum.Enum): AUTO_INFER_FROM_SUFFIX = 0 CC_OBJECT = 1 CC_SHARED_LIBRARY = 2 CC_OBJECT_WITH_DEBUG_SYMBOLS = 3 OTHER = 99 FILE_TYPE_CHOICES = { "auto": ArtifactType.AUTO_INFER_FROM_SUFFIX, "object": ArtifactType.CC_OBJECT, "object_with_debug_symbols": ArtifactType.CC_OBJECT_WITH_DEBUG_SYMBOLS, "shared_library": ArtifactType.CC_SHARED_LIBRARY, } def _artifact_type(file_path): ext = file_path.suffix if ext in [".o", ".a"]: return ArtifactType.CC_OBJECT elif ext == ".so": return ArtifactType.CC_SHARED_LIBRARY else: return ArtifactType.OTHER # TODO(usta) use libdiff def literal_diff(left_path: pathlib.Path, right_path: pathlib.Path) -> list[str]: return subprocess.run( ["diff", str(left_path), str(right_path)], check=False, capture_output=True, encoding="utf-8").stdout.splitlines() @functools.cache def _diff_fns(artifact_type: ArtifactType, level: DiffLevel) -> list[DiffFunction]: fns = [] if artifact_type in [ ArtifactType.CC_OBJECT, ArtifactType.CC_OBJECT_WITH_DEBUG_SYMBOLS ]: fns.append(clangcompile.nm_differences) if level >= DiffLevel.WARNING: fns.append(clangcompile.elf_differences) if artifact_type == ArtifactType.CC_OBJECT_WITH_DEBUG_SYMBOLS: fns.append(clangcompile.bloaty_differences_compileunits) else: fns.append(clangcompile.bloaty_differences) else: fns.append(literal_diff) return fns def collect_commands_bazel(expr: str, config: str, mnemonic: str, *args): bazel_tool_path = pathlib.Path("build/bazel/bin/bazel").resolve().absolute() bazel_proc = subprocess.run( [ bazel_tool_path, "aquery", "--curses=no", "--config=bp2build", "--output=jsonproto", f"--config={config}", *args, f"{expr}", ], capture_output=True, encoding="utf-8", ) print(bazel_proc.stderr) actions_json = json.loads(bazel_proc.stdout) return [a for a in actions_json["actions"] if a["mnemonic"] == mnemonic] def collect_commands_ninja(ninja_file_path: pathlib.Path, output_file_path: pathlib.Path, ninja_tool_path: pathlib.Path) -> list[str]: """Returns a list of all command lines required to build the file at given output_file_path_string, as described by the ninja file present at ninja_file_path_string. """ result = subprocess.check_output([ str(ninja_tool_path), "-f", ninja_file_path, "-t", "commands", str(output_file_path) ]).decode("utf-8") return result.splitlines() def collect_commands(ninja_file_path: pathlib.Path, output_file_path: pathlib.Path) -> list[str]: ninja_tool_path = pathlib.Path( "prebuilts/build-tools/linux-x86/bin/ninja").resolve() wd = os.getcwd() try: os.chdir(ninja_file_path.parent.absolute()) return collect_commands_ninja( ninja_file_path.name, output_file_path, ninja_tool_path, ) except Exception as e: raise e finally: os.chdir(wd) def file_differences( left_path: pathlib.Path, right_path: pathlib.Path, level=DiffLevel.SEVERE, file_type=ArtifactType.AUTO_INFER_FROM_SUFFIX) -> list[str]: """Returns differences between the two given files. Returns the empty list if these files are deemed "similar enough". """ errors = [] if not left_path.is_file(): errors += ["%s does not exist" % left_path] if not right_path.is_file(): errors += ["%s does not exist" % right_path] if errors: return errors if file_type is ArtifactType.AUTO_INFER_FROM_SUFFIX: file_type = _artifact_type(left_path) right_type = _artifact_type(right_path) if file_type != right_type: errors += ["file types differ: %s and %s" % (file_type, right_type)] return errors for fn in _diff_fns(file_type, level): errors += fn(left_path, right_path) return errors def parse_collection_info(info_file_path: pathlib.Path): """Parses the collection info file at the given path and returns details.""" if not info_file_path.is_file(): raise Exception("Expected file %s was not found. " % info_file_path + "Did you run collect.py for this directory?") info_contents = info_file_path.read_text().splitlines() ninja_path = pathlib.Path(info_contents[0]) target_file = None if len(info_contents) > 1 and info_contents[1]: target_file = info_contents[1] return ninja_path, target_file # Pattern to parse out env-setting command prefix, for example: # # FOO=BAR KEY=VALUE {main_command_args} env_set_prefix_pattern = re.compile("^(( )*([^ =]+=[^ =]+)( )*)+(.*)$") # Pattern to parse out command prefixes which cd into the execroot and # then remove the old output. For example: # # cd path/to/execroot && rm old_output && {main_command} cd_rm_prefix_pattern = re.compile("^cd [^&]* &&( )+rm [^&]* && (.*)$") # Pattern to parse out any trailing comment suffix. For example: # # {main_command} # This comment should be removed. comment_suffix_pattern = re.compile("(.*) # .*") def _remove_rbe_tokens(tokens, tool_endings): for i in range(len(tokens)): for ending in tool_endings: if tokens[i].endswith(ending): return tokens[i:] return None def rich_command_info(raw_command): """Returns a command info object describing the raw command string.""" cmd = raw_command.strip() # Remove things unrelated to the core command. m = env_set_prefix_pattern.fullmatch(cmd) if m is not None: cmd = m.group(5) m = cd_rm_prefix_pattern.fullmatch(cmd) if m is not None: cmd = m.group(2) m = comment_suffix_pattern.fullmatch(cmd) if m is not None: cmd = m.group(1) tokens = cmd.split() tokens_without_rbe = _remove_rbe_tokens(tokens, ["clang", "clang++"]) if tokens_without_rbe: tokens = tokens_without_rbe tool = tokens[0] args = tokens[1:] if tool.endswith("clang") or tool.endswith("clang++"): # TODO(cparsons): Disambiguate between clang compile and other clang # commands. return clangcompile.ClangCompileInfo(tool=tool, args=args) else: return commands.CommandInfo(tool=tool, args=args) def main(): parser = argparse.ArgumentParser(description="") parser.add_argument( "--level", action=EnumAction, default=DiffLevel.SEVERE, type=DiffLevel, help="the level of differences to be considered." + "Diffs below the specified level are ignored.") parser.add_argument( "--verbose", "-v", action=argparse.BooleanOptionalAction, default=False, help="log verbosely.") parser.add_argument( "left_dir", help="the 'left' directory to compare build outputs " + "from. This must be the target of an invocation of collect.py.") parser.add_argument( "--left_file", "-l", dest="left_file", default=None, help="the output file (relative to execution root) for " + "the 'left' build invocation.") parser.add_argument( "right_dir", help="the 'right' directory to compare build outputs " + "from. This must be the target of an invocation of collect.py.") parser.add_argument( "--right_file", "-r", dest="right_file", default=None, help="the output file (relative to execution root) " + "for the 'right' build invocation.") parser.add_argument( "--file_type", dest="file_type", default="auto", choices=FILE_TYPE_CHOICES.keys(), help="the type of file being diffed (overrides automatic " + "filetype resolution)") parser.add_argument( "--allow_missing_file", action=argparse.BooleanOptionalAction, default=False, help="allow a missing output file; this is useful to " + "compare actions even in the absence of an output file.") args = parser.parse_args() level = args.level left_diffinfo = pathlib.Path(args.left_dir).joinpath(COLLECTION_INFO_FILENAME) right_diffinfo = pathlib.Path( args.right_dir).joinpath(COLLECTION_INFO_FILENAME) left_ninja_name, left_file = parse_collection_info(left_diffinfo) right_ninja_name, right_file = parse_collection_info(right_diffinfo) if args.left_file: left_file = pathlib.Path(args.left_file) if args.right_file: right_file = pathlib.Path(args.right_file) if left_file is None: raise Exception("No left file specified. Either run collect.py with a " + "target file, or specify --left_file.") if right_file is None: raise Exception("No right file specified. Either run collect.py with a " + "target file, or specify --right_file.") left_path = pathlib.Path(args.left_dir).joinpath(left_file) right_path = pathlib.Path(args.right_dir).joinpath(right_file) if not args.allow_missing_file: if not left_path.is_file(): raise RuntimeError("Expected file %s was not found. " % left_path) if not right_path.is_file(): raise RuntimeError("Expected file %s was not found. " % right_path) file_diff_errors = file_differences(left_path, right_path, level, FILE_TYPE_CHOICES[args.file_type]) if file_diff_errors: for err in file_diff_errors: print(err) if args.verbose: left_ninja_path = pathlib.Path(args.left_dir).joinpath(left_ninja_name) left_commands = collect_commands(left_ninja_path, left_file) left_command_info = rich_command_info(left_commands[-1]) right_ninja_path = pathlib.Path(args.right_dir).joinpath(right_ninja_name) right_commands = collect_commands(right_ninja_path, right_file) right_command_info = rich_command_info(right_commands[-1]) print("======== ACTION COMPARISON: ========") print("=== LEFT ONLY:\n") print(left_command_info.compare(right_command_info)) print() print("=== RIGHT ONLY:\n") print(right_command_info.compare(left_command_info)) print() sys.exit(1) else: print(f"{left_file} matches\n{right_file}") sys.exit(0) if __name__ == "__main__": main()