#!/usr/bin/env python3
#
# Copyright (C) 2021 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Provides useful diff information for build artifacts.

Uses collected build artifacts from two separate build invocations to
compare output artifacts of these builds and/or the commands executed
to generate them.

See the directory-level README for information about full usage, including
the collection step: a preparatory step required before invocation of this
tool.

Use `difftool.py --help` for full usage information of this tool.

Example Usage:
  ./difftool.py [left_dir] [left_output_file] [right_dir] [right_output_file]

Difftool will compare [left_dir]/[left_output_file] and
[right_dir]/[right_output_file] and provide its best insightful analysis on the
differences between these files. The content and depth of this analysis depends
on the types of these files, and also on Difftool"s verbosity mode. Difftool
may also use command data present in the left and right directories as part of
its analysis.
"""

import argparse
import enum
import functools
import json
import os
import pathlib
import re
import subprocess
import sys
from typing import Callable

import clangcompile
import commands
from collect import COLLECTION_INFO_FILENAME

DiffFunction = Callable[[pathlib.Path, pathlib.Path], list[str]]
"""Given two files, produces a list of differences."""


@functools.total_ordering
class DiffLevel(enum.Enum):
  """Defines the level of differences that should trigger a failure.

  E.g. when set to WARNING, differences deemed WARNING or SEVERE are taken into
  account while other differences (INFO, FINE etc.) will be ignored.
  """
  SEVERE = 1
  WARNING = 2
  INFO = 3
  FINE = 4

  def __lt__(self, other):
    if self.__class__ is other.__class__:
      return self.value < other.value
    return NotImplemented


class EnumAction(argparse.Action):
  """Parses command line options into Enum types."""

  def __init__(self, **kwargs):
    enum_type = kwargs.pop("type", None)
    kwargs.setdefault("choices", list(e.name for e in enum_type))
    super(EnumAction, self).__init__(**kwargs)
    self._enum = enum_type

  def __call__(self, parser, namespace, values, option_string=None):
    value = self._enum[values]
    setattr(namespace, self.dest, value)


class ArtifactType(enum.Enum):
  AUTO_INFER_FROM_SUFFIX = 0
  CC_OBJECT = 1
  CC_SHARED_LIBRARY = 2
  CC_OBJECT_WITH_DEBUG_SYMBOLS = 3
  OTHER = 99


FILE_TYPE_CHOICES = {
    "auto": ArtifactType.AUTO_INFER_FROM_SUFFIX,
    "object": ArtifactType.CC_OBJECT,
    "object_with_debug_symbols": ArtifactType.CC_OBJECT_WITH_DEBUG_SYMBOLS,
    "shared_library": ArtifactType.CC_SHARED_LIBRARY,
}


def _artifact_type(file_path):
  ext = file_path.suffix
  if ext in [".o", ".a"]:
    return ArtifactType.CC_OBJECT
  elif ext == ".so":
    return ArtifactType.CC_SHARED_LIBRARY
  else:
    return ArtifactType.OTHER


# TODO(usta) use libdiff
def literal_diff(left_path: pathlib.Path,
                 right_path: pathlib.Path) -> list[str]:
  return subprocess.run(
      ["diff", str(left_path), str(right_path)],
      check=False,
      capture_output=True,
      encoding="utf-8").stdout.splitlines()


@functools.cache
def _diff_fns(artifact_type: ArtifactType,
              level: DiffLevel) -> list[DiffFunction]:
  fns = []

  if artifact_type in [
      ArtifactType.CC_OBJECT, ArtifactType.CC_OBJECT_WITH_DEBUG_SYMBOLS
  ]:
    fns.append(clangcompile.nm_differences)
    if level >= DiffLevel.WARNING:
      fns.append(clangcompile.elf_differences)
      if artifact_type == ArtifactType.CC_OBJECT_WITH_DEBUG_SYMBOLS:
        fns.append(clangcompile.bloaty_differences_compileunits)
      else:
        fns.append(clangcompile.bloaty_differences)
  else:
    fns.append(literal_diff)

  return fns


def collect_commands_bazel(expr: str, config: str, mnemonic: str, *args):
  bazel_tool_path = pathlib.Path("build/bazel/bin/bazel").resolve().absolute()
  bazel_proc = subprocess.run(
      [
          bazel_tool_path,
          "aquery",
          "--curses=no",
          "--config=bp2build",
          "--output=jsonproto",
          f"--config={config}",
          *args,
          f"{expr}",
      ],
      capture_output=True,
      encoding="utf-8",
  )
  print(bazel_proc.stderr)
  actions_json = json.loads(bazel_proc.stdout)
  return [a for a in actions_json["actions"] if a["mnemonic"] == mnemonic]


def collect_commands_ninja(ninja_file_path: pathlib.Path,
                           output_file_path: pathlib.Path,
                           ninja_tool_path: pathlib.Path) -> list[str]:
  """Returns a list of all command lines required to build the file at given

  output_file_path_string, as described by the ninja file present at
  ninja_file_path_string.
  """

  result = subprocess.check_output([
      str(ninja_tool_path), "-f", ninja_file_path, "-t", "commands",
      str(output_file_path)
  ]).decode("utf-8")
  return result.splitlines()


def collect_commands(ninja_file_path: pathlib.Path,
                     output_file_path: pathlib.Path) -> list[str]:
  ninja_tool_path = pathlib.Path(
      "prebuilts/build-tools/linux-x86/bin/ninja").resolve()
  wd = os.getcwd()
  try:
    os.chdir(ninja_file_path.parent.absolute())
    return collect_commands_ninja(
        ninja_file_path.name,
        output_file_path,
        ninja_tool_path,
    )
  except Exception as e:
    raise e
  finally:
    os.chdir(wd)


def file_differences(
    left_path: pathlib.Path,
    right_path: pathlib.Path,
    level=DiffLevel.SEVERE,
    file_type=ArtifactType.AUTO_INFER_FROM_SUFFIX) -> list[str]:
  """Returns differences between the two given files.

  Returns the empty list if these files are deemed "similar enough".
  """

  errors = []
  if not left_path.is_file():
    errors += ["%s does not exist" % left_path]
  if not right_path.is_file():
    errors += ["%s does not exist" % right_path]
  if errors:
    return errors

  if file_type is ArtifactType.AUTO_INFER_FROM_SUFFIX:
    file_type = _artifact_type(left_path)
    right_type = _artifact_type(right_path)
    if file_type != right_type:
      errors += ["file types differ: %s and %s" % (file_type, right_type)]
      return errors

  for fn in _diff_fns(file_type, level):
    errors += fn(left_path, right_path)

  return errors


def parse_collection_info(info_file_path: pathlib.Path):
  """Parses the collection info file at the given path and returns details."""
  if not info_file_path.is_file():
    raise Exception("Expected file %s was not found. " % info_file_path +
                    "Did you run collect.py for this directory?")

  info_contents = info_file_path.read_text().splitlines()
  ninja_path = pathlib.Path(info_contents[0])
  target_file = None

  if len(info_contents) > 1 and info_contents[1]:
    target_file = info_contents[1]

  return ninja_path, target_file


# Pattern to parse out env-setting command prefix, for example:
#
# FOO=BAR KEY=VALUE {main_command_args}
env_set_prefix_pattern = re.compile("^(( )*([^ =]+=[^ =]+)( )*)+(.*)$")

# Pattern to parse out command prefixes which cd into the execroot and
# then remove the old output. For example:
#
# cd path/to/execroot && rm old_output && {main_command}
cd_rm_prefix_pattern = re.compile("^cd [^&]* &&( )+rm [^&]* && (.*)$")

# Pattern to parse out any trailing comment suffix. For example:
#
# {main_command} # This comment should be removed.
comment_suffix_pattern = re.compile("(.*) # .*")


def _remove_rbe_tokens(tokens, tool_endings):
  for i in range(len(tokens)):
    for ending in tool_endings:
      if tokens[i].endswith(ending):
        return tokens[i:]
  return None


def rich_command_info(raw_command):
  """Returns a command info object describing the raw command string."""
  cmd = raw_command.strip()
  # Remove things unrelated to the core command.
  m = env_set_prefix_pattern.fullmatch(cmd)
  if m is not None:
    cmd = m.group(5)
  m = cd_rm_prefix_pattern.fullmatch(cmd)
  if m is not None:
    cmd = m.group(2)
  m = comment_suffix_pattern.fullmatch(cmd)
  if m is not None:
    cmd = m.group(1)
  tokens = cmd.split()
  tokens_without_rbe = _remove_rbe_tokens(tokens, ["clang", "clang++"])
  if tokens_without_rbe:
    tokens = tokens_without_rbe
  tool = tokens[0]
  args = tokens[1:]

  if tool.endswith("clang") or tool.endswith("clang++"):
    # TODO(cparsons): Disambiguate between clang compile and other clang
    # commands.
    return clangcompile.ClangCompileInfo(tool=tool, args=args)
  else:
    return commands.CommandInfo(tool=tool, args=args)


def main():
  parser = argparse.ArgumentParser(description="")
  parser.add_argument(
      "--level",
      action=EnumAction,
      default=DiffLevel.SEVERE,
      type=DiffLevel,
      help="the level of differences to be considered." +
      "Diffs below the specified level are ignored.")
  parser.add_argument(
      "--verbose",
      "-v",
      action=argparse.BooleanOptionalAction,
      default=False,
      help="log verbosely.")
  parser.add_argument(
      "left_dir",
      help="the 'left' directory to compare build outputs " +
      "from. This must be the target of an invocation of collect.py.")
  parser.add_argument(
      "--left_file",
      "-l",
      dest="left_file",
      default=None,
      help="the output file (relative to execution root) for " +
      "the 'left' build invocation.")
  parser.add_argument(
      "right_dir",
      help="the 'right' directory to compare build outputs " +
      "from. This must be the target of an invocation of collect.py.")
  parser.add_argument(
      "--right_file",
      "-r",
      dest="right_file",
      default=None,
      help="the output file (relative to execution root) " +
      "for the 'right' build invocation.")
  parser.add_argument(
      "--file_type",
      dest="file_type",
      default="auto",
      choices=FILE_TYPE_CHOICES.keys(),
      help="the type of file being diffed (overrides automatic " +
      "filetype resolution)")
  parser.add_argument(
      "--allow_missing_file",
      action=argparse.BooleanOptionalAction,
      default=False,
      help="allow a missing output file; this is useful to " +
      "compare actions even in the absence of an output file.")
  args = parser.parse_args()

  level = args.level
  left_diffinfo = pathlib.Path(args.left_dir).joinpath(COLLECTION_INFO_FILENAME)
  right_diffinfo = pathlib.Path(
      args.right_dir).joinpath(COLLECTION_INFO_FILENAME)

  left_ninja_name, left_file = parse_collection_info(left_diffinfo)
  right_ninja_name, right_file = parse_collection_info(right_diffinfo)
  if args.left_file:
    left_file = pathlib.Path(args.left_file)
  if args.right_file:
    right_file = pathlib.Path(args.right_file)

  if left_file is None:
    raise Exception("No left file specified. Either run collect.py with a " +
                    "target file, or specify --left_file.")
  if right_file is None:
    raise Exception("No right file specified. Either run collect.py with a " +
                    "target file, or specify --right_file.")

  left_path = pathlib.Path(args.left_dir).joinpath(left_file)
  right_path = pathlib.Path(args.right_dir).joinpath(right_file)
  if not args.allow_missing_file:
    if not left_path.is_file():
      raise RuntimeError("Expected file %s was not found. " % left_path)
    if not right_path.is_file():
      raise RuntimeError("Expected file %s was not found. " % right_path)

  file_diff_errors = file_differences(left_path, right_path, level,
                                      FILE_TYPE_CHOICES[args.file_type])

  if file_diff_errors:
    for err in file_diff_errors:
      print(err)
    if args.verbose:
      left_ninja_path = pathlib.Path(args.left_dir).joinpath(left_ninja_name)
      left_commands = collect_commands(left_ninja_path, left_file)
      left_command_info = rich_command_info(left_commands[-1])
      right_ninja_path = pathlib.Path(args.right_dir).joinpath(right_ninja_name)
      right_commands = collect_commands(right_ninja_path, right_file)
      right_command_info = rich_command_info(right_commands[-1])
      print("======== ACTION COMPARISON: ========")
      print("=== LEFT ONLY:\n")
      print(left_command_info.compare(right_command_info))
      print()
      print("=== RIGHT ONLY:\n")
      print(right_command_info.compare(left_command_info))
      print()
    sys.exit(1)
  else:
    print(f"{left_file} matches\n{right_file}")
  sys.exit(0)


if __name__ == "__main__":
  main()