#!/usr/bin/env python3 # Copyright 2015 The PDFium Authors # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. from dataclasses import dataclass import itertools import os import shutil import subprocess import sys EXACT_MATCHING = 'exact' FUZZY_MATCHING = 'fuzzy' _PNG_OPTIMIZER = 'optipng' # Each suffix order acts like a path along a tree, with the leaves being the # most specific, and the root being the least specific. _COMMON_SUFFIX_ORDER = ('_{os}', '') _AGG_SUFFIX_ORDER = ('_agg_{os}', '_agg') + _COMMON_SUFFIX_ORDER _GDI_SUFFIX_ORDER = ('_gdi_{os}', '_gdi') + _COMMON_SUFFIX_ORDER _SKIA_SUFFIX_ORDER = ('_skia_{os}', '_skia') + _COMMON_SUFFIX_ORDER @dataclass class ImageDiff: """Details about an image diff. Attributes: actual_path: Path to the actual image file. expected_path: Path to the expected image file, or `None` if no matches. diff_path: Path to the diff image file, or `None` if no diff. reason: Optional reason for the diff. """ actual_path: str expected_path: str = None diff_path: str = None reason: str = None class PNGDiffer(): def __init__(self, finder, reverse_byte_order, rendering_option): self.pdfium_diff_path = finder.ExecutablePath('pdfium_diff') self.os_name = finder.os_name self.reverse_byte_order = reverse_byte_order if rendering_option == 'agg': self.suffix_order = _AGG_SUFFIX_ORDER elif rendering_option == 'gdi': self.suffix_order = _GDI_SUFFIX_ORDER elif rendering_option == 'skia': self.suffix_order = _SKIA_SUFFIX_ORDER else: raise ValueError(f'rendering_option={rendering_option}') def CheckMissingTools(self, regenerate_expected): if regenerate_expected and not shutil.which(_PNG_OPTIMIZER): return f'Please install "{_PNG_OPTIMIZER}" to regenerate expected images.' return None def GetActualFiles(self, input_filename, source_dir, working_dir): actual_paths = [] path_templates = _PathTemplates(input_filename, source_dir, working_dir, self.os_name, self.suffix_order) for page in itertools.count(): actual_path = path_templates.GetActualPath(page) if path_templates.GetExpectedPath(page, default_to_base=False): actual_paths.append(actual_path) else: break return actual_paths def _RunCommand(self, cmd): try: subprocess.run(cmd, capture_output=True, check=True) return None except subprocess.CalledProcessError as e: return e def _RunImageCompareCommand(self, image_diff, image_matching_algorithm): cmd = [self.pdfium_diff_path] if self.reverse_byte_order: cmd.append('--reverse-byte-order') if image_matching_algorithm == FUZZY_MATCHING: cmd.append('--fuzzy') cmd.extend([image_diff.actual_path, image_diff.expected_path]) return self._RunCommand(cmd) def _RunImageDiffCommand(self, image_diff): # TODO(crbug.com/pdfium/1925): Diff mode ignores --reverse-byte-order. return self._RunCommand([ self.pdfium_diff_path, '--subtract', image_diff.actual_path, image_diff.expected_path, image_diff.diff_path ]) def ComputeDifferences(self, input_filename, source_dir, working_dir, image_matching_algorithm): """Computes differences between actual and expected image files. Returns: A list of `ImageDiff` instances, one per differing page. """ image_diffs = [] path_templates = _PathTemplates(input_filename, source_dir, working_dir, self.os_name, self.suffix_order) for page in itertools.count(): page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page)) if not os.path.exists(page_diff.actual_path): # No more actual pages. break expected_path = path_templates.GetExpectedPath(page) if os.path.exists(expected_path): page_diff.expected_path = expected_path compare_error = self._RunImageCompareCommand(page_diff, image_matching_algorithm) if compare_error: page_diff.reason = str(compare_error) # TODO(crbug.com/pdfium/1925): Compare and diff simultaneously. page_diff.diff_path = path_templates.GetDiffPath(page) if not self._RunImageDiffCommand(page_diff): print(f'WARNING: No diff for {page_diff.actual_path}') page_diff.diff_path = None else: # Validate that no other paths match. for unexpected_path in path_templates.GetExpectedPaths(page)[1:]: page_diff.expected_path = unexpected_path if not self._RunImageCompareCommand(page_diff, image_matching_algorithm): page_diff.reason = f'Also matches {unexpected_path}' break page_diff.expected_path = expected_path else: if page == 0: print(f'WARNING: no expected results files for {input_filename}') page_diff.reason = f'{expected_path} does not exist' if page_diff.reason: image_diffs.append(page_diff) return image_diffs def Regenerate(self, input_filename, source_dir, working_dir, image_matching_algorithm): path_templates = _PathTemplates(input_filename, source_dir, working_dir, self.os_name, self.suffix_order) for page in itertools.count(): expected_paths = path_templates.GetExpectedPaths(page) first_match = None last_match = None page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page)) if os.path.exists(page_diff.actual_path): # Match against all expected page images. for index, expected_path in enumerate(expected_paths): page_diff.expected_path = expected_path if not self._RunImageCompareCommand(page_diff, image_matching_algorithm): if first_match is None: first_match = index last_match = index if last_match == 0: # Regeneration not needed. This case may be reached if only some, but # not all, pages need to be regenerated. continue elif expected_paths: # Remove all expected page images. print(f'WARNING: {input_filename} has extra expected page {page}') first_match = 0 last_match = len(expected_paths) else: # No more expected or actual pages. break # Try to reuse expectations by removing intervening non-matches. # # TODO(crbug.com/pdfium/1988): This can make mistakes due to a lack of # global knowledge about other test configurations, which is why it just # creates backup files rather than immediately removing files. if last_match is not None: if first_match > 1: print(f'WARNING: {input_filename}.{page} has non-adjacent match') if first_match != last_match: print(f'WARNING: {input_filename}.{page} has redundant matches') for expected_path in expected_paths[:last_match]: os.rename(expected_path, expected_path + '.bak') continue # Regenerate the most specific expected path that exists. If there are no # existing expectations, regenerate the base case. expected_path = path_templates.GetExpectedPath(page) shutil.copyfile(page_diff.actual_path, expected_path) self._RunCommand([_PNG_OPTIMIZER, expected_path]) _ACTUAL_TEMPLATE = '.pdf.%d.png' _DIFF_TEMPLATE = '.pdf.%d.diff.png' class _PathTemplates: def __init__(self, input_filename, source_dir, working_dir, os_name, suffix_order): input_root, _ = os.path.splitext(input_filename) self.actual_path_template = os.path.join(working_dir, input_root + _ACTUAL_TEMPLATE) self.diff_path_template = os.path.join(working_dir, input_root + _DIFF_TEMPLATE) # Pre-create the available templates from most to least specific. We # generally expect the most specific case to match first. self.expected_templates = [] for suffix in suffix_order: formatted_suffix = suffix.format(os=os_name) self.expected_templates.append( os.path.join( source_dir, f'{input_root}_expected{formatted_suffix}{_ACTUAL_TEMPLATE}')) assert self.expected_templates def GetActualPath(self, page): return self.actual_path_template % page def GetDiffPath(self, page): return self.diff_path_template % page def _GetPossibleExpectedPaths(self, page): return [template % page for template in self.expected_templates] def GetExpectedPaths(self, page): return list(filter(os.path.exists, self._GetPossibleExpectedPaths(page))) def GetExpectedPath(self, page, default_to_base=True): """Returns the most specific expected path that exists.""" last_not_found_expected_path = None for expected_path in self._GetPossibleExpectedPaths(page): if os.path.exists(expected_path): return expected_path last_not_found_expected_path = expected_path return last_not_found_expected_path if default_to_base else None