1#!/usr/bin/env python3 2# Copyright 2015 The PDFium Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6from dataclasses import dataclass 7import itertools 8import os 9import shutil 10import subprocess 11import sys 12 13EXACT_MATCHING = 'exact' 14FUZZY_MATCHING = 'fuzzy' 15 16_PNG_OPTIMIZER = 'optipng' 17 18# Each suffix order acts like a path along a tree, with the leaves being the 19# most specific, and the root being the least specific. 20_COMMON_SUFFIX_ORDER = ('_{os}', '') 21_AGG_SUFFIX_ORDER = ('_agg_{os}', '_agg') + _COMMON_SUFFIX_ORDER 22_GDI_SUFFIX_ORDER = ('_gdi_{os}', '_gdi') + _COMMON_SUFFIX_ORDER 23_SKIA_SUFFIX_ORDER = ('_skia_{os}', '_skia') + _COMMON_SUFFIX_ORDER 24 25 26@dataclass 27class ImageDiff: 28 """Details about an image diff. 29 30 Attributes: 31 actual_path: Path to the actual image file. 32 expected_path: Path to the expected image file, or `None` if no matches. 33 diff_path: Path to the diff image file, or `None` if no diff. 34 reason: Optional reason for the diff. 35 """ 36 actual_path: str 37 expected_path: str = None 38 diff_path: str = None 39 reason: str = None 40 41class PNGDiffer(): 42 43 def __init__(self, finder, reverse_byte_order, rendering_option): 44 self.pdfium_diff_path = finder.ExecutablePath('pdfium_diff') 45 self.os_name = finder.os_name 46 self.reverse_byte_order = reverse_byte_order 47 48 if rendering_option == 'agg': 49 self.suffix_order = _AGG_SUFFIX_ORDER 50 elif rendering_option == 'gdi': 51 self.suffix_order = _GDI_SUFFIX_ORDER 52 elif rendering_option == 'skia': 53 self.suffix_order = _SKIA_SUFFIX_ORDER 54 else: 55 raise ValueError(f'rendering_option={rendering_option}') 56 57 def CheckMissingTools(self, regenerate_expected): 58 if regenerate_expected and not shutil.which(_PNG_OPTIMIZER): 59 return f'Please install "{_PNG_OPTIMIZER}" to regenerate expected images.' 60 return None 61 62 def GetActualFiles(self, input_filename, source_dir, working_dir): 63 actual_paths = [] 64 path_templates = _PathTemplates(input_filename, source_dir, working_dir, 65 self.os_name, self.suffix_order) 66 67 for page in itertools.count(): 68 actual_path = path_templates.GetActualPath(page) 69 if path_templates.GetExpectedPath(page, default_to_base=False): 70 actual_paths.append(actual_path) 71 else: 72 break 73 return actual_paths 74 75 def _RunCommand(self, cmd): 76 try: 77 subprocess.run(cmd, capture_output=True, check=True) 78 return None 79 except subprocess.CalledProcessError as e: 80 return e 81 82 def _RunImageCompareCommand(self, image_diff, image_matching_algorithm): 83 cmd = [self.pdfium_diff_path] 84 if self.reverse_byte_order: 85 cmd.append('--reverse-byte-order') 86 if image_matching_algorithm == FUZZY_MATCHING: 87 cmd.append('--fuzzy') 88 cmd.extend([image_diff.actual_path, image_diff.expected_path]) 89 return self._RunCommand(cmd) 90 91 def _RunImageDiffCommand(self, image_diff): 92 # TODO(crbug.com/pdfium/1925): Diff mode ignores --reverse-byte-order. 93 return self._RunCommand([ 94 self.pdfium_diff_path, '--subtract', image_diff.actual_path, 95 image_diff.expected_path, image_diff.diff_path 96 ]) 97 98 def ComputeDifferences(self, input_filename, source_dir, working_dir, 99 image_matching_algorithm): 100 """Computes differences between actual and expected image files. 101 102 Returns: 103 A list of `ImageDiff` instances, one per differing page. 104 """ 105 image_diffs = [] 106 107 path_templates = _PathTemplates(input_filename, source_dir, working_dir, 108 self.os_name, self.suffix_order) 109 for page in itertools.count(): 110 page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page)) 111 if not os.path.exists(page_diff.actual_path): 112 # No more actual pages. 113 break 114 115 expected_path = path_templates.GetExpectedPath(page) 116 if os.path.exists(expected_path): 117 page_diff.expected_path = expected_path 118 119 compare_error = self._RunImageCompareCommand(page_diff, 120 image_matching_algorithm) 121 if compare_error: 122 page_diff.reason = str(compare_error) 123 124 # TODO(crbug.com/pdfium/1925): Compare and diff simultaneously. 125 page_diff.diff_path = path_templates.GetDiffPath(page) 126 if not self._RunImageDiffCommand(page_diff): 127 print(f'WARNING: No diff for {page_diff.actual_path}') 128 page_diff.diff_path = None 129 else: 130 # Validate that no other paths match. 131 for unexpected_path in path_templates.GetExpectedPaths(page)[1:]: 132 page_diff.expected_path = unexpected_path 133 if not self._RunImageCompareCommand(page_diff, 134 image_matching_algorithm): 135 page_diff.reason = f'Also matches {unexpected_path}' 136 break 137 page_diff.expected_path = expected_path 138 else: 139 if page == 0: 140 print(f'WARNING: no expected results files for {input_filename}') 141 page_diff.reason = f'{expected_path} does not exist' 142 143 if page_diff.reason: 144 image_diffs.append(page_diff) 145 146 return image_diffs 147 148 def Regenerate(self, input_filename, source_dir, working_dir, 149 image_matching_algorithm): 150 path_templates = _PathTemplates(input_filename, source_dir, working_dir, 151 self.os_name, self.suffix_order) 152 for page in itertools.count(): 153 expected_paths = path_templates.GetExpectedPaths(page) 154 155 first_match = None 156 last_match = None 157 page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page)) 158 if os.path.exists(page_diff.actual_path): 159 # Match against all expected page images. 160 for index, expected_path in enumerate(expected_paths): 161 page_diff.expected_path = expected_path 162 if not self._RunImageCompareCommand(page_diff, 163 image_matching_algorithm): 164 if first_match is None: 165 first_match = index 166 last_match = index 167 168 if last_match == 0: 169 # Regeneration not needed. This case may be reached if only some, but 170 # not all, pages need to be regenerated. 171 continue 172 elif expected_paths: 173 # Remove all expected page images. 174 print(f'WARNING: {input_filename} has extra expected page {page}') 175 first_match = 0 176 last_match = len(expected_paths) 177 else: 178 # No more expected or actual pages. 179 break 180 181 # Try to reuse expectations by removing intervening non-matches. 182 # 183 # TODO(crbug.com/pdfium/1988): This can make mistakes due to a lack of 184 # global knowledge about other test configurations, which is why it just 185 # creates backup files rather than immediately removing files. 186 if last_match is not None: 187 if first_match > 1: 188 print(f'WARNING: {input_filename}.{page} has non-adjacent match') 189 if first_match != last_match: 190 print(f'WARNING: {input_filename}.{page} has redundant matches') 191 192 for expected_path in expected_paths[:last_match]: 193 os.rename(expected_path, expected_path + '.bak') 194 continue 195 196 # Regenerate the most specific expected path that exists. If there are no 197 # existing expectations, regenerate the base case. 198 expected_path = path_templates.GetExpectedPath(page) 199 shutil.copyfile(page_diff.actual_path, expected_path) 200 self._RunCommand([_PNG_OPTIMIZER, expected_path]) 201 202 203_ACTUAL_TEMPLATE = '.pdf.%d.png' 204_DIFF_TEMPLATE = '.pdf.%d.diff.png' 205 206 207class _PathTemplates: 208 209 def __init__(self, input_filename, source_dir, working_dir, os_name, 210 suffix_order): 211 input_root, _ = os.path.splitext(input_filename) 212 self.actual_path_template = os.path.join(working_dir, 213 input_root + _ACTUAL_TEMPLATE) 214 self.diff_path_template = os.path.join(working_dir, 215 input_root + _DIFF_TEMPLATE) 216 217 # Pre-create the available templates from most to least specific. We 218 # generally expect the most specific case to match first. 219 self.expected_templates = [] 220 for suffix in suffix_order: 221 formatted_suffix = suffix.format(os=os_name) 222 self.expected_templates.append( 223 os.path.join( 224 source_dir, 225 f'{input_root}_expected{formatted_suffix}{_ACTUAL_TEMPLATE}')) 226 assert self.expected_templates 227 228 def GetActualPath(self, page): 229 return self.actual_path_template % page 230 231 def GetDiffPath(self, page): 232 return self.diff_path_template % page 233 234 def _GetPossibleExpectedPaths(self, page): 235 return [template % page for template in self.expected_templates] 236 237 def GetExpectedPaths(self, page): 238 return list(filter(os.path.exists, self._GetPossibleExpectedPaths(page))) 239 240 def GetExpectedPath(self, page, default_to_base=True): 241 """Returns the most specific expected path that exists.""" 242 last_not_found_expected_path = None 243 for expected_path in self._GetPossibleExpectedPaths(page): 244 if os.path.exists(expected_path): 245 return expected_path 246 last_not_found_expected_path = expected_path 247 return last_not_found_expected_path if default_to_base else None 248