xref: /aosp_15_r20/external/pdfium/testing/tools/pngdiffer.py (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1#!/usr/bin/env python3
2# Copyright 2015 The PDFium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6from dataclasses import dataclass
7import itertools
8import os
9import shutil
10import subprocess
11import sys
12
13EXACT_MATCHING = 'exact'
14FUZZY_MATCHING = 'fuzzy'
15
16_PNG_OPTIMIZER = 'optipng'
17
18# Each suffix order acts like a path along a tree, with the leaves being the
19# most specific, and the root being the least specific.
20_COMMON_SUFFIX_ORDER = ('_{os}', '')
21_AGG_SUFFIX_ORDER = ('_agg_{os}', '_agg') + _COMMON_SUFFIX_ORDER
22_GDI_SUFFIX_ORDER = ('_gdi_{os}', '_gdi') + _COMMON_SUFFIX_ORDER
23_SKIA_SUFFIX_ORDER = ('_skia_{os}', '_skia') + _COMMON_SUFFIX_ORDER
24
25
26@dataclass
27class ImageDiff:
28  """Details about an image diff.
29
30  Attributes:
31    actual_path: Path to the actual image file.
32    expected_path: Path to the expected image file, or `None` if no matches.
33    diff_path: Path to the diff image file, or `None` if no diff.
34    reason: Optional reason for the diff.
35  """
36  actual_path: str
37  expected_path: str = None
38  diff_path: str = None
39  reason: str = None
40
41class PNGDiffer():
42
43  def __init__(self, finder, reverse_byte_order, rendering_option):
44    self.pdfium_diff_path = finder.ExecutablePath('pdfium_diff')
45    self.os_name = finder.os_name
46    self.reverse_byte_order = reverse_byte_order
47
48    if rendering_option == 'agg':
49      self.suffix_order = _AGG_SUFFIX_ORDER
50    elif rendering_option == 'gdi':
51      self.suffix_order = _GDI_SUFFIX_ORDER
52    elif rendering_option == 'skia':
53      self.suffix_order = _SKIA_SUFFIX_ORDER
54    else:
55      raise ValueError(f'rendering_option={rendering_option}')
56
57  def CheckMissingTools(self, regenerate_expected):
58    if regenerate_expected and not shutil.which(_PNG_OPTIMIZER):
59      return f'Please install "{_PNG_OPTIMIZER}" to regenerate expected images.'
60    return None
61
62  def GetActualFiles(self, input_filename, source_dir, working_dir):
63    actual_paths = []
64    path_templates = _PathTemplates(input_filename, source_dir, working_dir,
65                                    self.os_name, self.suffix_order)
66
67    for page in itertools.count():
68      actual_path = path_templates.GetActualPath(page)
69      if path_templates.GetExpectedPath(page, default_to_base=False):
70        actual_paths.append(actual_path)
71      else:
72        break
73    return actual_paths
74
75  def _RunCommand(self, cmd):
76    try:
77      subprocess.run(cmd, capture_output=True, check=True)
78      return None
79    except subprocess.CalledProcessError as e:
80      return e
81
82  def _RunImageCompareCommand(self, image_diff, image_matching_algorithm):
83    cmd = [self.pdfium_diff_path]
84    if self.reverse_byte_order:
85      cmd.append('--reverse-byte-order')
86    if image_matching_algorithm == FUZZY_MATCHING:
87      cmd.append('--fuzzy')
88    cmd.extend([image_diff.actual_path, image_diff.expected_path])
89    return self._RunCommand(cmd)
90
91  def _RunImageDiffCommand(self, image_diff):
92    # TODO(crbug.com/pdfium/1925): Diff mode ignores --reverse-byte-order.
93    return self._RunCommand([
94        self.pdfium_diff_path, '--subtract', image_diff.actual_path,
95        image_diff.expected_path, image_diff.diff_path
96    ])
97
98  def ComputeDifferences(self, input_filename, source_dir, working_dir,
99                         image_matching_algorithm):
100    """Computes differences between actual and expected image files.
101
102    Returns:
103      A list of `ImageDiff` instances, one per differing page.
104    """
105    image_diffs = []
106
107    path_templates = _PathTemplates(input_filename, source_dir, working_dir,
108                                    self.os_name, self.suffix_order)
109    for page in itertools.count():
110      page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page))
111      if not os.path.exists(page_diff.actual_path):
112        # No more actual pages.
113        break
114
115      expected_path = path_templates.GetExpectedPath(page)
116      if os.path.exists(expected_path):
117        page_diff.expected_path = expected_path
118
119        compare_error = self._RunImageCompareCommand(page_diff,
120                                                     image_matching_algorithm)
121        if compare_error:
122          page_diff.reason = str(compare_error)
123
124          # TODO(crbug.com/pdfium/1925): Compare and diff simultaneously.
125          page_diff.diff_path = path_templates.GetDiffPath(page)
126          if not self._RunImageDiffCommand(page_diff):
127            print(f'WARNING: No diff for {page_diff.actual_path}')
128            page_diff.diff_path = None
129        else:
130          # Validate that no other paths match.
131          for unexpected_path in path_templates.GetExpectedPaths(page)[1:]:
132            page_diff.expected_path = unexpected_path
133            if not self._RunImageCompareCommand(page_diff,
134                                                image_matching_algorithm):
135              page_diff.reason = f'Also matches {unexpected_path}'
136              break
137          page_diff.expected_path = expected_path
138      else:
139        if page == 0:
140          print(f'WARNING: no expected results files for {input_filename}')
141        page_diff.reason = f'{expected_path} does not exist'
142
143      if page_diff.reason:
144        image_diffs.append(page_diff)
145
146    return image_diffs
147
148  def Regenerate(self, input_filename, source_dir, working_dir,
149                 image_matching_algorithm):
150    path_templates = _PathTemplates(input_filename, source_dir, working_dir,
151                                    self.os_name, self.suffix_order)
152    for page in itertools.count():
153      expected_paths = path_templates.GetExpectedPaths(page)
154
155      first_match = None
156      last_match = None
157      page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page))
158      if os.path.exists(page_diff.actual_path):
159        # Match against all expected page images.
160        for index, expected_path in enumerate(expected_paths):
161          page_diff.expected_path = expected_path
162          if not self._RunImageCompareCommand(page_diff,
163                                              image_matching_algorithm):
164            if first_match is None:
165              first_match = index
166            last_match = index
167
168        if last_match == 0:
169          # Regeneration not needed. This case may be reached if only some, but
170          # not all, pages need to be regenerated.
171          continue
172      elif expected_paths:
173        # Remove all expected page images.
174        print(f'WARNING: {input_filename} has extra expected page {page}')
175        first_match = 0
176        last_match = len(expected_paths)
177      else:
178        # No more expected or actual pages.
179        break
180
181      # Try to reuse expectations by removing intervening non-matches.
182      #
183      # TODO(crbug.com/pdfium/1988): This can make mistakes due to a lack of
184      # global knowledge about other test configurations, which is why it just
185      # creates backup files rather than immediately removing files.
186      if last_match is not None:
187        if first_match > 1:
188          print(f'WARNING: {input_filename}.{page} has non-adjacent match')
189        if first_match != last_match:
190          print(f'WARNING: {input_filename}.{page} has redundant matches')
191
192        for expected_path in expected_paths[:last_match]:
193          os.rename(expected_path, expected_path + '.bak')
194        continue
195
196      # Regenerate the most specific expected path that exists. If there are no
197      # existing expectations, regenerate the base case.
198      expected_path = path_templates.GetExpectedPath(page)
199      shutil.copyfile(page_diff.actual_path, expected_path)
200      self._RunCommand([_PNG_OPTIMIZER, expected_path])
201
202
203_ACTUAL_TEMPLATE = '.pdf.%d.png'
204_DIFF_TEMPLATE = '.pdf.%d.diff.png'
205
206
207class _PathTemplates:
208
209  def __init__(self, input_filename, source_dir, working_dir, os_name,
210               suffix_order):
211    input_root, _ = os.path.splitext(input_filename)
212    self.actual_path_template = os.path.join(working_dir,
213                                             input_root + _ACTUAL_TEMPLATE)
214    self.diff_path_template = os.path.join(working_dir,
215                                           input_root + _DIFF_TEMPLATE)
216
217    # Pre-create the available templates from most to least specific. We
218    # generally expect the most specific case to match first.
219    self.expected_templates = []
220    for suffix in suffix_order:
221      formatted_suffix = suffix.format(os=os_name)
222      self.expected_templates.append(
223          os.path.join(
224              source_dir,
225              f'{input_root}_expected{formatted_suffix}{_ACTUAL_TEMPLATE}'))
226    assert self.expected_templates
227
228  def GetActualPath(self, page):
229    return self.actual_path_template % page
230
231  def GetDiffPath(self, page):
232    return self.diff_path_template % page
233
234  def _GetPossibleExpectedPaths(self, page):
235    return [template % page for template in self.expected_templates]
236
237  def GetExpectedPaths(self, page):
238    return list(filter(os.path.exists, self._GetPossibleExpectedPaths(page)))
239
240  def GetExpectedPath(self, page, default_to_base=True):
241    """Returns the most specific expected path that exists."""
242    last_not_found_expected_path = None
243    for expected_path in self._GetPossibleExpectedPaths(page):
244      if os.path.exists(expected_path):
245        return expected_path
246      last_not_found_expected_path = expected_path
247    return last_not_found_expected_path if default_to_base else None
248