xref: /aosp_15_r20/external/pdfium/testing/tools/strip_jp2_comments.py (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1*3ac0a46fSAndroid Build Coastguard Worker#!/usr/bin/env python3
2*3ac0a46fSAndroid Build Coastguard Worker# Copyright 2023 The PDFium Authors
3*3ac0a46fSAndroid Build Coastguard Worker# Use of this source code is governed by a BSD-style license that can be
4*3ac0a46fSAndroid Build Coastguard Worker# found in the LICENSE file.
5*3ac0a46fSAndroid Build Coastguard Worker"""Strips comments from a JP2 file.
6*3ac0a46fSAndroid Build Coastguard Worker
7*3ac0a46fSAndroid Build Coastguard WorkerThis is a simple filter script to strip comments from a JP2 file, in order to
8*3ac0a46fSAndroid Build Coastguard Workersave a few bytes from the final file size.
9*3ac0a46fSAndroid Build Coastguard Worker"""
10*3ac0a46fSAndroid Build Coastguard Worker
11*3ac0a46fSAndroid Build Coastguard Workerimport struct
12*3ac0a46fSAndroid Build Coastguard Workerimport sys
13*3ac0a46fSAndroid Build Coastguard Worker
14*3ac0a46fSAndroid Build Coastguard WorkerBOX_HEADER_SIZE = 8
15*3ac0a46fSAndroid Build Coastguard WorkerBOX_TAG_JP2C = b'jp2c'
16*3ac0a46fSAndroid Build Coastguard Worker
17*3ac0a46fSAndroid Build Coastguard WorkerMARKER_SIZE = 2
18*3ac0a46fSAndroid Build Coastguard WorkerMARKER_START = 0xff
19*3ac0a46fSAndroid Build Coastguard WorkerMARKER_TAG_IGNORE = 0x00
20*3ac0a46fSAndroid Build Coastguard WorkerMARKER_TAG_COMMENT = 0x64
21*3ac0a46fSAndroid Build Coastguard WorkerMARKER_TAG_FILL = 0xff
22*3ac0a46fSAndroid Build Coastguard Worker
23*3ac0a46fSAndroid Build Coastguard Worker
24*3ac0a46fSAndroid Build Coastguard Workerdef parse_box(buffer, offset):
25*3ac0a46fSAndroid Build Coastguard Worker  """Parses the next box in a JP2 file.
26*3ac0a46fSAndroid Build Coastguard Worker
27*3ac0a46fSAndroid Build Coastguard Worker  Args:
28*3ac0a46fSAndroid Build Coastguard Worker    buffer: A buffer containing the JP2 file contents.
29*3ac0a46fSAndroid Build Coastguard Worker    offset: The starting offset into the buffer.
30*3ac0a46fSAndroid Build Coastguard Worker
31*3ac0a46fSAndroid Build Coastguard Worker  Returns:
32*3ac0a46fSAndroid Build Coastguard Worker    A tuple (next_offset, tag) where next_offset is the ending offset, and tag
33*3ac0a46fSAndroid Build Coastguard Worker    is the type tag. The box contents will be buffer[offset + 8:next_offset].
34*3ac0a46fSAndroid Build Coastguard Worker  """
35*3ac0a46fSAndroid Build Coastguard Worker  length, tag = struct.unpack_from('>I4s', buffer, offset)
36*3ac0a46fSAndroid Build Coastguard Worker  return offset + length, tag
37*3ac0a46fSAndroid Build Coastguard Worker
38*3ac0a46fSAndroid Build Coastguard Worker
39*3ac0a46fSAndroid Build Coastguard Workerdef parse_marker(buffer, offset):
40*3ac0a46fSAndroid Build Coastguard Worker  """Parses the next marker in a codestream.
41*3ac0a46fSAndroid Build Coastguard Worker
42*3ac0a46fSAndroid Build Coastguard Worker  Args:
43*3ac0a46fSAndroid Build Coastguard Worker    buffer: A buffer containing the codestream.
44*3ac0a46fSAndroid Build Coastguard Worker    offset: The starting offset into the buffer.
45*3ac0a46fSAndroid Build Coastguard Worker
46*3ac0a46fSAndroid Build Coastguard Worker  Returns:
47*3ac0a46fSAndroid Build Coastguard Worker    A tuple (next_offset, tag) where next_offset is the offset after the marker,
48*3ac0a46fSAndroid Build Coastguard Worker    and tag is the type tag. If no marker was found, next_offset will point to
49*3ac0a46fSAndroid Build Coastguard Worker    the end of the buffer, and tag will be None. A marker is always 2 bytes.
50*3ac0a46fSAndroid Build Coastguard Worker  """
51*3ac0a46fSAndroid Build Coastguard Worker  while True:
52*3ac0a46fSAndroid Build Coastguard Worker    # Search for start of marker.
53*3ac0a46fSAndroid Build Coastguard Worker    next_offset = buffer.find(MARKER_START, offset)
54*3ac0a46fSAndroid Build Coastguard Worker    if next_offset == -1:
55*3ac0a46fSAndroid Build Coastguard Worker      next_offset = len(buffer)
56*3ac0a46fSAndroid Build Coastguard Worker      break
57*3ac0a46fSAndroid Build Coastguard Worker    next_offset += 1
58*3ac0a46fSAndroid Build Coastguard Worker
59*3ac0a46fSAndroid Build Coastguard Worker    # Parse marker.
60*3ac0a46fSAndroid Build Coastguard Worker    if next_offset == len(buffer):
61*3ac0a46fSAndroid Build Coastguard Worker      break
62*3ac0a46fSAndroid Build Coastguard Worker    tag = buffer[next_offset]
63*3ac0a46fSAndroid Build Coastguard Worker    if tag == MARKER_TAG_FILL:
64*3ac0a46fSAndroid Build Coastguard Worker      # Possible fill byte, reparse as start of marker.
65*3ac0a46fSAndroid Build Coastguard Worker      continue
66*3ac0a46fSAndroid Build Coastguard Worker    next_offset += 1
67*3ac0a46fSAndroid Build Coastguard Worker
68*3ac0a46fSAndroid Build Coastguard Worker    if tag == MARKER_TAG_IGNORE:
69*3ac0a46fSAndroid Build Coastguard Worker      # Not a real marker.
70*3ac0a46fSAndroid Build Coastguard Worker      continue
71*3ac0a46fSAndroid Build Coastguard Worker    return next_offset, tag
72*3ac0a46fSAndroid Build Coastguard Worker
73*3ac0a46fSAndroid Build Coastguard Worker  return next_offset
74*3ac0a46fSAndroid Build Coastguard Worker
75*3ac0a46fSAndroid Build Coastguard Worker
76*3ac0a46fSAndroid Build Coastguard Workerdef rewrite_jp2c(buffer):
77*3ac0a46fSAndroid Build Coastguard Worker  rewrite_buffer = bytearray(BOX_HEADER_SIZE)
78*3ac0a46fSAndroid Build Coastguard Worker
79*3ac0a46fSAndroid Build Coastguard Worker  offset = 0
80*3ac0a46fSAndroid Build Coastguard Worker  start_offset = offset
81*3ac0a46fSAndroid Build Coastguard Worker  while offset < len(buffer):
82*3ac0a46fSAndroid Build Coastguard Worker    next_offset, marker = parse_marker(buffer, offset)
83*3ac0a46fSAndroid Build Coastguard Worker    if marker == MARKER_TAG_COMMENT:
84*3ac0a46fSAndroid Build Coastguard Worker      # Flush the codestream before the comment.
85*3ac0a46fSAndroid Build Coastguard Worker      rewrite_buffer.extend(buffer[start_offset:next_offset - MARKER_SIZE])
86*3ac0a46fSAndroid Build Coastguard Worker
87*3ac0a46fSAndroid Build Coastguard Worker      # Find the next marker, skipping the comment.
88*3ac0a46fSAndroid Build Coastguard Worker      next_offset, marker = parse_marker(buffer, next_offset)
89*3ac0a46fSAndroid Build Coastguard Worker      if marker is not None:
90*3ac0a46fSAndroid Build Coastguard Worker        # Reparse the marker.
91*3ac0a46fSAndroid Build Coastguard Worker        next_offset -= MARKER_SIZE
92*3ac0a46fSAndroid Build Coastguard Worker      start_offset = next_offset
93*3ac0a46fSAndroid Build Coastguard Worker    else:
94*3ac0a46fSAndroid Build Coastguard Worker      # Pass through other markers.
95*3ac0a46fSAndroid Build Coastguard Worker      pass
96*3ac0a46fSAndroid Build Coastguard Worker    offset = next_offset
97*3ac0a46fSAndroid Build Coastguard Worker
98*3ac0a46fSAndroid Build Coastguard Worker  # Flush the tail of the codestream.
99*3ac0a46fSAndroid Build Coastguard Worker  rewrite_buffer.extend(buffer[start_offset:])
100*3ac0a46fSAndroid Build Coastguard Worker
101*3ac0a46fSAndroid Build Coastguard Worker  struct.pack_into('>I4s', rewrite_buffer, 0, len(rewrite_buffer), BOX_TAG_JP2C)
102*3ac0a46fSAndroid Build Coastguard Worker  return rewrite_buffer
103*3ac0a46fSAndroid Build Coastguard Worker
104*3ac0a46fSAndroid Build Coastguard Worker
105*3ac0a46fSAndroid Build Coastguard Workerdef main(in_file, out_file):
106*3ac0a46fSAndroid Build Coastguard Worker  buffer = in_file.read()
107*3ac0a46fSAndroid Build Coastguard Worker
108*3ac0a46fSAndroid Build Coastguard Worker  # Scan through JP2 boxes.
109*3ac0a46fSAndroid Build Coastguard Worker  offset = 0
110*3ac0a46fSAndroid Build Coastguard Worker  while offset < len(buffer):
111*3ac0a46fSAndroid Build Coastguard Worker    next_offset, tag = parse_box(buffer, offset)
112*3ac0a46fSAndroid Build Coastguard Worker    if tag == BOX_TAG_JP2C:
113*3ac0a46fSAndroid Build Coastguard Worker      # Rewrite "jp2c" (codestream) box.
114*3ac0a46fSAndroid Build Coastguard Worker      out_file.write(rewrite_jp2c(buffer[offset + BOX_HEADER_SIZE:next_offset]))
115*3ac0a46fSAndroid Build Coastguard Worker    else:
116*3ac0a46fSAndroid Build Coastguard Worker      # Pass through other boxes.
117*3ac0a46fSAndroid Build Coastguard Worker      out_file.write(buffer[offset:next_offset])
118*3ac0a46fSAndroid Build Coastguard Worker    offset = next_offset
119*3ac0a46fSAndroid Build Coastguard Worker
120*3ac0a46fSAndroid Build Coastguard Worker  out_file.flush()
121*3ac0a46fSAndroid Build Coastguard Worker
122*3ac0a46fSAndroid Build Coastguard Worker
123*3ac0a46fSAndroid Build Coastguard Workerif __name__ == '__main__':
124*3ac0a46fSAndroid Build Coastguard Worker  main(sys.stdin.buffer, sys.stdout.buffer)
125