1*3ac0a46fSAndroid Build Coastguard Worker#!/usr/bin/env python3 2*3ac0a46fSAndroid Build Coastguard Worker# Copyright 2023 The PDFium Authors 3*3ac0a46fSAndroid Build Coastguard Worker# Use of this source code is governed by a BSD-style license that can be 4*3ac0a46fSAndroid Build Coastguard Worker# found in the LICENSE file. 5*3ac0a46fSAndroid Build Coastguard Worker"""Strips comments from a JP2 file. 6*3ac0a46fSAndroid Build Coastguard Worker 7*3ac0a46fSAndroid Build Coastguard WorkerThis is a simple filter script to strip comments from a JP2 file, in order to 8*3ac0a46fSAndroid Build Coastguard Workersave a few bytes from the final file size. 9*3ac0a46fSAndroid Build Coastguard Worker""" 10*3ac0a46fSAndroid Build Coastguard Worker 11*3ac0a46fSAndroid Build Coastguard Workerimport struct 12*3ac0a46fSAndroid Build Coastguard Workerimport sys 13*3ac0a46fSAndroid Build Coastguard Worker 14*3ac0a46fSAndroid Build Coastguard WorkerBOX_HEADER_SIZE = 8 15*3ac0a46fSAndroid Build Coastguard WorkerBOX_TAG_JP2C = b'jp2c' 16*3ac0a46fSAndroid Build Coastguard Worker 17*3ac0a46fSAndroid Build Coastguard WorkerMARKER_SIZE = 2 18*3ac0a46fSAndroid Build Coastguard WorkerMARKER_START = 0xff 19*3ac0a46fSAndroid Build Coastguard WorkerMARKER_TAG_IGNORE = 0x00 20*3ac0a46fSAndroid Build Coastguard WorkerMARKER_TAG_COMMENT = 0x64 21*3ac0a46fSAndroid Build Coastguard WorkerMARKER_TAG_FILL = 0xff 22*3ac0a46fSAndroid Build Coastguard Worker 23*3ac0a46fSAndroid Build Coastguard Worker 24*3ac0a46fSAndroid Build Coastguard Workerdef parse_box(buffer, offset): 25*3ac0a46fSAndroid Build Coastguard Worker """Parses the next box in a JP2 file. 26*3ac0a46fSAndroid Build Coastguard Worker 27*3ac0a46fSAndroid Build Coastguard Worker Args: 28*3ac0a46fSAndroid Build Coastguard Worker buffer: A buffer containing the JP2 file contents. 29*3ac0a46fSAndroid Build Coastguard Worker offset: The starting offset into the buffer. 30*3ac0a46fSAndroid Build Coastguard Worker 31*3ac0a46fSAndroid Build Coastguard Worker Returns: 32*3ac0a46fSAndroid Build Coastguard Worker A tuple (next_offset, tag) where next_offset is the ending offset, and tag 33*3ac0a46fSAndroid Build Coastguard Worker is the type tag. The box contents will be buffer[offset + 8:next_offset]. 34*3ac0a46fSAndroid Build Coastguard Worker """ 35*3ac0a46fSAndroid Build Coastguard Worker length, tag = struct.unpack_from('>I4s', buffer, offset) 36*3ac0a46fSAndroid Build Coastguard Worker return offset + length, tag 37*3ac0a46fSAndroid Build Coastguard Worker 38*3ac0a46fSAndroid Build Coastguard Worker 39*3ac0a46fSAndroid Build Coastguard Workerdef parse_marker(buffer, offset): 40*3ac0a46fSAndroid Build Coastguard Worker """Parses the next marker in a codestream. 41*3ac0a46fSAndroid Build Coastguard Worker 42*3ac0a46fSAndroid Build Coastguard Worker Args: 43*3ac0a46fSAndroid Build Coastguard Worker buffer: A buffer containing the codestream. 44*3ac0a46fSAndroid Build Coastguard Worker offset: The starting offset into the buffer. 45*3ac0a46fSAndroid Build Coastguard Worker 46*3ac0a46fSAndroid Build Coastguard Worker Returns: 47*3ac0a46fSAndroid Build Coastguard Worker A tuple (next_offset, tag) where next_offset is the offset after the marker, 48*3ac0a46fSAndroid Build Coastguard Worker and tag is the type tag. If no marker was found, next_offset will point to 49*3ac0a46fSAndroid Build Coastguard Worker the end of the buffer, and tag will be None. A marker is always 2 bytes. 50*3ac0a46fSAndroid Build Coastguard Worker """ 51*3ac0a46fSAndroid Build Coastguard Worker while True: 52*3ac0a46fSAndroid Build Coastguard Worker # Search for start of marker. 53*3ac0a46fSAndroid Build Coastguard Worker next_offset = buffer.find(MARKER_START, offset) 54*3ac0a46fSAndroid Build Coastguard Worker if next_offset == -1: 55*3ac0a46fSAndroid Build Coastguard Worker next_offset = len(buffer) 56*3ac0a46fSAndroid Build Coastguard Worker break 57*3ac0a46fSAndroid Build Coastguard Worker next_offset += 1 58*3ac0a46fSAndroid Build Coastguard Worker 59*3ac0a46fSAndroid Build Coastguard Worker # Parse marker. 60*3ac0a46fSAndroid Build Coastguard Worker if next_offset == len(buffer): 61*3ac0a46fSAndroid Build Coastguard Worker break 62*3ac0a46fSAndroid Build Coastguard Worker tag = buffer[next_offset] 63*3ac0a46fSAndroid Build Coastguard Worker if tag == MARKER_TAG_FILL: 64*3ac0a46fSAndroid Build Coastguard Worker # Possible fill byte, reparse as start of marker. 65*3ac0a46fSAndroid Build Coastguard Worker continue 66*3ac0a46fSAndroid Build Coastguard Worker next_offset += 1 67*3ac0a46fSAndroid Build Coastguard Worker 68*3ac0a46fSAndroid Build Coastguard Worker if tag == MARKER_TAG_IGNORE: 69*3ac0a46fSAndroid Build Coastguard Worker # Not a real marker. 70*3ac0a46fSAndroid Build Coastguard Worker continue 71*3ac0a46fSAndroid Build Coastguard Worker return next_offset, tag 72*3ac0a46fSAndroid Build Coastguard Worker 73*3ac0a46fSAndroid Build Coastguard Worker return next_offset 74*3ac0a46fSAndroid Build Coastguard Worker 75*3ac0a46fSAndroid Build Coastguard Worker 76*3ac0a46fSAndroid Build Coastguard Workerdef rewrite_jp2c(buffer): 77*3ac0a46fSAndroid Build Coastguard Worker rewrite_buffer = bytearray(BOX_HEADER_SIZE) 78*3ac0a46fSAndroid Build Coastguard Worker 79*3ac0a46fSAndroid Build Coastguard Worker offset = 0 80*3ac0a46fSAndroid Build Coastguard Worker start_offset = offset 81*3ac0a46fSAndroid Build Coastguard Worker while offset < len(buffer): 82*3ac0a46fSAndroid Build Coastguard Worker next_offset, marker = parse_marker(buffer, offset) 83*3ac0a46fSAndroid Build Coastguard Worker if marker == MARKER_TAG_COMMENT: 84*3ac0a46fSAndroid Build Coastguard Worker # Flush the codestream before the comment. 85*3ac0a46fSAndroid Build Coastguard Worker rewrite_buffer.extend(buffer[start_offset:next_offset - MARKER_SIZE]) 86*3ac0a46fSAndroid Build Coastguard Worker 87*3ac0a46fSAndroid Build Coastguard Worker # Find the next marker, skipping the comment. 88*3ac0a46fSAndroid Build Coastguard Worker next_offset, marker = parse_marker(buffer, next_offset) 89*3ac0a46fSAndroid Build Coastguard Worker if marker is not None: 90*3ac0a46fSAndroid Build Coastguard Worker # Reparse the marker. 91*3ac0a46fSAndroid Build Coastguard Worker next_offset -= MARKER_SIZE 92*3ac0a46fSAndroid Build Coastguard Worker start_offset = next_offset 93*3ac0a46fSAndroid Build Coastguard Worker else: 94*3ac0a46fSAndroid Build Coastguard Worker # Pass through other markers. 95*3ac0a46fSAndroid Build Coastguard Worker pass 96*3ac0a46fSAndroid Build Coastguard Worker offset = next_offset 97*3ac0a46fSAndroid Build Coastguard Worker 98*3ac0a46fSAndroid Build Coastguard Worker # Flush the tail of the codestream. 99*3ac0a46fSAndroid Build Coastguard Worker rewrite_buffer.extend(buffer[start_offset:]) 100*3ac0a46fSAndroid Build Coastguard Worker 101*3ac0a46fSAndroid Build Coastguard Worker struct.pack_into('>I4s', rewrite_buffer, 0, len(rewrite_buffer), BOX_TAG_JP2C) 102*3ac0a46fSAndroid Build Coastguard Worker return rewrite_buffer 103*3ac0a46fSAndroid Build Coastguard Worker 104*3ac0a46fSAndroid Build Coastguard Worker 105*3ac0a46fSAndroid Build Coastguard Workerdef main(in_file, out_file): 106*3ac0a46fSAndroid Build Coastguard Worker buffer = in_file.read() 107*3ac0a46fSAndroid Build Coastguard Worker 108*3ac0a46fSAndroid Build Coastguard Worker # Scan through JP2 boxes. 109*3ac0a46fSAndroid Build Coastguard Worker offset = 0 110*3ac0a46fSAndroid Build Coastguard Worker while offset < len(buffer): 111*3ac0a46fSAndroid Build Coastguard Worker next_offset, tag = parse_box(buffer, offset) 112*3ac0a46fSAndroid Build Coastguard Worker if tag == BOX_TAG_JP2C: 113*3ac0a46fSAndroid Build Coastguard Worker # Rewrite "jp2c" (codestream) box. 114*3ac0a46fSAndroid Build Coastguard Worker out_file.write(rewrite_jp2c(buffer[offset + BOX_HEADER_SIZE:next_offset])) 115*3ac0a46fSAndroid Build Coastguard Worker else: 116*3ac0a46fSAndroid Build Coastguard Worker # Pass through other boxes. 117*3ac0a46fSAndroid Build Coastguard Worker out_file.write(buffer[offset:next_offset]) 118*3ac0a46fSAndroid Build Coastguard Worker offset = next_offset 119*3ac0a46fSAndroid Build Coastguard Worker 120*3ac0a46fSAndroid Build Coastguard Worker out_file.flush() 121*3ac0a46fSAndroid Build Coastguard Worker 122*3ac0a46fSAndroid Build Coastguard Worker 123*3ac0a46fSAndroid Build Coastguard Workerif __name__ == '__main__': 124*3ac0a46fSAndroid Build Coastguard Worker main(sys.stdin.buffer, sys.stdout.buffer) 125