#!/usr/bin/env python3 # Copyright 2023 The PDFium Authors # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Strips comments from a JP2 file. This is a simple filter script to strip comments from a JP2 file, in order to save a few bytes from the final file size. """ import struct import sys BOX_HEADER_SIZE = 8 BOX_TAG_JP2C = b'jp2c' MARKER_SIZE = 2 MARKER_START = 0xff MARKER_TAG_IGNORE = 0x00 MARKER_TAG_COMMENT = 0x64 MARKER_TAG_FILL = 0xff def parse_box(buffer, offset): """Parses the next box in a JP2 file. Args: buffer: A buffer containing the JP2 file contents. offset: The starting offset into the buffer. Returns: A tuple (next_offset, tag) where next_offset is the ending offset, and tag is the type tag. The box contents will be buffer[offset + 8:next_offset]. """ length, tag = struct.unpack_from('>I4s', buffer, offset) return offset + length, tag def parse_marker(buffer, offset): """Parses the next marker in a codestream. Args: buffer: A buffer containing the codestream. offset: The starting offset into the buffer. Returns: A tuple (next_offset, tag) where next_offset is the offset after the marker, and tag is the type tag. If no marker was found, next_offset will point to the end of the buffer, and tag will be None. A marker is always 2 bytes. """ while True: # Search for start of marker. next_offset = buffer.find(MARKER_START, offset) if next_offset == -1: next_offset = len(buffer) break next_offset += 1 # Parse marker. if next_offset == len(buffer): break tag = buffer[next_offset] if tag == MARKER_TAG_FILL: # Possible fill byte, reparse as start of marker. continue next_offset += 1 if tag == MARKER_TAG_IGNORE: # Not a real marker. continue return next_offset, tag return next_offset def rewrite_jp2c(buffer): rewrite_buffer = bytearray(BOX_HEADER_SIZE) offset = 0 start_offset = offset while offset < len(buffer): next_offset, marker = parse_marker(buffer, offset) if marker == MARKER_TAG_COMMENT: # Flush the codestream before the comment. rewrite_buffer.extend(buffer[start_offset:next_offset - MARKER_SIZE]) # Find the next marker, skipping the comment. next_offset, marker = parse_marker(buffer, next_offset) if marker is not None: # Reparse the marker. next_offset -= MARKER_SIZE start_offset = next_offset else: # Pass through other markers. pass offset = next_offset # Flush the tail of the codestream. rewrite_buffer.extend(buffer[start_offset:]) struct.pack_into('>I4s', rewrite_buffer, 0, len(rewrite_buffer), BOX_TAG_JP2C) return rewrite_buffer def main(in_file, out_file): buffer = in_file.read() # Scan through JP2 boxes. offset = 0 while offset < len(buffer): next_offset, tag = parse_box(buffer, offset) if tag == BOX_TAG_JP2C: # Rewrite "jp2c" (codestream) box. out_file.write(rewrite_jp2c(buffer[offset + BOX_HEADER_SIZE:next_offset])) else: # Pass through other boxes. out_file.write(buffer[offset:next_offset]) offset = next_offset out_file.flush() if __name__ == '__main__': main(sys.stdin.buffer, sys.stdout.buffer)