1#!/usr/bin/env python3 2# Copyright 2023 The PDFium Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5"""Strips comments from a JP2 file. 6 7This is a simple filter script to strip comments from a JP2 file, in order to 8save a few bytes from the final file size. 9""" 10 11import struct 12import sys 13 14BOX_HEADER_SIZE = 8 15BOX_TAG_JP2C = b'jp2c' 16 17MARKER_SIZE = 2 18MARKER_START = 0xff 19MARKER_TAG_IGNORE = 0x00 20MARKER_TAG_COMMENT = 0x64 21MARKER_TAG_FILL = 0xff 22 23 24def parse_box(buffer, offset): 25 """Parses the next box in a JP2 file. 26 27 Args: 28 buffer: A buffer containing the JP2 file contents. 29 offset: The starting offset into the buffer. 30 31 Returns: 32 A tuple (next_offset, tag) where next_offset is the ending offset, and tag 33 is the type tag. The box contents will be buffer[offset + 8:next_offset]. 34 """ 35 length, tag = struct.unpack_from('>I4s', buffer, offset) 36 return offset + length, tag 37 38 39def parse_marker(buffer, offset): 40 """Parses the next marker in a codestream. 41 42 Args: 43 buffer: A buffer containing the codestream. 44 offset: The starting offset into the buffer. 45 46 Returns: 47 A tuple (next_offset, tag) where next_offset is the offset after the marker, 48 and tag is the type tag. If no marker was found, next_offset will point to 49 the end of the buffer, and tag will be None. A marker is always 2 bytes. 50 """ 51 while True: 52 # Search for start of marker. 53 next_offset = buffer.find(MARKER_START, offset) 54 if next_offset == -1: 55 next_offset = len(buffer) 56 break 57 next_offset += 1 58 59 # Parse marker. 60 if next_offset == len(buffer): 61 break 62 tag = buffer[next_offset] 63 if tag == MARKER_TAG_FILL: 64 # Possible fill byte, reparse as start of marker. 65 continue 66 next_offset += 1 67 68 if tag == MARKER_TAG_IGNORE: 69 # Not a real marker. 70 continue 71 return next_offset, tag 72 73 return next_offset 74 75 76def rewrite_jp2c(buffer): 77 rewrite_buffer = bytearray(BOX_HEADER_SIZE) 78 79 offset = 0 80 start_offset = offset 81 while offset < len(buffer): 82 next_offset, marker = parse_marker(buffer, offset) 83 if marker == MARKER_TAG_COMMENT: 84 # Flush the codestream before the comment. 85 rewrite_buffer.extend(buffer[start_offset:next_offset - MARKER_SIZE]) 86 87 # Find the next marker, skipping the comment. 88 next_offset, marker = parse_marker(buffer, next_offset) 89 if marker is not None: 90 # Reparse the marker. 91 next_offset -= MARKER_SIZE 92 start_offset = next_offset 93 else: 94 # Pass through other markers. 95 pass 96 offset = next_offset 97 98 # Flush the tail of the codestream. 99 rewrite_buffer.extend(buffer[start_offset:]) 100 101 struct.pack_into('>I4s', rewrite_buffer, 0, len(rewrite_buffer), BOX_TAG_JP2C) 102 return rewrite_buffer 103 104 105def main(in_file, out_file): 106 buffer = in_file.read() 107 108 # Scan through JP2 boxes. 109 offset = 0 110 while offset < len(buffer): 111 next_offset, tag = parse_box(buffer, offset) 112 if tag == BOX_TAG_JP2C: 113 # Rewrite "jp2c" (codestream) box. 114 out_file.write(rewrite_jp2c(buffer[offset + BOX_HEADER_SIZE:next_offset])) 115 else: 116 # Pass through other boxes. 117 out_file.write(buffer[offset:next_offset]) 118 offset = next_offset 119 120 out_file.flush() 121 122 123if __name__ == '__main__': 124 main(sys.stdin.buffer, sys.stdout.buffer) 125