xref: /aosp_15_r20/external/pdfium/testing/tools/strip_jp2_comments.py (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1#!/usr/bin/env python3
2# Copyright 2023 The PDFium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Strips comments from a JP2 file.
6
7This is a simple filter script to strip comments from a JP2 file, in order to
8save a few bytes from the final file size.
9"""
10
11import struct
12import sys
13
14BOX_HEADER_SIZE = 8
15BOX_TAG_JP2C = b'jp2c'
16
17MARKER_SIZE = 2
18MARKER_START = 0xff
19MARKER_TAG_IGNORE = 0x00
20MARKER_TAG_COMMENT = 0x64
21MARKER_TAG_FILL = 0xff
22
23
24def parse_box(buffer, offset):
25  """Parses the next box in a JP2 file.
26
27  Args:
28    buffer: A buffer containing the JP2 file contents.
29    offset: The starting offset into the buffer.
30
31  Returns:
32    A tuple (next_offset, tag) where next_offset is the ending offset, and tag
33    is the type tag. The box contents will be buffer[offset + 8:next_offset].
34  """
35  length, tag = struct.unpack_from('>I4s', buffer, offset)
36  return offset + length, tag
37
38
39def parse_marker(buffer, offset):
40  """Parses the next marker in a codestream.
41
42  Args:
43    buffer: A buffer containing the codestream.
44    offset: The starting offset into the buffer.
45
46  Returns:
47    A tuple (next_offset, tag) where next_offset is the offset after the marker,
48    and tag is the type tag. If no marker was found, next_offset will point to
49    the end of the buffer, and tag will be None. A marker is always 2 bytes.
50  """
51  while True:
52    # Search for start of marker.
53    next_offset = buffer.find(MARKER_START, offset)
54    if next_offset == -1:
55      next_offset = len(buffer)
56      break
57    next_offset += 1
58
59    # Parse marker.
60    if next_offset == len(buffer):
61      break
62    tag = buffer[next_offset]
63    if tag == MARKER_TAG_FILL:
64      # Possible fill byte, reparse as start of marker.
65      continue
66    next_offset += 1
67
68    if tag == MARKER_TAG_IGNORE:
69      # Not a real marker.
70      continue
71    return next_offset, tag
72
73  return next_offset
74
75
76def rewrite_jp2c(buffer):
77  rewrite_buffer = bytearray(BOX_HEADER_SIZE)
78
79  offset = 0
80  start_offset = offset
81  while offset < len(buffer):
82    next_offset, marker = parse_marker(buffer, offset)
83    if marker == MARKER_TAG_COMMENT:
84      # Flush the codestream before the comment.
85      rewrite_buffer.extend(buffer[start_offset:next_offset - MARKER_SIZE])
86
87      # Find the next marker, skipping the comment.
88      next_offset, marker = parse_marker(buffer, next_offset)
89      if marker is not None:
90        # Reparse the marker.
91        next_offset -= MARKER_SIZE
92      start_offset = next_offset
93    else:
94      # Pass through other markers.
95      pass
96    offset = next_offset
97
98  # Flush the tail of the codestream.
99  rewrite_buffer.extend(buffer[start_offset:])
100
101  struct.pack_into('>I4s', rewrite_buffer, 0, len(rewrite_buffer), BOX_TAG_JP2C)
102  return rewrite_buffer
103
104
105def main(in_file, out_file):
106  buffer = in_file.read()
107
108  # Scan through JP2 boxes.
109  offset = 0
110  while offset < len(buffer):
111    next_offset, tag = parse_box(buffer, offset)
112    if tag == BOX_TAG_JP2C:
113      # Rewrite "jp2c" (codestream) box.
114      out_file.write(rewrite_jp2c(buffer[offset + BOX_HEADER_SIZE:next_offset]))
115    else:
116      # Pass through other boxes.
117      out_file.write(buffer[offset:next_offset])
118    offset = next_offset
119
120  out_file.flush()
121
122
123if __name__ == '__main__':
124  main(sys.stdin.buffer, sys.stdout.buffer)
125