xref: /aosp_15_r20/external/pdfium/testing/tools/fixup_pdf_template.py (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1*3ac0a46fSAndroid Build Coastguard Worker#!/usr/bin/env python3
2*3ac0a46fSAndroid Build Coastguard Worker# Copyright 2014 The PDFium Authors
3*3ac0a46fSAndroid Build Coastguard Worker# Use of this source code is governed by a BSD-style license that can be
4*3ac0a46fSAndroid Build Coastguard Worker# found in the LICENSE file.
5*3ac0a46fSAndroid Build Coastguard Worker"""Expands a hand-written PDF testcase (template) into a valid PDF file.
6*3ac0a46fSAndroid Build Coastguard Worker
7*3ac0a46fSAndroid Build Coastguard WorkerThere are several places in a PDF file where byte-offsets are required. This
8*3ac0a46fSAndroid Build Coastguard Workerscript replaces {{name}}-style variables in the input with calculated results
9*3ac0a46fSAndroid Build Coastguard Worker
10*3ac0a46fSAndroid Build Coastguard Worker  {{include path/to/file}} - inserts file's contents into stream.
11*3ac0a46fSAndroid Build Coastguard Worker  {{header}} - expands to the header comment required for PDF files.
12*3ac0a46fSAndroid Build Coastguard Worker  {{xref}} - expands to a generated xref table, noting the offset.
13*3ac0a46fSAndroid Build Coastguard Worker  {{trailer}} - expands to a standard trailer with "1 0 R" as the /Root.
14*3ac0a46fSAndroid Build Coastguard Worker  {{trailersize}} - expands to `/Size n`, to be used in non-standard trailers.
15*3ac0a46fSAndroid Build Coastguard Worker  {{startxref} - expands to a startxref directive followed by correct offset.
16*3ac0a46fSAndroid Build Coastguard Worker  {{startxrefobj x y} - expands to a startxref directive followed by correct
17*3ac0a46fSAndroid Build Coastguard Worker                        offset pointing to the start of `x y obj`.
18*3ac0a46fSAndroid Build Coastguard Worker  {{object x y}} - expands to `x y obj` declaration, noting the offset.
19*3ac0a46fSAndroid Build Coastguard Worker  {{streamlen}} - expands to `/Length n`.
20*3ac0a46fSAndroid Build Coastguard Worker"""
21*3ac0a46fSAndroid Build Coastguard Worker
22*3ac0a46fSAndroid Build Coastguard Workerimport io
23*3ac0a46fSAndroid Build Coastguard Workerimport optparse
24*3ac0a46fSAndroid Build Coastguard Workerimport os
25*3ac0a46fSAndroid Build Coastguard Workerimport re
26*3ac0a46fSAndroid Build Coastguard Workerimport sys
27*3ac0a46fSAndroid Build Coastguard Worker
28*3ac0a46fSAndroid Build Coastguard Worker# Line Endings.
29*3ac0a46fSAndroid Build Coastguard WorkerWINDOWS_LINE_ENDING = b'\r\n'
30*3ac0a46fSAndroid Build Coastguard WorkerUNIX_LINE_ENDING = b'\n'
31*3ac0a46fSAndroid Build Coastguard Worker
32*3ac0a46fSAndroid Build Coastguard Worker# List of extensions whose line endings should be modified after parsing.
33*3ac0a46fSAndroid Build Coastguard WorkerEXTENSION_OVERRIDE_LINE_ENDINGS = [
34*3ac0a46fSAndroid Build Coastguard Worker    '.js',
35*3ac0a46fSAndroid Build Coastguard Worker    '.fragment',
36*3ac0a46fSAndroid Build Coastguard Worker    '.in',
37*3ac0a46fSAndroid Build Coastguard Worker    '.xml',
38*3ac0a46fSAndroid Build Coastguard Worker]
39*3ac0a46fSAndroid Build Coastguard Worker
40*3ac0a46fSAndroid Build Coastguard Worker
41*3ac0a46fSAndroid Build Coastguard Workerclass StreamLenState:
42*3ac0a46fSAndroid Build Coastguard Worker  START = 1
43*3ac0a46fSAndroid Build Coastguard Worker  FIND_STREAM = 2
44*3ac0a46fSAndroid Build Coastguard Worker  FIND_ENDSTREAM = 3
45*3ac0a46fSAndroid Build Coastguard Worker
46*3ac0a46fSAndroid Build Coastguard Worker
47*3ac0a46fSAndroid Build Coastguard Workerclass TemplateProcessor:
48*3ac0a46fSAndroid Build Coastguard Worker  HEADER_TOKEN = b'{{header}}'
49*3ac0a46fSAndroid Build Coastguard Worker  HEADER_REPLACEMENT = b'%PDF-1.7\n%\xa0\xf2\xa4\xf4'
50*3ac0a46fSAndroid Build Coastguard Worker
51*3ac0a46fSAndroid Build Coastguard Worker  XREF_TOKEN = b'{{xref}}'
52*3ac0a46fSAndroid Build Coastguard Worker  XREF_REPLACEMENT = b'xref\n%d %d\n'
53*3ac0a46fSAndroid Build Coastguard Worker
54*3ac0a46fSAndroid Build Coastguard Worker  XREF_REPLACEMENT_N = b'%010d %05d n \n'
55*3ac0a46fSAndroid Build Coastguard Worker  XREF_REPLACEMENT_F = b'0000000000 65535 f \n'
56*3ac0a46fSAndroid Build Coastguard Worker  # XREF rows must be exactly 20 bytes - space required.
57*3ac0a46fSAndroid Build Coastguard Worker  assert len(XREF_REPLACEMENT_F) == 20
58*3ac0a46fSAndroid Build Coastguard Worker
59*3ac0a46fSAndroid Build Coastguard Worker  TRAILER_TOKEN = b'{{trailer}}'
60*3ac0a46fSAndroid Build Coastguard Worker  TRAILER_REPLACEMENT = b'trailer <<\n  /Root 1 0 R\n  /Size %d\n>>'
61*3ac0a46fSAndroid Build Coastguard Worker
62*3ac0a46fSAndroid Build Coastguard Worker  TRAILERSIZE_TOKEN = b'{{trailersize}}'
63*3ac0a46fSAndroid Build Coastguard Worker  TRAILERSIZE_REPLACEMENT = b'/Size %d'
64*3ac0a46fSAndroid Build Coastguard Worker
65*3ac0a46fSAndroid Build Coastguard Worker  STARTXREF_TOKEN = b'{{startxref}}'
66*3ac0a46fSAndroid Build Coastguard Worker  STARTXREF_REPLACEMENT = b'startxref\n%d'
67*3ac0a46fSAndroid Build Coastguard Worker
68*3ac0a46fSAndroid Build Coastguard Worker  STARTXREFOBJ_PATTERN = b'\{\{startxrefobj\s+(\d+)\s+(\d+)\}\}'
69*3ac0a46fSAndroid Build Coastguard Worker
70*3ac0a46fSAndroid Build Coastguard Worker  OBJECT_PATTERN = b'\{\{object\s+(\d+)\s+(\d+)\}\}'
71*3ac0a46fSAndroid Build Coastguard Worker  OBJECT_REPLACEMENT = b'\g<1> \g<2> obj'
72*3ac0a46fSAndroid Build Coastguard Worker
73*3ac0a46fSAndroid Build Coastguard Worker  STREAMLEN_TOKEN = b'{{streamlen}}'
74*3ac0a46fSAndroid Build Coastguard Worker  STREAMLEN_REPLACEMENT = b'/Length %d'
75*3ac0a46fSAndroid Build Coastguard Worker
76*3ac0a46fSAndroid Build Coastguard Worker  def __init__(self):
77*3ac0a46fSAndroid Build Coastguard Worker    self.streamlen_state = StreamLenState.START
78*3ac0a46fSAndroid Build Coastguard Worker    self.streamlens = []
79*3ac0a46fSAndroid Build Coastguard Worker    self.offset = 0
80*3ac0a46fSAndroid Build Coastguard Worker    self.xref_offset = 0
81*3ac0a46fSAndroid Build Coastguard Worker    self.max_object_number = 0
82*3ac0a46fSAndroid Build Coastguard Worker    self.objects = {}
83*3ac0a46fSAndroid Build Coastguard Worker
84*3ac0a46fSAndroid Build Coastguard Worker  def insert_xref_entry(self, object_number, generation_number):
85*3ac0a46fSAndroid Build Coastguard Worker    self.objects[object_number] = (self.offset, generation_number)
86*3ac0a46fSAndroid Build Coastguard Worker    self.max_object_number = max(self.max_object_number, object_number)
87*3ac0a46fSAndroid Build Coastguard Worker
88*3ac0a46fSAndroid Build Coastguard Worker  def generate_xref_table(self):
89*3ac0a46fSAndroid Build Coastguard Worker    result = self.XREF_REPLACEMENT % (0, self.max_object_number + 1)
90*3ac0a46fSAndroid Build Coastguard Worker    for i in range(0, self.max_object_number + 1):
91*3ac0a46fSAndroid Build Coastguard Worker      if i in self.objects:
92*3ac0a46fSAndroid Build Coastguard Worker        result += self.XREF_REPLACEMENT_N % self.objects[i]
93*3ac0a46fSAndroid Build Coastguard Worker      else:
94*3ac0a46fSAndroid Build Coastguard Worker        result += self.XREF_REPLACEMENT_F
95*3ac0a46fSAndroid Build Coastguard Worker    return result
96*3ac0a46fSAndroid Build Coastguard Worker
97*3ac0a46fSAndroid Build Coastguard Worker  def preprocess_line(self, line):
98*3ac0a46fSAndroid Build Coastguard Worker    if self.STREAMLEN_TOKEN in line:
99*3ac0a46fSAndroid Build Coastguard Worker      assert self.streamlen_state == StreamLenState.START
100*3ac0a46fSAndroid Build Coastguard Worker      self.streamlen_state = StreamLenState.FIND_STREAM
101*3ac0a46fSAndroid Build Coastguard Worker      self.streamlens.append(0)
102*3ac0a46fSAndroid Build Coastguard Worker      return
103*3ac0a46fSAndroid Build Coastguard Worker
104*3ac0a46fSAndroid Build Coastguard Worker    if (self.streamlen_state == StreamLenState.FIND_STREAM and
105*3ac0a46fSAndroid Build Coastguard Worker        line.rstrip() == b'stream'):
106*3ac0a46fSAndroid Build Coastguard Worker      self.streamlen_state = StreamLenState.FIND_ENDSTREAM
107*3ac0a46fSAndroid Build Coastguard Worker      return
108*3ac0a46fSAndroid Build Coastguard Worker
109*3ac0a46fSAndroid Build Coastguard Worker    if self.streamlen_state == StreamLenState.FIND_ENDSTREAM:
110*3ac0a46fSAndroid Build Coastguard Worker      if line.rstrip() == b'endstream':
111*3ac0a46fSAndroid Build Coastguard Worker        self.streamlen_state = StreamLenState.START
112*3ac0a46fSAndroid Build Coastguard Worker      else:
113*3ac0a46fSAndroid Build Coastguard Worker        self.streamlens[-1] += len(line)
114*3ac0a46fSAndroid Build Coastguard Worker
115*3ac0a46fSAndroid Build Coastguard Worker  def process_line(self, line):
116*3ac0a46fSAndroid Build Coastguard Worker    if self.HEADER_TOKEN in line:
117*3ac0a46fSAndroid Build Coastguard Worker      line = line.replace(self.HEADER_TOKEN, self.HEADER_REPLACEMENT)
118*3ac0a46fSAndroid Build Coastguard Worker    if self.STREAMLEN_TOKEN in line:
119*3ac0a46fSAndroid Build Coastguard Worker      sub = self.STREAMLEN_REPLACEMENT % self.streamlens.pop(0)
120*3ac0a46fSAndroid Build Coastguard Worker      line = re.sub(self.STREAMLEN_TOKEN, sub, line)
121*3ac0a46fSAndroid Build Coastguard Worker    if self.XREF_TOKEN in line:
122*3ac0a46fSAndroid Build Coastguard Worker      self.xref_offset = self.offset
123*3ac0a46fSAndroid Build Coastguard Worker      line = self.generate_xref_table()
124*3ac0a46fSAndroid Build Coastguard Worker    if self.TRAILER_TOKEN in line:
125*3ac0a46fSAndroid Build Coastguard Worker      replacement = self.TRAILER_REPLACEMENT % (self.max_object_number + 1)
126*3ac0a46fSAndroid Build Coastguard Worker      line = line.replace(self.TRAILER_TOKEN, replacement)
127*3ac0a46fSAndroid Build Coastguard Worker    if self.TRAILERSIZE_TOKEN in line:
128*3ac0a46fSAndroid Build Coastguard Worker      replacement = self.TRAILERSIZE_REPLACEMENT % (self.max_object_number + 1)
129*3ac0a46fSAndroid Build Coastguard Worker      line = line.replace(self.TRAILERSIZE_TOKEN, replacement)
130*3ac0a46fSAndroid Build Coastguard Worker    if self.STARTXREF_TOKEN in line:
131*3ac0a46fSAndroid Build Coastguard Worker      replacement = self.STARTXREF_REPLACEMENT % self.xref_offset
132*3ac0a46fSAndroid Build Coastguard Worker      line = line.replace(self.STARTXREF_TOKEN, replacement)
133*3ac0a46fSAndroid Build Coastguard Worker    match = re.match(self.OBJECT_PATTERN, line)
134*3ac0a46fSAndroid Build Coastguard Worker    if match:
135*3ac0a46fSAndroid Build Coastguard Worker      self.insert_xref_entry(int(match.group(1)), int(match.group(2)))
136*3ac0a46fSAndroid Build Coastguard Worker      line = re.sub(self.OBJECT_PATTERN, self.OBJECT_REPLACEMENT, line)
137*3ac0a46fSAndroid Build Coastguard Worker    match = re.match(self.STARTXREFOBJ_PATTERN, line)
138*3ac0a46fSAndroid Build Coastguard Worker    if match:
139*3ac0a46fSAndroid Build Coastguard Worker      (offset, generation_number) = self.objects[int(match.group(1))]
140*3ac0a46fSAndroid Build Coastguard Worker      assert int(match.group(2)) == generation_number
141*3ac0a46fSAndroid Build Coastguard Worker      replacement = self.STARTXREF_REPLACEMENT % offset
142*3ac0a46fSAndroid Build Coastguard Worker      line = re.sub(self.STARTXREFOBJ_PATTERN, replacement, line)
143*3ac0a46fSAndroid Build Coastguard Worker    self.offset += len(line)
144*3ac0a46fSAndroid Build Coastguard Worker    return line
145*3ac0a46fSAndroid Build Coastguard Worker
146*3ac0a46fSAndroid Build Coastguard Worker
147*3ac0a46fSAndroid Build Coastguard Workerdef expand_file(infile, output_path):
148*3ac0a46fSAndroid Build Coastguard Worker  processor = TemplateProcessor()
149*3ac0a46fSAndroid Build Coastguard Worker  try:
150*3ac0a46fSAndroid Build Coastguard Worker    with open(output_path, 'wb') as outfile:
151*3ac0a46fSAndroid Build Coastguard Worker      preprocessed = io.BytesIO()
152*3ac0a46fSAndroid Build Coastguard Worker      for line in infile:
153*3ac0a46fSAndroid Build Coastguard Worker        preprocessed.write(line)
154*3ac0a46fSAndroid Build Coastguard Worker        processor.preprocess_line(line)
155*3ac0a46fSAndroid Build Coastguard Worker      preprocessed.seek(0)
156*3ac0a46fSAndroid Build Coastguard Worker      for line in preprocessed:
157*3ac0a46fSAndroid Build Coastguard Worker        outfile.write(processor.process_line(line))
158*3ac0a46fSAndroid Build Coastguard Worker  except IOError:
159*3ac0a46fSAndroid Build Coastguard Worker    print('failed to process %s' % input_path, file=sys.stderr)
160*3ac0a46fSAndroid Build Coastguard Worker
161*3ac0a46fSAndroid Build Coastguard Worker
162*3ac0a46fSAndroid Build Coastguard Workerdef insert_includes(input_path, output_file, visited_set):
163*3ac0a46fSAndroid Build Coastguard Worker  input_path = os.path.normpath(input_path)
164*3ac0a46fSAndroid Build Coastguard Worker  if input_path in visited_set:
165*3ac0a46fSAndroid Build Coastguard Worker    print('Circular inclusion %s, ignoring' % input_path, file=sys.stderr)
166*3ac0a46fSAndroid Build Coastguard Worker    return
167*3ac0a46fSAndroid Build Coastguard Worker  visited_set.add(input_path)
168*3ac0a46fSAndroid Build Coastguard Worker  try:
169*3ac0a46fSAndroid Build Coastguard Worker    _, file_extension = os.path.splitext(input_path)
170*3ac0a46fSAndroid Build Coastguard Worker    override_line_endings = (file_extension in EXTENSION_OVERRIDE_LINE_ENDINGS)
171*3ac0a46fSAndroid Build Coastguard Worker
172*3ac0a46fSAndroid Build Coastguard Worker    end_of_file_line_ending = False
173*3ac0a46fSAndroid Build Coastguard Worker    with open(input_path, 'rb') as infile:
174*3ac0a46fSAndroid Build Coastguard Worker      for line in infile:
175*3ac0a46fSAndroid Build Coastguard Worker        match = re.match(b'\s*\{\{include\s+(.+)\}\}', line)
176*3ac0a46fSAndroid Build Coastguard Worker        if match:
177*3ac0a46fSAndroid Build Coastguard Worker          insert_includes(
178*3ac0a46fSAndroid Build Coastguard Worker              os.path.join(
179*3ac0a46fSAndroid Build Coastguard Worker                  os.path.dirname(input_path),
180*3ac0a46fSAndroid Build Coastguard Worker                  match.group(1).decode('utf-8')), output_file, visited_set)
181*3ac0a46fSAndroid Build Coastguard Worker        else:
182*3ac0a46fSAndroid Build Coastguard Worker          if override_line_endings:
183*3ac0a46fSAndroid Build Coastguard Worker            # Replace CRLF with LF line endings for .in files.
184*3ac0a46fSAndroid Build Coastguard Worker            if line.endswith(WINDOWS_LINE_ENDING):
185*3ac0a46fSAndroid Build Coastguard Worker              line = line.removesuffix(WINDOWS_LINE_ENDING) + UNIX_LINE_ENDING
186*3ac0a46fSAndroid Build Coastguard Worker              end_of_file_line_ending = True
187*3ac0a46fSAndroid Build Coastguard Worker            else:
188*3ac0a46fSAndroid Build Coastguard Worker              end_of_file_line_ending = line.endswith(UNIX_LINE_ENDING)
189*3ac0a46fSAndroid Build Coastguard Worker          output_file.write(line)
190*3ac0a46fSAndroid Build Coastguard Worker
191*3ac0a46fSAndroid Build Coastguard Worker    # Ensure the include ends on its own line.
192*3ac0a46fSAndroid Build Coastguard Worker    if not end_of_file_line_ending:
193*3ac0a46fSAndroid Build Coastguard Worker      output_file.write(UNIX_LINE_ENDING)
194*3ac0a46fSAndroid Build Coastguard Worker  except IOError:
195*3ac0a46fSAndroid Build Coastguard Worker    print('failed to include %s' % input_path, file=sys.stderr)
196*3ac0a46fSAndroid Build Coastguard Worker    raise
197*3ac0a46fSAndroid Build Coastguard Worker  visited_set.discard(input_path)
198*3ac0a46fSAndroid Build Coastguard Worker
199*3ac0a46fSAndroid Build Coastguard Worker
200*3ac0a46fSAndroid Build Coastguard Workerdef main():
201*3ac0a46fSAndroid Build Coastguard Worker  parser = optparse.OptionParser()
202*3ac0a46fSAndroid Build Coastguard Worker  parser.add_option('--output-dir', default='')
203*3ac0a46fSAndroid Build Coastguard Worker  options, args = parser.parse_args()
204*3ac0a46fSAndroid Build Coastguard Worker  for testcase_path in args:
205*3ac0a46fSAndroid Build Coastguard Worker    testcase_filename = os.path.basename(testcase_path)
206*3ac0a46fSAndroid Build Coastguard Worker    testcase_root, _ = os.path.splitext(testcase_filename)
207*3ac0a46fSAndroid Build Coastguard Worker    output_dir = os.path.dirname(testcase_path)
208*3ac0a46fSAndroid Build Coastguard Worker    if options.output_dir:
209*3ac0a46fSAndroid Build Coastguard Worker      output_dir = options.output_dir
210*3ac0a46fSAndroid Build Coastguard Worker    intermediate_stream = io.BytesIO()
211*3ac0a46fSAndroid Build Coastguard Worker    insert_includes(testcase_path, intermediate_stream, set())
212*3ac0a46fSAndroid Build Coastguard Worker    intermediate_stream.seek(0)
213*3ac0a46fSAndroid Build Coastguard Worker    output_path = os.path.join(output_dir, testcase_root + '.pdf')
214*3ac0a46fSAndroid Build Coastguard Worker    expand_file(intermediate_stream, output_path)
215*3ac0a46fSAndroid Build Coastguard Worker  return 0
216*3ac0a46fSAndroid Build Coastguard Worker
217*3ac0a46fSAndroid Build Coastguard Worker
218*3ac0a46fSAndroid Build Coastguard Workerif __name__ == '__main__':
219*3ac0a46fSAndroid Build Coastguard Worker  sys.exit(main())
220