1*3ac0a46fSAndroid Build Coastguard Worker#!/usr/bin/env python3 2*3ac0a46fSAndroid Build Coastguard Worker# Copyright 2014 The PDFium Authors 3*3ac0a46fSAndroid Build Coastguard Worker# Use of this source code is governed by a BSD-style license that can be 4*3ac0a46fSAndroid Build Coastguard Worker# found in the LICENSE file. 5*3ac0a46fSAndroid Build Coastguard Worker"""Expands a hand-written PDF testcase (template) into a valid PDF file. 6*3ac0a46fSAndroid Build Coastguard Worker 7*3ac0a46fSAndroid Build Coastguard WorkerThere are several places in a PDF file where byte-offsets are required. This 8*3ac0a46fSAndroid Build Coastguard Workerscript replaces {{name}}-style variables in the input with calculated results 9*3ac0a46fSAndroid Build Coastguard Worker 10*3ac0a46fSAndroid Build Coastguard Worker {{include path/to/file}} - inserts file's contents into stream. 11*3ac0a46fSAndroid Build Coastguard Worker {{header}} - expands to the header comment required for PDF files. 12*3ac0a46fSAndroid Build Coastguard Worker {{xref}} - expands to a generated xref table, noting the offset. 13*3ac0a46fSAndroid Build Coastguard Worker {{trailer}} - expands to a standard trailer with "1 0 R" as the /Root. 14*3ac0a46fSAndroid Build Coastguard Worker {{trailersize}} - expands to `/Size n`, to be used in non-standard trailers. 15*3ac0a46fSAndroid Build Coastguard Worker {{startxref} - expands to a startxref directive followed by correct offset. 16*3ac0a46fSAndroid Build Coastguard Worker {{startxrefobj x y} - expands to a startxref directive followed by correct 17*3ac0a46fSAndroid Build Coastguard Worker offset pointing to the start of `x y obj`. 18*3ac0a46fSAndroid Build Coastguard Worker {{object x y}} - expands to `x y obj` declaration, noting the offset. 19*3ac0a46fSAndroid Build Coastguard Worker {{streamlen}} - expands to `/Length n`. 20*3ac0a46fSAndroid Build Coastguard Worker""" 21*3ac0a46fSAndroid Build Coastguard Worker 22*3ac0a46fSAndroid Build Coastguard Workerimport io 23*3ac0a46fSAndroid Build Coastguard Workerimport optparse 24*3ac0a46fSAndroid Build Coastguard Workerimport os 25*3ac0a46fSAndroid Build Coastguard Workerimport re 26*3ac0a46fSAndroid Build Coastguard Workerimport sys 27*3ac0a46fSAndroid Build Coastguard Worker 28*3ac0a46fSAndroid Build Coastguard Worker# Line Endings. 29*3ac0a46fSAndroid Build Coastguard WorkerWINDOWS_LINE_ENDING = b'\r\n' 30*3ac0a46fSAndroid Build Coastguard WorkerUNIX_LINE_ENDING = b'\n' 31*3ac0a46fSAndroid Build Coastguard Worker 32*3ac0a46fSAndroid Build Coastguard Worker# List of extensions whose line endings should be modified after parsing. 33*3ac0a46fSAndroid Build Coastguard WorkerEXTENSION_OVERRIDE_LINE_ENDINGS = [ 34*3ac0a46fSAndroid Build Coastguard Worker '.js', 35*3ac0a46fSAndroid Build Coastguard Worker '.fragment', 36*3ac0a46fSAndroid Build Coastguard Worker '.in', 37*3ac0a46fSAndroid Build Coastguard Worker '.xml', 38*3ac0a46fSAndroid Build Coastguard Worker] 39*3ac0a46fSAndroid Build Coastguard Worker 40*3ac0a46fSAndroid Build Coastguard Worker 41*3ac0a46fSAndroid Build Coastguard Workerclass StreamLenState: 42*3ac0a46fSAndroid Build Coastguard Worker START = 1 43*3ac0a46fSAndroid Build Coastguard Worker FIND_STREAM = 2 44*3ac0a46fSAndroid Build Coastguard Worker FIND_ENDSTREAM = 3 45*3ac0a46fSAndroid Build Coastguard Worker 46*3ac0a46fSAndroid Build Coastguard Worker 47*3ac0a46fSAndroid Build Coastguard Workerclass TemplateProcessor: 48*3ac0a46fSAndroid Build Coastguard Worker HEADER_TOKEN = b'{{header}}' 49*3ac0a46fSAndroid Build Coastguard Worker HEADER_REPLACEMENT = b'%PDF-1.7\n%\xa0\xf2\xa4\xf4' 50*3ac0a46fSAndroid Build Coastguard Worker 51*3ac0a46fSAndroid Build Coastguard Worker XREF_TOKEN = b'{{xref}}' 52*3ac0a46fSAndroid Build Coastguard Worker XREF_REPLACEMENT = b'xref\n%d %d\n' 53*3ac0a46fSAndroid Build Coastguard Worker 54*3ac0a46fSAndroid Build Coastguard Worker XREF_REPLACEMENT_N = b'%010d %05d n \n' 55*3ac0a46fSAndroid Build Coastguard Worker XREF_REPLACEMENT_F = b'0000000000 65535 f \n' 56*3ac0a46fSAndroid Build Coastguard Worker # XREF rows must be exactly 20 bytes - space required. 57*3ac0a46fSAndroid Build Coastguard Worker assert len(XREF_REPLACEMENT_F) == 20 58*3ac0a46fSAndroid Build Coastguard Worker 59*3ac0a46fSAndroid Build Coastguard Worker TRAILER_TOKEN = b'{{trailer}}' 60*3ac0a46fSAndroid Build Coastguard Worker TRAILER_REPLACEMENT = b'trailer <<\n /Root 1 0 R\n /Size %d\n>>' 61*3ac0a46fSAndroid Build Coastguard Worker 62*3ac0a46fSAndroid Build Coastguard Worker TRAILERSIZE_TOKEN = b'{{trailersize}}' 63*3ac0a46fSAndroid Build Coastguard Worker TRAILERSIZE_REPLACEMENT = b'/Size %d' 64*3ac0a46fSAndroid Build Coastguard Worker 65*3ac0a46fSAndroid Build Coastguard Worker STARTXREF_TOKEN = b'{{startxref}}' 66*3ac0a46fSAndroid Build Coastguard Worker STARTXREF_REPLACEMENT = b'startxref\n%d' 67*3ac0a46fSAndroid Build Coastguard Worker 68*3ac0a46fSAndroid Build Coastguard Worker STARTXREFOBJ_PATTERN = b'\{\{startxrefobj\s+(\d+)\s+(\d+)\}\}' 69*3ac0a46fSAndroid Build Coastguard Worker 70*3ac0a46fSAndroid Build Coastguard Worker OBJECT_PATTERN = b'\{\{object\s+(\d+)\s+(\d+)\}\}' 71*3ac0a46fSAndroid Build Coastguard Worker OBJECT_REPLACEMENT = b'\g<1> \g<2> obj' 72*3ac0a46fSAndroid Build Coastguard Worker 73*3ac0a46fSAndroid Build Coastguard Worker STREAMLEN_TOKEN = b'{{streamlen}}' 74*3ac0a46fSAndroid Build Coastguard Worker STREAMLEN_REPLACEMENT = b'/Length %d' 75*3ac0a46fSAndroid Build Coastguard Worker 76*3ac0a46fSAndroid Build Coastguard Worker def __init__(self): 77*3ac0a46fSAndroid Build Coastguard Worker self.streamlen_state = StreamLenState.START 78*3ac0a46fSAndroid Build Coastguard Worker self.streamlens = [] 79*3ac0a46fSAndroid Build Coastguard Worker self.offset = 0 80*3ac0a46fSAndroid Build Coastguard Worker self.xref_offset = 0 81*3ac0a46fSAndroid Build Coastguard Worker self.max_object_number = 0 82*3ac0a46fSAndroid Build Coastguard Worker self.objects = {} 83*3ac0a46fSAndroid Build Coastguard Worker 84*3ac0a46fSAndroid Build Coastguard Worker def insert_xref_entry(self, object_number, generation_number): 85*3ac0a46fSAndroid Build Coastguard Worker self.objects[object_number] = (self.offset, generation_number) 86*3ac0a46fSAndroid Build Coastguard Worker self.max_object_number = max(self.max_object_number, object_number) 87*3ac0a46fSAndroid Build Coastguard Worker 88*3ac0a46fSAndroid Build Coastguard Worker def generate_xref_table(self): 89*3ac0a46fSAndroid Build Coastguard Worker result = self.XREF_REPLACEMENT % (0, self.max_object_number + 1) 90*3ac0a46fSAndroid Build Coastguard Worker for i in range(0, self.max_object_number + 1): 91*3ac0a46fSAndroid Build Coastguard Worker if i in self.objects: 92*3ac0a46fSAndroid Build Coastguard Worker result += self.XREF_REPLACEMENT_N % self.objects[i] 93*3ac0a46fSAndroid Build Coastguard Worker else: 94*3ac0a46fSAndroid Build Coastguard Worker result += self.XREF_REPLACEMENT_F 95*3ac0a46fSAndroid Build Coastguard Worker return result 96*3ac0a46fSAndroid Build Coastguard Worker 97*3ac0a46fSAndroid Build Coastguard Worker def preprocess_line(self, line): 98*3ac0a46fSAndroid Build Coastguard Worker if self.STREAMLEN_TOKEN in line: 99*3ac0a46fSAndroid Build Coastguard Worker assert self.streamlen_state == StreamLenState.START 100*3ac0a46fSAndroid Build Coastguard Worker self.streamlen_state = StreamLenState.FIND_STREAM 101*3ac0a46fSAndroid Build Coastguard Worker self.streamlens.append(0) 102*3ac0a46fSAndroid Build Coastguard Worker return 103*3ac0a46fSAndroid Build Coastguard Worker 104*3ac0a46fSAndroid Build Coastguard Worker if (self.streamlen_state == StreamLenState.FIND_STREAM and 105*3ac0a46fSAndroid Build Coastguard Worker line.rstrip() == b'stream'): 106*3ac0a46fSAndroid Build Coastguard Worker self.streamlen_state = StreamLenState.FIND_ENDSTREAM 107*3ac0a46fSAndroid Build Coastguard Worker return 108*3ac0a46fSAndroid Build Coastguard Worker 109*3ac0a46fSAndroid Build Coastguard Worker if self.streamlen_state == StreamLenState.FIND_ENDSTREAM: 110*3ac0a46fSAndroid Build Coastguard Worker if line.rstrip() == b'endstream': 111*3ac0a46fSAndroid Build Coastguard Worker self.streamlen_state = StreamLenState.START 112*3ac0a46fSAndroid Build Coastguard Worker else: 113*3ac0a46fSAndroid Build Coastguard Worker self.streamlens[-1] += len(line) 114*3ac0a46fSAndroid Build Coastguard Worker 115*3ac0a46fSAndroid Build Coastguard Worker def process_line(self, line): 116*3ac0a46fSAndroid Build Coastguard Worker if self.HEADER_TOKEN in line: 117*3ac0a46fSAndroid Build Coastguard Worker line = line.replace(self.HEADER_TOKEN, self.HEADER_REPLACEMENT) 118*3ac0a46fSAndroid Build Coastguard Worker if self.STREAMLEN_TOKEN in line: 119*3ac0a46fSAndroid Build Coastguard Worker sub = self.STREAMLEN_REPLACEMENT % self.streamlens.pop(0) 120*3ac0a46fSAndroid Build Coastguard Worker line = re.sub(self.STREAMLEN_TOKEN, sub, line) 121*3ac0a46fSAndroid Build Coastguard Worker if self.XREF_TOKEN in line: 122*3ac0a46fSAndroid Build Coastguard Worker self.xref_offset = self.offset 123*3ac0a46fSAndroid Build Coastguard Worker line = self.generate_xref_table() 124*3ac0a46fSAndroid Build Coastguard Worker if self.TRAILER_TOKEN in line: 125*3ac0a46fSAndroid Build Coastguard Worker replacement = self.TRAILER_REPLACEMENT % (self.max_object_number + 1) 126*3ac0a46fSAndroid Build Coastguard Worker line = line.replace(self.TRAILER_TOKEN, replacement) 127*3ac0a46fSAndroid Build Coastguard Worker if self.TRAILERSIZE_TOKEN in line: 128*3ac0a46fSAndroid Build Coastguard Worker replacement = self.TRAILERSIZE_REPLACEMENT % (self.max_object_number + 1) 129*3ac0a46fSAndroid Build Coastguard Worker line = line.replace(self.TRAILERSIZE_TOKEN, replacement) 130*3ac0a46fSAndroid Build Coastguard Worker if self.STARTXREF_TOKEN in line: 131*3ac0a46fSAndroid Build Coastguard Worker replacement = self.STARTXREF_REPLACEMENT % self.xref_offset 132*3ac0a46fSAndroid Build Coastguard Worker line = line.replace(self.STARTXREF_TOKEN, replacement) 133*3ac0a46fSAndroid Build Coastguard Worker match = re.match(self.OBJECT_PATTERN, line) 134*3ac0a46fSAndroid Build Coastguard Worker if match: 135*3ac0a46fSAndroid Build Coastguard Worker self.insert_xref_entry(int(match.group(1)), int(match.group(2))) 136*3ac0a46fSAndroid Build Coastguard Worker line = re.sub(self.OBJECT_PATTERN, self.OBJECT_REPLACEMENT, line) 137*3ac0a46fSAndroid Build Coastguard Worker match = re.match(self.STARTXREFOBJ_PATTERN, line) 138*3ac0a46fSAndroid Build Coastguard Worker if match: 139*3ac0a46fSAndroid Build Coastguard Worker (offset, generation_number) = self.objects[int(match.group(1))] 140*3ac0a46fSAndroid Build Coastguard Worker assert int(match.group(2)) == generation_number 141*3ac0a46fSAndroid Build Coastguard Worker replacement = self.STARTXREF_REPLACEMENT % offset 142*3ac0a46fSAndroid Build Coastguard Worker line = re.sub(self.STARTXREFOBJ_PATTERN, replacement, line) 143*3ac0a46fSAndroid Build Coastguard Worker self.offset += len(line) 144*3ac0a46fSAndroid Build Coastguard Worker return line 145*3ac0a46fSAndroid Build Coastguard Worker 146*3ac0a46fSAndroid Build Coastguard Worker 147*3ac0a46fSAndroid Build Coastguard Workerdef expand_file(infile, output_path): 148*3ac0a46fSAndroid Build Coastguard Worker processor = TemplateProcessor() 149*3ac0a46fSAndroid Build Coastguard Worker try: 150*3ac0a46fSAndroid Build Coastguard Worker with open(output_path, 'wb') as outfile: 151*3ac0a46fSAndroid Build Coastguard Worker preprocessed = io.BytesIO() 152*3ac0a46fSAndroid Build Coastguard Worker for line in infile: 153*3ac0a46fSAndroid Build Coastguard Worker preprocessed.write(line) 154*3ac0a46fSAndroid Build Coastguard Worker processor.preprocess_line(line) 155*3ac0a46fSAndroid Build Coastguard Worker preprocessed.seek(0) 156*3ac0a46fSAndroid Build Coastguard Worker for line in preprocessed: 157*3ac0a46fSAndroid Build Coastguard Worker outfile.write(processor.process_line(line)) 158*3ac0a46fSAndroid Build Coastguard Worker except IOError: 159*3ac0a46fSAndroid Build Coastguard Worker print('failed to process %s' % input_path, file=sys.stderr) 160*3ac0a46fSAndroid Build Coastguard Worker 161*3ac0a46fSAndroid Build Coastguard Worker 162*3ac0a46fSAndroid Build Coastguard Workerdef insert_includes(input_path, output_file, visited_set): 163*3ac0a46fSAndroid Build Coastguard Worker input_path = os.path.normpath(input_path) 164*3ac0a46fSAndroid Build Coastguard Worker if input_path in visited_set: 165*3ac0a46fSAndroid Build Coastguard Worker print('Circular inclusion %s, ignoring' % input_path, file=sys.stderr) 166*3ac0a46fSAndroid Build Coastguard Worker return 167*3ac0a46fSAndroid Build Coastguard Worker visited_set.add(input_path) 168*3ac0a46fSAndroid Build Coastguard Worker try: 169*3ac0a46fSAndroid Build Coastguard Worker _, file_extension = os.path.splitext(input_path) 170*3ac0a46fSAndroid Build Coastguard Worker override_line_endings = (file_extension in EXTENSION_OVERRIDE_LINE_ENDINGS) 171*3ac0a46fSAndroid Build Coastguard Worker 172*3ac0a46fSAndroid Build Coastguard Worker end_of_file_line_ending = False 173*3ac0a46fSAndroid Build Coastguard Worker with open(input_path, 'rb') as infile: 174*3ac0a46fSAndroid Build Coastguard Worker for line in infile: 175*3ac0a46fSAndroid Build Coastguard Worker match = re.match(b'\s*\{\{include\s+(.+)\}\}', line) 176*3ac0a46fSAndroid Build Coastguard Worker if match: 177*3ac0a46fSAndroid Build Coastguard Worker insert_includes( 178*3ac0a46fSAndroid Build Coastguard Worker os.path.join( 179*3ac0a46fSAndroid Build Coastguard Worker os.path.dirname(input_path), 180*3ac0a46fSAndroid Build Coastguard Worker match.group(1).decode('utf-8')), output_file, visited_set) 181*3ac0a46fSAndroid Build Coastguard Worker else: 182*3ac0a46fSAndroid Build Coastguard Worker if override_line_endings: 183*3ac0a46fSAndroid Build Coastguard Worker # Replace CRLF with LF line endings for .in files. 184*3ac0a46fSAndroid Build Coastguard Worker if line.endswith(WINDOWS_LINE_ENDING): 185*3ac0a46fSAndroid Build Coastguard Worker line = line.removesuffix(WINDOWS_LINE_ENDING) + UNIX_LINE_ENDING 186*3ac0a46fSAndroid Build Coastguard Worker end_of_file_line_ending = True 187*3ac0a46fSAndroid Build Coastguard Worker else: 188*3ac0a46fSAndroid Build Coastguard Worker end_of_file_line_ending = line.endswith(UNIX_LINE_ENDING) 189*3ac0a46fSAndroid Build Coastguard Worker output_file.write(line) 190*3ac0a46fSAndroid Build Coastguard Worker 191*3ac0a46fSAndroid Build Coastguard Worker # Ensure the include ends on its own line. 192*3ac0a46fSAndroid Build Coastguard Worker if not end_of_file_line_ending: 193*3ac0a46fSAndroid Build Coastguard Worker output_file.write(UNIX_LINE_ENDING) 194*3ac0a46fSAndroid Build Coastguard Worker except IOError: 195*3ac0a46fSAndroid Build Coastguard Worker print('failed to include %s' % input_path, file=sys.stderr) 196*3ac0a46fSAndroid Build Coastguard Worker raise 197*3ac0a46fSAndroid Build Coastguard Worker visited_set.discard(input_path) 198*3ac0a46fSAndroid Build Coastguard Worker 199*3ac0a46fSAndroid Build Coastguard Worker 200*3ac0a46fSAndroid Build Coastguard Workerdef main(): 201*3ac0a46fSAndroid Build Coastguard Worker parser = optparse.OptionParser() 202*3ac0a46fSAndroid Build Coastguard Worker parser.add_option('--output-dir', default='') 203*3ac0a46fSAndroid Build Coastguard Worker options, args = parser.parse_args() 204*3ac0a46fSAndroid Build Coastguard Worker for testcase_path in args: 205*3ac0a46fSAndroid Build Coastguard Worker testcase_filename = os.path.basename(testcase_path) 206*3ac0a46fSAndroid Build Coastguard Worker testcase_root, _ = os.path.splitext(testcase_filename) 207*3ac0a46fSAndroid Build Coastguard Worker output_dir = os.path.dirname(testcase_path) 208*3ac0a46fSAndroid Build Coastguard Worker if options.output_dir: 209*3ac0a46fSAndroid Build Coastguard Worker output_dir = options.output_dir 210*3ac0a46fSAndroid Build Coastguard Worker intermediate_stream = io.BytesIO() 211*3ac0a46fSAndroid Build Coastguard Worker insert_includes(testcase_path, intermediate_stream, set()) 212*3ac0a46fSAndroid Build Coastguard Worker intermediate_stream.seek(0) 213*3ac0a46fSAndroid Build Coastguard Worker output_path = os.path.join(output_dir, testcase_root + '.pdf') 214*3ac0a46fSAndroid Build Coastguard Worker expand_file(intermediate_stream, output_path) 215*3ac0a46fSAndroid Build Coastguard Worker return 0 216*3ac0a46fSAndroid Build Coastguard Worker 217*3ac0a46fSAndroid Build Coastguard Worker 218*3ac0a46fSAndroid Build Coastguard Workerif __name__ == '__main__': 219*3ac0a46fSAndroid Build Coastguard Worker sys.exit(main()) 220