1#!/usr/bin/python3 2 3# Copyright 2022 The Chromium Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7import itertools 8import struct 9import sys 10 11PAGE_SIZE = 0x1000 # System page size. 12THRESHOLD = 0x2000 # Minimum size of the file to be aligned. 13 14 15# Read 2 bytes. 16def read16(data, offset): 17 return struct.unpack_from("<H", data, offset)[0] 18 19 20# Read 4 bytes. 21def read32(data, offset): 22 return struct.unpack_from("<I", data, offset)[0] 23 24 25# Write 4 bytes. 26def write32(data, offset, value): 27 return struct.pack_into("<I", data, offset, value) 28 29 30################################################################################ 31# (Adapted from `source/tools/toolutil/pkg_gencmn.cpp`) 32# 33# A .dat package file contains a simple Table of Contents of item names, 34# followed by the items themselves: 35# 36# 1. ToC table 37# 38# uint32_t count; - number of items 39# UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item: 40# uint32_t nameOffset; - offset of the item name 41# uint32_t dataOffset; - offset of the item data 42# both are byte offsets from the beginning of the data 43# 44# 2. item name strings 45# 46# All item names are stored as char * strings in one block between the ToC table 47# and the data items. 48# 49# 3. data items 50# 51# The data items are stored following the item names block. 52# The data items are stored in the sorted order of their names. 53################################################################################ 54 55 56def pad_data(data): 57 out = bytearray() 58 59 header_size = read16(data, 0) # Size of the ICU header. 60 item_count = read32(data, header_size) # Number of files inside icudtl.dat 61 toc_offset = header_size + 4 # Offset of the Table of Contents. 62 63 # Copy everything until the beginning of the data. 64 out_offset = read32(data, toc_offset + 4) + header_size 65 out += data[:out_offset] 66 67 # Iterate over the files. 68 for i in range(item_count): 69 # Offset inside the ToC for this file. 70 offset = toc_offset + (i * 8) 71 72 # Offset of the name and data, relative to the beginning of the data section. 73 name_offset = read32(data, offset) 74 data_offset = read32(data, offset + 4) 75 76 # Offset of the name and the data, relative to the beginning of the file. 77 name_file_offset = name_offset + header_size 78 data_file_offset = data_offset + header_size 79 80 # Calculate the size of this file. 81 if i + 1 < item_count: 82 next_offset = toc_offset + ((i + 1) * 8) 83 next_data_offset = read32(data, next_offset + 4) 84 size = next_data_offset - data_offset 85 else: 86 size = len(data) - (data_offset + header_size) 87 88 # Insert padding to align files bigger than the threshold. 89 page_offset = out_offset & (PAGE_SIZE - 1) 90 if size >= THRESHOLD and page_offset != 0: 91 padding = PAGE_SIZE - page_offset 92 out.extend(itertools.repeat(0x00, padding)) 93 out_offset += padding 94 95 # Put the new offset into the Table of Contents. 96 write32(out, offset + 4, out_offset - header_size) 97 98 # Copy the content of the file. 99 out += data[data_file_offset : data_file_offset + size] 100 out_offset += size 101 102 return out 103 104 105if __name__ == "__main__": 106 # Check arguments. 107 if len(sys.argv) != 3: 108 error_str = "icualign: wrong number of arguments\n\n" 109 help_str = "usage: icualign <infilename> <outfilename>\n\n" 110 sys.exit(error_str + help_str) 111 112 # Extract arguments. 113 in_filename = sys.argv[1] 114 out_filename = sys.argv[2] 115 116 # Read the input file. 117 with open(in_filename, "rb") as in_file: 118 data = in_file.read() 119 # Apply padding to the file to achieve the desired alignment. 120 out_data = pad_data(data) 121 # Write the output file. 122 with open(out_filename, "wb") as out_file: 123 out_file.write(out_data) 124