xref: /aosp_15_r20/external/cronet/third_party/icu/scripts/icualign.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1#!/usr/bin/python3
2
3# Copyright 2022 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7import itertools
8import struct
9import sys
10
11PAGE_SIZE = 0x1000  # System page size.
12THRESHOLD = 0x2000  # Minimum size of the file to be aligned.
13
14
15# Read 2 bytes.
16def read16(data, offset):
17    return struct.unpack_from("<H", data, offset)[0]
18
19
20# Read 4 bytes.
21def read32(data, offset):
22    return struct.unpack_from("<I", data, offset)[0]
23
24
25# Write 4 bytes.
26def write32(data, offset, value):
27    return struct.pack_into("<I", data, offset, value)
28
29
30################################################################################
31# (Adapted from `source/tools/toolutil/pkg_gencmn.cpp`)
32#
33# A .dat package file contains a simple Table of Contents of item names,
34# followed by the items themselves:
35#
36# 1. ToC table
37#
38# uint32_t count; - number of items
39# UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
40#     uint32_t nameOffset; - offset of the item name
41#     uint32_t dataOffset; - offset of the item data
42# both are byte offsets from the beginning of the data
43#
44# 2. item name strings
45#
46# All item names are stored as char * strings in one block between the ToC table
47# and the data items.
48#
49# 3. data items
50#
51# The data items are stored following the item names block.
52# The data items are stored in the sorted order of their names.
53################################################################################
54
55
56def pad_data(data):
57    out = bytearray()
58
59    header_size = read16(data, 0)           # Size of the ICU header.
60    item_count = read32(data, header_size)  # Number of files inside icudtl.dat
61    toc_offset = header_size + 4            # Offset of the Table of Contents.
62
63    # Copy everything until the beginning of the data.
64    out_offset = read32(data, toc_offset + 4) + header_size
65    out += data[:out_offset]
66
67    # Iterate over the files.
68    for i in range(item_count):
69        # Offset inside the ToC for this file.
70        offset = toc_offset + (i * 8)
71
72        # Offset of the name and data, relative to the beginning of the data section.
73        name_offset = read32(data, offset)
74        data_offset = read32(data, offset + 4)
75
76        # Offset of the name and the data, relative to the beginning of the file.
77        name_file_offset = name_offset + header_size
78        data_file_offset = data_offset + header_size
79
80        # Calculate the size of this file.
81        if i + 1 < item_count:
82            next_offset = toc_offset + ((i + 1) * 8)
83            next_data_offset = read32(data, next_offset + 4)
84            size = next_data_offset - data_offset
85        else:
86            size = len(data) - (data_offset + header_size)
87
88        # Insert padding to align files bigger than the threshold.
89        page_offset = out_offset & (PAGE_SIZE - 1)
90        if size >= THRESHOLD and page_offset != 0:
91            padding = PAGE_SIZE - page_offset
92            out.extend(itertools.repeat(0x00, padding))
93            out_offset += padding
94
95        # Put the new offset into the Table of Contents.
96        write32(out, offset + 4, out_offset - header_size)
97
98        # Copy the content of the file.
99        out += data[data_file_offset : data_file_offset + size]
100        out_offset += size
101
102    return out
103
104
105if __name__ == "__main__":
106    # Check arguments.
107    if len(sys.argv) != 3:
108        error_str = "icualign: wrong number of arguments\n\n"
109        help_str = "usage: icualign <infilename> <outfilename>\n\n"
110        sys.exit(error_str + help_str)
111
112    # Extract arguments.
113    in_filename = sys.argv[1]
114    out_filename = sys.argv[2]
115
116    # Read the input file.
117    with open(in_filename, "rb") as in_file:
118        data = in_file.read()
119        # Apply padding to the file to achieve the desired alignment.
120        out_data = pad_data(data)
121        # Write the output file.
122        with open(out_filename, "wb") as out_file:
123            out_file.write(out_data)
124