xref: /aosp_15_r20/external/brotli/scripts/dictionary/step-04-generate-java-literals.py (revision f4ee7fba7774faf2a30f13154332c0a06550dbc4)
1*f4ee7fbaSAndroid Build Coastguard Worker# Step 04 - generate Java literals.
2*f4ee7fbaSAndroid Build Coastguard Worker#
3*f4ee7fbaSAndroid Build Coastguard Worker# Java byte-code has ridiculous restrictions. There is no such thing as
4*f4ee7fbaSAndroid Build Coastguard Worker# "array literal" - those are implemented as series of data[x] = y;
5*f4ee7fbaSAndroid Build Coastguard Worker# as a consequence N-byte array will use 7N bytes in class, plus N bytes
6*f4ee7fbaSAndroid Build Coastguard Worker# in instantiated variable. Also no literal could be longer than 64KiB.
7*f4ee7fbaSAndroid Build Coastguard Worker#
8*f4ee7fbaSAndroid Build Coastguard Worker# To keep dictionary data compact both in source code and in compiled format
9*f4ee7fbaSAndroid Build Coastguard Worker# we use the following tricks:
10*f4ee7fbaSAndroid Build Coastguard Worker#  * use String as a data container
11*f4ee7fbaSAndroid Build Coastguard Worker#  * store only lowest 7 bits; i.e. all characters fit ASCII table; this allows
12*f4ee7fbaSAndroid Build Coastguard Worker#    efficient conversion to byte array; also ASCII characters use only 1 byte
13*f4ee7fbaSAndroid Build Coastguard Worker#.   of memory (UTF-8 encoding)
14*f4ee7fbaSAndroid Build Coastguard Worker#  * RLE-compress sequence of 8-th bits
15*f4ee7fbaSAndroid Build Coastguard Worker#
16*f4ee7fbaSAndroid Build Coastguard Worker# This script generates literals used in Java code.
17*f4ee7fbaSAndroid Build Coastguard Worker
18*f4ee7fbaSAndroid Build Coastguard Workerbin_path = "dictionary.bin"
19*f4ee7fbaSAndroid Build Coastguard Worker
20*f4ee7fbaSAndroid Build Coastguard Workerwith open(bin_path, "rb") as raw:
21*f4ee7fbaSAndroid Build Coastguard Worker  data = raw.read()
22*f4ee7fbaSAndroid Build Coastguard Worker
23*f4ee7fbaSAndroid Build Coastguard Workerlow = []
24*f4ee7fbaSAndroid Build Coastguard Workerhi = []
25*f4ee7fbaSAndroid Build Coastguard Workeris_skip = True
26*f4ee7fbaSAndroid Build Coastguard Workerskip_flip_offset = 36
27*f4ee7fbaSAndroid Build Coastguard Workercntr = skip_flip_offset
28*f4ee7fbaSAndroid Build Coastguard Workerfor b in data:
29*f4ee7fbaSAndroid Build Coastguard Worker  value = ord(b)
30*f4ee7fbaSAndroid Build Coastguard Worker  low.append(chr(value & 0x7F))
31*f4ee7fbaSAndroid Build Coastguard Worker  if is_skip:
32*f4ee7fbaSAndroid Build Coastguard Worker    if value < 0x80:
33*f4ee7fbaSAndroid Build Coastguard Worker      cntr += 1
34*f4ee7fbaSAndroid Build Coastguard Worker    else:
35*f4ee7fbaSAndroid Build Coastguard Worker      is_skip = False
36*f4ee7fbaSAndroid Build Coastguard Worker      hi.append(unichr(cntr))
37*f4ee7fbaSAndroid Build Coastguard Worker      cntr = skip_flip_offset + 1
38*f4ee7fbaSAndroid Build Coastguard Worker  else:
39*f4ee7fbaSAndroid Build Coastguard Worker    if value >= 0x80:
40*f4ee7fbaSAndroid Build Coastguard Worker      cntr += 1
41*f4ee7fbaSAndroid Build Coastguard Worker    else:
42*f4ee7fbaSAndroid Build Coastguard Worker      is_skip = True
43*f4ee7fbaSAndroid Build Coastguard Worker      hi.append(unichr(cntr))
44*f4ee7fbaSAndroid Build Coastguard Worker      cntr = skip_flip_offset + 1
45*f4ee7fbaSAndroid Build Coastguard Workerhi.append(unichr(cntr))
46*f4ee7fbaSAndroid Build Coastguard Worker
47*f4ee7fbaSAndroid Build Coastguard Workerlow0 = low[0 : len(low) // 2]
48*f4ee7fbaSAndroid Build Coastguard Workerlow1 = low[len(low) // 2 : len(low)]
49*f4ee7fbaSAndroid Build Coastguard Worker
50*f4ee7fbaSAndroid Build Coastguard Workerdef escape(chars):
51*f4ee7fbaSAndroid Build Coastguard Worker  result = []
52*f4ee7fbaSAndroid Build Coastguard Worker  for c in chars:
53*f4ee7fbaSAndroid Build Coastguard Worker    if "\r" == c:
54*f4ee7fbaSAndroid Build Coastguard Worker      result.append("\\r")
55*f4ee7fbaSAndroid Build Coastguard Worker    elif "\n" == c:
56*f4ee7fbaSAndroid Build Coastguard Worker      result.append("\\n")
57*f4ee7fbaSAndroid Build Coastguard Worker    elif "\t" == c:
58*f4ee7fbaSAndroid Build Coastguard Worker      result.append("\\t")
59*f4ee7fbaSAndroid Build Coastguard Worker    elif "\"" == c:
60*f4ee7fbaSAndroid Build Coastguard Worker      result.append("\\\"")
61*f4ee7fbaSAndroid Build Coastguard Worker    elif "\\" == c:
62*f4ee7fbaSAndroid Build Coastguard Worker      result.append("\\\\")
63*f4ee7fbaSAndroid Build Coastguard Worker    elif ord(c) < 32 or ord(c) >= 127:
64*f4ee7fbaSAndroid Build Coastguard Worker      result.append("\\u%04X" % ord(c))
65*f4ee7fbaSAndroid Build Coastguard Worker    else:
66*f4ee7fbaSAndroid Build Coastguard Worker      result.append(c);
67*f4ee7fbaSAndroid Build Coastguard Worker  return result
68*f4ee7fbaSAndroid Build Coastguard Worker
69*f4ee7fbaSAndroid Build Coastguard Worker
70*f4ee7fbaSAndroid Build Coastguard Workersource_code = [
71*f4ee7fbaSAndroid Build Coastguard Worker    "  private static final String DATA0 = \"", "".join(escape(low0)), "\";\n",
72*f4ee7fbaSAndroid Build Coastguard Worker    "  private static final String DATA1 = \"", "".join(escape(low1)), "\";\n",
73*f4ee7fbaSAndroid Build Coastguard Worker    "  private static final String SKIP_FLIP = \"", "".join(escape(hi)), "\";\n"
74*f4ee7fbaSAndroid Build Coastguard Worker]
75*f4ee7fbaSAndroid Build Coastguard Worker
76*f4ee7fbaSAndroid Build Coastguard Workersrc_path = "DictionaryData.inc.java"
77*f4ee7fbaSAndroid Build Coastguard Worker
78*f4ee7fbaSAndroid Build Coastguard Workerwith open(src_path, "w") as source:
79*f4ee7fbaSAndroid Build Coastguard Worker  source.write("".join(source_code))
80