xref: /aosp_15_r20/external/noto-fonts/scripts/subset_noto_cjk.py (revision e5825d3be9fd13b272e7df556d285d1f07f3b027)
1*e5825d3bSAndroid Build Coastguard Worker#!/usr/bin/env python3
2*e5825d3bSAndroid Build Coastguard Worker# coding=UTF-8
3*e5825d3bSAndroid Build Coastguard Worker#
4*e5825d3bSAndroid Build Coastguard Worker# Copyright 2016 Google Inc. All rights reserved.
5*e5825d3bSAndroid Build Coastguard Worker#
6*e5825d3bSAndroid Build Coastguard Worker# Licensed under the Apache License, Version 2.0 (the "License");
7*e5825d3bSAndroid Build Coastguard Worker# you may not use this file except in compliance with the License.
8*e5825d3bSAndroid Build Coastguard Worker# You may obtain a copy of the License at
9*e5825d3bSAndroid Build Coastguard Worker#
10*e5825d3bSAndroid Build Coastguard Worker#     http://www.apache.org/licenses/LICENSE-2.0
11*e5825d3bSAndroid Build Coastguard Worker#
12*e5825d3bSAndroid Build Coastguard Worker# Unless required by applicable law or agreed to in writing, software
13*e5825d3bSAndroid Build Coastguard Worker# distributed under the License is distributed on an "AS IS" BASIS,
14*e5825d3bSAndroid Build Coastguard Worker# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15*e5825d3bSAndroid Build Coastguard Worker# See the License for the specific language governing permissions and
16*e5825d3bSAndroid Build Coastguard Worker# limitations under the License.
17*e5825d3bSAndroid Build Coastguard Worker
18*e5825d3bSAndroid Build Coastguard Worker"""Create a curated subset of Noto CJK for Android."""
19*e5825d3bSAndroid Build Coastguard Worker
20*e5825d3bSAndroid Build Coastguard Workerimport argparse
21*e5825d3bSAndroid Build Coastguard Workerimport logging
22*e5825d3bSAndroid Build Coastguard Workerimport os
23*e5825d3bSAndroid Build Coastguard Workerfrom pathlib import Path
24*e5825d3bSAndroid Build Coastguard Worker
25*e5825d3bSAndroid Build Coastguard Workerfrom fontTools import ttLib
26*e5825d3bSAndroid Build Coastguard Workerfrom nototools import font_data
27*e5825d3bSAndroid Build Coastguard Workerfrom nototools import tool_utils
28*e5825d3bSAndroid Build Coastguard Workerfrom nototools import ttc_utils
29*e5825d3bSAndroid Build Coastguard Worker
30*e5825d3bSAndroid Build Coastguard Worker# Characters supported in Noto CJK fonts that UTR #51 recommends default to
31*e5825d3bSAndroid Build Coastguard Worker# emoji-style.
32*e5825d3bSAndroid Build Coastguard WorkerEMOJI_IN_CJK = {
33*e5825d3bSAndroid Build Coastguard Worker    0x26BD, # ⚽ SOCCER BALL
34*e5825d3bSAndroid Build Coastguard Worker    0x26BE, # ⚾ BASEBALL
35*e5825d3bSAndroid Build Coastguard Worker    0x1F18E, # �� NEGATIVE SQUARED AB
36*e5825d3bSAndroid Build Coastguard Worker    0x1F191, # �� SQUARED CL
37*e5825d3bSAndroid Build Coastguard Worker    0x1F192, # �� SQUARED COOL
38*e5825d3bSAndroid Build Coastguard Worker    0x1F193, # �� SQUARED FREE
39*e5825d3bSAndroid Build Coastguard Worker    0x1F194, # �� SQUARED ID
40*e5825d3bSAndroid Build Coastguard Worker    0x1F195, # �� SQUARED NEW
41*e5825d3bSAndroid Build Coastguard Worker    0x1F196, # �� SQUARED NG
42*e5825d3bSAndroid Build Coastguard Worker    0x1F197, # �� SQUARED OK
43*e5825d3bSAndroid Build Coastguard Worker    0x1F198, # �� SQUARED SOS
44*e5825d3bSAndroid Build Coastguard Worker    0x1F199, # �� SQUARED UP WITH EXCLAMATION MARK
45*e5825d3bSAndroid Build Coastguard Worker    0x1F19A, # �� SQUARED VS
46*e5825d3bSAndroid Build Coastguard Worker    0x1F201, # �� SQUARED KATAKANA KOKO
47*e5825d3bSAndroid Build Coastguard Worker    0x1F21A, # �� SQUARED CJK UNIFIED IDEOGRAPH-7121
48*e5825d3bSAndroid Build Coastguard Worker    0x1F22F, # �� SQUARED CJK UNIFIED IDEOGRAPH-6307
49*e5825d3bSAndroid Build Coastguard Worker    0x1F232, # �� SQUARED CJK UNIFIED IDEOGRAPH-7981
50*e5825d3bSAndroid Build Coastguard Worker    0x1F233, # �� SQUARED CJK UNIFIED IDEOGRAPH-7A7A
51*e5825d3bSAndroid Build Coastguard Worker    0x1F234, # �� SQUARED CJK UNIFIED IDEOGRAPH-5408
52*e5825d3bSAndroid Build Coastguard Worker    0x1F235, # �� SQUARED CJK UNIFIED IDEOGRAPH-6E80
53*e5825d3bSAndroid Build Coastguard Worker    0x1F236, # �� SQUARED CJK UNIFIED IDEOGRAPH-6709
54*e5825d3bSAndroid Build Coastguard Worker    0x1F238, # �� SQUARED CJK UNIFIED IDEOGRAPH-7533
55*e5825d3bSAndroid Build Coastguard Worker    0x1F239, # �� SQUARED CJK UNIFIED IDEOGRAPH-5272
56*e5825d3bSAndroid Build Coastguard Worker    0x1F23A, # �� SQUARED CJK UNIFIED IDEOGRAPH-55B6
57*e5825d3bSAndroid Build Coastguard Worker    0x1F250, # �� CIRCLED IDEOGRAPH ADVANTAGE
58*e5825d3bSAndroid Build Coastguard Worker    0x1F251, # �� CIRCLED IDEOGRAPH ACCEPT
59*e5825d3bSAndroid Build Coastguard Worker}
60*e5825d3bSAndroid Build Coastguard Worker
61*e5825d3bSAndroid Build Coastguard Worker# Characters we have decided we are doing as emoji-style in Android,
62*e5825d3bSAndroid Build Coastguard Worker# despite UTR #51's recommendation
63*e5825d3bSAndroid Build Coastguard WorkerANDROID_EMOJI = {
64*e5825d3bSAndroid Build Coastguard Worker    0x2600, # ☀ BLACK SUN WITH RAYS
65*e5825d3bSAndroid Build Coastguard Worker    0x2601, # ☁ CLOUD
66*e5825d3bSAndroid Build Coastguard Worker    0X260E, # ☎ BLACK TELEPHONE
67*e5825d3bSAndroid Build Coastguard Worker    0x261D, # ☝ WHITE UP POINTING INDEX
68*e5825d3bSAndroid Build Coastguard Worker    0x263A, # ☺ WHITE SMILING FACE
69*e5825d3bSAndroid Build Coastguard Worker    0x2660, # ♠ BLACK SPADE SUIT
70*e5825d3bSAndroid Build Coastguard Worker    0x2663, # ♣ BLACK CLUB SUIT
71*e5825d3bSAndroid Build Coastguard Worker    0x2665, # ♥ BLACK HEART SUIT
72*e5825d3bSAndroid Build Coastguard Worker    0x2666, # ♦ BLACK DIAMOND SUIT
73*e5825d3bSAndroid Build Coastguard Worker    0x270C, # ✌ VICTORY HAND
74*e5825d3bSAndroid Build Coastguard Worker    0x2744, # ❄ SNOWFLAKE
75*e5825d3bSAndroid Build Coastguard Worker    0x2764, # ❤ HEAVY BLACK HEART
76*e5825d3bSAndroid Build Coastguard Worker}
77*e5825d3bSAndroid Build Coastguard Worker
78*e5825d3bSAndroid Build Coastguard Worker# We don't want support for ASCII control chars.
79*e5825d3bSAndroid Build Coastguard WorkerCONTROL_CHARS = tool_utils.parse_int_ranges('0000-001F')
80*e5825d3bSAndroid Build Coastguard Worker
81*e5825d3bSAndroid Build Coastguard WorkerEXCLUDED_CODEPOINTS = sorted(EMOJI_IN_CJK | ANDROID_EMOJI | CONTROL_CHARS)
82*e5825d3bSAndroid Build Coastguard Worker
83*e5825d3bSAndroid Build Coastguard WorkerTTC_NAMES = ('NotoSansCJK-Regular.ttc', 'NotoSerifCJK-Regular.ttc')
84*e5825d3bSAndroid Build Coastguard Worker
85*e5825d3bSAndroid Build Coastguard Worker
86*e5825d3bSAndroid Build Coastguard Workerdef remove_from_cmap(infile, outfile, exclude=frozenset()):
87*e5825d3bSAndroid Build Coastguard Worker    """Removes a set of characters from a font file's cmap table."""
88*e5825d3bSAndroid Build Coastguard Worker    font = ttLib.TTFont(infile)
89*e5825d3bSAndroid Build Coastguard Worker    font_data.delete_from_cmap(font, exclude)
90*e5825d3bSAndroid Build Coastguard Worker    font.save(outfile)
91*e5825d3bSAndroid Build Coastguard Worker
92*e5825d3bSAndroid Build Coastguard Worker
93*e5825d3bSAndroid Build Coastguard Workerdef remove_codepoints_from_ttc_using_ttc_utils(ttc_name, out_dir):
94*e5825d3bSAndroid Build Coastguard Worker    otf_names = ttc_utils.ttcfile_extract(ttc_name, out_dir)
95*e5825d3bSAndroid Build Coastguard Worker
96*e5825d3bSAndroid Build Coastguard Worker    with tool_utils.temp_chdir(out_dir):
97*e5825d3bSAndroid Build Coastguard Worker        for index, otf_name in enumerate(otf_names):
98*e5825d3bSAndroid Build Coastguard Worker            logging.info('Subsetting %s...', otf_name)
99*e5825d3bSAndroid Build Coastguard Worker            remove_from_cmap(otf_name, otf_name, exclude=EXCLUDED_CODEPOINTS)
100*e5825d3bSAndroid Build Coastguard Worker        ttc_utils.ttcfile_build(ttc_name, otf_names)
101*e5825d3bSAndroid Build Coastguard Worker        for f in otf_names:
102*e5825d3bSAndroid Build Coastguard Worker            os.remove(f)
103*e5825d3bSAndroid Build Coastguard Worker
104*e5825d3bSAndroid Build Coastguard Worker
105*e5825d3bSAndroid Build Coastguard Workerdef remove_codepoints_from_ttc(ttc_path, out_dir):
106*e5825d3bSAndroid Build Coastguard Worker    """Removes a set of characters from a TTC font file's cmap table."""
107*e5825d3bSAndroid Build Coastguard Worker    logging.info('Loading %s', ttc_path)
108*e5825d3bSAndroid Build Coastguard Worker    ttc = ttLib.ttCollection.TTCollection(ttc_path)
109*e5825d3bSAndroid Build Coastguard Worker
110*e5825d3bSAndroid Build Coastguard Worker    logging.info('Subsetting %d fonts in the collection', len(ttc))
111*e5825d3bSAndroid Build Coastguard Worker    for font in ttc:
112*e5825d3bSAndroid Build Coastguard Worker        font_data.delete_from_cmap(font, EXCLUDED_CODEPOINTS)
113*e5825d3bSAndroid Build Coastguard Worker
114*e5825d3bSAndroid Build Coastguard Worker    out_path = out_dir / ttc_path.name
115*e5825d3bSAndroid Build Coastguard Worker    logging.info('Saving to %s', out_path)
116*e5825d3bSAndroid Build Coastguard Worker    ttc.save(out_path)
117*e5825d3bSAndroid Build Coastguard Worker    logging.info('Size: %d --> %d, delta=%d',
118*e5825d3bSAndroid Build Coastguard Worker                 ttc_path.stat().st_size,
119*e5825d3bSAndroid Build Coastguard Worker                 out_path.stat().st_size,
120*e5825d3bSAndroid Build Coastguard Worker                 out_path.stat().st_size - ttc_path.stat().st_size)
121*e5825d3bSAndroid Build Coastguard Worker
122*e5825d3bSAndroid Build Coastguard Worker
123*e5825d3bSAndroid Build Coastguard Workerdef main():
124*e5825d3bSAndroid Build Coastguard Worker    parser = argparse.ArgumentParser()
125*e5825d3bSAndroid Build Coastguard Worker    parser.add_argument('input', default='.', nargs='?')
126*e5825d3bSAndroid Build Coastguard Worker    parser.add_argument('-o', '--output', default='subsetted')
127*e5825d3bSAndroid Build Coastguard Worker    parser.add_argument('--use-ttc-utils', action='store_true')
128*e5825d3bSAndroid Build Coastguard Worker    parser.add_argument('-v', '--verbose', action='count')
129*e5825d3bSAndroid Build Coastguard Worker    args = parser.parse_args()
130*e5825d3bSAndroid Build Coastguard Worker    if args.verbose:
131*e5825d3bSAndroid Build Coastguard Worker        if args.verbose > 1:
132*e5825d3bSAndroid Build Coastguard Worker            logging.basicConfig(level=logging.DEBUG)
133*e5825d3bSAndroid Build Coastguard Worker        else:
134*e5825d3bSAndroid Build Coastguard Worker            logging.basicConfig(level=logging.INFO)
135*e5825d3bSAndroid Build Coastguard Worker    in_dir = Path(args.input)
136*e5825d3bSAndroid Build Coastguard Worker    out_dir = Path(args.output)
137*e5825d3bSAndroid Build Coastguard Worker    out_dir.mkdir(parents=True, exist_ok=True)
138*e5825d3bSAndroid Build Coastguard Worker    for ttc_name in TTC_NAMES:
139*e5825d3bSAndroid Build Coastguard Worker        if args.use_ttc_utils:
140*e5825d3bSAndroid Build Coastguard Worker            remove_codepoints_from_ttc_using_ttc_utils(ttc_name, out_dir)
141*e5825d3bSAndroid Build Coastguard Worker        else:
142*e5825d3bSAndroid Build Coastguard Worker            remove_codepoints_from_ttc(in_dir / ttc_name, out_dir)
143*e5825d3bSAndroid Build Coastguard Worker
144*e5825d3bSAndroid Build Coastguard Worker
145*e5825d3bSAndroid Build Coastguard Workerif __name__ == "__main__":
146*e5825d3bSAndroid Build Coastguard Worker    main()
147