xref: /aosp_15_r20/external/noto-fonts/emoji-compat/createfont.py (revision e5825d3be9fd13b272e7df556d285d1f07f3b027)
1#!/usr/bin/env python3
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""
18Creates the EmojiCompat font with the metadata. Metadata is embedded in FlatBuffers binary format
19under a meta tag with name 'Emji'.
20
21In order to create the final font the followings are used as inputs:
22
23- NotoColorEmoji.ttf: Emoji font in the Android framework. Currently at
24external/noto-fonts/emoji/NotoColorEmoji.ttf
25
26- Unicode files: Unicode files that are in the framework, and lists information about all the
27emojis. These files are emoji-data.txt, emoji-sequences.txt, emoji-zwj-sequences.txt,
28and emoji-variation-sequences.txt. Currently at external/unicode/.
29
30- additions/emoji-zwj-sequences.txt: Includes emojis that are not defined in Unicode files, but are
31in the Android font. Resides in framework and currently under external/unicode/.
32
33- data/emoji_metadata.txt: The file that includes the id, codepoints, the first
34Android OS version that the emoji was added (sdkAdded), and finally the first EmojiCompat font
35version that the emoji was added (compatAdded). Updated when the script is executed.
36
37- data/emoji_metadata.fbs: The flatbuffer schema file. See http://google.github.io/flatbuffers/.
38
39After execution the following files are generated if they don't exist otherwise, they are updated:
40- font/NotoColorEmojiCompat.ttf
41- supported-emojis/emojis.txt
42- data/emoji_metadata.txt
43- src/java/android/support/text/emoji/flatbuffer/*
44"""
45
46import contextlib
47import csv
48import hashlib
49import itertools
50import json
51import os
52import re
53import shutil
54import subprocess
55import sys
56import tempfile
57from fontTools import ttLib
58from fontTools.ttLib.tables import otTables
59from nototools import font_data
60
61########### UPDATE OR CHECK WHEN A NEW FONT IS BEING GENERATED ###########
62# Last Android SDK Version
63SDK_VERSION = 31
64# metadata version that will be embedded into font. If there are updates to the font that would
65# cause data/emoji_metadata.txt to change, this integer number should be incremented. This number
66# defines in which EmojiCompat metadata version the emoji is added to the font.
67METADATA_VERSION = 8
68
69####### main directories where output files are created #######
70SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
71FONT_DIR = os.path.join(SCRIPT_DIR, 'font')
72DATA_DIR = os.path.join(SCRIPT_DIR, 'data')
73SUPPORTED_EMOJIS_DIR = os.path.join(SCRIPT_DIR, 'supported-emojis')
74JAVA_SRC_DIR = os.path.join('src', 'java')
75####### output files #######
76# font file
77FONT_PATH = os.path.join(FONT_DIR, 'NotoColorEmojiCompat.ttf')
78# emoji metadata json output file
79OUTPUT_META_FILE = os.path.join(DATA_DIR, 'emoji_metadata.txt')
80# emojis test file
81TEST_DATA_PATH = os.path.join(SUPPORTED_EMOJIS_DIR, 'emojis.txt')
82####### input files #######
83# Unicode file names to read emoji data
84EMOJI_DATA_FILE = 'emoji-data.txt'
85EMOJI_SEQ_FILE = 'emoji-sequences.txt'
86EMOJI_ZWJ_FILE = 'emoji-zwj-sequences.txt'
87EMOJI_VARIATION_SEQ_FILE = 'emoji-variation-sequences.txt'
88# Android OS emoji file for emojis that are not in Unicode files
89ANDROID_EMOJI_ZWJ_SEQ_FILE = os.path.join('additions', 'emoji-zwj-sequences.txt')
90ANDROID_EMOJIS_SEQ_FILE = os.path.join('additions', 'emoji-sequences.txt')
91# Android OS emoji style override file. Codepoints that are rendered with emoji style by default
92# even though not defined so in <code>emoji-data.txt</code>.
93EMOJI_STYLE_OVERRIDE_FILE = os.path.join('additions', 'emoji-data.txt')
94# emoji metadata file
95INPUT_META_FILE = OUTPUT_META_FILE
96# default flatbuffer module location (if not specified by caller)
97FLATBUFFER_MODULE_DIR = os.path.join(SCRIPT_DIR, '..', 'emoji-compat-flatbuffers')
98# flatbuffer schema
99FLATBUFFER_SCHEMA = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'emoji_metadata.fbs')
100# file path for java header, it will be prepended to flatbuffer java files
101FLATBUFFER_HEADER = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'flatbuffer_header.txt')
102# temporary emoji metadata json output file
103OUTPUT_JSON_FILE_NAME = 'emoji_metadata.json'
104# temporary binary file generated by flatbuffer
105FLATBUFFER_BIN = 'emoji_metadata.bin'
106# directory representation for flatbuffer java package
107FLATBUFFER_PACKAGE_PATH = os.path.join('androidx', 'text', 'emoji', 'flatbuffer', '')
108# temporary directory that contains flatbuffer java files
109FLATBUFFER_JAVA_PATH = os.path.join(FLATBUFFER_PACKAGE_PATH)
110FLATBUFFER_METADATA_LIST_JAVA = "MetadataList.java"
111FLATBUFFER_METADATA_ITEM_JAVA = "MetadataItem.java"
112# directory under source where flatbuffer java files will be copied into
113FLATBUFFER_JAVA_TARGET = os.path.join(FLATBUFFER_MODULE_DIR, JAVA_SRC_DIR, FLATBUFFER_PACKAGE_PATH)
114# meta tag name used in the font to embed the emoji metadata. This value is also used in
115# MetadataListReader.java in order to locate the metadata location.
116EMOJI_META_TAG_NAME = 'Emji'
117
118EMOJI_STR = 'EMOJI'
119EMOJI_PRESENTATION_STR = 'EMOJI_PRESENTATION'
120ACCEPTED_EMOJI_PROPERTIES = [EMOJI_PRESENTATION_STR, EMOJI_STR]
121STD_VARIANTS_EMOJI_STYLE = 'EMOJI STYLE'
122
123DEFAULT_EMOJI_ID = 0xF0001
124EMOJI_STYLE_VS = 0xFE0F
125
126# The reference code point to be used for filling metrics of wartermark glyph
127WATERMARK_REF_CODE_POINT = 0x1F600
128# The code point and glyph name used for watermark.
129WATERMARK_NEW_CODE_POINT = 0x10FF00
130WATERMARK_NEW_GLYPH_ID = 'u10FF00'
131
132def to_hex_str(value):
133    """Converts given int value to hex without the 0x prefix"""
134    return format(value, 'X')
135
136def hex_str_to_int(string):
137    """Convert a hex string into int"""
138    return int(string, 16)
139
140def codepoint_to_string(codepoints):
141    """Converts a list of codepoints into a string separated with space."""
142    return ' '.join([to_hex_str(x) for x in codepoints])
143
144def prepend_header_to_file(file_path, header_path):
145    """Prepends the header to the file. Used to update flatbuffer java files with header, comments
146    and annotations."""
147    with open(file_path, "r+") as original_file:
148        with open(header_path, "r") as copyright_file:
149            original_content = original_file.read()
150            original_file.seek(0)
151            original_file.write(copyright_file.read() + "\n" + original_content)
152
153def is_ri(codepoint):
154  return 0x1F1E6 <= codepoint and codepoint <= 0x1F1FF
155
156def is_flag_seq(codepoints):
157  return all(is_ri(x) for x in codepoints)
158
159
160def update_flatbuffer_java_files(flatbuffer_java_dir, header_dir, target_dir):
161    """Prepends headers to flatbuffer java files and copies to the final destination"""
162    tmp_metadata_list = flatbuffer_java_dir + FLATBUFFER_METADATA_LIST_JAVA
163    tmp_metadata_item = flatbuffer_java_dir + FLATBUFFER_METADATA_ITEM_JAVA
164    prepend_header_to_file(tmp_metadata_list, header_dir)
165    prepend_header_to_file(tmp_metadata_item, header_dir)
166
167    if not os.path.exists(target_dir):
168        os.makedirs(target_dir)
169
170    shutil.copy(tmp_metadata_list, os.path.join(target_dir, FLATBUFFER_METADATA_LIST_JAVA))
171    shutil.copy(tmp_metadata_item, os.path.join(target_dir, FLATBUFFER_METADATA_ITEM_JAVA))
172
173def create_test_data(unicode_path):
174    """Read all the emojis in the unicode files and update the test file"""
175    lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_ZWJ_FILE))
176    lines += read_emoji_lines(os.path.join(unicode_path, EMOJI_SEQ_FILE))
177
178    lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), optional=True)
179    lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), optional=True)
180
181    # standardized variants contains a huge list of sequences, only read the ones that are emojis
182    # and also the ones with FE0F (emoji style)
183    standardized_variants_lines = read_emoji_lines(
184        os.path.join(unicode_path, EMOJI_VARIATION_SEQ_FILE))
185    for line in standardized_variants_lines:
186        if STD_VARIANTS_EMOJI_STYLE in line:
187            lines.append(line)
188
189    emojis_set = set()
190    for line in lines:
191        # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them
192        # here since we are already checking the emoji presentations with
193        # emoji-variation-sequences.txt.
194        if "BASIC_EMOJI" in line:
195            continue
196        codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
197        emojis_set.add(codepoint_to_string(codepoints).upper())
198
199    emoji_data_lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_DATA_FILE))
200    for line in emoji_data_lines:
201        codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
202        if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
203            continue
204        is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
205        if is_emoji_style:
206            codepoints = [to_hex_str(x) for x in
207                          codepoints_for_emojirange(codepoints_range)]
208            emojis_set.update(codepoints)
209
210    emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
211    #  finally add the android default emoji exceptions
212    emojis_set.update([to_hex_str(x) for x in emoji_style_exceptions])
213
214    emojis_list = list(emojis_set)
215    emojis_list.sort()
216    with open(TEST_DATA_PATH, "w") as test_file:
217        for line in emojis_list:
218            test_file.write("%s\n" % line)
219
220class _EmojiData(object):
221    """Holds the information about a single emoji."""
222
223    def __init__(self, codepoints, is_emoji_style):
224        self.codepoints = codepoints
225        self.emoji_style = is_emoji_style
226        self.emoji_id = 0
227        self.width = 0
228        self.height = 0
229        self.sdk_added = SDK_VERSION
230        self.compat_added = METADATA_VERSION
231
232    def update_metrics(self, metrics):
233        """Updates width/height instance variables with the values given in metrics dictionary.
234        :param metrics: a dictionary object that has width and height values.
235        """
236        self.width = metrics.width
237        self.height = metrics.height
238
239    def __repr__(self):
240        return '<EmojiData {0} - {1}>'.format(self.emoji_style,
241                                              codepoint_to_string(self.codepoints))
242
243    def create_json_element(self):
244        """Creates the json representation of EmojiData."""
245        json_element = {}
246        json_element['id'] = self.emoji_id
247        json_element['emojiStyle'] = self.emoji_style
248        json_element['sdkAdded'] = self.sdk_added
249        json_element['compatAdded'] = self.compat_added
250        json_element['width'] = self.width
251        json_element['height'] = self.height
252        json_element['codepoints'] = self.codepoints
253        return json_element
254
255    def create_txt_row(self):
256        """Creates array of values for CSV of EmojiData."""
257        row = [to_hex_str(self.emoji_id), self.sdk_added, self.compat_added]
258        row += [to_hex_str(x) for x in self.codepoints]
259        return row
260
261    def update(self, emoji_id, sdk_added, compat_added):
262        """Updates current EmojiData with the values in a json element"""
263        self.emoji_id = emoji_id
264        self.sdk_added = sdk_added
265        self.compat_added = compat_added
266
267
268def read_emoji_lines(file_path, optional=False):
269    """Read all lines in an unicode emoji file into a list of uppercase strings. Ignore the empty
270    lines and comments
271    :param file_path: unicode emoji file path
272    :param optional: if True no exception is raised when the file cannot be read
273    :return: list of uppercase strings
274    """
275    result = []
276    try:
277        with open(file_path) as file_stream:
278            for line in file_stream:
279                line = line.strip()
280                if line and not line.startswith('#'):
281                    result.append(line.upper())
282    except IOError:
283        if optional:
284            pass
285        else:
286            raise
287
288    return result
289
290def get_emoji_style_exceptions(unicode_path):
291    """Read EMOJI_STYLE_OVERRIDE_FILE and return the codepoints as integers"""
292    lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_STYLE_OVERRIDE_FILE))
293    exceptions = []
294    for line in lines:
295        codepoint = hex_str_to_int(codepoints_and_emoji_prop(line)[0])
296        exceptions.append(codepoint)
297    return exceptions
298
299def codepoints_for_emojirange(codepoints_range):
300    """ Return codepoints given in emoji files. Expand the codepoints that are given as a range
301    such as XYZ ... UVT
302    """
303    codepoints = []
304    if '..' in codepoints_range:
305        range_start, range_end = codepoints_range.split('..')
306        codepoints_range = range(hex_str_to_int(range_start),
307                                 hex_str_to_int(range_end) + 1)
308        codepoints.extend(codepoints_range)
309    else:
310        codepoints.append(hex_str_to_int(codepoints_range))
311    return codepoints
312
313def codepoints_and_emoji_prop(line):
314    """For a given emoji file line, return codepoints and emoji property in the line.
315    1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
316    |Extended_Pictographic] # [...]"""
317    line = line.strip()
318    if '#' in line:
319        line = line[:line.index('#')]
320    else:
321        raise ValueError("Line is expected to have # in it")
322    line = line.split(';')
323    codepoints_range = line[0].strip()
324    emoji_property = line[1].strip()
325
326    return codepoints_range, emoji_property
327
328def read_emoji_intervals(emoji_data_map, file_path, emoji_style_exceptions):
329    """Read unicode lines of unicode emoji file in which each line describes a set of codepoint
330    intervals. Expands the interval on a line and inserts related EmojiDatas into emoji_data_map.
331    A line format that is expected is as follows:
332    1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
333    |Extended_Pictographic] # [...]"""
334    lines = read_emoji_lines(file_path)
335
336    for line in lines:
337        codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
338        if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
339            continue
340        is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
341        codepoints = codepoints_for_emojirange(codepoints_range)
342
343        for codepoint in codepoints:
344            key = codepoint_to_string([codepoint])
345            codepoint_is_emoji_style = is_emoji_style or codepoint in emoji_style_exceptions
346            if key in emoji_data_map:
347                # since there are multiple definitions of emojis, only update when emoji style is
348                # True
349                if codepoint_is_emoji_style:
350                    emoji_data_map[key].emoji_style = True
351            else:
352                emoji_data = _EmojiData([codepoint], codepoint_is_emoji_style)
353                emoji_data_map[key] = emoji_data
354
355
356def read_emoji_sequences(emoji_data_map, file_path, optional=False, filter=None):
357    """Reads the content of the file which contains emoji sequences. Creates EmojiData for each
358    line and puts into emoji_data_map."""
359    lines = read_emoji_lines(file_path, optional)
360    # 1F1E6 1F1E8 ; Name ; [...]
361    for line in lines:
362        # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them
363        # here since we are already checking the emoji presentations with
364        # emoji-variation-sequences.txt.
365        if "BASIC_EMOJI" in line:
366            continue
367        codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
368        codepoints = [x for x in codepoints if x != EMOJI_STYLE_VS]
369        if filter:
370          if filter(codepoints):
371            continue
372        key = codepoint_to_string(codepoints)
373        if not key in emoji_data_map:
374            emoji_data = _EmojiData(codepoints, False)
375            emoji_data_map[key] = emoji_data
376
377
378def load_emoji_data_map(unicode_path, without_flags):
379    """Reads the emoji data files, constructs a map of space separated codepoints to EmojiData.
380    :return: map of space separated codepoints to EmojiData
381    """
382    if without_flags:
383      filter = lambda x: is_flag_seq(x)
384    else:
385      filter = None
386    emoji_data_map = {}
387    emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
388    read_emoji_intervals(emoji_data_map, os.path.join(unicode_path, EMOJI_DATA_FILE),
389                         emoji_style_exceptions)
390    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_ZWJ_FILE))
391    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_SEQ_FILE), filter=filter)
392
393    # Add the optional ANDROID_EMOJI_ZWJ_SEQ_FILE if it exists.
394    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE),
395                         optional=True)
396    # Add the optional ANDROID_EMOJIS_SEQ_FILE if it exists.
397    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE),
398                         optional=True)
399
400    return emoji_data_map
401
402
403def load_previous_metadata(emoji_data_map):
404    """Updates emoji data elements in emoji_data_map using the id, sdk_added and compat_added fields
405       in emoji_metadata.txt. Returns the smallest available emoji id to use. i.e. if the largest
406       emoji id emoji_metadata.txt is 1, function would return 2. If emoji_metadata.txt does not
407       exist, or contains no emojis defined returns DEFAULT_EMOJI_ID"""
408    current_emoji_id = DEFAULT_EMOJI_ID
409    if os.path.isfile(INPUT_META_FILE):
410        with open(INPUT_META_FILE) as csvfile:
411            reader = csv.reader(csvfile, delimiter=' ')
412            for row in reader:
413                if row[0].startswith('#'):
414                    continue
415                emoji_id = hex_str_to_int(row[0])
416                sdk_added = int(row[1])
417                compat_added = int(row[2])
418                key = codepoint_to_string(hex_str_to_int(x) for x in row[3:])
419                if key in emoji_data_map:
420                    emoji_data = emoji_data_map[key]
421                    emoji_data.update(emoji_id, sdk_added, compat_added)
422                    if emoji_data.emoji_id >= current_emoji_id:
423                        current_emoji_id = emoji_data.emoji_id + 1
424
425    return current_emoji_id
426
427
428def update_ttlib_orig_sort():
429    """Updates the ttLib tag sort with a closure that makes the meta table first."""
430    orig_sort = ttLib.sortedTagList
431
432    def meta_first_table_sort(tag_list, table_order=None):
433        """Sorts the tables with the original ttLib sort, then makes the meta table first."""
434        tag_list = orig_sort(tag_list, table_order)
435        tag_list.remove('meta')
436        tag_list.insert(0, 'meta')
437        return tag_list
438
439    ttLib.sortedTagList = meta_first_table_sort
440
441
442def inject_meta_into_font(ttf, flatbuffer_bin_filename):
443    """inject metadata binary into font"""
444    if not 'meta' in ttf:
445        ttf['meta'] = ttLib.getTableClass('meta')()
446    meta = ttf['meta']
447    with open(flatbuffer_bin_filename, 'rb') as flatbuffer_bin_file:
448        meta.data[EMOJI_META_TAG_NAME] = flatbuffer_bin_file.read()
449
450    # sort meta tables for faster access
451    update_ttlib_orig_sort()
452
453
454def validate_input_files(font_path, unicode_path, flatbuffer_path):
455    """Validate the existence of font file and the unicode files"""
456    if not os.path.isfile(font_path):
457        raise ValueError("Font file does not exist: " + font_path)
458
459    if not os.path.isdir(unicode_path):
460        raise ValueError(
461            "Unicode directory does not exist or is not a directory " + unicode_path)
462
463    emoji_filenames = [os.path.join(unicode_path, EMOJI_DATA_FILE),
464                       os.path.join(unicode_path, EMOJI_ZWJ_FILE),
465                       os.path.join(unicode_path, EMOJI_SEQ_FILE)]
466    for emoji_filename in emoji_filenames:
467        if not os.path.isfile(emoji_filename):
468            raise ValueError("Unicode emoji data file does not exist: " + emoji_filename)
469
470    if not os.path.isdir(flatbuffer_path):
471        raise ValueError(
472            "Flatbuffer directory does not exist or is not a directory " + flatbuffer_path)
473
474    flatbuffer_filenames = [os.path.join(flatbuffer_path, FLATBUFFER_SCHEMA),
475                            os.path.join(flatbuffer_path, FLATBUFFER_HEADER)]
476    for flatbuffer_filename in flatbuffer_filenames:
477        if not os.path.isfile(flatbuffer_filename):
478            raise ValueError("Flatbuffer file does not exist: " + flatbuffer_filename)
479
480
481def add_file_to_sha(sha_algo, file_path):
482    with open(file_path, 'rb') as input_file:
483        for data in iter(lambda: input_file.read(8192), b''):
484            sha_algo.update(data)
485
486def create_sha_from_source_files(font_paths):
487    """Creates a SHA from the given font files"""
488    sha_algo = hashlib.sha256()
489    for file_path in font_paths:
490        add_file_to_sha(sha_algo, file_path)
491    return sha_algo.hexdigest()
492
493
494class EmojiFontCreator(object):
495    """Creates the EmojiCompat font"""
496
497    def __init__(self, font_path, unicode_path, without_flags):
498        validate_input_files(font_path, unicode_path, FLATBUFFER_MODULE_DIR)
499
500        self.font_path = font_path
501        self.unicode_path = unicode_path
502        self.without_flags = without_flags
503        self.emoji_data_map = {}
504        self.remapped_codepoints = {}
505        self.glyph_to_image_metrics_map = {}
506        # set default emoji id to start of Supplemental Private Use Area-A
507        self.emoji_id = DEFAULT_EMOJI_ID
508
509    def update_emoji_data(self, codepoints, glyph_name):
510        """Updates the existing EmojiData identified with codepoints. The fields that are set are:
511        - emoji_id (if it does not exist)
512        - image width/height"""
513        key = codepoint_to_string(codepoints)
514        if key in self.emoji_data_map:
515            # add emoji to final data
516            emoji_data = self.emoji_data_map[key]
517            emoji_data.update_metrics(self.glyph_to_image_metrics_map[glyph_name])
518            if emoji_data.emoji_id == 0:
519                emoji_data.emoji_id = self.emoji_id
520                self.emoji_id = self.emoji_id + 1
521            self.remapped_codepoints[emoji_data.emoji_id] = glyph_name
522
523    def read_cbdt(self, ttf):
524        """Read image size data from CBDT."""
525        cbdt = ttf['CBDT']
526        for strike_data in cbdt.strikeData:
527            for key, data in strike_data.items():
528                data.decompile()
529                self.glyph_to_image_metrics_map[key] = data.metrics
530
531    def read_cmap12(self, ttf, glyph_to_codepoint_map):
532        """Reads single code point emojis that are in cmap12, updates glyph_to_codepoint_map and
533        finally clears all elements in CMAP 12"""
534        cmap = ttf['cmap']
535        for table in cmap.tables:
536            if table.format == 12 and table.platformID == 3 and table.platEncID == 10:
537                for codepoint, glyph_name in table.cmap.items():
538                    glyph_to_codepoint_map[glyph_name] = codepoint
539                    self.update_emoji_data([codepoint], glyph_name)
540                return table
541        raise ValueError("Font doesn't contain cmap with format:12, platformID:3 and platEncID:10")
542
543    def read_gsub(self, ttf, glyph_to_codepoint_map):
544        """Reads the emoji sequences defined in GSUB and clear all elements under GSUB"""
545        gsub = ttf['GSUB']
546        ligature_subtables = []
547        context_subtables = []
548        # this code is font dependent, implementing all gsub rules is out of scope of EmojiCompat
549        # and would be expensive with little value
550        for lookup in gsub.table.LookupList.Lookup:
551            for subtable in lookup.SubTable:
552                if subtable.LookupType == 5:
553                    context_subtables.append(subtable)
554                elif subtable.LookupType == 4:
555                    ligature_subtables.append(subtable)
556
557        for subtable in context_subtables:
558            self.add_gsub_context_subtable(subtable, gsub.table.LookupList, glyph_to_codepoint_map)
559
560        for subtable in ligature_subtables:
561            self.add_gsub_ligature_subtable(subtable, glyph_to_codepoint_map)
562
563    def add_gsub_context_subtable(self, subtable, lookup_list, glyph_to_codepoint_map):
564        """Add substitutions defined as OpenType Context Substitution"""
565        for sub_class_set in subtable.SubClassSet:
566            if sub_class_set:
567                for sub_class_rule in sub_class_set.SubClassRule:
568                    # prepare holder for substitution list. each rule will have a list that is added
569                    # to the subs_list.
570                    subs_list = len(sub_class_rule.SubstLookupRecord) * [None]
571                    for record in sub_class_rule.SubstLookupRecord:
572                        subs_list[record.SequenceIndex] = self.get_substitutions(lookup_list,
573                                                                            record.LookupListIndex)
574                    # create combinations or all lists. the combinations will be filtered by
575                    # emoji_data_map. the first element that contain as a valid glyph will be used
576                    # as the final glyph
577                    combinations = list(itertools.product(*subs_list))
578                    for seq in combinations:
579                        glyph_names = [x["input"] for x in seq]
580                        codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
581                        outputs = [x["output"] for x in seq if x["output"]]
582                        nonempty_outputs = list(filter(lambda x: x.strip() , outputs))
583                        if len(nonempty_outputs) == 0:
584                            print("Warning: no output glyph is set for " + str(glyph_names))
585                            continue
586                        elif len(nonempty_outputs) > 1:
587                            print(
588                                "Warning: multiple glyph is set for "
589                                    + str(glyph_names) + ", will use the first one")
590
591                        glyph = nonempty_outputs[0]
592                        self.update_emoji_data(codepoints, glyph)
593
594    def get_substitutions(self, lookup_list, index):
595        result = []
596        for x in lookup_list.Lookup[index].SubTable:
597            for input, output in x.mapping.items():
598                result.append({"input": input, "output": output})
599        return result
600
601    def add_gsub_ligature_subtable(self, subtable, glyph_to_codepoint_map):
602        for name, ligatures in subtable.ligatures.items():
603            for ligature in ligatures:
604                glyph_names = [name] + ligature.Component
605                codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
606                self.update_emoji_data(codepoints, ligature.LigGlyph)
607
608    def write_metadata_json(self, output_json_file_path):
609        """Writes the emojis into a json file"""
610        output_json = {}
611        output_json['version'] = METADATA_VERSION
612        output_json['sourceSha'] = create_sha_from_source_files(
613            [self.font_path, OUTPUT_META_FILE, FLATBUFFER_SCHEMA])
614        output_json['list'] = []
615
616        emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
617
618        total_emoji_count = 0
619        for emoji_data in emoji_data_list:
620            if self.without_flags and is_flag_seq(emoji_data.codepoints):
621                continue  # Do not add flags emoji data if this is for subset font.
622            element = emoji_data.create_json_element()
623            output_json['list'].append(element)
624            total_emoji_count = total_emoji_count + 1
625
626        # write the new json file to be processed by FlatBuffers
627        with open(output_json_file_path, 'w') as json_file:
628            print(json.dumps(output_json, indent=4, sort_keys=True, separators=(',', ':')),
629                  file=json_file)
630
631        return total_emoji_count
632
633    def write_metadata_csv(self):
634        """Writes emoji metadata into space separated file"""
635        with open(OUTPUT_META_FILE, 'w') as csvfile:
636            csvwriter = csv.writer(csvfile, delimiter=' ')
637            emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
638            csvwriter.writerow(['#id', 'sdkAdded', 'compatAdded', 'codepoints'])
639            for emoji_data in emoji_data_list:
640                csvwriter.writerow(emoji_data.create_txt_row())
641
642    def add_watermark(self, ttf):
643        cmap = ttf.getBestCmap()
644        gsub = ttf['GSUB'].table
645
646        # Obtain Version string
647        m = re.search('^Version (\d*)\.(\d*)', font_data.font_version(ttf))
648        if not m:
649            raise ValueError('The font does not have proper version string.')
650        major = m.group(1)
651        minor = m.group(2)
652        # Replace the dot with space since NotoColorEmoji does not have glyph for dot.
653        glyphs = [cmap[ord(x)] for x in '%s %s' % (major, minor)]
654
655        # Update Glyph metrics
656        ttf.getGlyphOrder().append(WATERMARK_NEW_GLYPH_ID)
657        refGlyphId = cmap[WATERMARK_REF_CODE_POINT]
658        ttf['hmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['hmtx'].metrics[refGlyphId]
659        ttf['vmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['vmtx'].metrics[refGlyphId]
660
661        # Add new Glyph to cmap
662        font_data.add_to_cmap(ttf, { WATERMARK_NEW_CODE_POINT : WATERMARK_NEW_GLYPH_ID })
663
664        # Add lookup table for the version string.
665        lookups = gsub.LookupList.Lookup
666        new_lookup = otTables.Lookup()
667        new_lookup.LookupType = 2  # Multiple Substitution Subtable.
668        new_lookup.LookupFlag = 0
669        new_subtable = otTables.MultipleSubst()
670        new_subtable.mapping = { WATERMARK_NEW_GLYPH_ID : tuple(glyphs) }
671        new_lookup.SubTable = [ new_subtable ]
672        new_lookup_index = len(lookups)
673        lookups.append(new_lookup)
674
675        # Add feature
676        feature = next(x for x in gsub.FeatureList.FeatureRecord if x.FeatureTag == 'ccmp')
677        if not feature:
678            raise ValueError("Font doesn't contain ccmp feature.")
679
680        feature.Feature.LookupListIndex.append(new_lookup_index)
681
682    def create_font(self):
683        """Creates the EmojiCompat font.
684        :param font_path: path to Android NotoColorEmoji font
685        :param unicode_path: path to directory that contains unicode files
686        """
687
688        tmp_dir = tempfile.mkdtemp()
689
690        # create emoji codepoints to EmojiData map
691        self.emoji_data_map = load_emoji_data_map(self.unicode_path, self.without_flags)
692
693        # read previous metadata file to update id, sdkAdded and compatAdded. emoji id that is
694        # returned is either default or 1 greater than the largest id in previous data
695        self.emoji_id = load_previous_metadata(self.emoji_data_map)
696
697        # recalcTimestamp parameter will keep the modified field same as the original font. Changing
698        # the modified field in the font causes the font ttf file to change, which makes it harder
699        # to understand if something really changed in the font.
700        with contextlib.closing(ttLib.TTFont(self.font_path, recalcTimestamp=False)) as ttf:
701            # read image size data
702            self.read_cbdt(ttf)
703
704            # glyph name to codepoint map
705            glyph_to_codepoint_map = {}
706
707            # read single codepoint emojis under cmap12 and clear the table contents
708            cmap12_table = self.read_cmap12(ttf, glyph_to_codepoint_map)
709
710            # read emoji sequences gsub and clear the table contents
711            self.read_gsub(ttf, glyph_to_codepoint_map)
712
713            # add all new codepoint to glyph mappings
714            cmap12_table.cmap.update(self.remapped_codepoints)
715
716            # final metadata csv will be used to generate the sha, therefore write it before
717            # metadata json is written.
718            self.write_metadata_csv()
719
720            output_json_file = os.path.join(tmp_dir, OUTPUT_JSON_FILE_NAME)
721            flatbuffer_bin_file = os.path.join(tmp_dir, FLATBUFFER_BIN)
722            flatbuffer_java_dir = os.path.join(tmp_dir, FLATBUFFER_JAVA_PATH)
723
724            total_emoji_count = self.write_metadata_json(output_json_file)
725
726            # create the flatbuffers binary and java classes
727            flatc_command = ['flatc',
728                             '-o',
729                             tmp_dir,
730                             '-b',
731                             '-j',
732                             FLATBUFFER_SCHEMA,
733                             output_json_file]
734            subprocess.check_output(flatc_command)
735
736            # inject metadata binary into font
737            inject_meta_into_font(ttf, flatbuffer_bin_file)
738
739            # add wartermark glyph for manual verification.
740            self.add_watermark(ttf)
741
742            # update CBDT and CBLC versions since older android versions cannot read > 2.0
743            ttf['CBDT'].version = 2.0
744            ttf['CBLC'].version = 2.0
745
746            # save the new font
747            ttf.save(FONT_PATH)
748
749            update_flatbuffer_java_files(flatbuffer_java_dir, #tmp dir
750                                         FLATBUFFER_HEADER,
751                                         FLATBUFFER_JAVA_TARGET)
752
753            create_test_data(self.unicode_path)
754
755            # clear the tmp output directory
756            shutil.rmtree(tmp_dir, ignore_errors=True)
757
758            print(
759                "{0} emojis are written to\n{1}".format(total_emoji_count, FONT_DIR))
760
761
762def print_usage():
763    """Prints how to use the script."""
764    print("Please specify a path to font and unicode files.\n"
765          "usage: createfont.py noto-color-emoji-path unicode-dir-path")
766
767def parse_args(argv):
768    # parse manually to avoid any extra dependencies
769    if len(argv) == 4:
770      without_flags = argv[3] == '--without-flags'
771    else:
772      without_flags = False
773
774    if len(argv) < 3:
775        print_usage()
776        sys.exit(1)
777    return (sys.argv[1], sys.argv[2], without_flags)
778
779def main():
780    font_file, unicode_dir, without_flags = parse_args(sys.argv)
781    EmojiFontCreator(font_file, unicode_dir, without_flags).create_font()
782
783
784if __name__ == '__main__':
785    main()
786