1#!/usr/bin/env python3 2# 3# Copyright (C) 2017 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17""" 18Creates the EmojiCompat font with the metadata. Metadata is embedded in FlatBuffers binary format 19under a meta tag with name 'Emji'. 20 21In order to create the final font the followings are used as inputs: 22 23- NotoColorEmoji.ttf: Emoji font in the Android framework. Currently at 24external/noto-fonts/emoji/NotoColorEmoji.ttf 25 26- Unicode files: Unicode files that are in the framework, and lists information about all the 27emojis. These files are emoji-data.txt, emoji-sequences.txt, emoji-zwj-sequences.txt, 28and emoji-variation-sequences.txt. Currently at external/unicode/. 29 30- additions/emoji-zwj-sequences.txt: Includes emojis that are not defined in Unicode files, but are 31in the Android font. Resides in framework and currently under external/unicode/. 32 33- data/emoji_metadata.txt: The file that includes the id, codepoints, the first 34Android OS version that the emoji was added (sdkAdded), and finally the first EmojiCompat font 35version that the emoji was added (compatAdded). Updated when the script is executed. 36 37- data/emoji_metadata.fbs: The flatbuffer schema file. See http://google.github.io/flatbuffers/. 38 39After execution the following files are generated if they don't exist otherwise, they are updated: 40- font/NotoColorEmojiCompat.ttf 41- supported-emojis/emojis.txt 42- data/emoji_metadata.txt 43- src/java/android/support/text/emoji/flatbuffer/* 44""" 45 46import contextlib 47import csv 48import hashlib 49import itertools 50import json 51import os 52import re 53import shutil 54import subprocess 55import sys 56import tempfile 57from fontTools import ttLib 58from fontTools.ttLib.tables import otTables 59from nototools import font_data 60 61########### UPDATE OR CHECK WHEN A NEW FONT IS BEING GENERATED ########### 62# Last Android SDK Version 63SDK_VERSION = 31 64# metadata version that will be embedded into font. If there are updates to the font that would 65# cause data/emoji_metadata.txt to change, this integer number should be incremented. This number 66# defines in which EmojiCompat metadata version the emoji is added to the font. 67METADATA_VERSION = 8 68 69####### main directories where output files are created ####### 70SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__)) 71FONT_DIR = os.path.join(SCRIPT_DIR, 'font') 72DATA_DIR = os.path.join(SCRIPT_DIR, 'data') 73SUPPORTED_EMOJIS_DIR = os.path.join(SCRIPT_DIR, 'supported-emojis') 74JAVA_SRC_DIR = os.path.join('src', 'java') 75####### output files ####### 76# font file 77FONT_PATH = os.path.join(FONT_DIR, 'NotoColorEmojiCompat.ttf') 78# emoji metadata json output file 79OUTPUT_META_FILE = os.path.join(DATA_DIR, 'emoji_metadata.txt') 80# emojis test file 81TEST_DATA_PATH = os.path.join(SUPPORTED_EMOJIS_DIR, 'emojis.txt') 82####### input files ####### 83# Unicode file names to read emoji data 84EMOJI_DATA_FILE = 'emoji-data.txt' 85EMOJI_SEQ_FILE = 'emoji-sequences.txt' 86EMOJI_ZWJ_FILE = 'emoji-zwj-sequences.txt' 87EMOJI_VARIATION_SEQ_FILE = 'emoji-variation-sequences.txt' 88# Android OS emoji file for emojis that are not in Unicode files 89ANDROID_EMOJI_ZWJ_SEQ_FILE = os.path.join('additions', 'emoji-zwj-sequences.txt') 90ANDROID_EMOJIS_SEQ_FILE = os.path.join('additions', 'emoji-sequences.txt') 91# Android OS emoji style override file. Codepoints that are rendered with emoji style by default 92# even though not defined so in <code>emoji-data.txt</code>. 93EMOJI_STYLE_OVERRIDE_FILE = os.path.join('additions', 'emoji-data.txt') 94# emoji metadata file 95INPUT_META_FILE = OUTPUT_META_FILE 96# default flatbuffer module location (if not specified by caller) 97FLATBUFFER_MODULE_DIR = os.path.join(SCRIPT_DIR, '..', 'emoji-compat-flatbuffers') 98# flatbuffer schema 99FLATBUFFER_SCHEMA = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'emoji_metadata.fbs') 100# file path for java header, it will be prepended to flatbuffer java files 101FLATBUFFER_HEADER = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'flatbuffer_header.txt') 102# temporary emoji metadata json output file 103OUTPUT_JSON_FILE_NAME = 'emoji_metadata.json' 104# temporary binary file generated by flatbuffer 105FLATBUFFER_BIN = 'emoji_metadata.bin' 106# directory representation for flatbuffer java package 107FLATBUFFER_PACKAGE_PATH = os.path.join('androidx', 'text', 'emoji', 'flatbuffer', '') 108# temporary directory that contains flatbuffer java files 109FLATBUFFER_JAVA_PATH = os.path.join(FLATBUFFER_PACKAGE_PATH) 110FLATBUFFER_METADATA_LIST_JAVA = "MetadataList.java" 111FLATBUFFER_METADATA_ITEM_JAVA = "MetadataItem.java" 112# directory under source where flatbuffer java files will be copied into 113FLATBUFFER_JAVA_TARGET = os.path.join(FLATBUFFER_MODULE_DIR, JAVA_SRC_DIR, FLATBUFFER_PACKAGE_PATH) 114# meta tag name used in the font to embed the emoji metadata. This value is also used in 115# MetadataListReader.java in order to locate the metadata location. 116EMOJI_META_TAG_NAME = 'Emji' 117 118EMOJI_STR = 'EMOJI' 119EMOJI_PRESENTATION_STR = 'EMOJI_PRESENTATION' 120ACCEPTED_EMOJI_PROPERTIES = [EMOJI_PRESENTATION_STR, EMOJI_STR] 121STD_VARIANTS_EMOJI_STYLE = 'EMOJI STYLE' 122 123DEFAULT_EMOJI_ID = 0xF0001 124EMOJI_STYLE_VS = 0xFE0F 125 126# The reference code point to be used for filling metrics of wartermark glyph 127WATERMARK_REF_CODE_POINT = 0x1F600 128# The code point and glyph name used for watermark. 129WATERMARK_NEW_CODE_POINT = 0x10FF00 130WATERMARK_NEW_GLYPH_ID = 'u10FF00' 131 132def to_hex_str(value): 133 """Converts given int value to hex without the 0x prefix""" 134 return format(value, 'X') 135 136def hex_str_to_int(string): 137 """Convert a hex string into int""" 138 return int(string, 16) 139 140def codepoint_to_string(codepoints): 141 """Converts a list of codepoints into a string separated with space.""" 142 return ' '.join([to_hex_str(x) for x in codepoints]) 143 144def prepend_header_to_file(file_path, header_path): 145 """Prepends the header to the file. Used to update flatbuffer java files with header, comments 146 and annotations.""" 147 with open(file_path, "r+") as original_file: 148 with open(header_path, "r") as copyright_file: 149 original_content = original_file.read() 150 original_file.seek(0) 151 original_file.write(copyright_file.read() + "\n" + original_content) 152 153def is_ri(codepoint): 154 return 0x1F1E6 <= codepoint and codepoint <= 0x1F1FF 155 156def is_flag_seq(codepoints): 157 return all(is_ri(x) for x in codepoints) 158 159 160def update_flatbuffer_java_files(flatbuffer_java_dir, header_dir, target_dir): 161 """Prepends headers to flatbuffer java files and copies to the final destination""" 162 tmp_metadata_list = flatbuffer_java_dir + FLATBUFFER_METADATA_LIST_JAVA 163 tmp_metadata_item = flatbuffer_java_dir + FLATBUFFER_METADATA_ITEM_JAVA 164 prepend_header_to_file(tmp_metadata_list, header_dir) 165 prepend_header_to_file(tmp_metadata_item, header_dir) 166 167 if not os.path.exists(target_dir): 168 os.makedirs(target_dir) 169 170 shutil.copy(tmp_metadata_list, os.path.join(target_dir, FLATBUFFER_METADATA_LIST_JAVA)) 171 shutil.copy(tmp_metadata_item, os.path.join(target_dir, FLATBUFFER_METADATA_ITEM_JAVA)) 172 173def create_test_data(unicode_path): 174 """Read all the emojis in the unicode files and update the test file""" 175 lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_ZWJ_FILE)) 176 lines += read_emoji_lines(os.path.join(unicode_path, EMOJI_SEQ_FILE)) 177 178 lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), optional=True) 179 lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), optional=True) 180 181 # standardized variants contains a huge list of sequences, only read the ones that are emojis 182 # and also the ones with FE0F (emoji style) 183 standardized_variants_lines = read_emoji_lines( 184 os.path.join(unicode_path, EMOJI_VARIATION_SEQ_FILE)) 185 for line in standardized_variants_lines: 186 if STD_VARIANTS_EMOJI_STYLE in line: 187 lines.append(line) 188 189 emojis_set = set() 190 for line in lines: 191 # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them 192 # here since we are already checking the emoji presentations with 193 # emoji-variation-sequences.txt. 194 if "BASIC_EMOJI" in line: 195 continue 196 codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')] 197 emojis_set.add(codepoint_to_string(codepoints).upper()) 198 199 emoji_data_lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_DATA_FILE)) 200 for line in emoji_data_lines: 201 codepoints_range, emoji_property = codepoints_and_emoji_prop(line) 202 if not emoji_property in ACCEPTED_EMOJI_PROPERTIES: 203 continue 204 is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR 205 if is_emoji_style: 206 codepoints = [to_hex_str(x) for x in 207 codepoints_for_emojirange(codepoints_range)] 208 emojis_set.update(codepoints) 209 210 emoji_style_exceptions = get_emoji_style_exceptions(unicode_path) 211 # finally add the android default emoji exceptions 212 emojis_set.update([to_hex_str(x) for x in emoji_style_exceptions]) 213 214 emojis_list = list(emojis_set) 215 emojis_list.sort() 216 with open(TEST_DATA_PATH, "w") as test_file: 217 for line in emojis_list: 218 test_file.write("%s\n" % line) 219 220class _EmojiData(object): 221 """Holds the information about a single emoji.""" 222 223 def __init__(self, codepoints, is_emoji_style): 224 self.codepoints = codepoints 225 self.emoji_style = is_emoji_style 226 self.emoji_id = 0 227 self.width = 0 228 self.height = 0 229 self.sdk_added = SDK_VERSION 230 self.compat_added = METADATA_VERSION 231 232 def update_metrics(self, metrics): 233 """Updates width/height instance variables with the values given in metrics dictionary. 234 :param metrics: a dictionary object that has width and height values. 235 """ 236 self.width = metrics.width 237 self.height = metrics.height 238 239 def __repr__(self): 240 return '<EmojiData {0} - {1}>'.format(self.emoji_style, 241 codepoint_to_string(self.codepoints)) 242 243 def create_json_element(self): 244 """Creates the json representation of EmojiData.""" 245 json_element = {} 246 json_element['id'] = self.emoji_id 247 json_element['emojiStyle'] = self.emoji_style 248 json_element['sdkAdded'] = self.sdk_added 249 json_element['compatAdded'] = self.compat_added 250 json_element['width'] = self.width 251 json_element['height'] = self.height 252 json_element['codepoints'] = self.codepoints 253 return json_element 254 255 def create_txt_row(self): 256 """Creates array of values for CSV of EmojiData.""" 257 row = [to_hex_str(self.emoji_id), self.sdk_added, self.compat_added] 258 row += [to_hex_str(x) for x in self.codepoints] 259 return row 260 261 def update(self, emoji_id, sdk_added, compat_added): 262 """Updates current EmojiData with the values in a json element""" 263 self.emoji_id = emoji_id 264 self.sdk_added = sdk_added 265 self.compat_added = compat_added 266 267 268def read_emoji_lines(file_path, optional=False): 269 """Read all lines in an unicode emoji file into a list of uppercase strings. Ignore the empty 270 lines and comments 271 :param file_path: unicode emoji file path 272 :param optional: if True no exception is raised when the file cannot be read 273 :return: list of uppercase strings 274 """ 275 result = [] 276 try: 277 with open(file_path) as file_stream: 278 for line in file_stream: 279 line = line.strip() 280 if line and not line.startswith('#'): 281 result.append(line.upper()) 282 except IOError: 283 if optional: 284 pass 285 else: 286 raise 287 288 return result 289 290def get_emoji_style_exceptions(unicode_path): 291 """Read EMOJI_STYLE_OVERRIDE_FILE and return the codepoints as integers""" 292 lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_STYLE_OVERRIDE_FILE)) 293 exceptions = [] 294 for line in lines: 295 codepoint = hex_str_to_int(codepoints_and_emoji_prop(line)[0]) 296 exceptions.append(codepoint) 297 return exceptions 298 299def codepoints_for_emojirange(codepoints_range): 300 """ Return codepoints given in emoji files. Expand the codepoints that are given as a range 301 such as XYZ ... UVT 302 """ 303 codepoints = [] 304 if '..' in codepoints_range: 305 range_start, range_end = codepoints_range.split('..') 306 codepoints_range = range(hex_str_to_int(range_start), 307 hex_str_to_int(range_end) + 1) 308 codepoints.extend(codepoints_range) 309 else: 310 codepoints.append(hex_str_to_int(codepoints_range)) 311 return codepoints 312 313def codepoints_and_emoji_prop(line): 314 """For a given emoji file line, return codepoints and emoji property in the line. 315 1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component 316 |Extended_Pictographic] # [...]""" 317 line = line.strip() 318 if '#' in line: 319 line = line[:line.index('#')] 320 else: 321 raise ValueError("Line is expected to have # in it") 322 line = line.split(';') 323 codepoints_range = line[0].strip() 324 emoji_property = line[1].strip() 325 326 return codepoints_range, emoji_property 327 328def read_emoji_intervals(emoji_data_map, file_path, emoji_style_exceptions): 329 """Read unicode lines of unicode emoji file in which each line describes a set of codepoint 330 intervals. Expands the interval on a line and inserts related EmojiDatas into emoji_data_map. 331 A line format that is expected is as follows: 332 1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component 333 |Extended_Pictographic] # [...]""" 334 lines = read_emoji_lines(file_path) 335 336 for line in lines: 337 codepoints_range, emoji_property = codepoints_and_emoji_prop(line) 338 if not emoji_property in ACCEPTED_EMOJI_PROPERTIES: 339 continue 340 is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR 341 codepoints = codepoints_for_emojirange(codepoints_range) 342 343 for codepoint in codepoints: 344 key = codepoint_to_string([codepoint]) 345 codepoint_is_emoji_style = is_emoji_style or codepoint in emoji_style_exceptions 346 if key in emoji_data_map: 347 # since there are multiple definitions of emojis, only update when emoji style is 348 # True 349 if codepoint_is_emoji_style: 350 emoji_data_map[key].emoji_style = True 351 else: 352 emoji_data = _EmojiData([codepoint], codepoint_is_emoji_style) 353 emoji_data_map[key] = emoji_data 354 355 356def read_emoji_sequences(emoji_data_map, file_path, optional=False, filter=None): 357 """Reads the content of the file which contains emoji sequences. Creates EmojiData for each 358 line and puts into emoji_data_map.""" 359 lines = read_emoji_lines(file_path, optional) 360 # 1F1E6 1F1E8 ; Name ; [...] 361 for line in lines: 362 # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them 363 # here since we are already checking the emoji presentations with 364 # emoji-variation-sequences.txt. 365 if "BASIC_EMOJI" in line: 366 continue 367 codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')] 368 codepoints = [x for x in codepoints if x != EMOJI_STYLE_VS] 369 if filter: 370 if filter(codepoints): 371 continue 372 key = codepoint_to_string(codepoints) 373 if not key in emoji_data_map: 374 emoji_data = _EmojiData(codepoints, False) 375 emoji_data_map[key] = emoji_data 376 377 378def load_emoji_data_map(unicode_path, without_flags): 379 """Reads the emoji data files, constructs a map of space separated codepoints to EmojiData. 380 :return: map of space separated codepoints to EmojiData 381 """ 382 if without_flags: 383 filter = lambda x: is_flag_seq(x) 384 else: 385 filter = None 386 emoji_data_map = {} 387 emoji_style_exceptions = get_emoji_style_exceptions(unicode_path) 388 read_emoji_intervals(emoji_data_map, os.path.join(unicode_path, EMOJI_DATA_FILE), 389 emoji_style_exceptions) 390 read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_ZWJ_FILE)) 391 read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_SEQ_FILE), filter=filter) 392 393 # Add the optional ANDROID_EMOJI_ZWJ_SEQ_FILE if it exists. 394 read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), 395 optional=True) 396 # Add the optional ANDROID_EMOJIS_SEQ_FILE if it exists. 397 read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), 398 optional=True) 399 400 return emoji_data_map 401 402 403def load_previous_metadata(emoji_data_map): 404 """Updates emoji data elements in emoji_data_map using the id, sdk_added and compat_added fields 405 in emoji_metadata.txt. Returns the smallest available emoji id to use. i.e. if the largest 406 emoji id emoji_metadata.txt is 1, function would return 2. If emoji_metadata.txt does not 407 exist, or contains no emojis defined returns DEFAULT_EMOJI_ID""" 408 current_emoji_id = DEFAULT_EMOJI_ID 409 if os.path.isfile(INPUT_META_FILE): 410 with open(INPUT_META_FILE) as csvfile: 411 reader = csv.reader(csvfile, delimiter=' ') 412 for row in reader: 413 if row[0].startswith('#'): 414 continue 415 emoji_id = hex_str_to_int(row[0]) 416 sdk_added = int(row[1]) 417 compat_added = int(row[2]) 418 key = codepoint_to_string(hex_str_to_int(x) for x in row[3:]) 419 if key in emoji_data_map: 420 emoji_data = emoji_data_map[key] 421 emoji_data.update(emoji_id, sdk_added, compat_added) 422 if emoji_data.emoji_id >= current_emoji_id: 423 current_emoji_id = emoji_data.emoji_id + 1 424 425 return current_emoji_id 426 427 428def update_ttlib_orig_sort(): 429 """Updates the ttLib tag sort with a closure that makes the meta table first.""" 430 orig_sort = ttLib.sortedTagList 431 432 def meta_first_table_sort(tag_list, table_order=None): 433 """Sorts the tables with the original ttLib sort, then makes the meta table first.""" 434 tag_list = orig_sort(tag_list, table_order) 435 tag_list.remove('meta') 436 tag_list.insert(0, 'meta') 437 return tag_list 438 439 ttLib.sortedTagList = meta_first_table_sort 440 441 442def inject_meta_into_font(ttf, flatbuffer_bin_filename): 443 """inject metadata binary into font""" 444 if not 'meta' in ttf: 445 ttf['meta'] = ttLib.getTableClass('meta')() 446 meta = ttf['meta'] 447 with open(flatbuffer_bin_filename, 'rb') as flatbuffer_bin_file: 448 meta.data[EMOJI_META_TAG_NAME] = flatbuffer_bin_file.read() 449 450 # sort meta tables for faster access 451 update_ttlib_orig_sort() 452 453 454def validate_input_files(font_path, unicode_path, flatbuffer_path): 455 """Validate the existence of font file and the unicode files""" 456 if not os.path.isfile(font_path): 457 raise ValueError("Font file does not exist: " + font_path) 458 459 if not os.path.isdir(unicode_path): 460 raise ValueError( 461 "Unicode directory does not exist or is not a directory " + unicode_path) 462 463 emoji_filenames = [os.path.join(unicode_path, EMOJI_DATA_FILE), 464 os.path.join(unicode_path, EMOJI_ZWJ_FILE), 465 os.path.join(unicode_path, EMOJI_SEQ_FILE)] 466 for emoji_filename in emoji_filenames: 467 if not os.path.isfile(emoji_filename): 468 raise ValueError("Unicode emoji data file does not exist: " + emoji_filename) 469 470 if not os.path.isdir(flatbuffer_path): 471 raise ValueError( 472 "Flatbuffer directory does not exist or is not a directory " + flatbuffer_path) 473 474 flatbuffer_filenames = [os.path.join(flatbuffer_path, FLATBUFFER_SCHEMA), 475 os.path.join(flatbuffer_path, FLATBUFFER_HEADER)] 476 for flatbuffer_filename in flatbuffer_filenames: 477 if not os.path.isfile(flatbuffer_filename): 478 raise ValueError("Flatbuffer file does not exist: " + flatbuffer_filename) 479 480 481def add_file_to_sha(sha_algo, file_path): 482 with open(file_path, 'rb') as input_file: 483 for data in iter(lambda: input_file.read(8192), b''): 484 sha_algo.update(data) 485 486def create_sha_from_source_files(font_paths): 487 """Creates a SHA from the given font files""" 488 sha_algo = hashlib.sha256() 489 for file_path in font_paths: 490 add_file_to_sha(sha_algo, file_path) 491 return sha_algo.hexdigest() 492 493 494class EmojiFontCreator(object): 495 """Creates the EmojiCompat font""" 496 497 def __init__(self, font_path, unicode_path, without_flags): 498 validate_input_files(font_path, unicode_path, FLATBUFFER_MODULE_DIR) 499 500 self.font_path = font_path 501 self.unicode_path = unicode_path 502 self.without_flags = without_flags 503 self.emoji_data_map = {} 504 self.remapped_codepoints = {} 505 self.glyph_to_image_metrics_map = {} 506 # set default emoji id to start of Supplemental Private Use Area-A 507 self.emoji_id = DEFAULT_EMOJI_ID 508 509 def update_emoji_data(self, codepoints, glyph_name): 510 """Updates the existing EmojiData identified with codepoints. The fields that are set are: 511 - emoji_id (if it does not exist) 512 - image width/height""" 513 key = codepoint_to_string(codepoints) 514 if key in self.emoji_data_map: 515 # add emoji to final data 516 emoji_data = self.emoji_data_map[key] 517 emoji_data.update_metrics(self.glyph_to_image_metrics_map[glyph_name]) 518 if emoji_data.emoji_id == 0: 519 emoji_data.emoji_id = self.emoji_id 520 self.emoji_id = self.emoji_id + 1 521 self.remapped_codepoints[emoji_data.emoji_id] = glyph_name 522 523 def read_cbdt(self, ttf): 524 """Read image size data from CBDT.""" 525 cbdt = ttf['CBDT'] 526 for strike_data in cbdt.strikeData: 527 for key, data in strike_data.items(): 528 data.decompile() 529 self.glyph_to_image_metrics_map[key] = data.metrics 530 531 def read_cmap12(self, ttf, glyph_to_codepoint_map): 532 """Reads single code point emojis that are in cmap12, updates glyph_to_codepoint_map and 533 finally clears all elements in CMAP 12""" 534 cmap = ttf['cmap'] 535 for table in cmap.tables: 536 if table.format == 12 and table.platformID == 3 and table.platEncID == 10: 537 for codepoint, glyph_name in table.cmap.items(): 538 glyph_to_codepoint_map[glyph_name] = codepoint 539 self.update_emoji_data([codepoint], glyph_name) 540 return table 541 raise ValueError("Font doesn't contain cmap with format:12, platformID:3 and platEncID:10") 542 543 def read_gsub(self, ttf, glyph_to_codepoint_map): 544 """Reads the emoji sequences defined in GSUB and clear all elements under GSUB""" 545 gsub = ttf['GSUB'] 546 ligature_subtables = [] 547 context_subtables = [] 548 # this code is font dependent, implementing all gsub rules is out of scope of EmojiCompat 549 # and would be expensive with little value 550 for lookup in gsub.table.LookupList.Lookup: 551 for subtable in lookup.SubTable: 552 if subtable.LookupType == 5: 553 context_subtables.append(subtable) 554 elif subtable.LookupType == 4: 555 ligature_subtables.append(subtable) 556 557 for subtable in context_subtables: 558 self.add_gsub_context_subtable(subtable, gsub.table.LookupList, glyph_to_codepoint_map) 559 560 for subtable in ligature_subtables: 561 self.add_gsub_ligature_subtable(subtable, glyph_to_codepoint_map) 562 563 def add_gsub_context_subtable(self, subtable, lookup_list, glyph_to_codepoint_map): 564 """Add substitutions defined as OpenType Context Substitution""" 565 for sub_class_set in subtable.SubClassSet: 566 if sub_class_set: 567 for sub_class_rule in sub_class_set.SubClassRule: 568 # prepare holder for substitution list. each rule will have a list that is added 569 # to the subs_list. 570 subs_list = len(sub_class_rule.SubstLookupRecord) * [None] 571 for record in sub_class_rule.SubstLookupRecord: 572 subs_list[record.SequenceIndex] = self.get_substitutions(lookup_list, 573 record.LookupListIndex) 574 # create combinations or all lists. the combinations will be filtered by 575 # emoji_data_map. the first element that contain as a valid glyph will be used 576 # as the final glyph 577 combinations = list(itertools.product(*subs_list)) 578 for seq in combinations: 579 glyph_names = [x["input"] for x in seq] 580 codepoints = [glyph_to_codepoint_map[x] for x in glyph_names] 581 outputs = [x["output"] for x in seq if x["output"]] 582 nonempty_outputs = list(filter(lambda x: x.strip() , outputs)) 583 if len(nonempty_outputs) == 0: 584 print("Warning: no output glyph is set for " + str(glyph_names)) 585 continue 586 elif len(nonempty_outputs) > 1: 587 print( 588 "Warning: multiple glyph is set for " 589 + str(glyph_names) + ", will use the first one") 590 591 glyph = nonempty_outputs[0] 592 self.update_emoji_data(codepoints, glyph) 593 594 def get_substitutions(self, lookup_list, index): 595 result = [] 596 for x in lookup_list.Lookup[index].SubTable: 597 for input, output in x.mapping.items(): 598 result.append({"input": input, "output": output}) 599 return result 600 601 def add_gsub_ligature_subtable(self, subtable, glyph_to_codepoint_map): 602 for name, ligatures in subtable.ligatures.items(): 603 for ligature in ligatures: 604 glyph_names = [name] + ligature.Component 605 codepoints = [glyph_to_codepoint_map[x] for x in glyph_names] 606 self.update_emoji_data(codepoints, ligature.LigGlyph) 607 608 def write_metadata_json(self, output_json_file_path): 609 """Writes the emojis into a json file""" 610 output_json = {} 611 output_json['version'] = METADATA_VERSION 612 output_json['sourceSha'] = create_sha_from_source_files( 613 [self.font_path, OUTPUT_META_FILE, FLATBUFFER_SCHEMA]) 614 output_json['list'] = [] 615 616 emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id) 617 618 total_emoji_count = 0 619 for emoji_data in emoji_data_list: 620 if self.without_flags and is_flag_seq(emoji_data.codepoints): 621 continue # Do not add flags emoji data if this is for subset font. 622 element = emoji_data.create_json_element() 623 output_json['list'].append(element) 624 total_emoji_count = total_emoji_count + 1 625 626 # write the new json file to be processed by FlatBuffers 627 with open(output_json_file_path, 'w') as json_file: 628 print(json.dumps(output_json, indent=4, sort_keys=True, separators=(',', ':')), 629 file=json_file) 630 631 return total_emoji_count 632 633 def write_metadata_csv(self): 634 """Writes emoji metadata into space separated file""" 635 with open(OUTPUT_META_FILE, 'w') as csvfile: 636 csvwriter = csv.writer(csvfile, delimiter=' ') 637 emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id) 638 csvwriter.writerow(['#id', 'sdkAdded', 'compatAdded', 'codepoints']) 639 for emoji_data in emoji_data_list: 640 csvwriter.writerow(emoji_data.create_txt_row()) 641 642 def add_watermark(self, ttf): 643 cmap = ttf.getBestCmap() 644 gsub = ttf['GSUB'].table 645 646 # Obtain Version string 647 m = re.search('^Version (\d*)\.(\d*)', font_data.font_version(ttf)) 648 if not m: 649 raise ValueError('The font does not have proper version string.') 650 major = m.group(1) 651 minor = m.group(2) 652 # Replace the dot with space since NotoColorEmoji does not have glyph for dot. 653 glyphs = [cmap[ord(x)] for x in '%s %s' % (major, minor)] 654 655 # Update Glyph metrics 656 ttf.getGlyphOrder().append(WATERMARK_NEW_GLYPH_ID) 657 refGlyphId = cmap[WATERMARK_REF_CODE_POINT] 658 ttf['hmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['hmtx'].metrics[refGlyphId] 659 ttf['vmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['vmtx'].metrics[refGlyphId] 660 661 # Add new Glyph to cmap 662 font_data.add_to_cmap(ttf, { WATERMARK_NEW_CODE_POINT : WATERMARK_NEW_GLYPH_ID }) 663 664 # Add lookup table for the version string. 665 lookups = gsub.LookupList.Lookup 666 new_lookup = otTables.Lookup() 667 new_lookup.LookupType = 2 # Multiple Substitution Subtable. 668 new_lookup.LookupFlag = 0 669 new_subtable = otTables.MultipleSubst() 670 new_subtable.mapping = { WATERMARK_NEW_GLYPH_ID : tuple(glyphs) } 671 new_lookup.SubTable = [ new_subtable ] 672 new_lookup_index = len(lookups) 673 lookups.append(new_lookup) 674 675 # Add feature 676 feature = next(x for x in gsub.FeatureList.FeatureRecord if x.FeatureTag == 'ccmp') 677 if not feature: 678 raise ValueError("Font doesn't contain ccmp feature.") 679 680 feature.Feature.LookupListIndex.append(new_lookup_index) 681 682 def create_font(self): 683 """Creates the EmojiCompat font. 684 :param font_path: path to Android NotoColorEmoji font 685 :param unicode_path: path to directory that contains unicode files 686 """ 687 688 tmp_dir = tempfile.mkdtemp() 689 690 # create emoji codepoints to EmojiData map 691 self.emoji_data_map = load_emoji_data_map(self.unicode_path, self.without_flags) 692 693 # read previous metadata file to update id, sdkAdded and compatAdded. emoji id that is 694 # returned is either default or 1 greater than the largest id in previous data 695 self.emoji_id = load_previous_metadata(self.emoji_data_map) 696 697 # recalcTimestamp parameter will keep the modified field same as the original font. Changing 698 # the modified field in the font causes the font ttf file to change, which makes it harder 699 # to understand if something really changed in the font. 700 with contextlib.closing(ttLib.TTFont(self.font_path, recalcTimestamp=False)) as ttf: 701 # read image size data 702 self.read_cbdt(ttf) 703 704 # glyph name to codepoint map 705 glyph_to_codepoint_map = {} 706 707 # read single codepoint emojis under cmap12 and clear the table contents 708 cmap12_table = self.read_cmap12(ttf, glyph_to_codepoint_map) 709 710 # read emoji sequences gsub and clear the table contents 711 self.read_gsub(ttf, glyph_to_codepoint_map) 712 713 # add all new codepoint to glyph mappings 714 cmap12_table.cmap.update(self.remapped_codepoints) 715 716 # final metadata csv will be used to generate the sha, therefore write it before 717 # metadata json is written. 718 self.write_metadata_csv() 719 720 output_json_file = os.path.join(tmp_dir, OUTPUT_JSON_FILE_NAME) 721 flatbuffer_bin_file = os.path.join(tmp_dir, FLATBUFFER_BIN) 722 flatbuffer_java_dir = os.path.join(tmp_dir, FLATBUFFER_JAVA_PATH) 723 724 total_emoji_count = self.write_metadata_json(output_json_file) 725 726 # create the flatbuffers binary and java classes 727 flatc_command = ['flatc', 728 '-o', 729 tmp_dir, 730 '-b', 731 '-j', 732 FLATBUFFER_SCHEMA, 733 output_json_file] 734 subprocess.check_output(flatc_command) 735 736 # inject metadata binary into font 737 inject_meta_into_font(ttf, flatbuffer_bin_file) 738 739 # add wartermark glyph for manual verification. 740 self.add_watermark(ttf) 741 742 # update CBDT and CBLC versions since older android versions cannot read > 2.0 743 ttf['CBDT'].version = 2.0 744 ttf['CBLC'].version = 2.0 745 746 # save the new font 747 ttf.save(FONT_PATH) 748 749 update_flatbuffer_java_files(flatbuffer_java_dir, #tmp dir 750 FLATBUFFER_HEADER, 751 FLATBUFFER_JAVA_TARGET) 752 753 create_test_data(self.unicode_path) 754 755 # clear the tmp output directory 756 shutil.rmtree(tmp_dir, ignore_errors=True) 757 758 print( 759 "{0} emojis are written to\n{1}".format(total_emoji_count, FONT_DIR)) 760 761 762def print_usage(): 763 """Prints how to use the script.""" 764 print("Please specify a path to font and unicode files.\n" 765 "usage: createfont.py noto-color-emoji-path unicode-dir-path") 766 767def parse_args(argv): 768 # parse manually to avoid any extra dependencies 769 if len(argv) == 4: 770 without_flags = argv[3] == '--without-flags' 771 else: 772 without_flags = False 773 774 if len(argv) < 3: 775 print_usage() 776 sys.exit(1) 777 return (sys.argv[1], sys.argv[2], without_flags) 778 779def main(): 780 font_file, unicode_dir, without_flags = parse_args(sys.argv) 781 EmojiFontCreator(font_file, unicode_dir, without_flags).create_font() 782 783 784if __name__ == '__main__': 785 main() 786