diff options
author | Sean McQuillan <seanmcq@google.com> | 2023-01-03 21:23:50 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2023-01-03 21:23:50 +0000 |
commit | 51bffe5fa6cef4a8d6acebae1561498f53948145 (patch) | |
tree | 7ae92ded0e309133933209fe929eb67eb12c4f94 | |
parent | dc835085d183b117e4dd5786c09252d762d27aec (diff) | |
download | noto-fonts-51bffe5fa6cef4a8d6acebae1561498f53948145.tar.gz |
Revert "Bump emojicompat bundled font to 15"
This reverts commit dc835085d183b117e4dd5786c09252d762d27aec.
Reason for revert: Metadata is malformed
Change-Id: I1de72f31fd2c5073c50258e6b2a23a47054d275d
-rw-r--r-- | emoji-compat/README.android | 19 | ||||
-rwxr-xr-x | emoji-compat/createfont.py | 785 | ||||
-rw-r--r-- | emoji-compat/data/emoji_metadata.txt | 31 | ||||
-rwxr-xr-x | emoji-compat/fetch.sh | 118 | ||||
-rw-r--r-- | emoji-compat/font/NotoColorEmojiCompat.ttf | bin | 10529744 -> 10043088 bytes | |||
-rw-r--r-- | emoji-compat/supported-emojis/emojis.txt | 31 |
6 files changed, 794 insertions, 190 deletions
diff --git a/emoji-compat/README.android b/emoji-compat/README.android index 908ee0c..07cabe1 100644 --- a/emoji-compat/README.android +++ b/emoji-compat/README.android @@ -5,17 +5,16 @@ License: Unicode License File: LICENSE_UNICODE Description: -Noto Color Emoji Compat font is generated using Noto Color Emoji font using -<a href="https://github.com/googlefonts/emojicompat">github.com/googlefonts/emojicompat</a> +Noto Color Emoji Compat font is generated using Noto Color Emoji font using createfont.py. The +compat font is under font/ directory. -Canonical source of truth for fonts is +While generating the compat font, Noto Color Emoji font and data files from Unicode are used. -* <a href="http://github.com/googlefonts/noto-emoji">github.com/googlefonts/noto-emoji</a> +data/emoji-metadata.txt is updated using the Noto Color Emoji font and data files from +Unicode. -However, we do not pull down that entire project as it would increase repo size. +supported-emojis/emojis.txt file contains list of emojis that are supported by the font. Main +purpose is testing. It is generated using the Unicode files. -To pull in a new font please update - -* font/NotoColorEmoji.txt -* data/emoji-metadata.txt -* supported-emojis.emojis.txt +Noto Color Emoji font is under the <android_source>/external/noto-fonts/emoji/ directory. Unicode +files are under the <android_source>/external/unicode/ directory.
\ No newline at end of file diff --git a/emoji-compat/createfont.py b/emoji-compat/createfont.py new file mode 100755 index 0000000..f694cf2 --- /dev/null +++ b/emoji-compat/createfont.py @@ -0,0 +1,785 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2017 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Creates the EmojiCompat font with the metadata. Metadata is embedded in FlatBuffers binary format +under a meta tag with name 'Emji'. + +In order to create the final font the followings are used as inputs: + +- NotoColorEmoji.ttf: Emoji font in the Android framework. Currently at +external/noto-fonts/emoji/NotoColorEmoji.ttf + +- Unicode files: Unicode files that are in the framework, and lists information about all the +emojis. These files are emoji-data.txt, emoji-sequences.txt, emoji-zwj-sequences.txt, +and emoji-variation-sequences.txt. Currently at external/unicode/. + +- additions/emoji-zwj-sequences.txt: Includes emojis that are not defined in Unicode files, but are +in the Android font. Resides in framework and currently under external/unicode/. + +- data/emoji_metadata.txt: The file that includes the id, codepoints, the first +Android OS version that the emoji was added (sdkAdded), and finally the first EmojiCompat font +version that the emoji was added (compatAdded). Updated when the script is executed. + +- data/emoji_metadata.fbs: The flatbuffer schema file. See http://google.github.io/flatbuffers/. + +After execution the following files are generated if they don't exist otherwise, they are updated: +- font/NotoColorEmojiCompat.ttf +- supported-emojis/emojis.txt +- data/emoji_metadata.txt +- src/java/android/support/text/emoji/flatbuffer/* +""" + +import contextlib +import csv +import hashlib +import itertools +import json +import os +import re +import shutil +import subprocess +import sys +import tempfile +from fontTools import ttLib +from fontTools.ttLib.tables import otTables +from nototools import font_data + +########### UPDATE OR CHECK WHEN A NEW FONT IS BEING GENERATED ########### +# Last Android SDK Version +SDK_VERSION = 31 +# metadata version that will be embedded into font. If there are updates to the font that would +# cause data/emoji_metadata.txt to change, this integer number should be incremented. This number +# defines in which EmojiCompat metadata version the emoji is added to the font. +METADATA_VERSION = 8 + +####### main directories where output files are created ####### +SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__)) +FONT_DIR = os.path.join(SCRIPT_DIR, 'font') +DATA_DIR = os.path.join(SCRIPT_DIR, 'data') +SUPPORTED_EMOJIS_DIR = os.path.join(SCRIPT_DIR, 'supported-emojis') +JAVA_SRC_DIR = os.path.join('src', 'java') +####### output files ####### +# font file +FONT_PATH = os.path.join(FONT_DIR, 'NotoColorEmojiCompat.ttf') +# emoji metadata json output file +OUTPUT_META_FILE = os.path.join(DATA_DIR, 'emoji_metadata.txt') +# emojis test file +TEST_DATA_PATH = os.path.join(SUPPORTED_EMOJIS_DIR, 'emojis.txt') +####### input files ####### +# Unicode file names to read emoji data +EMOJI_DATA_FILE = 'emoji-data.txt' +EMOJI_SEQ_FILE = 'emoji-sequences.txt' +EMOJI_ZWJ_FILE = 'emoji-zwj-sequences.txt' +EMOJI_VARIATION_SEQ_FILE = 'emoji-variation-sequences.txt' +# Android OS emoji file for emojis that are not in Unicode files +ANDROID_EMOJI_ZWJ_SEQ_FILE = os.path.join('additions', 'emoji-zwj-sequences.txt') +ANDROID_EMOJIS_SEQ_FILE = os.path.join('additions', 'emoji-sequences.txt') +# Android OS emoji style override file. Codepoints that are rendered with emoji style by default +# even though not defined so in <code>emoji-data.txt</code>. +EMOJI_STYLE_OVERRIDE_FILE = os.path.join('additions', 'emoji-data.txt') +# emoji metadata file +INPUT_META_FILE = OUTPUT_META_FILE +# default flatbuffer module location (if not specified by caller) +FLATBUFFER_MODULE_DIR = os.path.join(SCRIPT_DIR, '..', 'emoji-compat-flatbuffers') +# flatbuffer schema +FLATBUFFER_SCHEMA = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'emoji_metadata.fbs') +# file path for java header, it will be prepended to flatbuffer java files +FLATBUFFER_HEADER = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'flatbuffer_header.txt') +# temporary emoji metadata json output file +OUTPUT_JSON_FILE_NAME = 'emoji_metadata.json' +# temporary binary file generated by flatbuffer +FLATBUFFER_BIN = 'emoji_metadata.bin' +# directory representation for flatbuffer java package +FLATBUFFER_PACKAGE_PATH = os.path.join('androidx', 'text', 'emoji', 'flatbuffer', '') +# temporary directory that contains flatbuffer java files +FLATBUFFER_JAVA_PATH = os.path.join(FLATBUFFER_PACKAGE_PATH) +FLATBUFFER_METADATA_LIST_JAVA = "MetadataList.java" +FLATBUFFER_METADATA_ITEM_JAVA = "MetadataItem.java" +# directory under source where flatbuffer java files will be copied into +FLATBUFFER_JAVA_TARGET = os.path.join(FLATBUFFER_MODULE_DIR, JAVA_SRC_DIR, FLATBUFFER_PACKAGE_PATH) +# meta tag name used in the font to embed the emoji metadata. This value is also used in +# MetadataListReader.java in order to locate the metadata location. +EMOJI_META_TAG_NAME = 'Emji' + +EMOJI_STR = 'EMOJI' +EMOJI_PRESENTATION_STR = 'EMOJI_PRESENTATION' +ACCEPTED_EMOJI_PROPERTIES = [EMOJI_PRESENTATION_STR, EMOJI_STR] +STD_VARIANTS_EMOJI_STYLE = 'EMOJI STYLE' + +DEFAULT_EMOJI_ID = 0xF0001 +EMOJI_STYLE_VS = 0xFE0F + +# The reference code point to be used for filling metrics of wartermark glyph +WATERMARK_REF_CODE_POINT = 0x1F600 +# The code point and glyph name used for watermark. +WATERMARK_NEW_CODE_POINT = 0x10FF00 +WATERMARK_NEW_GLYPH_ID = 'u10FF00' + +def to_hex_str(value): + """Converts given int value to hex without the 0x prefix""" + return format(value, 'X') + +def hex_str_to_int(string): + """Convert a hex string into int""" + return int(string, 16) + +def codepoint_to_string(codepoints): + """Converts a list of codepoints into a string separated with space.""" + return ' '.join([to_hex_str(x) for x in codepoints]) + +def prepend_header_to_file(file_path, header_path): + """Prepends the header to the file. Used to update flatbuffer java files with header, comments + and annotations.""" + with open(file_path, "r+") as original_file: + with open(header_path, "r") as copyright_file: + original_content = original_file.read() + original_file.seek(0) + original_file.write(copyright_file.read() + "\n" + original_content) + +def is_ri(codepoint): + return 0x1F1E6 <= codepoint and codepoint <= 0x1F1FF + +def is_flag_seq(codepoints): + return all(is_ri(x) for x in codepoints) + + +def update_flatbuffer_java_files(flatbuffer_java_dir, header_dir, target_dir): + """Prepends headers to flatbuffer java files and copies to the final destination""" + tmp_metadata_list = flatbuffer_java_dir + FLATBUFFER_METADATA_LIST_JAVA + tmp_metadata_item = flatbuffer_java_dir + FLATBUFFER_METADATA_ITEM_JAVA + prepend_header_to_file(tmp_metadata_list, header_dir) + prepend_header_to_file(tmp_metadata_item, header_dir) + + if not os.path.exists(target_dir): + os.makedirs(target_dir) + + shutil.copy(tmp_metadata_list, os.path.join(target_dir, FLATBUFFER_METADATA_LIST_JAVA)) + shutil.copy(tmp_metadata_item, os.path.join(target_dir, FLATBUFFER_METADATA_ITEM_JAVA)) + +def create_test_data(unicode_path): + """Read all the emojis in the unicode files and update the test file""" + lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_ZWJ_FILE)) + lines += read_emoji_lines(os.path.join(unicode_path, EMOJI_SEQ_FILE)) + + lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), optional=True) + lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), optional=True) + + # standardized variants contains a huge list of sequences, only read the ones that are emojis + # and also the ones with FE0F (emoji style) + standardized_variants_lines = read_emoji_lines( + os.path.join(unicode_path, EMOJI_VARIATION_SEQ_FILE)) + for line in standardized_variants_lines: + if STD_VARIANTS_EMOJI_STYLE in line: + lines.append(line) + + emojis_set = set() + for line in lines: + # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them + # here since we are already checking the emoji presentations with + # emoji-variation-sequences.txt. + if "BASIC_EMOJI" in line: + continue + codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')] + emojis_set.add(codepoint_to_string(codepoints).upper()) + + emoji_data_lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_DATA_FILE)) + for line in emoji_data_lines: + codepoints_range, emoji_property = codepoints_and_emoji_prop(line) + if not emoji_property in ACCEPTED_EMOJI_PROPERTIES: + continue + is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR + if is_emoji_style: + codepoints = [to_hex_str(x) for x in + codepoints_for_emojirange(codepoints_range)] + emojis_set.update(codepoints) + + emoji_style_exceptions = get_emoji_style_exceptions(unicode_path) + # finally add the android default emoji exceptions + emojis_set.update([to_hex_str(x) for x in emoji_style_exceptions]) + + emojis_list = list(emojis_set) + emojis_list.sort() + with open(TEST_DATA_PATH, "w") as test_file: + for line in emojis_list: + test_file.write("%s\n" % line) + +class _EmojiData(object): + """Holds the information about a single emoji.""" + + def __init__(self, codepoints, is_emoji_style): + self.codepoints = codepoints + self.emoji_style = is_emoji_style + self.emoji_id = 0 + self.width = 0 + self.height = 0 + self.sdk_added = SDK_VERSION + self.compat_added = METADATA_VERSION + + def update_metrics(self, metrics): + """Updates width/height instance variables with the values given in metrics dictionary. + :param metrics: a dictionary object that has width and height values. + """ + self.width = metrics.width + self.height = metrics.height + + def __repr__(self): + return '<EmojiData {0} - {1}>'.format(self.emoji_style, + codepoint_to_string(self.codepoints)) + + def create_json_element(self): + """Creates the json representation of EmojiData.""" + json_element = {} + json_element['id'] = self.emoji_id + json_element['emojiStyle'] = self.emoji_style + json_element['sdkAdded'] = self.sdk_added + json_element['compatAdded'] = self.compat_added + json_element['width'] = self.width + json_element['height'] = self.height + json_element['codepoints'] = self.codepoints + return json_element + + def create_txt_row(self): + """Creates array of values for CSV of EmojiData.""" + row = [to_hex_str(self.emoji_id), self.sdk_added, self.compat_added] + row += [to_hex_str(x) for x in self.codepoints] + return row + + def update(self, emoji_id, sdk_added, compat_added): + """Updates current EmojiData with the values in a json element""" + self.emoji_id = emoji_id + self.sdk_added = sdk_added + self.compat_added = compat_added + + +def read_emoji_lines(file_path, optional=False): + """Read all lines in an unicode emoji file into a list of uppercase strings. Ignore the empty + lines and comments + :param file_path: unicode emoji file path + :param optional: if True no exception is raised when the file cannot be read + :return: list of uppercase strings + """ + result = [] + try: + with open(file_path) as file_stream: + for line in file_stream: + line = line.strip() + if line and not line.startswith('#'): + result.append(line.upper()) + except IOError: + if optional: + pass + else: + raise + + return result + +def get_emoji_style_exceptions(unicode_path): + """Read EMOJI_STYLE_OVERRIDE_FILE and return the codepoints as integers""" + lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_STYLE_OVERRIDE_FILE)) + exceptions = [] + for line in lines: + codepoint = hex_str_to_int(codepoints_and_emoji_prop(line)[0]) + exceptions.append(codepoint) + return exceptions + +def codepoints_for_emojirange(codepoints_range): + """ Return codepoints given in emoji files. Expand the codepoints that are given as a range + such as XYZ ... UVT + """ + codepoints = [] + if '..' in codepoints_range: + range_start, range_end = codepoints_range.split('..') + codepoints_range = range(hex_str_to_int(range_start), + hex_str_to_int(range_end) + 1) + codepoints.extend(codepoints_range) + else: + codepoints.append(hex_str_to_int(codepoints_range)) + return codepoints + +def codepoints_and_emoji_prop(line): + """For a given emoji file line, return codepoints and emoji property in the line. + 1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component + |Extended_Pictographic] # [...]""" + line = line.strip() + if '#' in line: + line = line[:line.index('#')] + else: + raise ValueError("Line is expected to have # in it") + line = line.split(';') + codepoints_range = line[0].strip() + emoji_property = line[1].strip() + + return codepoints_range, emoji_property + +def read_emoji_intervals(emoji_data_map, file_path, emoji_style_exceptions): + """Read unicode lines of unicode emoji file in which each line describes a set of codepoint + intervals. Expands the interval on a line and inserts related EmojiDatas into emoji_data_map. + A line format that is expected is as follows: + 1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component + |Extended_Pictographic] # [...]""" + lines = read_emoji_lines(file_path) + + for line in lines: + codepoints_range, emoji_property = codepoints_and_emoji_prop(line) + if not emoji_property in ACCEPTED_EMOJI_PROPERTIES: + continue + is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR + codepoints = codepoints_for_emojirange(codepoints_range) + + for codepoint in codepoints: + key = codepoint_to_string([codepoint]) + codepoint_is_emoji_style = is_emoji_style or codepoint in emoji_style_exceptions + if key in emoji_data_map: + # since there are multiple definitions of emojis, only update when emoji style is + # True + if codepoint_is_emoji_style: + emoji_data_map[key].emoji_style = True + else: + emoji_data = _EmojiData([codepoint], codepoint_is_emoji_style) + emoji_data_map[key] = emoji_data + + +def read_emoji_sequences(emoji_data_map, file_path, optional=False, filter=None): + """Reads the content of the file which contains emoji sequences. Creates EmojiData for each + line and puts into emoji_data_map.""" + lines = read_emoji_lines(file_path, optional) + # 1F1E6 1F1E8 ; Name ; [...] + for line in lines: + # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them + # here since we are already checking the emoji presentations with + # emoji-variation-sequences.txt. + if "BASIC_EMOJI" in line: + continue + codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')] + codepoints = [x for x in codepoints if x != EMOJI_STYLE_VS] + if filter: + if filter(codepoints): + continue + key = codepoint_to_string(codepoints) + if not key in emoji_data_map: + emoji_data = _EmojiData(codepoints, False) + emoji_data_map[key] = emoji_data + + +def load_emoji_data_map(unicode_path, without_flags): + """Reads the emoji data files, constructs a map of space separated codepoints to EmojiData. + :return: map of space separated codepoints to EmojiData + """ + if without_flags: + filter = lambda x: is_flag_seq(x) + else: + filter = None + emoji_data_map = {} + emoji_style_exceptions = get_emoji_style_exceptions(unicode_path) + read_emoji_intervals(emoji_data_map, os.path.join(unicode_path, EMOJI_DATA_FILE), + emoji_style_exceptions) + read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_ZWJ_FILE)) + read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_SEQ_FILE), filter=filter) + + # Add the optional ANDROID_EMOJI_ZWJ_SEQ_FILE if it exists. + read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), + optional=True) + # Add the optional ANDROID_EMOJIS_SEQ_FILE if it exists. + read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), + optional=True) + + return emoji_data_map + + +def load_previous_metadata(emoji_data_map): + """Updates emoji data elements in emoji_data_map using the id, sdk_added and compat_added fields + in emoji_metadata.txt. Returns the smallest available emoji id to use. i.e. if the largest + emoji id emoji_metadata.txt is 1, function would return 2. If emoji_metadata.txt does not + exist, or contains no emojis defined returns DEFAULT_EMOJI_ID""" + current_emoji_id = DEFAULT_EMOJI_ID + if os.path.isfile(INPUT_META_FILE): + with open(INPUT_META_FILE) as csvfile: + reader = csv.reader(csvfile, delimiter=' ') + for row in reader: + if row[0].startswith('#'): + continue + emoji_id = hex_str_to_int(row[0]) + sdk_added = int(row[1]) + compat_added = int(row[2]) + key = codepoint_to_string(hex_str_to_int(x) for x in row[3:]) + if key in emoji_data_map: + emoji_data = emoji_data_map[key] + emoji_data.update(emoji_id, sdk_added, compat_added) + if emoji_data.emoji_id >= current_emoji_id: + current_emoji_id = emoji_data.emoji_id + 1 + + return current_emoji_id + + +def update_ttlib_orig_sort(): + """Updates the ttLib tag sort with a closure that makes the meta table first.""" + orig_sort = ttLib.sortedTagList + + def meta_first_table_sort(tag_list, table_order=None): + """Sorts the tables with the original ttLib sort, then makes the meta table first.""" + tag_list = orig_sort(tag_list, table_order) + tag_list.remove('meta') + tag_list.insert(0, 'meta') + return tag_list + + ttLib.sortedTagList = meta_first_table_sort + + +def inject_meta_into_font(ttf, flatbuffer_bin_filename): + """inject metadata binary into font""" + if not 'meta' in ttf: + ttf['meta'] = ttLib.getTableClass('meta')() + meta = ttf['meta'] + with open(flatbuffer_bin_filename, 'rb') as flatbuffer_bin_file: + meta.data[EMOJI_META_TAG_NAME] = flatbuffer_bin_file.read() + + # sort meta tables for faster access + update_ttlib_orig_sort() + + +def validate_input_files(font_path, unicode_path, flatbuffer_path): + """Validate the existence of font file and the unicode files""" + if not os.path.isfile(font_path): + raise ValueError("Font file does not exist: " + font_path) + + if not os.path.isdir(unicode_path): + raise ValueError( + "Unicode directory does not exist or is not a directory " + unicode_path) + + emoji_filenames = [os.path.join(unicode_path, EMOJI_DATA_FILE), + os.path.join(unicode_path, EMOJI_ZWJ_FILE), + os.path.join(unicode_path, EMOJI_SEQ_FILE)] + for emoji_filename in emoji_filenames: + if not os.path.isfile(emoji_filename): + raise ValueError("Unicode emoji data file does not exist: " + emoji_filename) + + if not os.path.isdir(flatbuffer_path): + raise ValueError( + "Flatbuffer directory does not exist or is not a directory " + flatbuffer_path) + + flatbuffer_filenames = [os.path.join(flatbuffer_path, FLATBUFFER_SCHEMA), + os.path.join(flatbuffer_path, FLATBUFFER_HEADER)] + for flatbuffer_filename in flatbuffer_filenames: + if not os.path.isfile(flatbuffer_filename): + raise ValueError("Flatbuffer file does not exist: " + flatbuffer_filename) + + +def add_file_to_sha(sha_algo, file_path): + with open(file_path, 'rb') as input_file: + for data in iter(lambda: input_file.read(8192), b''): + sha_algo.update(data) + +def create_sha_from_source_files(font_paths): + """Creates a SHA from the given font files""" + sha_algo = hashlib.sha256() + for file_path in font_paths: + add_file_to_sha(sha_algo, file_path) + return sha_algo.hexdigest() + + +class EmojiFontCreator(object): + """Creates the EmojiCompat font""" + + def __init__(self, font_path, unicode_path, without_flags): + validate_input_files(font_path, unicode_path, FLATBUFFER_MODULE_DIR) + + self.font_path = font_path + self.unicode_path = unicode_path + self.without_flags = without_flags + self.emoji_data_map = {} + self.remapped_codepoints = {} + self.glyph_to_image_metrics_map = {} + # set default emoji id to start of Supplemental Private Use Area-A + self.emoji_id = DEFAULT_EMOJI_ID + + def update_emoji_data(self, codepoints, glyph_name): + """Updates the existing EmojiData identified with codepoints. The fields that are set are: + - emoji_id (if it does not exist) + - image width/height""" + key = codepoint_to_string(codepoints) + if key in self.emoji_data_map: + # add emoji to final data + emoji_data = self.emoji_data_map[key] + emoji_data.update_metrics(self.glyph_to_image_metrics_map[glyph_name]) + if emoji_data.emoji_id == 0: + emoji_data.emoji_id = self.emoji_id + self.emoji_id = self.emoji_id + 1 + self.remapped_codepoints[emoji_data.emoji_id] = glyph_name + + def read_cbdt(self, ttf): + """Read image size data from CBDT.""" + cbdt = ttf['CBDT'] + for strike_data in cbdt.strikeData: + for key, data in strike_data.items(): + data.decompile() + self.glyph_to_image_metrics_map[key] = data.metrics + + def read_cmap12(self, ttf, glyph_to_codepoint_map): + """Reads single code point emojis that are in cmap12, updates glyph_to_codepoint_map and + finally clears all elements in CMAP 12""" + cmap = ttf['cmap'] + for table in cmap.tables: + if table.format == 12 and table.platformID == 3 and table.platEncID == 10: + for codepoint, glyph_name in table.cmap.items(): + glyph_to_codepoint_map[glyph_name] = codepoint + self.update_emoji_data([codepoint], glyph_name) + return table + raise ValueError("Font doesn't contain cmap with format:12, platformID:3 and platEncID:10") + + def read_gsub(self, ttf, glyph_to_codepoint_map): + """Reads the emoji sequences defined in GSUB and clear all elements under GSUB""" + gsub = ttf['GSUB'] + ligature_subtables = [] + context_subtables = [] + # this code is font dependent, implementing all gsub rules is out of scope of EmojiCompat + # and would be expensive with little value + for lookup in gsub.table.LookupList.Lookup: + for subtable in lookup.SubTable: + if subtable.LookupType == 5: + context_subtables.append(subtable) + elif subtable.LookupType == 4: + ligature_subtables.append(subtable) + + for subtable in context_subtables: + self.add_gsub_context_subtable(subtable, gsub.table.LookupList, glyph_to_codepoint_map) + + for subtable in ligature_subtables: + self.add_gsub_ligature_subtable(subtable, glyph_to_codepoint_map) + + def add_gsub_context_subtable(self, subtable, lookup_list, glyph_to_codepoint_map): + """Add substitutions defined as OpenType Context Substitution""" + for sub_class_set in subtable.SubClassSet: + if sub_class_set: + for sub_class_rule in sub_class_set.SubClassRule: + # prepare holder for substitution list. each rule will have a list that is added + # to the subs_list. + subs_list = len(sub_class_rule.SubstLookupRecord) * [None] + for record in sub_class_rule.SubstLookupRecord: + subs_list[record.SequenceIndex] = self.get_substitutions(lookup_list, + record.LookupListIndex) + # create combinations or all lists. the combinations will be filtered by + # emoji_data_map. the first element that contain as a valid glyph will be used + # as the final glyph + combinations = list(itertools.product(*subs_list)) + for seq in combinations: + glyph_names = [x["input"] for x in seq] + codepoints = [glyph_to_codepoint_map[x] for x in glyph_names] + outputs = [x["output"] for x in seq if x["output"]] + nonempty_outputs = list(filter(lambda x: x.strip() , outputs)) + if len(nonempty_outputs) == 0: + print("Warning: no output glyph is set for " + str(glyph_names)) + continue + elif len(nonempty_outputs) > 1: + print( + "Warning: multiple glyph is set for " + + str(glyph_names) + ", will use the first one") + + glyph = nonempty_outputs[0] + self.update_emoji_data(codepoints, glyph) + + def get_substitutions(self, lookup_list, index): + result = [] + for x in lookup_list.Lookup[index].SubTable: + for input, output in x.mapping.items(): + result.append({"input": input, "output": output}) + return result + + def add_gsub_ligature_subtable(self, subtable, glyph_to_codepoint_map): + for name, ligatures in subtable.ligatures.items(): + for ligature in ligatures: + glyph_names = [name] + ligature.Component + codepoints = [glyph_to_codepoint_map[x] for x in glyph_names] + self.update_emoji_data(codepoints, ligature.LigGlyph) + + def write_metadata_json(self, output_json_file_path): + """Writes the emojis into a json file""" + output_json = {} + output_json['version'] = METADATA_VERSION + output_json['sourceSha'] = create_sha_from_source_files( + [self.font_path, OUTPUT_META_FILE, FLATBUFFER_SCHEMA]) + output_json['list'] = [] + + emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id) + + total_emoji_count = 0 + for emoji_data in emoji_data_list: + if self.without_flags and is_flag_seq(emoji_data.codepoints): + continue # Do not add flags emoji data if this is for subset font. + element = emoji_data.create_json_element() + output_json['list'].append(element) + total_emoji_count = total_emoji_count + 1 + + # write the new json file to be processed by FlatBuffers + with open(output_json_file_path, 'w') as json_file: + print(json.dumps(output_json, indent=4, sort_keys=True, separators=(',', ':')), + file=json_file) + + return total_emoji_count + + def write_metadata_csv(self): + """Writes emoji metadata into space separated file""" + with open(OUTPUT_META_FILE, 'w') as csvfile: + csvwriter = csv.writer(csvfile, delimiter=' ') + emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id) + csvwriter.writerow(['#id', 'sdkAdded', 'compatAdded', 'codepoints']) + for emoji_data in emoji_data_list: + csvwriter.writerow(emoji_data.create_txt_row()) + + def add_watermark(self, ttf): + cmap = ttf.getBestCmap() + gsub = ttf['GSUB'].table + + # Obtain Version string + m = re.search('^Version (\d*)\.(\d*)', font_data.font_version(ttf)) + if not m: + raise ValueError('The font does not have proper version string.') + major = m.group(1) + minor = m.group(2) + # Replace the dot with space since NotoColorEmoji does not have glyph for dot. + glyphs = [cmap[ord(x)] for x in '%s %s' % (major, minor)] + + # Update Glyph metrics + ttf.getGlyphOrder().append(WATERMARK_NEW_GLYPH_ID) + refGlyphId = cmap[WATERMARK_REF_CODE_POINT] + ttf['hmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['hmtx'].metrics[refGlyphId] + ttf['vmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['vmtx'].metrics[refGlyphId] + + # Add new Glyph to cmap + font_data.add_to_cmap(ttf, { WATERMARK_NEW_CODE_POINT : WATERMARK_NEW_GLYPH_ID }) + + # Add lookup table for the version string. + lookups = gsub.LookupList.Lookup + new_lookup = otTables.Lookup() + new_lookup.LookupType = 2 # Multiple Substitution Subtable. + new_lookup.LookupFlag = 0 + new_subtable = otTables.MultipleSubst() + new_subtable.mapping = { WATERMARK_NEW_GLYPH_ID : tuple(glyphs) } + new_lookup.SubTable = [ new_subtable ] + new_lookup_index = len(lookups) + lookups.append(new_lookup) + + # Add feature + feature = next(x for x in gsub.FeatureList.FeatureRecord if x.FeatureTag == 'ccmp') + if not feature: + raise ValueError("Font doesn't contain ccmp feature.") + + feature.Feature.LookupListIndex.append(new_lookup_index) + + def create_font(self): + """Creates the EmojiCompat font. + :param font_path: path to Android NotoColorEmoji font + :param unicode_path: path to directory that contains unicode files + """ + + tmp_dir = tempfile.mkdtemp() + + # create emoji codepoints to EmojiData map + self.emoji_data_map = load_emoji_data_map(self.unicode_path, self.without_flags) + + # read previous metadata file to update id, sdkAdded and compatAdded. emoji id that is + # returned is either default or 1 greater than the largest id in previous data + self.emoji_id = load_previous_metadata(self.emoji_data_map) + + # recalcTimestamp parameter will keep the modified field same as the original font. Changing + # the modified field in the font causes the font ttf file to change, which makes it harder + # to understand if something really changed in the font. + with contextlib.closing(ttLib.TTFont(self.font_path, recalcTimestamp=False)) as ttf: + # read image size data + self.read_cbdt(ttf) + + # glyph name to codepoint map + glyph_to_codepoint_map = {} + + # read single codepoint emojis under cmap12 and clear the table contents + cmap12_table = self.read_cmap12(ttf, glyph_to_codepoint_map) + + # read emoji sequences gsub and clear the table contents + self.read_gsub(ttf, glyph_to_codepoint_map) + + # add all new codepoint to glyph mappings + cmap12_table.cmap.update(self.remapped_codepoints) + + # final metadata csv will be used to generate the sha, therefore write it before + # metadata json is written. + self.write_metadata_csv() + + output_json_file = os.path.join(tmp_dir, OUTPUT_JSON_FILE_NAME) + flatbuffer_bin_file = os.path.join(tmp_dir, FLATBUFFER_BIN) + flatbuffer_java_dir = os.path.join(tmp_dir, FLATBUFFER_JAVA_PATH) + + total_emoji_count = self.write_metadata_json(output_json_file) + + # create the flatbuffers binary and java classes + flatc_command = ['flatc', + '-o', + tmp_dir, + '-b', + '-j', + FLATBUFFER_SCHEMA, + output_json_file] + subprocess.check_output(flatc_command) + + # inject metadata binary into font + inject_meta_into_font(ttf, flatbuffer_bin_file) + + # add wartermark glyph for manual verification. + self.add_watermark(ttf) + + # update CBDT and CBLC versions since older android versions cannot read > 2.0 + ttf['CBDT'].version = 2.0 + ttf['CBLC'].version = 2.0 + + # save the new font + ttf.save(FONT_PATH) + + update_flatbuffer_java_files(flatbuffer_java_dir, #tmp dir + FLATBUFFER_HEADER, + FLATBUFFER_JAVA_TARGET) + + create_test_data(self.unicode_path) + + # clear the tmp output directory + shutil.rmtree(tmp_dir, ignore_errors=True) + + print( + "{0} emojis are written to\n{1}".format(total_emoji_count, FONT_DIR)) + + +def print_usage(): + """Prints how to use the script.""" + print("Please specify a path to font and unicode files.\n" + "usage: createfont.py noto-color-emoji-path unicode-dir-path") + +def parse_args(argv): + # parse manually to avoid any extra dependencies + if len(argv) == 4: + without_flags = argv[3] == '--without-flags' + else: + without_flags = False + + if len(argv) < 3: + print_usage() + sys.exit(1) + return (sys.argv[1], sys.argv[2], without_flags) + +def main(): + font_file, unicode_dir, without_flags = parse_args(sys.argv) + EmojiFontCreator(font_file, unicode_dir, without_flags).create_font() + + +if __name__ == '__main__': + main() diff --git a/emoji-compat/data/emoji_metadata.txt b/emoji-compat/data/emoji_metadata.txt index 6a9110e..d57d34e 100644 --- a/emoji-compat/data/emoji_metadata.txt +++ b/emoji-compat/data/emoji_metadata.txt @@ -3685,34 +3685,3 @@ F0E97 31 8 1FAF6 1F3FC F0E98 31 8 1FAF6 1F3FD
F0E99 31 8 1FAF6 1F3FE
F0E9A 31 8 1FAF6 1F3FF
-F0E9C 1500 9 1F426 200D 2B1B
-F0E9D 1500 9 1F6DC
-F0E9E 1500 9 1FA75
-F0E9F 1500 9 1FA76
-F0EA0 1500 9 1FA77
-F0EA1 1500 9 1FA87
-F0EA2 1500 9 1FA88
-F0EA3 1500 9 1FAAD
-F0EA4 1500 9 1FAAE
-F0EA5 1500 9 1FAAF
-F0EA6 1500 9 1FABB
-F0EA7 1500 9 1FABC
-F0EA8 1500 9 1FABD
-F0EA9 1500 9 1FABF
-F0EAA 1500 9 1FACE
-F0EAB 1500 9 1FACF
-F0EAC 1500 9 1FADA
-F0EAD 1500 9 1FADB
-F0EAE 1500 9 1FAE8
-F0EAF 1500 9 1FAF7
-F0EB0 1500 9 1FAF7 1F3FB
-F0EB1 1500 9 1FAF7 1F3FC
-F0EB2 1500 9 1FAF7 1F3FD
-F0EB3 1500 9 1FAF7 1F3FE
-F0EB4 1500 9 1FAF7 1F3FF
-F0EB5 1500 9 1FAF8
-F0EB6 1500 9 1FAF8 1F3FB
-F0EB7 1500 9 1FAF8 1F3FC
-F0EB8 1500 9 1FAF8 1F3FD
-F0EB9 1500 9 1FAF8 1F3FE
-F0EBA 1500 9 1FAF8 1F3FF
diff --git a/emoji-compat/fetch.sh b/emoji-compat/fetch.sh deleted file mode 100755 index d35127a..0000000 --- a/emoji-compat/fetch.sh +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2022 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Helper script for fetching new emoji compat fonts from github source -# of truth - -# This script is very basic, please extend or replace to handle your -# needs (e.g. pulling specific commits, releases, branches) as needed. - -#set -o xtrace -set +e - -METADATA_GIT="https://github.com/googlefonts/emojicompat.git" -FONT_GIT="https://github.com/googlefonts/noto-emoji.git" - -SCRIPT_DIR=$(readlink -f $(dirname -- "$0")) -TMP_DIR=$(mktemp -d) - -GIT_VERSION=$(git --version) -if [ $? -ne 0 ]; then - echo -e "ERROR: git not found" - exit 1 -fi - -TTX_VERSION=$(ttx --version) - -if [ $? -ne 0 ]; then - echo "ERROR ttx required to check font" - echo -e "\t python3 -m venv venv" - echo -e "\t source venv/bin/activate" - echo -e "\t pip install fonttools" - exit 127 -fi - -echo "METADATA: $METADATA_GIT" -echo "FONT: $FONT_GIT" -echo "Updating in: $SCRIPT_DIR" - -# confirm directory is clean -pushd $SCRIPT_DIR > /dev/null -UNCOMMITED_CHANGES=$(git status --porcelain) -popd > /dev/null -if [[ "$UNCOMMITED_CHANGES" ]]; then - echo "$UNCOMMITED_CHANGES" - read -p "Uncommited changes. Continue? [y/N]:" uncommited - if [[ ! $uncommited =~ ^[Yy] ]]; then - exit 3 - fi -fi - -function confirm_git_commit() { - pushd $TMP_DIR/$1 > /dev/null - RESULT=$(git log -1) - echo "$RESULT" - read -p "Continue for repo $1? [y/N]: " yn - if [[ ! $yn =~ ^[Yy] ]]; then - exit 2 - fi - popd > /dev/null -} - -pushd $TMP_DIR > /dev/null - -git clone --quiet --depth 1 --branch main $METADATA_GIT -confirm_git_commit "emojicompat" -METADATA_FILE="./emojicompat/src/emojicompat/emoji_metadata.txt" -# adjust newlines to avoid giant diffs -cat $METADATA_FILE | awk 'sub("$", "\r")' > emoji_metadata.txt - -# pull the font -git clone --quiet --depth 1 --branch main $FONT_GIT -confirm_git_commit "noto-emoji" -cp ./noto-emoji/fonts/NotoColorEmoji-emojicompat.ttf ./NewFont.ttf - -ttx -o NewFont.ttx NewFont.ttf 2> /dev/null -grep -q 'header version="2.0"' NewFont.ttx - -if [ $? -ne 0 ]; then - echo -e "WRONG HEADER VERSION IN FONT FILE (breaks API23)" - echo -e "Expected 'header version=\"2.0\"" - echo -e "Found: " - grep 'header version' NewFont.ttx - exit 128 -fi - -# concat new codepoints to emojis.txt -NEW_LINES=$(comm -23 emoji_metadata.txt $SCRIPT_DIR/data/emoji_metadata.txt) -NEW_CODEPOINTS=$(echo "$NEW_LINES" | cut -d" " -f4-100 | sed 's/\r//') - -if [[ "$NEW_CODEPOINTS" ]]; then - echo "$NEW_CODEPOINTS" - read -p "New codpoints found in metadata. Append emojis.txt? [y/N]:" emojiAppend - if [[ "$emojiAppend" =~ ^[Yy] ]]; then - echo "$NEW_CODEPOINTS" >> $SCRIPT_DIR/supported-emojis/emojis.txt - echo "Updated ${SCRIPT_DIR}/supported-emojis/emojis.txt" - fi -fi - -cp emoji_metadata.txt $SCRIPT_DIR/data/emoji_metadata.txt -echo "Updated ${SCRIPT_DIR}/data/emoji_metadata.txt" -cp NewFont.ttf $SCRIPT_DIR/font/NotoColorEmojiCompat.ttf -echo "Updated ${SCRIPT_DIR}/font/NotoColorEmojiCompat.ttf" - -popd > /dev/null -rm -rf $TMP_DIR diff --git a/emoji-compat/font/NotoColorEmojiCompat.ttf b/emoji-compat/font/NotoColorEmojiCompat.ttf Binary files differindex f7f9129..7334ae8 100644 --- a/emoji-compat/font/NotoColorEmojiCompat.ttf +++ b/emoji-compat/font/NotoColorEmojiCompat.ttf diff --git a/emoji-compat/supported-emojis/emojis.txt b/emoji-compat/supported-emojis/emojis.txt index 3e5eb9e..a3ac299 100644 --- a/emoji-compat/supported-emojis/emojis.txt +++ b/emoji-compat/supported-emojis/emojis.txt @@ -3831,34 +3831,3 @@ 39 FE0F 20E3 A9 FE0F AE FE0F -1F426 200D 2B1B -1F6DC -1FA75 -1FA76 -1FA77 -1FA87 -1FA88 -1FAAD -1FAAE -1FAAF -1FABB -1FABC -1FABD -1FABF -1FACE -1FACF -1FADA -1FADB -1FAE8 -1FAF7 -1FAF7 1F3FB -1FAF7 1F3FC -1FAF7 1F3FD -1FAF7 1F3FE -1FAF7 1F3FF -1FAF8 -1FAF8 1F3FB -1FAF8 1F3FC -1FAF8 1F3FD -1FAF8 1F3FE -1FAF8 1F3FF |