summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean McQuillan <seanmcq@google.com>2023-01-03 21:23:50 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2023-01-03 21:23:50 +0000
commit51bffe5fa6cef4a8d6acebae1561498f53948145 (patch)
tree7ae92ded0e309133933209fe929eb67eb12c4f94
parentdc835085d183b117e4dd5786c09252d762d27aec (diff)
downloadnoto-fonts-51bffe5fa6cef4a8d6acebae1561498f53948145.tar.gz
Revert "Bump emojicompat bundled font to 15"
This reverts commit dc835085d183b117e4dd5786c09252d762d27aec. Reason for revert: Metadata is malformed Change-Id: I1de72f31fd2c5073c50258e6b2a23a47054d275d
-rw-r--r--emoji-compat/README.android19
-rwxr-xr-xemoji-compat/createfont.py785
-rw-r--r--emoji-compat/data/emoji_metadata.txt31
-rwxr-xr-xemoji-compat/fetch.sh118
-rw-r--r--emoji-compat/font/NotoColorEmojiCompat.ttfbin10529744 -> 10043088 bytes
-rw-r--r--emoji-compat/supported-emojis/emojis.txt31
6 files changed, 794 insertions, 190 deletions
diff --git a/emoji-compat/README.android b/emoji-compat/README.android
index 908ee0c..07cabe1 100644
--- a/emoji-compat/README.android
+++ b/emoji-compat/README.android
@@ -5,17 +5,16 @@ License: Unicode
License File: LICENSE_UNICODE
Description:
-Noto Color Emoji Compat font is generated using Noto Color Emoji font using
-<a href="https://github.com/googlefonts/emojicompat">github.com/googlefonts/emojicompat</a>
+Noto Color Emoji Compat font is generated using Noto Color Emoji font using createfont.py. The
+compat font is under font/ directory.
-Canonical source of truth for fonts is
+While generating the compat font, Noto Color Emoji font and data files from Unicode are used.
-* <a href="http://github.com/googlefonts/noto-emoji">github.com/googlefonts/noto-emoji</a>
+data/emoji-metadata.txt is updated using the Noto Color Emoji font and data files from
+Unicode.
-However, we do not pull down that entire project as it would increase repo size.
+supported-emojis/emojis.txt file contains list of emojis that are supported by the font. Main
+purpose is testing. It is generated using the Unicode files.
-To pull in a new font please update
-
-* font/NotoColorEmoji.txt
-* data/emoji-metadata.txt
-* supported-emojis.emojis.txt
+Noto Color Emoji font is under the <android_source>/external/noto-fonts/emoji/ directory. Unicode
+files are under the <android_source>/external/unicode/ directory. \ No newline at end of file
diff --git a/emoji-compat/createfont.py b/emoji-compat/createfont.py
new file mode 100755
index 0000000..f694cf2
--- /dev/null
+++ b/emoji-compat/createfont.py
@@ -0,0 +1,785 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Creates the EmojiCompat font with the metadata. Metadata is embedded in FlatBuffers binary format
+under a meta tag with name 'Emji'.
+
+In order to create the final font the followings are used as inputs:
+
+- NotoColorEmoji.ttf: Emoji font in the Android framework. Currently at
+external/noto-fonts/emoji/NotoColorEmoji.ttf
+
+- Unicode files: Unicode files that are in the framework, and lists information about all the
+emojis. These files are emoji-data.txt, emoji-sequences.txt, emoji-zwj-sequences.txt,
+and emoji-variation-sequences.txt. Currently at external/unicode/.
+
+- additions/emoji-zwj-sequences.txt: Includes emojis that are not defined in Unicode files, but are
+in the Android font. Resides in framework and currently under external/unicode/.
+
+- data/emoji_metadata.txt: The file that includes the id, codepoints, the first
+Android OS version that the emoji was added (sdkAdded), and finally the first EmojiCompat font
+version that the emoji was added (compatAdded). Updated when the script is executed.
+
+- data/emoji_metadata.fbs: The flatbuffer schema file. See http://google.github.io/flatbuffers/.
+
+After execution the following files are generated if they don't exist otherwise, they are updated:
+- font/NotoColorEmojiCompat.ttf
+- supported-emojis/emojis.txt
+- data/emoji_metadata.txt
+- src/java/android/support/text/emoji/flatbuffer/*
+"""
+
+import contextlib
+import csv
+import hashlib
+import itertools
+import json
+import os
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+from fontTools import ttLib
+from fontTools.ttLib.tables import otTables
+from nototools import font_data
+
+########### UPDATE OR CHECK WHEN A NEW FONT IS BEING GENERATED ###########
+# Last Android SDK Version
+SDK_VERSION = 31
+# metadata version that will be embedded into font. If there are updates to the font that would
+# cause data/emoji_metadata.txt to change, this integer number should be incremented. This number
+# defines in which EmojiCompat metadata version the emoji is added to the font.
+METADATA_VERSION = 8
+
+####### main directories where output files are created #######
+SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
+FONT_DIR = os.path.join(SCRIPT_DIR, 'font')
+DATA_DIR = os.path.join(SCRIPT_DIR, 'data')
+SUPPORTED_EMOJIS_DIR = os.path.join(SCRIPT_DIR, 'supported-emojis')
+JAVA_SRC_DIR = os.path.join('src', 'java')
+####### output files #######
+# font file
+FONT_PATH = os.path.join(FONT_DIR, 'NotoColorEmojiCompat.ttf')
+# emoji metadata json output file
+OUTPUT_META_FILE = os.path.join(DATA_DIR, 'emoji_metadata.txt')
+# emojis test file
+TEST_DATA_PATH = os.path.join(SUPPORTED_EMOJIS_DIR, 'emojis.txt')
+####### input files #######
+# Unicode file names to read emoji data
+EMOJI_DATA_FILE = 'emoji-data.txt'
+EMOJI_SEQ_FILE = 'emoji-sequences.txt'
+EMOJI_ZWJ_FILE = 'emoji-zwj-sequences.txt'
+EMOJI_VARIATION_SEQ_FILE = 'emoji-variation-sequences.txt'
+# Android OS emoji file for emojis that are not in Unicode files
+ANDROID_EMOJI_ZWJ_SEQ_FILE = os.path.join('additions', 'emoji-zwj-sequences.txt')
+ANDROID_EMOJIS_SEQ_FILE = os.path.join('additions', 'emoji-sequences.txt')
+# Android OS emoji style override file. Codepoints that are rendered with emoji style by default
+# even though not defined so in <code>emoji-data.txt</code>.
+EMOJI_STYLE_OVERRIDE_FILE = os.path.join('additions', 'emoji-data.txt')
+# emoji metadata file
+INPUT_META_FILE = OUTPUT_META_FILE
+# default flatbuffer module location (if not specified by caller)
+FLATBUFFER_MODULE_DIR = os.path.join(SCRIPT_DIR, '..', 'emoji-compat-flatbuffers')
+# flatbuffer schema
+FLATBUFFER_SCHEMA = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'emoji_metadata.fbs')
+# file path for java header, it will be prepended to flatbuffer java files
+FLATBUFFER_HEADER = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'flatbuffer_header.txt')
+# temporary emoji metadata json output file
+OUTPUT_JSON_FILE_NAME = 'emoji_metadata.json'
+# temporary binary file generated by flatbuffer
+FLATBUFFER_BIN = 'emoji_metadata.bin'
+# directory representation for flatbuffer java package
+FLATBUFFER_PACKAGE_PATH = os.path.join('androidx', 'text', 'emoji', 'flatbuffer', '')
+# temporary directory that contains flatbuffer java files
+FLATBUFFER_JAVA_PATH = os.path.join(FLATBUFFER_PACKAGE_PATH)
+FLATBUFFER_METADATA_LIST_JAVA = "MetadataList.java"
+FLATBUFFER_METADATA_ITEM_JAVA = "MetadataItem.java"
+# directory under source where flatbuffer java files will be copied into
+FLATBUFFER_JAVA_TARGET = os.path.join(FLATBUFFER_MODULE_DIR, JAVA_SRC_DIR, FLATBUFFER_PACKAGE_PATH)
+# meta tag name used in the font to embed the emoji metadata. This value is also used in
+# MetadataListReader.java in order to locate the metadata location.
+EMOJI_META_TAG_NAME = 'Emji'
+
+EMOJI_STR = 'EMOJI'
+EMOJI_PRESENTATION_STR = 'EMOJI_PRESENTATION'
+ACCEPTED_EMOJI_PROPERTIES = [EMOJI_PRESENTATION_STR, EMOJI_STR]
+STD_VARIANTS_EMOJI_STYLE = 'EMOJI STYLE'
+
+DEFAULT_EMOJI_ID = 0xF0001
+EMOJI_STYLE_VS = 0xFE0F
+
+# The reference code point to be used for filling metrics of wartermark glyph
+WATERMARK_REF_CODE_POINT = 0x1F600
+# The code point and glyph name used for watermark.
+WATERMARK_NEW_CODE_POINT = 0x10FF00
+WATERMARK_NEW_GLYPH_ID = 'u10FF00'
+
+def to_hex_str(value):
+ """Converts given int value to hex without the 0x prefix"""
+ return format(value, 'X')
+
+def hex_str_to_int(string):
+ """Convert a hex string into int"""
+ return int(string, 16)
+
+def codepoint_to_string(codepoints):
+ """Converts a list of codepoints into a string separated with space."""
+ return ' '.join([to_hex_str(x) for x in codepoints])
+
+def prepend_header_to_file(file_path, header_path):
+ """Prepends the header to the file. Used to update flatbuffer java files with header, comments
+ and annotations."""
+ with open(file_path, "r+") as original_file:
+ with open(header_path, "r") as copyright_file:
+ original_content = original_file.read()
+ original_file.seek(0)
+ original_file.write(copyright_file.read() + "\n" + original_content)
+
+def is_ri(codepoint):
+ return 0x1F1E6 <= codepoint and codepoint <= 0x1F1FF
+
+def is_flag_seq(codepoints):
+ return all(is_ri(x) for x in codepoints)
+
+
+def update_flatbuffer_java_files(flatbuffer_java_dir, header_dir, target_dir):
+ """Prepends headers to flatbuffer java files and copies to the final destination"""
+ tmp_metadata_list = flatbuffer_java_dir + FLATBUFFER_METADATA_LIST_JAVA
+ tmp_metadata_item = flatbuffer_java_dir + FLATBUFFER_METADATA_ITEM_JAVA
+ prepend_header_to_file(tmp_metadata_list, header_dir)
+ prepend_header_to_file(tmp_metadata_item, header_dir)
+
+ if not os.path.exists(target_dir):
+ os.makedirs(target_dir)
+
+ shutil.copy(tmp_metadata_list, os.path.join(target_dir, FLATBUFFER_METADATA_LIST_JAVA))
+ shutil.copy(tmp_metadata_item, os.path.join(target_dir, FLATBUFFER_METADATA_ITEM_JAVA))
+
+def create_test_data(unicode_path):
+ """Read all the emojis in the unicode files and update the test file"""
+ lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_ZWJ_FILE))
+ lines += read_emoji_lines(os.path.join(unicode_path, EMOJI_SEQ_FILE))
+
+ lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), optional=True)
+ lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), optional=True)
+
+ # standardized variants contains a huge list of sequences, only read the ones that are emojis
+ # and also the ones with FE0F (emoji style)
+ standardized_variants_lines = read_emoji_lines(
+ os.path.join(unicode_path, EMOJI_VARIATION_SEQ_FILE))
+ for line in standardized_variants_lines:
+ if STD_VARIANTS_EMOJI_STYLE in line:
+ lines.append(line)
+
+ emojis_set = set()
+ for line in lines:
+ # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them
+ # here since we are already checking the emoji presentations with
+ # emoji-variation-sequences.txt.
+ if "BASIC_EMOJI" in line:
+ continue
+ codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
+ emojis_set.add(codepoint_to_string(codepoints).upper())
+
+ emoji_data_lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_DATA_FILE))
+ for line in emoji_data_lines:
+ codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
+ if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
+ continue
+ is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
+ if is_emoji_style:
+ codepoints = [to_hex_str(x) for x in
+ codepoints_for_emojirange(codepoints_range)]
+ emojis_set.update(codepoints)
+
+ emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
+ # finally add the android default emoji exceptions
+ emojis_set.update([to_hex_str(x) for x in emoji_style_exceptions])
+
+ emojis_list = list(emojis_set)
+ emojis_list.sort()
+ with open(TEST_DATA_PATH, "w") as test_file:
+ for line in emojis_list:
+ test_file.write("%s\n" % line)
+
+class _EmojiData(object):
+ """Holds the information about a single emoji."""
+
+ def __init__(self, codepoints, is_emoji_style):
+ self.codepoints = codepoints
+ self.emoji_style = is_emoji_style
+ self.emoji_id = 0
+ self.width = 0
+ self.height = 0
+ self.sdk_added = SDK_VERSION
+ self.compat_added = METADATA_VERSION
+
+ def update_metrics(self, metrics):
+ """Updates width/height instance variables with the values given in metrics dictionary.
+ :param metrics: a dictionary object that has width and height values.
+ """
+ self.width = metrics.width
+ self.height = metrics.height
+
+ def __repr__(self):
+ return '<EmojiData {0} - {1}>'.format(self.emoji_style,
+ codepoint_to_string(self.codepoints))
+
+ def create_json_element(self):
+ """Creates the json representation of EmojiData."""
+ json_element = {}
+ json_element['id'] = self.emoji_id
+ json_element['emojiStyle'] = self.emoji_style
+ json_element['sdkAdded'] = self.sdk_added
+ json_element['compatAdded'] = self.compat_added
+ json_element['width'] = self.width
+ json_element['height'] = self.height
+ json_element['codepoints'] = self.codepoints
+ return json_element
+
+ def create_txt_row(self):
+ """Creates array of values for CSV of EmojiData."""
+ row = [to_hex_str(self.emoji_id), self.sdk_added, self.compat_added]
+ row += [to_hex_str(x) for x in self.codepoints]
+ return row
+
+ def update(self, emoji_id, sdk_added, compat_added):
+ """Updates current EmojiData with the values in a json element"""
+ self.emoji_id = emoji_id
+ self.sdk_added = sdk_added
+ self.compat_added = compat_added
+
+
+def read_emoji_lines(file_path, optional=False):
+ """Read all lines in an unicode emoji file into a list of uppercase strings. Ignore the empty
+ lines and comments
+ :param file_path: unicode emoji file path
+ :param optional: if True no exception is raised when the file cannot be read
+ :return: list of uppercase strings
+ """
+ result = []
+ try:
+ with open(file_path) as file_stream:
+ for line in file_stream:
+ line = line.strip()
+ if line and not line.startswith('#'):
+ result.append(line.upper())
+ except IOError:
+ if optional:
+ pass
+ else:
+ raise
+
+ return result
+
+def get_emoji_style_exceptions(unicode_path):
+ """Read EMOJI_STYLE_OVERRIDE_FILE and return the codepoints as integers"""
+ lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_STYLE_OVERRIDE_FILE))
+ exceptions = []
+ for line in lines:
+ codepoint = hex_str_to_int(codepoints_and_emoji_prop(line)[0])
+ exceptions.append(codepoint)
+ return exceptions
+
+def codepoints_for_emojirange(codepoints_range):
+ """ Return codepoints given in emoji files. Expand the codepoints that are given as a range
+ such as XYZ ... UVT
+ """
+ codepoints = []
+ if '..' in codepoints_range:
+ range_start, range_end = codepoints_range.split('..')
+ codepoints_range = range(hex_str_to_int(range_start),
+ hex_str_to_int(range_end) + 1)
+ codepoints.extend(codepoints_range)
+ else:
+ codepoints.append(hex_str_to_int(codepoints_range))
+ return codepoints
+
+def codepoints_and_emoji_prop(line):
+ """For a given emoji file line, return codepoints and emoji property in the line.
+ 1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
+ |Extended_Pictographic] # [...]"""
+ line = line.strip()
+ if '#' in line:
+ line = line[:line.index('#')]
+ else:
+ raise ValueError("Line is expected to have # in it")
+ line = line.split(';')
+ codepoints_range = line[0].strip()
+ emoji_property = line[1].strip()
+
+ return codepoints_range, emoji_property
+
+def read_emoji_intervals(emoji_data_map, file_path, emoji_style_exceptions):
+ """Read unicode lines of unicode emoji file in which each line describes a set of codepoint
+ intervals. Expands the interval on a line and inserts related EmojiDatas into emoji_data_map.
+ A line format that is expected is as follows:
+ 1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
+ |Extended_Pictographic] # [...]"""
+ lines = read_emoji_lines(file_path)
+
+ for line in lines:
+ codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
+ if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
+ continue
+ is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
+ codepoints = codepoints_for_emojirange(codepoints_range)
+
+ for codepoint in codepoints:
+ key = codepoint_to_string([codepoint])
+ codepoint_is_emoji_style = is_emoji_style or codepoint in emoji_style_exceptions
+ if key in emoji_data_map:
+ # since there are multiple definitions of emojis, only update when emoji style is
+ # True
+ if codepoint_is_emoji_style:
+ emoji_data_map[key].emoji_style = True
+ else:
+ emoji_data = _EmojiData([codepoint], codepoint_is_emoji_style)
+ emoji_data_map[key] = emoji_data
+
+
+def read_emoji_sequences(emoji_data_map, file_path, optional=False, filter=None):
+ """Reads the content of the file which contains emoji sequences. Creates EmojiData for each
+ line and puts into emoji_data_map."""
+ lines = read_emoji_lines(file_path, optional)
+ # 1F1E6 1F1E8 ; Name ; [...]
+ for line in lines:
+ # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them
+ # here since we are already checking the emoji presentations with
+ # emoji-variation-sequences.txt.
+ if "BASIC_EMOJI" in line:
+ continue
+ codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
+ codepoints = [x for x in codepoints if x != EMOJI_STYLE_VS]
+ if filter:
+ if filter(codepoints):
+ continue
+ key = codepoint_to_string(codepoints)
+ if not key in emoji_data_map:
+ emoji_data = _EmojiData(codepoints, False)
+ emoji_data_map[key] = emoji_data
+
+
+def load_emoji_data_map(unicode_path, without_flags):
+ """Reads the emoji data files, constructs a map of space separated codepoints to EmojiData.
+ :return: map of space separated codepoints to EmojiData
+ """
+ if without_flags:
+ filter = lambda x: is_flag_seq(x)
+ else:
+ filter = None
+ emoji_data_map = {}
+ emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
+ read_emoji_intervals(emoji_data_map, os.path.join(unicode_path, EMOJI_DATA_FILE),
+ emoji_style_exceptions)
+ read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_ZWJ_FILE))
+ read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_SEQ_FILE), filter=filter)
+
+ # Add the optional ANDROID_EMOJI_ZWJ_SEQ_FILE if it exists.
+ read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE),
+ optional=True)
+ # Add the optional ANDROID_EMOJIS_SEQ_FILE if it exists.
+ read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE),
+ optional=True)
+
+ return emoji_data_map
+
+
+def load_previous_metadata(emoji_data_map):
+ """Updates emoji data elements in emoji_data_map using the id, sdk_added and compat_added fields
+ in emoji_metadata.txt. Returns the smallest available emoji id to use. i.e. if the largest
+ emoji id emoji_metadata.txt is 1, function would return 2. If emoji_metadata.txt does not
+ exist, or contains no emojis defined returns DEFAULT_EMOJI_ID"""
+ current_emoji_id = DEFAULT_EMOJI_ID
+ if os.path.isfile(INPUT_META_FILE):
+ with open(INPUT_META_FILE) as csvfile:
+ reader = csv.reader(csvfile, delimiter=' ')
+ for row in reader:
+ if row[0].startswith('#'):
+ continue
+ emoji_id = hex_str_to_int(row[0])
+ sdk_added = int(row[1])
+ compat_added = int(row[2])
+ key = codepoint_to_string(hex_str_to_int(x) for x in row[3:])
+ if key in emoji_data_map:
+ emoji_data = emoji_data_map[key]
+ emoji_data.update(emoji_id, sdk_added, compat_added)
+ if emoji_data.emoji_id >= current_emoji_id:
+ current_emoji_id = emoji_data.emoji_id + 1
+
+ return current_emoji_id
+
+
+def update_ttlib_orig_sort():
+ """Updates the ttLib tag sort with a closure that makes the meta table first."""
+ orig_sort = ttLib.sortedTagList
+
+ def meta_first_table_sort(tag_list, table_order=None):
+ """Sorts the tables with the original ttLib sort, then makes the meta table first."""
+ tag_list = orig_sort(tag_list, table_order)
+ tag_list.remove('meta')
+ tag_list.insert(0, 'meta')
+ return tag_list
+
+ ttLib.sortedTagList = meta_first_table_sort
+
+
+def inject_meta_into_font(ttf, flatbuffer_bin_filename):
+ """inject metadata binary into font"""
+ if not 'meta' in ttf:
+ ttf['meta'] = ttLib.getTableClass('meta')()
+ meta = ttf['meta']
+ with open(flatbuffer_bin_filename, 'rb') as flatbuffer_bin_file:
+ meta.data[EMOJI_META_TAG_NAME] = flatbuffer_bin_file.read()
+
+ # sort meta tables for faster access
+ update_ttlib_orig_sort()
+
+
+def validate_input_files(font_path, unicode_path, flatbuffer_path):
+ """Validate the existence of font file and the unicode files"""
+ if not os.path.isfile(font_path):
+ raise ValueError("Font file does not exist: " + font_path)
+
+ if not os.path.isdir(unicode_path):
+ raise ValueError(
+ "Unicode directory does not exist or is not a directory " + unicode_path)
+
+ emoji_filenames = [os.path.join(unicode_path, EMOJI_DATA_FILE),
+ os.path.join(unicode_path, EMOJI_ZWJ_FILE),
+ os.path.join(unicode_path, EMOJI_SEQ_FILE)]
+ for emoji_filename in emoji_filenames:
+ if not os.path.isfile(emoji_filename):
+ raise ValueError("Unicode emoji data file does not exist: " + emoji_filename)
+
+ if not os.path.isdir(flatbuffer_path):
+ raise ValueError(
+ "Flatbuffer directory does not exist or is not a directory " + flatbuffer_path)
+
+ flatbuffer_filenames = [os.path.join(flatbuffer_path, FLATBUFFER_SCHEMA),
+ os.path.join(flatbuffer_path, FLATBUFFER_HEADER)]
+ for flatbuffer_filename in flatbuffer_filenames:
+ if not os.path.isfile(flatbuffer_filename):
+ raise ValueError("Flatbuffer file does not exist: " + flatbuffer_filename)
+
+
+def add_file_to_sha(sha_algo, file_path):
+ with open(file_path, 'rb') as input_file:
+ for data in iter(lambda: input_file.read(8192), b''):
+ sha_algo.update(data)
+
+def create_sha_from_source_files(font_paths):
+ """Creates a SHA from the given font files"""
+ sha_algo = hashlib.sha256()
+ for file_path in font_paths:
+ add_file_to_sha(sha_algo, file_path)
+ return sha_algo.hexdigest()
+
+
+class EmojiFontCreator(object):
+ """Creates the EmojiCompat font"""
+
+ def __init__(self, font_path, unicode_path, without_flags):
+ validate_input_files(font_path, unicode_path, FLATBUFFER_MODULE_DIR)
+
+ self.font_path = font_path
+ self.unicode_path = unicode_path
+ self.without_flags = without_flags
+ self.emoji_data_map = {}
+ self.remapped_codepoints = {}
+ self.glyph_to_image_metrics_map = {}
+ # set default emoji id to start of Supplemental Private Use Area-A
+ self.emoji_id = DEFAULT_EMOJI_ID
+
+ def update_emoji_data(self, codepoints, glyph_name):
+ """Updates the existing EmojiData identified with codepoints. The fields that are set are:
+ - emoji_id (if it does not exist)
+ - image width/height"""
+ key = codepoint_to_string(codepoints)
+ if key in self.emoji_data_map:
+ # add emoji to final data
+ emoji_data = self.emoji_data_map[key]
+ emoji_data.update_metrics(self.glyph_to_image_metrics_map[glyph_name])
+ if emoji_data.emoji_id == 0:
+ emoji_data.emoji_id = self.emoji_id
+ self.emoji_id = self.emoji_id + 1
+ self.remapped_codepoints[emoji_data.emoji_id] = glyph_name
+
+ def read_cbdt(self, ttf):
+ """Read image size data from CBDT."""
+ cbdt = ttf['CBDT']
+ for strike_data in cbdt.strikeData:
+ for key, data in strike_data.items():
+ data.decompile()
+ self.glyph_to_image_metrics_map[key] = data.metrics
+
+ def read_cmap12(self, ttf, glyph_to_codepoint_map):
+ """Reads single code point emojis that are in cmap12, updates glyph_to_codepoint_map and
+ finally clears all elements in CMAP 12"""
+ cmap = ttf['cmap']
+ for table in cmap.tables:
+ if table.format == 12 and table.platformID == 3 and table.platEncID == 10:
+ for codepoint, glyph_name in table.cmap.items():
+ glyph_to_codepoint_map[glyph_name] = codepoint
+ self.update_emoji_data([codepoint], glyph_name)
+ return table
+ raise ValueError("Font doesn't contain cmap with format:12, platformID:3 and platEncID:10")
+
+ def read_gsub(self, ttf, glyph_to_codepoint_map):
+ """Reads the emoji sequences defined in GSUB and clear all elements under GSUB"""
+ gsub = ttf['GSUB']
+ ligature_subtables = []
+ context_subtables = []
+ # this code is font dependent, implementing all gsub rules is out of scope of EmojiCompat
+ # and would be expensive with little value
+ for lookup in gsub.table.LookupList.Lookup:
+ for subtable in lookup.SubTable:
+ if subtable.LookupType == 5:
+ context_subtables.append(subtable)
+ elif subtable.LookupType == 4:
+ ligature_subtables.append(subtable)
+
+ for subtable in context_subtables:
+ self.add_gsub_context_subtable(subtable, gsub.table.LookupList, glyph_to_codepoint_map)
+
+ for subtable in ligature_subtables:
+ self.add_gsub_ligature_subtable(subtable, glyph_to_codepoint_map)
+
+ def add_gsub_context_subtable(self, subtable, lookup_list, glyph_to_codepoint_map):
+ """Add substitutions defined as OpenType Context Substitution"""
+ for sub_class_set in subtable.SubClassSet:
+ if sub_class_set:
+ for sub_class_rule in sub_class_set.SubClassRule:
+ # prepare holder for substitution list. each rule will have a list that is added
+ # to the subs_list.
+ subs_list = len(sub_class_rule.SubstLookupRecord) * [None]
+ for record in sub_class_rule.SubstLookupRecord:
+ subs_list[record.SequenceIndex] = self.get_substitutions(lookup_list,
+ record.LookupListIndex)
+ # create combinations or all lists. the combinations will be filtered by
+ # emoji_data_map. the first element that contain as a valid glyph will be used
+ # as the final glyph
+ combinations = list(itertools.product(*subs_list))
+ for seq in combinations:
+ glyph_names = [x["input"] for x in seq]
+ codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
+ outputs = [x["output"] for x in seq if x["output"]]
+ nonempty_outputs = list(filter(lambda x: x.strip() , outputs))
+ if len(nonempty_outputs) == 0:
+ print("Warning: no output glyph is set for " + str(glyph_names))
+ continue
+ elif len(nonempty_outputs) > 1:
+ print(
+ "Warning: multiple glyph is set for "
+ + str(glyph_names) + ", will use the first one")
+
+ glyph = nonempty_outputs[0]
+ self.update_emoji_data(codepoints, glyph)
+
+ def get_substitutions(self, lookup_list, index):
+ result = []
+ for x in lookup_list.Lookup[index].SubTable:
+ for input, output in x.mapping.items():
+ result.append({"input": input, "output": output})
+ return result
+
+ def add_gsub_ligature_subtable(self, subtable, glyph_to_codepoint_map):
+ for name, ligatures in subtable.ligatures.items():
+ for ligature in ligatures:
+ glyph_names = [name] + ligature.Component
+ codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
+ self.update_emoji_data(codepoints, ligature.LigGlyph)
+
+ def write_metadata_json(self, output_json_file_path):
+ """Writes the emojis into a json file"""
+ output_json = {}
+ output_json['version'] = METADATA_VERSION
+ output_json['sourceSha'] = create_sha_from_source_files(
+ [self.font_path, OUTPUT_META_FILE, FLATBUFFER_SCHEMA])
+ output_json['list'] = []
+
+ emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
+
+ total_emoji_count = 0
+ for emoji_data in emoji_data_list:
+ if self.without_flags and is_flag_seq(emoji_data.codepoints):
+ continue # Do not add flags emoji data if this is for subset font.
+ element = emoji_data.create_json_element()
+ output_json['list'].append(element)
+ total_emoji_count = total_emoji_count + 1
+
+ # write the new json file to be processed by FlatBuffers
+ with open(output_json_file_path, 'w') as json_file:
+ print(json.dumps(output_json, indent=4, sort_keys=True, separators=(',', ':')),
+ file=json_file)
+
+ return total_emoji_count
+
+ def write_metadata_csv(self):
+ """Writes emoji metadata into space separated file"""
+ with open(OUTPUT_META_FILE, 'w') as csvfile:
+ csvwriter = csv.writer(csvfile, delimiter=' ')
+ emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
+ csvwriter.writerow(['#id', 'sdkAdded', 'compatAdded', 'codepoints'])
+ for emoji_data in emoji_data_list:
+ csvwriter.writerow(emoji_data.create_txt_row())
+
+ def add_watermark(self, ttf):
+ cmap = ttf.getBestCmap()
+ gsub = ttf['GSUB'].table
+
+ # Obtain Version string
+ m = re.search('^Version (\d*)\.(\d*)', font_data.font_version(ttf))
+ if not m:
+ raise ValueError('The font does not have proper version string.')
+ major = m.group(1)
+ minor = m.group(2)
+ # Replace the dot with space since NotoColorEmoji does not have glyph for dot.
+ glyphs = [cmap[ord(x)] for x in '%s %s' % (major, minor)]
+
+ # Update Glyph metrics
+ ttf.getGlyphOrder().append(WATERMARK_NEW_GLYPH_ID)
+ refGlyphId = cmap[WATERMARK_REF_CODE_POINT]
+ ttf['hmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['hmtx'].metrics[refGlyphId]
+ ttf['vmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['vmtx'].metrics[refGlyphId]
+
+ # Add new Glyph to cmap
+ font_data.add_to_cmap(ttf, { WATERMARK_NEW_CODE_POINT : WATERMARK_NEW_GLYPH_ID })
+
+ # Add lookup table for the version string.
+ lookups = gsub.LookupList.Lookup
+ new_lookup = otTables.Lookup()
+ new_lookup.LookupType = 2 # Multiple Substitution Subtable.
+ new_lookup.LookupFlag = 0
+ new_subtable = otTables.MultipleSubst()
+ new_subtable.mapping = { WATERMARK_NEW_GLYPH_ID : tuple(glyphs) }
+ new_lookup.SubTable = [ new_subtable ]
+ new_lookup_index = len(lookups)
+ lookups.append(new_lookup)
+
+ # Add feature
+ feature = next(x for x in gsub.FeatureList.FeatureRecord if x.FeatureTag == 'ccmp')
+ if not feature:
+ raise ValueError("Font doesn't contain ccmp feature.")
+
+ feature.Feature.LookupListIndex.append(new_lookup_index)
+
+ def create_font(self):
+ """Creates the EmojiCompat font.
+ :param font_path: path to Android NotoColorEmoji font
+ :param unicode_path: path to directory that contains unicode files
+ """
+
+ tmp_dir = tempfile.mkdtemp()
+
+ # create emoji codepoints to EmojiData map
+ self.emoji_data_map = load_emoji_data_map(self.unicode_path, self.without_flags)
+
+ # read previous metadata file to update id, sdkAdded and compatAdded. emoji id that is
+ # returned is either default or 1 greater than the largest id in previous data
+ self.emoji_id = load_previous_metadata(self.emoji_data_map)
+
+ # recalcTimestamp parameter will keep the modified field same as the original font. Changing
+ # the modified field in the font causes the font ttf file to change, which makes it harder
+ # to understand if something really changed in the font.
+ with contextlib.closing(ttLib.TTFont(self.font_path, recalcTimestamp=False)) as ttf:
+ # read image size data
+ self.read_cbdt(ttf)
+
+ # glyph name to codepoint map
+ glyph_to_codepoint_map = {}
+
+ # read single codepoint emojis under cmap12 and clear the table contents
+ cmap12_table = self.read_cmap12(ttf, glyph_to_codepoint_map)
+
+ # read emoji sequences gsub and clear the table contents
+ self.read_gsub(ttf, glyph_to_codepoint_map)
+
+ # add all new codepoint to glyph mappings
+ cmap12_table.cmap.update(self.remapped_codepoints)
+
+ # final metadata csv will be used to generate the sha, therefore write it before
+ # metadata json is written.
+ self.write_metadata_csv()
+
+ output_json_file = os.path.join(tmp_dir, OUTPUT_JSON_FILE_NAME)
+ flatbuffer_bin_file = os.path.join(tmp_dir, FLATBUFFER_BIN)
+ flatbuffer_java_dir = os.path.join(tmp_dir, FLATBUFFER_JAVA_PATH)
+
+ total_emoji_count = self.write_metadata_json(output_json_file)
+
+ # create the flatbuffers binary and java classes
+ flatc_command = ['flatc',
+ '-o',
+ tmp_dir,
+ '-b',
+ '-j',
+ FLATBUFFER_SCHEMA,
+ output_json_file]
+ subprocess.check_output(flatc_command)
+
+ # inject metadata binary into font
+ inject_meta_into_font(ttf, flatbuffer_bin_file)
+
+ # add wartermark glyph for manual verification.
+ self.add_watermark(ttf)
+
+ # update CBDT and CBLC versions since older android versions cannot read > 2.0
+ ttf['CBDT'].version = 2.0
+ ttf['CBLC'].version = 2.0
+
+ # save the new font
+ ttf.save(FONT_PATH)
+
+ update_flatbuffer_java_files(flatbuffer_java_dir, #tmp dir
+ FLATBUFFER_HEADER,
+ FLATBUFFER_JAVA_TARGET)
+
+ create_test_data(self.unicode_path)
+
+ # clear the tmp output directory
+ shutil.rmtree(tmp_dir, ignore_errors=True)
+
+ print(
+ "{0} emojis are written to\n{1}".format(total_emoji_count, FONT_DIR))
+
+
+def print_usage():
+ """Prints how to use the script."""
+ print("Please specify a path to font and unicode files.\n"
+ "usage: createfont.py noto-color-emoji-path unicode-dir-path")
+
+def parse_args(argv):
+ # parse manually to avoid any extra dependencies
+ if len(argv) == 4:
+ without_flags = argv[3] == '--without-flags'
+ else:
+ without_flags = False
+
+ if len(argv) < 3:
+ print_usage()
+ sys.exit(1)
+ return (sys.argv[1], sys.argv[2], without_flags)
+
+def main():
+ font_file, unicode_dir, without_flags = parse_args(sys.argv)
+ EmojiFontCreator(font_file, unicode_dir, without_flags).create_font()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/emoji-compat/data/emoji_metadata.txt b/emoji-compat/data/emoji_metadata.txt
index 6a9110e..d57d34e 100644
--- a/emoji-compat/data/emoji_metadata.txt
+++ b/emoji-compat/data/emoji_metadata.txt
@@ -3685,34 +3685,3 @@ F0E97 31 8 1FAF6 1F3FC
F0E98 31 8 1FAF6 1F3FD
F0E99 31 8 1FAF6 1F3FE
F0E9A 31 8 1FAF6 1F3FF
-F0E9C 1500 9 1F426 200D 2B1B
-F0E9D 1500 9 1F6DC
-F0E9E 1500 9 1FA75
-F0E9F 1500 9 1FA76
-F0EA0 1500 9 1FA77
-F0EA1 1500 9 1FA87
-F0EA2 1500 9 1FA88
-F0EA3 1500 9 1FAAD
-F0EA4 1500 9 1FAAE
-F0EA5 1500 9 1FAAF
-F0EA6 1500 9 1FABB
-F0EA7 1500 9 1FABC
-F0EA8 1500 9 1FABD
-F0EA9 1500 9 1FABF
-F0EAA 1500 9 1FACE
-F0EAB 1500 9 1FACF
-F0EAC 1500 9 1FADA
-F0EAD 1500 9 1FADB
-F0EAE 1500 9 1FAE8
-F0EAF 1500 9 1FAF7
-F0EB0 1500 9 1FAF7 1F3FB
-F0EB1 1500 9 1FAF7 1F3FC
-F0EB2 1500 9 1FAF7 1F3FD
-F0EB3 1500 9 1FAF7 1F3FE
-F0EB4 1500 9 1FAF7 1F3FF
-F0EB5 1500 9 1FAF8
-F0EB6 1500 9 1FAF8 1F3FB
-F0EB7 1500 9 1FAF8 1F3FC
-F0EB8 1500 9 1FAF8 1F3FD
-F0EB9 1500 9 1FAF8 1F3FE
-F0EBA 1500 9 1FAF8 1F3FF
diff --git a/emoji-compat/fetch.sh b/emoji-compat/fetch.sh
deleted file mode 100755
index d35127a..0000000
--- a/emoji-compat/fetch.sh
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright (C) 2022 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Helper script for fetching new emoji compat fonts from github source
-# of truth
-
-# This script is very basic, please extend or replace to handle your
-# needs (e.g. pulling specific commits, releases, branches) as needed.
-
-#set -o xtrace
-set +e
-
-METADATA_GIT="https://github.com/googlefonts/emojicompat.git"
-FONT_GIT="https://github.com/googlefonts/noto-emoji.git"
-
-SCRIPT_DIR=$(readlink -f $(dirname -- "$0"))
-TMP_DIR=$(mktemp -d)
-
-GIT_VERSION=$(git --version)
-if [ $? -ne 0 ]; then
- echo -e "ERROR: git not found"
- exit 1
-fi
-
-TTX_VERSION=$(ttx --version)
-
-if [ $? -ne 0 ]; then
- echo "ERROR ttx required to check font"
- echo -e "\t python3 -m venv venv"
- echo -e "\t source venv/bin/activate"
- echo -e "\t pip install fonttools"
- exit 127
-fi
-
-echo "METADATA: $METADATA_GIT"
-echo "FONT: $FONT_GIT"
-echo "Updating in: $SCRIPT_DIR"
-
-# confirm directory is clean
-pushd $SCRIPT_DIR > /dev/null
-UNCOMMITED_CHANGES=$(git status --porcelain)
-popd > /dev/null
-if [[ "$UNCOMMITED_CHANGES" ]]; then
- echo "$UNCOMMITED_CHANGES"
- read -p "Uncommited changes. Continue? [y/N]:" uncommited
- if [[ ! $uncommited =~ ^[Yy] ]]; then
- exit 3
- fi
-fi
-
-function confirm_git_commit() {
- pushd $TMP_DIR/$1 > /dev/null
- RESULT=$(git log -1)
- echo "$RESULT"
- read -p "Continue for repo $1? [y/N]: " yn
- if [[ ! $yn =~ ^[Yy] ]]; then
- exit 2
- fi
- popd > /dev/null
-}
-
-pushd $TMP_DIR > /dev/null
-
-git clone --quiet --depth 1 --branch main $METADATA_GIT
-confirm_git_commit "emojicompat"
-METADATA_FILE="./emojicompat/src/emojicompat/emoji_metadata.txt"
-# adjust newlines to avoid giant diffs
-cat $METADATA_FILE | awk 'sub("$", "\r")' > emoji_metadata.txt
-
-# pull the font
-git clone --quiet --depth 1 --branch main $FONT_GIT
-confirm_git_commit "noto-emoji"
-cp ./noto-emoji/fonts/NotoColorEmoji-emojicompat.ttf ./NewFont.ttf
-
-ttx -o NewFont.ttx NewFont.ttf 2> /dev/null
-grep -q 'header version="2.0"' NewFont.ttx
-
-if [ $? -ne 0 ]; then
- echo -e "WRONG HEADER VERSION IN FONT FILE (breaks API23)"
- echo -e "Expected 'header version=\"2.0\""
- echo -e "Found: "
- grep 'header version' NewFont.ttx
- exit 128
-fi
-
-# concat new codepoints to emojis.txt
-NEW_LINES=$(comm -23 emoji_metadata.txt $SCRIPT_DIR/data/emoji_metadata.txt)
-NEW_CODEPOINTS=$(echo "$NEW_LINES" | cut -d" " -f4-100 | sed 's/\r//')
-
-if [[ "$NEW_CODEPOINTS" ]]; then
- echo "$NEW_CODEPOINTS"
- read -p "New codpoints found in metadata. Append emojis.txt? [y/N]:" emojiAppend
- if [[ "$emojiAppend" =~ ^[Yy] ]]; then
- echo "$NEW_CODEPOINTS" >> $SCRIPT_DIR/supported-emojis/emojis.txt
- echo "Updated ${SCRIPT_DIR}/supported-emojis/emojis.txt"
- fi
-fi
-
-cp emoji_metadata.txt $SCRIPT_DIR/data/emoji_metadata.txt
-echo "Updated ${SCRIPT_DIR}/data/emoji_metadata.txt"
-cp NewFont.ttf $SCRIPT_DIR/font/NotoColorEmojiCompat.ttf
-echo "Updated ${SCRIPT_DIR}/font/NotoColorEmojiCompat.ttf"
-
-popd > /dev/null
-rm -rf $TMP_DIR
diff --git a/emoji-compat/font/NotoColorEmojiCompat.ttf b/emoji-compat/font/NotoColorEmojiCompat.ttf
index f7f9129..7334ae8 100644
--- a/emoji-compat/font/NotoColorEmojiCompat.ttf
+++ b/emoji-compat/font/NotoColorEmojiCompat.ttf
Binary files differ
diff --git a/emoji-compat/supported-emojis/emojis.txt b/emoji-compat/supported-emojis/emojis.txt
index 3e5eb9e..a3ac299 100644
--- a/emoji-compat/supported-emojis/emojis.txt
+++ b/emoji-compat/supported-emojis/emojis.txt
@@ -3831,34 +3831,3 @@
39 FE0F 20E3
A9 FE0F
AE FE0F
-1F426 200D 2B1B
-1F6DC
-1FA75
-1FA76
-1FA77
-1FA87
-1FA88
-1FAAD
-1FAAE
-1FAAF
-1FABB
-1FABC
-1FABD
-1FABF
-1FACE
-1FACF
-1FADA
-1FADB
-1FAE8
-1FAF7
-1FAF7 1F3FB
-1FAF7 1F3FC
-1FAF7 1F3FD
-1FAF7 1F3FE
-1FAF7 1F3FF
-1FAF8
-1FAF8 1F3FB
-1FAF8 1F3FC
-1FAF8 1F3FD
-1FAF8 1F3FE
-1FAF8 1F3FF