1#!/usr/bin/env python3 2# coding=UTF-8 3# 4# Copyright 2016 Google Inc. All rights reserved. 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17 18"""Create a curated subset of Noto CJK for Android.""" 19 20import argparse 21import logging 22import os 23from pathlib import Path 24 25from fontTools import ttLib 26from nototools import font_data 27from nototools import tool_utils 28from nototools import ttc_utils 29 30# Characters supported in Noto CJK fonts that UTR #51 recommends default to 31# emoji-style. 32EMOJI_IN_CJK = { 33 0x26BD, # ⚽ SOCCER BALL 34 0x26BE, # ⚾ BASEBALL 35 0x1F18E, # NEGATIVE SQUARED AB 36 0x1F191, # SQUARED CL 37 0x1F192, # SQUARED COOL 38 0x1F193, # SQUARED FREE 39 0x1F194, # SQUARED ID 40 0x1F195, # SQUARED NEW 41 0x1F196, # SQUARED NG 42 0x1F197, # SQUARED OK 43 0x1F198, # SQUARED SOS 44 0x1F199, # SQUARED UP WITH EXCLAMATION MARK 45 0x1F19A, # SQUARED VS 46 0x1F201, # SQUARED KATAKANA KOKO 47 0x1F21A, # SQUARED CJK UNIFIED IDEOGRAPH-7121 48 0x1F22F, # SQUARED CJK UNIFIED IDEOGRAPH-6307 49 0x1F232, # SQUARED CJK UNIFIED IDEOGRAPH-7981 50 0x1F233, # SQUARED CJK UNIFIED IDEOGRAPH-7A7A 51 0x1F234, # SQUARED CJK UNIFIED IDEOGRAPH-5408 52 0x1F235, # SQUARED CJK UNIFIED IDEOGRAPH-6E80 53 0x1F236, # SQUARED CJK UNIFIED IDEOGRAPH-6709 54 0x1F238, # SQUARED CJK UNIFIED IDEOGRAPH-7533 55 0x1F239, # SQUARED CJK UNIFIED IDEOGRAPH-5272 56 0x1F23A, # SQUARED CJK UNIFIED IDEOGRAPH-55B6 57 0x1F250, # CIRCLED IDEOGRAPH ADVANTAGE 58 0x1F251, # CIRCLED IDEOGRAPH ACCEPT 59} 60 61# Characters we have decided we are doing as emoji-style in Android, 62# despite UTR #51's recommendation 63ANDROID_EMOJI = { 64 0x2600, # ☀ BLACK SUN WITH RAYS 65 0x2601, # ☁ CLOUD 66 0X260E, # ☎ BLACK TELEPHONE 67 0x261D, # ☝ WHITE UP POINTING INDEX 68 0x263A, # ☺ WHITE SMILING FACE 69 0x2660, # ♠ BLACK SPADE SUIT 70 0x2663, # ♣ BLACK CLUB SUIT 71 0x2665, # ♥ BLACK HEART SUIT 72 0x2666, # ♦ BLACK DIAMOND SUIT 73 0x270C, # ✌ VICTORY HAND 74 0x2744, # ❄ SNOWFLAKE 75 0x2764, # ❤ HEAVY BLACK HEART 76} 77 78# We don't want support for ASCII control chars. 79CONTROL_CHARS = tool_utils.parse_int_ranges('0000-001F') 80 81EXCLUDED_CODEPOINTS = sorted(EMOJI_IN_CJK | ANDROID_EMOJI | CONTROL_CHARS) 82 83TTC_NAMES = ('NotoSansCJK-Regular.ttc', 'NotoSerifCJK-Regular.ttc') 84 85 86def remove_from_cmap(infile, outfile, exclude=frozenset()): 87 """Removes a set of characters from a font file's cmap table.""" 88 font = ttLib.TTFont(infile) 89 font_data.delete_from_cmap(font, exclude) 90 font.save(outfile) 91 92 93def remove_codepoints_from_ttc_using_ttc_utils(ttc_name, out_dir): 94 otf_names = ttc_utils.ttcfile_extract(ttc_name, out_dir) 95 96 with tool_utils.temp_chdir(out_dir): 97 for index, otf_name in enumerate(otf_names): 98 logging.info('Subsetting %s...', otf_name) 99 remove_from_cmap(otf_name, otf_name, exclude=EXCLUDED_CODEPOINTS) 100 ttc_utils.ttcfile_build(ttc_name, otf_names) 101 for f in otf_names: 102 os.remove(f) 103 104 105def remove_codepoints_from_ttc(ttc_path, out_dir): 106 """Removes a set of characters from a TTC font file's cmap table.""" 107 logging.info('Loading %s', ttc_path) 108 ttc = ttLib.ttCollection.TTCollection(ttc_path) 109 110 logging.info('Subsetting %d fonts in the collection', len(ttc)) 111 for font in ttc: 112 font_data.delete_from_cmap(font, EXCLUDED_CODEPOINTS) 113 114 out_path = out_dir / ttc_path.name 115 logging.info('Saving to %s', out_path) 116 ttc.save(out_path) 117 logging.info('Size: %d --> %d, delta=%d', 118 ttc_path.stat().st_size, 119 out_path.stat().st_size, 120 out_path.stat().st_size - ttc_path.stat().st_size) 121 122 123def main(): 124 parser = argparse.ArgumentParser() 125 parser.add_argument('input', default='.', nargs='?') 126 parser.add_argument('-o', '--output', default='subsetted') 127 parser.add_argument('--use-ttc-utils', action='store_true') 128 parser.add_argument('-v', '--verbose', action='count') 129 args = parser.parse_args() 130 if args.verbose: 131 if args.verbose > 1: 132 logging.basicConfig(level=logging.DEBUG) 133 else: 134 logging.basicConfig(level=logging.INFO) 135 in_dir = Path(args.input) 136 out_dir = Path(args.output) 137 out_dir.mkdir(parents=True, exist_ok=True) 138 for ttc_name in TTC_NAMES: 139 if args.use_ttc_utils: 140 remove_codepoints_from_ttc_using_ttc_utils(ttc_name, out_dir) 141 else: 142 remove_codepoints_from_ttc(in_dir / ttc_name, out_dir) 143 144 145if __name__ == "__main__": 146 main() 147