xref: /aosp_15_r20/frameworks/minikin/libs/minikin/FontCollection.cpp (revision 834a2baab5fdfc28e9a428ee87c7ea8f6a06a53d)
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "minikin/FontCollection.h"
18 
19 #include <log/log.h>
20 #include <unicode/unorm2.h>
21 
22 #include <algorithm>
23 #include <unordered_set>
24 
25 #include "FeatureFlags.h"
26 #include "Locale.h"
27 #include "LocaleListCache.h"
28 #include "MinikinInternal.h"
29 #include "minikin/Characters.h"
30 #include "minikin/Emoji.h"
31 #include "minikin/FontFileParser.h"
32 #include "minikin/MinikinExtent.h"
33 #include "minikin/MinikinPaint.h"
34 
35 using std::vector;
36 
37 namespace minikin {
38 
39 template <typename T>
max(T a,T b)40 static inline T max(T a, T b) {
41     return a > b ? a : b;
42 }
43 
44 const uint32_t EMOJI_STYLE_VS = 0xFE0F;
45 const uint32_t TEXT_STYLE_VS = 0xFE0E;
46 
47 static std::atomic<uint32_t> gNextCollectionId = {0};
48 
49 namespace {
50 
isEmojiBreak(uint32_t prevCh,uint32_t ch)51 inline bool isEmojiBreak(uint32_t prevCh, uint32_t ch) {
52     return !(isEmojiModifier(ch) || (isRegionalIndicator(prevCh) && isRegionalIndicator(ch)) ||
53              isKeyCap(ch) || isTagChar(ch) || ch == CHAR_ZWJ || prevCh == CHAR_ZWJ);
54 }
55 
56 // Lower is better
getGlyphScore(U16StringPiece text,uint32_t start,uint32_t end,const HbFontUniquePtr & font)57 uint32_t getGlyphScore(U16StringPiece text, uint32_t start, uint32_t end,
58                        const HbFontUniquePtr& font) {
59     HbBufferUniquePtr buffer(hb_buffer_create());
60     hb_buffer_set_direction(buffer.get(), HB_DIRECTION_LTR);
61     hb_buffer_add_utf16(buffer.get(), text.data() + start, end - start, 0, end - start);
62     hb_buffer_guess_segment_properties(buffer.get());
63 
64     unsigned int numGlyphs = -1;
65     hb_shape(font.get(), buffer.get(), nullptr, 0);
66     hb_glyph_info_t* info = hb_buffer_get_glyph_infos(buffer.get(), &numGlyphs);
67 
68     // HarfBuzz squashed unsupported tag sequence into first emoji glyph. So, we cannot use glyph
69     // count for the font selection score. Give extra score if the base score is different from the
70     // first glyph.
71     if (numGlyphs == 1) {
72         constexpr uint32_t TAG_SEQUENCE_FALLBACK_PENALTY = 0x10000;
73 
74         uint32_t ch = 0;
75         const uint16_t* string = text.data();
76         const uint32_t string_size = text.size();
77         uint32_t readLength = 0;
78 
79         U16_NEXT(string, readLength, string_size, ch);
80         if (U_IS_SURROGATE(ch)) {
81             return numGlyphs;  // Broken surrogate pair.
82         }
83 
84         if (readLength >= string_size) {
85             return numGlyphs;  // No more characters remaining.
86         }
87 
88         uint32_t nextCh = 0;
89         U16_NEXT(string, readLength, string_size, nextCh);
90 
91         if (!isTagChar(nextCh)) {
92             return numGlyphs;  // Not a tag sequence.
93         }
94 
95         uint32_t composedGlyphId = info[0].codepoint;
96 
97         // Shape only the first base emoji.
98         hb_buffer_reset(buffer.get());
99         hb_buffer_set_direction(buffer.get(), HB_DIRECTION_LTR);
100         hb_buffer_add_codepoints(buffer.get(), &ch, 1, 0, 1);
101         hb_buffer_guess_segment_properties(buffer.get());
102 
103         unsigned int numGlyphs = -1;
104         hb_shape(font.get(), buffer.get(), nullptr, 0);
105         info = hb_buffer_get_glyph_infos(buffer.get(), &numGlyphs);
106 
107         if (numGlyphs != 1) {
108             // If the single code point of the first base emoji is decomposed to multiple glyphs,
109             // we don't support it.
110             return numGlyphs;
111         }
112 
113         uint32_t baseGlyphId = info[0].codepoint;
114         if (composedGlyphId == baseGlyphId) {
115             return numGlyphs + TAG_SEQUENCE_FALLBACK_PENALTY;
116         } else {
117             return numGlyphs;
118         }
119     }
120 
121     return numGlyphs;
122 }
123 
124 }  // namespace
125 
126 // static
create(std::shared_ptr<FontFamily> && typeface)127 std::shared_ptr<FontCollection> FontCollection::create(std::shared_ptr<FontFamily>&& typeface) {
128     std::vector<std::shared_ptr<FontFamily>> typefaces;
129     typefaces.push_back(typeface);
130     return create(typefaces);
131 }
132 
133 // static
create(const vector<std::shared_ptr<FontFamily>> & typefaces)134 std::shared_ptr<FontCollection> FontCollection::create(
135         const vector<std::shared_ptr<FontFamily>>& typefaces) {
136     // TODO(b/174672300): Revert back to make_shared.
137     return std::shared_ptr<FontCollection>(new FontCollection(typefaces));
138 }
139 
FontCollection(const vector<std::shared_ptr<FontFamily>> & typefaces)140 FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces)
141         : mMaxChar(0), mSupportedAxes(nullptr) {
142     init(typefaces);
143 }
144 
init(const vector<std::shared_ptr<FontFamily>> & typefaces)145 void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) {
146     mId = gNextCollectionId++;
147     vector<uint32_t> lastChar;
148     size_t nTypefaces = typefaces.size();
149     const FontStyle defaultStyle;
150     auto families = std::make_shared<vector<std::shared_ptr<FontFamily>>>();
151     std::unordered_set<AxisTag> supportedAxesSet;
152     for (size_t i = 0; i < nTypefaces; i++) {
153         const std::shared_ptr<FontFamily>& family = typefaces[i];
154         if (family->getNumFonts() == 0) {
155             continue;
156         }
157         const SparseBitSet& coverage = family->getCoverage();
158         families->emplace_back(family);
159         if (family->hasVSTable()) {
160             mVSFamilyVec.push_back(family);
161         }
162         mMaxChar = max(mMaxChar, coverage.length());
163         lastChar.push_back(coverage.nextSetBit(0));
164 
165         for (size_t i = 0; i < family->getSupportedAxesCount(); i++) {
166             supportedAxesSet.insert(family->getSupportedAxisAt(i));
167         }
168     }
169     // mMaybeSharedFamilies is not shared.
170     mMaybeSharedFamilies = families;
171     mFamilyCount = families->size();
172     mFamilyIndices = nullptr;
173     MINIKIN_ASSERT(mFamilyCount > 0, "Font collection must have at least one valid typeface");
174     MINIKIN_ASSERT(mFamilyCount <= MAX_FAMILY_COUNT,
175                    "Font collection may only have up to %d font families.", MAX_FAMILY_COUNT);
176     // Although OpenType supports up to 2^16-1 axes per font,
177     // mSupportedAxesCount may exceed 2^16-1 as we have multiple fonts.
178     mSupportedAxesCount = static_cast<uint32_t>(supportedAxesSet.size());
179     if (mSupportedAxesCount > 0) {
180         mSupportedAxes = sortedArrayFromSet(supportedAxesSet);
181     }
182     size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
183     // TODO: Use variation selector map for mRanges construction.
184     // A font can have a glyph for a base code point and variation selector pair but no glyph for
185     // the base code point without variation selector. The family won't be listed in the range in
186     // this case.
187     mOwnedRanges = std::make_unique<Range[]>(nPages);
188     mRanges = mOwnedRanges.get();
189     mRangesCount = nPages;
190     for (size_t i = 0; i < nPages; i++) {
191         Range* range = &mOwnedRanges[i];
192         range->start = mOwnedFamilyVec.size();
193         for (size_t j = 0; j < getFamilyCount(); j++) {
194             if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
195                 const std::shared_ptr<FontFamily>& family = getFamilyAt(j);
196                 mOwnedFamilyVec.push_back(static_cast<uint8_t>(j));
197                 uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage);
198                 lastChar[j] = nextChar;
199             }
200         }
201         range->end = mOwnedFamilyVec.size();
202     }
203     // See the comment in Range for more details.
204     LOG_ALWAYS_FATAL_IF(mOwnedFamilyVec.size() >= 0xFFFF,
205                         "Exceeded the maximum indexable cmap coverage.");
206     mFamilyVec = mOwnedFamilyVec.data();
207     mFamilyVecCount = mOwnedFamilyVec.size();
208 }
209 
FontCollection(BufferReader * reader,const std::shared_ptr<std::vector<std::shared_ptr<FontFamily>>> & families)210 FontCollection::FontCollection(
211         BufferReader* reader,
212         const std::shared_ptr<std::vector<std::shared_ptr<FontFamily>>>& families)
213         : mSupportedAxes(nullptr) {
214     mId = gNextCollectionId++;
215     mMaxChar = reader->read<uint32_t>();
216     mMaybeSharedFamilies = families;
217     std::tie(mFamilyIndices, mFamilyCount) = reader->readArray<uint32_t>();
218     for (size_t i = 0; i < getFamilyCount(); i++) {
219         const auto& family = getFamilyAt(i);
220         if (family->hasVSTable()) mVSFamilyVec.emplace_back(family);
221     }
222     // Range is two packed uint16_t
223     static_assert(sizeof(Range) == 4);
224     std::tie(mRanges, mRangesCount) = reader->readArray<Range>();
225     std::tie(mFamilyVec, mFamilyVecCount) = reader->readArray<uint8_t>();
226     const auto& [axesPtr, axesCount] = reader->readArray<AxisTag>();
227     mSupportedAxesCount = axesCount;
228     if (axesCount > 0) {
229         mSupportedAxes = std::unique_ptr<AxisTag[]>(new AxisTag[axesCount]);
230         std::copy(axesPtr, axesPtr + axesCount, mSupportedAxes.get());
231     }
232 }
233 
writeTo(BufferWriter * writer,const std::unordered_map<std::shared_ptr<FontFamily>,uint32_t> & fontFamilyToIndexMap) const234 void FontCollection::writeTo(BufferWriter* writer,
235                              const std::unordered_map<std::shared_ptr<FontFamily>, uint32_t>&
236                                      fontFamilyToIndexMap) const {
237     writer->write<uint32_t>(mMaxChar);
238     std::vector<uint32_t> indices;
239     indices.reserve(getFamilyCount());
240     for (size_t i = 0; i < getFamilyCount(); ++i) {
241         const std::shared_ptr<FontFamily>& fontFamily = getFamilyAt(i);
242         auto it = fontFamilyToIndexMap.find(fontFamily);
243         if (it == fontFamilyToIndexMap.end()) {
244             ALOGE("fontFamily not found in fontFamilyToIndexMap");
245         } else {
246             indices.push_back(it->second);
247         }
248     }
249     writer->writeArray<uint32_t>(indices.data(), indices.size());
250     writer->writeArray<Range>(mRanges, mRangesCount);
251     writer->writeArray<uint8_t>(mFamilyVec, mFamilyVecCount);
252     // No need to serialize mVSFamilyVec as it can be reconstructed easily from mFamilies.
253     writer->writeArray<AxisTag>(mSupportedAxes.get(), mSupportedAxesCount);
254 }
255 
256 // static
readVector(BufferReader * reader)257 std::vector<std::shared_ptr<FontCollection>> FontCollection::readVector(BufferReader* reader) {
258     auto allFontFamilies = std::make_shared<std::vector<std::shared_ptr<FontFamily>>>(
259             FontFamily::readVector(reader));
260     uint32_t count = reader->read<uint32_t>();
261     std::vector<std::shared_ptr<FontCollection>> fontCollections;
262     fontCollections.reserve(count);
263     for (uint32_t i = 0; i < count; i++) {
264         fontCollections.emplace_back(new FontCollection(reader, allFontFamilies));
265     }
266     return fontCollections;
267 }
268 
269 // static
writeVector(BufferWriter * writer,const std::vector<std::shared_ptr<FontCollection>> & fontCollections)270 void FontCollection::writeVector(
271         BufferWriter* writer, const std::vector<std::shared_ptr<FontCollection>>& fontCollections) {
272     std::vector<std::shared_ptr<FontFamily>> allFontFamilies;
273     // Note: operator== for shared_ptr compares raw pointer values.
274     std::unordered_map<std::shared_ptr<FontFamily>, uint32_t> fontFamilyToIndexMap;
275     collectAllFontFamilies(fontCollections, &allFontFamilies, &fontFamilyToIndexMap);
276 
277     FontFamily::writeVector(writer, allFontFamilies);
278     writer->write<uint32_t>(fontCollections.size());
279     for (const auto& fontCollection : fontCollections) {
280         fontCollection->writeTo(writer, fontFamilyToIndexMap);
281     }
282 }
283 
284 // static
collectAllFontFamilies(const std::vector<std::shared_ptr<FontCollection>> & fontCollections,std::vector<std::shared_ptr<FontFamily>> * outAllFontFamilies,std::unordered_map<std::shared_ptr<FontFamily>,uint32_t> * outFontFamilyToIndexMap)285 void FontCollection::collectAllFontFamilies(
286         const std::vector<std::shared_ptr<FontCollection>>& fontCollections,
287         std::vector<std::shared_ptr<FontFamily>>* outAllFontFamilies,
288         std::unordered_map<std::shared_ptr<FontFamily>, uint32_t>* outFontFamilyToIndexMap) {
289     for (const auto& fontCollection : fontCollections) {
290         for (size_t i = 0; i < fontCollection->getFamilyCount(); ++i) {
291             const std::shared_ptr<FontFamily>& fontFamily = fontCollection->getFamilyAt(i);
292             bool inserted =
293                     outFontFamilyToIndexMap->emplace(fontFamily, outAllFontFamilies->size()).second;
294             if (inserted) {
295                 outAllFontFamilies->push_back(fontFamily);
296             }
297         }
298     }
299 }
300 
301 // Special scores for the font fallback.
302 const uint32_t kUnsupportedFontScore = 0;
303 const uint32_t kFirstFontScore = UINT32_MAX;
304 
305 // Calculates a font score.
306 // The score of the font family is based on three subscores.
307 //  - Coverage Score: How well the font family covers the given character or variation sequence.
308 //  - Locale Score: How well the font family is appropriate for the locale.
309 //  - Variant Score: Whether the font family matches the variant. Note that this variant is not the
310 //    one in BCP47. This is our own font variant (e.g., elegant, compact).
311 //
312 // Then, there is a priority for these three subscores as follow:
313 //   Coverage Score > Locale Score > Variant Score
314 // The returned score reflects this priority order.
315 //
316 // Note that there are two special scores.
317 //  - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
318 //    base character.
319 //  - kFirstFontScore: When the font is the first font family in the collection and it supports the
320 //    given character or variation sequence.
calcFamilyScore(uint32_t ch,uint32_t vs,FamilyVariant variant,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const321 uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, FamilyVariant variant,
322                                          uint32_t localeListId,
323                                          const std::shared_ptr<FontFamily>& fontFamily) const {
324     const uint32_t coverageScore = calcCoverageScore(ch, vs, localeListId, fontFamily);
325     if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
326         // No need to calculate other scores.
327         return coverageScore;
328     }
329 
330     const uint32_t localeScore = calcLocaleMatchingScore(localeListId, *fontFamily);
331     const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
332 
333     // Subscores are encoded into 31 bits representation to meet the subscore priority.
334     // The highest 2 bits are for coverage score, then following 28 bits are for locale score,
335     // then the last 1 bit is for variant score.
336     return coverageScore << 29 | localeScore << 1 | variantScore;
337 }
338 
339 // Returns true if
340 //  - the fontFamily is a developer specified custom fallback.
341 //  - no custom fallback is provided and the fontFamily is a default fallback.
isPrimaryFamily(const std::shared_ptr<FontFamily> & fontFamily) const342 bool FontCollection::isPrimaryFamily(const std::shared_ptr<FontFamily>& fontFamily) const {
343     // If the font family is provided by developers, it is primary.
344     if (fontFamily->isCustomFallback()) {
345         return true;
346     }
347 
348     if (getFamilyAt(0)->isCustomFallback()) {
349         return false;
350     } else {
351         return fontFamily->isDefaultFallback();
352     }
353 }
354 
355 // Calculates a font score based on variation sequence coverage.
356 // - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
357 //   character.
358 // - Returns kFirstFontScore if the font family is the first font family in the collection and it
359 //   supports the given character or variation sequence.
360 // - Returns 3 if the font family supports the variation sequence.
361 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
362 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
363 // - Returns 1 if the variation selector is not specified or if the font family only supports the
364 //   variation sequence's base character.
calcCoverageScore(uint32_t ch,uint32_t vs,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const365 uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, uint32_t localeListId,
366                                            const std::shared_ptr<FontFamily>& fontFamily) const {
367     const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
368     if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) {
369         // The font doesn't support either variation sequence or even the base character.
370         return kUnsupportedFontScore;
371     }
372 
373     if ((vs == 0 || hasVSGlyph) && isPrimaryFamily(fontFamily)) {
374         // If the first font family supports the given character or variation sequence, always use
375         // it.
376         return kFirstFontScore;
377     }
378 
379     if (vs != 0 && hasVSGlyph) {
380         return 3;
381     }
382 
383     bool colorEmojiRequest;
384     if (vs == EMOJI_STYLE_VS) {
385         colorEmojiRequest = true;
386     } else if (vs == TEXT_STYLE_VS) {
387         colorEmojiRequest = false;
388     } else {
389         switch (LocaleListCache::getById(localeListId).getEmojiStyle()) {
390             case EmojiStyle::EMOJI:
391                 colorEmojiRequest = true;
392                 break;
393             case EmojiStyle::TEXT:
394                 colorEmojiRequest = false;
395                 break;
396             case EmojiStyle::EMPTY:
397             case EmojiStyle::DEFAULT:
398             default:
399                 // Do not give any extra score for the default emoji style.
400                 return 1;
401                 break;
402         }
403     }
404 
405     return colorEmojiRequest == fontFamily->isColorEmojiFamily() ? 2 : 1;
406 }
407 
408 // Calculate font scores based on the script matching, subtag matching and primary locale matching.
409 //
410 // 1. If only the font's language matches or there is no matches between requested font and
411 //    supported font, then the font obtains a score of 0.
412 // 2. Without a match in language, considering subtag may change font's EmojiStyle over script,
413 //    a match in subtag gets a score of 2 and a match in scripts gains a score of 1.
414 // 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while
415 //    language-and-script obtains a socre of 3 with the same reason above.
416 //
417 // If two locales in the requested list have the same locale score, the font matching with higher
418 // priority locale gets a higher score. For example, in the case the user requested locale list is
419 // "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score than the font of
420 // "en-Latn".
421 //
422 // To achieve score calculation with priorities, the locale score is determined as follows:
423 //   LocaleScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1)
424 // Here, m is the maximum number of locales to be compared, and s(i) is the i-th locale's matching
425 // score. The possible values of s(i) are 0, 1, 2, 3 and 4.
calcLocaleMatchingScore(uint32_t userLocaleListId,const FontFamily & fontFamily)426 uint32_t FontCollection::calcLocaleMatchingScore(uint32_t userLocaleListId,
427                                                  const FontFamily& fontFamily) {
428     const LocaleList& localeList = LocaleListCache::getById(userLocaleListId);
429     const LocaleList& fontLocaleList = LocaleListCache::getById(fontFamily.localeListId());
430 
431     const size_t maxCompareNum = std::min(localeList.size(), FONT_LOCALE_LIMIT);
432     uint32_t score = 0;
433     for (size_t i = 0; i < maxCompareNum; ++i) {
434         score = score * 5u + localeList[i].calcScoreFor(fontLocaleList);
435     }
436     return score;
437 }
438 
439 // Calculates a font score based on variant ("compact" or "elegant") matching.
440 //  - Returns 1 if the font doesn't have variant or the variant matches with the text style.
441 //  - No score if the font has a variant but it doesn't match with the text style.
calcVariantMatchingScore(FamilyVariant variant,const FontFamily & fontFamily)442 uint32_t FontCollection::calcVariantMatchingScore(FamilyVariant variant,
443                                                   const FontFamily& fontFamily) {
444     const FamilyVariant familyVariant = fontFamily.variant();
445     if (familyVariant == FamilyVariant::DEFAULT) {
446         return 1;
447     }
448     if (familyVariant == variant) {
449         return 1;
450     }
451     if (variant == FamilyVariant::DEFAULT && familyVariant == FamilyVariant::COMPACT) {
452         // If default is requested, prefer compat variation.
453         return 1;
454     }
455     return 0;
456 }
457 
458 // Implement heuristic for choosing best-match font. Here are the rules:
459 // 1. If first font in the collection has the character, it wins.
460 // 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
461 // 3. Highest score wins, with ties resolved to the first font.
462 // This method never returns nullptr.
getFamilyForChar(uint32_t ch,uint32_t vs,uint32_t localeListId,FamilyVariant variant) const463 FontCollection::FamilyMatchResult FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
464                                                                    uint32_t localeListId,
465                                                                    FamilyVariant variant) const {
466     if (ch >= mMaxChar) {
467         return FamilyMatchResult::Builder().add(0).build();
468     }
469 
470     Range range = mRanges[ch >> kLogCharsPerPage];
471 
472     if (vs != 0) {
473         range = {0, static_cast<uint16_t>(getFamilyCount())};
474     }
475 
476     uint32_t bestScore = kUnsupportedFontScore;
477     FamilyMatchResult::Builder builder;
478 
479     for (size_t i = range.start; i < range.end; i++) {
480         const uint8_t familyIndex = vs == 0 ? mFamilyVec[i] : i;
481         const std::shared_ptr<FontFamily>& family = getFamilyAt(familyIndex);
482         const uint32_t score = calcFamilyScore(ch, vs, variant, localeListId, family);
483         if (score == kFirstFontScore) {
484             // If the first font family supports the given character or variation sequence, always
485             // use it.
486             return builder.add(familyIndex).build();
487         }
488         if (score != kUnsupportedFontScore && score >= bestScore) {
489             if (score > bestScore) {
490                 builder.reset();
491                 bestScore = score;
492             }
493             builder.add(familyIndex);
494         }
495     }
496     if (builder.empty()) {
497         UErrorCode errorCode = U_ZERO_ERROR;
498         const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
499         if (U_SUCCESS(errorCode)) {
500             UChar decomposed[4];
501             int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
502             if (U_SUCCESS(errorCode) && len > 0) {
503                 int off = 0;
504                 U16_NEXT_UNSAFE(decomposed, off, ch);
505                 return getFamilyForChar(ch, vs, localeListId, variant);
506             }
507         }
508         return FamilyMatchResult::Builder().add(0).build();
509     }
510     return builder.build();
511 }
512 
513 // Characters where we want to continue using existing font run for (or stick to the next run if
514 // they start a string), even if the font does not support them explicitly. These are handled
515 // properly by Minikin or HarfBuzz even if the font does not explicitly support them and it's
516 // usually meaningless to switch to a different font to display them.
doesNotNeedFontSupport(uint32_t c)517 static bool doesNotNeedFontSupport(uint32_t c) {
518     return c == 0x00AD                      // SOFT HYPHEN
519            || c == 0x034F                   // COMBINING GRAPHEME JOINER
520            || c == 0x061C                   // ARABIC LETTER MARK
521            || (0x200C <= c && c <= 0x200F)  // ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK
522            || (0x202A <= c && c <= 0x202E)  // LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
523            || (0x2066 <= c && c <= 0x2069)  // LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE
524            || c == 0xFEFF                   // BYTE ORDER MARK
525            || isVariationSelector(c);
526 }
527 
528 // Characters where we want to continue using existing font run instead of
529 // recomputing the best match in the fallback list.
530 static const uint32_t stickyAllowlist[] = {
531         '!',    ',', '-', '.', ':', ';', '?',
532         0x00A0,  // NBSP
533         0x2010,  // HYPHEN
534         0x2011,  // NB_HYPHEN
535         0x202F,  // NNBSP
536         0x2640,  // FEMALE_SIGN,
537         0x2642,  // MALE_SIGN,
538         0x2695,  // STAFF_OF_AESCULAPIUS
539 };
540 
isStickyAllowlisted(uint32_t c)541 static bool isStickyAllowlisted(uint32_t c) {
542     for (size_t i = 0; i < sizeof(stickyAllowlist) / sizeof(stickyAllowlist[0]); i++) {
543         if (stickyAllowlist[i] == c) return true;
544     }
545     return false;
546 }
547 
isCombining(uint32_t c)548 static inline bool isCombining(uint32_t c) {
549     return (U_GET_GC_MASK(c) & U_GC_M_MASK) != 0;
550 }
551 
hasVariationSelector(uint32_t baseCodepoint,uint32_t variationSelector) const552 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
553                                           uint32_t variationSelector) const {
554     if (!isVariationSelector(variationSelector)) {
555         return false;
556     }
557     if (baseCodepoint >= mMaxChar) {
558         return false;
559     }
560 
561     // Currently mRanges can not be used here since it isn't aware of the variation sequence.
562     for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
563         if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
564             return true;
565         }
566     }
567 
568     // Even if there is no cmap format 14 subtable entry for the given sequence, should return true
569     // for <char, text presentation selector> case since we have special fallback rule for the
570     // sequence. Note that we don't need to restrict this to already standardized variation
571     // sequences, since Unicode is adding variation sequences more frequently now and may even move
572     // towards allowing text and emoji variation selectors on any character.
573     if (variationSelector == TEXT_STYLE_VS) {
574         for (size_t i = 0; i < getFamilyCount(); ++i) {
575             const std::shared_ptr<FontFamily>& family = getFamilyAt(i);
576             if (!family->isColorEmojiFamily() && family->hasGlyph(baseCodepoint, 0)) {
577                 return true;
578             }
579         }
580     }
581 
582     return false;
583 }
584 
585 constexpr uint32_t REPLACEMENT_CHARACTER = 0xFFFD;
586 
intersect(FontCollection::FamilyMatchResult l,FontCollection::FamilyMatchResult r)587 FontCollection::FamilyMatchResult FontCollection::FamilyMatchResult::intersect(
588         FontCollection::FamilyMatchResult l, FontCollection::FamilyMatchResult r) {
589     if (l == r) {
590         return l;
591     }
592 
593     uint32_t li = 0;
594     uint32_t ri = 0;
595     FamilyMatchResult::Builder b;
596     while (li < l.size() && ri < r.size()) {
597         if (l[li] < r[ri]) {
598             li++;
599         } else if (l[li] > r[ri]) {
600             ri++;
601         } else {  // l[li] == r[ri]
602             b.add(l[li]);
603             li++;
604             ri++;
605         }
606     }
607     return b.build();
608 }
609 
filterFamilyByLocale(const LocaleList & localeList,const std::function<bool (const FontFamily & family)> & callback) const610 void FontCollection::filterFamilyByLocale(
611         const LocaleList& localeList,
612         const std::function<bool(const FontFamily& family)>& callback) const {
613     if (localeList.empty()) {
614         return;
615     }
616     // Only use the first family for the default line height.
617     const Locale& locale = localeList[0];
618     for (uint8_t i = 0; i < mFamilyCount; ++i) {
619         const auto& family = getFamilyAt(i);
620 
621         uint32_t fontLocaleId = family->localeListId();
622         if (fontLocaleId == LocaleListCache::kInvalidListId) {
623             continue;
624         }
625         const LocaleList& fontLocaleList = LocaleListCache::getById(fontLocaleId);
626         for (uint32_t i = 0; i < fontLocaleList.size(); ++i) {
627             if (fontLocaleList[i].isEqualScript(locale)) {
628                 bool cont = callback(*family.get());
629                 if (cont) {
630                     break;
631                 } else {
632                     return;
633                 }
634             }
635         }
636     }
637 }
638 
getReferenceExtentForLocale(const MinikinPaint & paint) const639 MinikinExtent FontCollection::getReferenceExtentForLocale(const MinikinPaint& paint) const {
640     uint32_t localeId = paint.localeListId;
641     LocaleExtentKey key = {localeId, paint.size};
642 
643     std::lock_guard<std::mutex> lock(mMutex);
644     auto e = mExtentCacheForLocale.get(key);
645 
646     if (e.ascent != 0 || e.descent != 0) {
647         return e;
648     }
649 
650     MinikinExtent result(0, 0);
651     // Reserve the custom font's extent.
652     for (uint8_t i = 0; i < mFamilyCount; ++i) {
653         const auto& family = getFamilyAt(i);
654         if (!family->isCustomFallback()) {
655             break;
656         }
657 
658         // Use this family
659         MinikinExtent extent(0, 0);
660         FakedFont font =
661                 getFamilyAt(i)->getClosestMatch(paint.fontStyle, paint.fontVariationSettings);
662         font.typeface()->GetFontExtent(&extent, paint, font.fakery);
663         result.extendBy(extent);
664     }
665 
666     if (localeId == LocaleListCache::kInvalidListId) {
667         mExtentCacheForLocale.put(key, result);
668         return result;
669     }
670 
671     // If default is requested, use compact one.
672     const FamilyVariant requestVariant = paint.familyVariant == FamilyVariant::DEFAULT
673                                                  ? FamilyVariant::COMPACT
674                                                  : paint.familyVariant;
675     const LocaleList& requestedLocaleList = LocaleListCache::getById(localeId);
676 
677     bool familyFound = false;
678     filterFamilyByLocale(requestedLocaleList, [&](const FontFamily& family) {
679         const FamilyVariant familyVariant = family.variant() == FamilyVariant::DEFAULT
680                                                     ? FamilyVariant::COMPACT
681                                                     : family.variant();
682 
683         if (familyVariant != requestVariant) {
684             return true;  // continue other families
685         }
686 
687         MinikinExtent extent(0, 0);
688         FakedFont font = family.getClosestMatch(paint.fontStyle, paint.fontVariationSettings);
689         font.typeface()->GetFontExtent(&extent, paint, font.fakery);
690         result.extendBy(extent);
691 
692         familyFound = true;
693         return false;  // We found it, stop searching.
694     });
695 
696     // If nothing matches, try non-variant match cases since it is used for fallback.
697     filterFamilyByLocale(requestedLocaleList, [&](const FontFamily& family) {
698         // Use this family
699         MinikinExtent extent(0, 0);
700         FakedFont font = family.getClosestMatch(paint.fontStyle, paint.fontVariationSettings);
701         font.typeface()->GetFontExtent(&extent, paint, font.fakery);
702         result.extendBy(extent);
703 
704         familyFound = true;
705         return false;  // We found it. stop searching.
706     });
707 
708     // If nothing matches, use default font.
709     if (!familyFound) {
710         FakedFont font =
711                 getFamilyAt(0)->getClosestMatch(paint.fontStyle, paint.fontVariationSettings);
712         font.typeface()->GetFontExtent(&result, paint, font.fakery);
713     }
714 
715     mExtentCacheForLocale.put(key, result);
716     return result;
717 }
718 
itemize(U16StringPiece text,FontStyle,uint32_t localeListId,FamilyVariant familyVariant,uint32_t runMax) const719 std::vector<FontCollection::Run> FontCollection::itemize(U16StringPiece text, FontStyle,
720                                                          uint32_t localeListId,
721                                                          FamilyVariant familyVariant,
722                                                          uint32_t runMax) const {
723     const uint16_t* string = text.data();
724     const uint32_t string_size = text.size();
725 
726     FamilyMatchResult lastFamilyIndices = FamilyMatchResult();
727 
728     if (string_size == 0) {
729         return std::vector<Run>();
730     }
731 
732     const uint32_t kEndOfString = 0xFFFFFFFF;
733     std::vector<Run> result;
734     Run* run = nullptr;
735 
736     uint32_t nextCh = 0;
737     uint32_t prevCh = 0;
738     size_t nextUtf16Pos = 0;
739     size_t readLength = 0;
740     U16_NEXT(string, readLength, string_size, nextCh);
741     if (U_IS_SURROGATE(nextCh)) {
742         nextCh = REPLACEMENT_CHARACTER;
743     }
744 
745     do {
746         const uint32_t ch = nextCh;
747         const size_t utf16Pos = nextUtf16Pos;
748         nextUtf16Pos = readLength;
749         if (readLength < string_size) {
750             U16_NEXT(string, readLength, string_size, nextCh);
751             if (U_IS_SURROGATE(nextCh)) {
752                 nextCh = REPLACEMENT_CHARACTER;
753             }
754         } else {
755             nextCh = kEndOfString;
756         }
757 
758         bool shouldContinueRun = false;
759         if (doesNotNeedFontSupport(ch)) {
760             // Always continue if the character is a format character not needed to be in the font.
761             shouldContinueRun = true;
762         } else if (!lastFamilyIndices.empty() && (isStickyAllowlisted(ch) || isCombining(ch))) {
763             // Continue using existing font as long as it has coverage and is whitelisted.
764 
765             const std::shared_ptr<FontFamily>& lastFamily = getFamilyAt(lastFamilyIndices[0]);
766             if (lastFamily->isColorEmojiFamily()) {
767                 // If the last family is color emoji font, find the longest family.
768                 shouldContinueRun = false;
769                 for (uint8_t ix : lastFamilyIndices) {
770                     shouldContinueRun |= getFamilyAt(ix)->getCoverage().get(ch);
771                 }
772             } else {
773                 shouldContinueRun = lastFamily->getCoverage().get(ch);
774             }
775         }
776 
777         if (!shouldContinueRun) {
778             FamilyMatchResult familyIndices = getFamilyForChar(
779                     ch, isVariationSelector(nextCh) ? nextCh : 0, localeListId, familyVariant);
780             bool breakRun;
781             if (utf16Pos == 0 || lastFamilyIndices.empty()) {
782                 breakRun = true;
783             } else {
784                 const std::shared_ptr<FontFamily>& lastFamily = getFamilyAt(lastFamilyIndices[0]);
785                 if (lastFamily->isColorEmojiFamily()) {
786                     FamilyMatchResult intersection =
787                             FamilyMatchResult::intersect(familyIndices, lastFamilyIndices);
788                     if (intersection.empty()) {
789                         breakRun = true;  // None of last family can draw the given char.
790                     } else {
791                         breakRun = isEmojiBreak(prevCh, ch);
792                         if (!breakRun) {
793                             // To select sequence supported families, update family indices with the
794                             // intersection between the supported families between prev char and
795                             // current char.
796                             familyIndices = intersection;
797                             lastFamilyIndices = intersection;
798                             run->familyMatch = intersection;
799                         }
800                     }
801                 } else {
802                     breakRun = familyIndices[0] != lastFamilyIndices[0];
803                 }
804             }
805 
806             if (breakRun) {
807                 size_t start = utf16Pos;
808                 // Workaround for combining marks and emoji modifiers until we implement
809                 // per-cluster font selection: if a combining mark or an emoji modifier is found in
810                 // a different font that also supports the previous character, attach previous
811                 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is
812                 // handled properly by this since it's a combining mark too.
813                 if (utf16Pos != 0 &&
814                     (isCombining(ch) || (isEmojiModifier(ch) && isEmojiBase(prevCh)))) {
815                     for (uint8_t ix : familyIndices) {
816                         if (getFamilyAt(ix)->getCoverage().get(prevCh)) {
817                             const size_t prevChLength = U16_LENGTH(prevCh);
818                             if (run != nullptr) {
819                                 run->end -= prevChLength;
820                                 if (run->start == run->end) {
821                                     result.pop_back();
822                                 }
823                             }
824                             start -= prevChLength;
825                             break;
826                         }
827                     }
828                 }
829                 if (lastFamilyIndices.empty()) {
830                     // This is the first family ever assigned. We are either seeing the very first
831                     // character (which means start would already be zero), or we have only seen
832                     // characters that don't need any font support (which means we need to adjust
833                     // start to be 0 to include those characters).
834                     start = 0;
835                 }
836                 result.push_back({familyIndices, static_cast<int>(start), 0});
837                 run = &result.back();
838                 lastFamilyIndices = run->familyMatch;
839             }
840         }
841         prevCh = ch;
842         if (run != nullptr) {
843             run->end = nextUtf16Pos;  // exclusive
844         }
845 
846         // Stop searching the remaining characters if the result length gets runMax + 2.
847         // When result.size gets runMax + 2 here, the run between [0, runMax) was finalized.
848         // If the result.size() equals to runMax, the run may be still expanding.
849         // if the result.size() equals to runMax + 2, the last run may be removed and the last run
850         // may be exntended the previous run with above workaround.
851         if (result.size() >= 2 && runMax == result.size() - 2) {
852             break;
853         }
854     } while (nextCh != kEndOfString);
855 
856     if (lastFamilyIndices.empty()) {
857         // No character needed any font support, so it doesn't really matter which font they end up
858         // getting displayed in. We put the whole string in one run, using the first font.
859         result.push_back(
860                 {FamilyMatchResult::Builder().add(0).build(), 0, static_cast<int>(string_size)});
861     }
862 
863     if (result.size() > runMax) {
864         // The itemization has terminated since it reaches the runMax. Remove last unfinalized runs.
865         return std::vector<Run>(result.begin(), result.begin() + runMax);
866     }
867 
868     return result;
869 }
870 
getBestFont(U16StringPiece text,const Run & run,FontStyle style,const VariationSettings & variationSettings)871 FakedFont FontCollection::getBestFont(U16StringPiece text, const Run& run, FontStyle style,
872                                       const VariationSettings& variationSettings) {
873     uint8_t bestIndex = 0;
874     uint32_t bestScore = 0xFFFFFFFF;
875 
876     const std::shared_ptr<FontFamily>& family = getFamilyAt(run.familyMatch[0]);
877     if (family->isColorEmojiFamily() && run.familyMatch.size() > 1) {
878         for (size_t i = 0; i < run.familyMatch.size(); ++i) {
879             const std::shared_ptr<FontFamily>& family = getFamilyAt(run.familyMatch[i]);
880             const HbFontUniquePtr& font = family->getFont(0)->baseFont();
881             uint32_t score = getGlyphScore(text, run.start, run.end, font);
882 
883             if (score < bestScore) {
884                 bestIndex = run.familyMatch[i];
885                 bestScore = score;
886             }
887         }
888     } else {
889         bestIndex = run.familyMatch[0];
890     }
891     return getFamilyAt(bestIndex)->getClosestMatch(style, variationSettings);
892 }
893 
baseFontFaked(FontStyle style)894 FakedFont FontCollection::baseFontFaked(FontStyle style) {
895     return getFamilyAt(0)->getClosestMatch(style);
896 }
897 
createCollectionWithVariation(const VariationSettings & variations)898 std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation(
899         const VariationSettings& variations) {
900     if (variations.empty() || mSupportedAxesCount == 0) {
901         return nullptr;
902     }
903 
904     bool hasSupportedAxis = false;
905     for (const FontVariation& variation : variations) {
906         if (std::binary_search(mSupportedAxes.get(), mSupportedAxes.get() + mSupportedAxesCount,
907                                variation.axisTag)) {
908             hasSupportedAxis = true;
909             break;
910         }
911     }
912     if (!hasSupportedAxis) {
913         // None of variation axes are supported by this font collection.
914         return nullptr;
915     }
916 
917     std::vector<std::shared_ptr<FontFamily>> families;
918     for (size_t i = 0; i < getFamilyCount(); ++i) {
919         const std::shared_ptr<FontFamily>& family = getFamilyAt(i);
920         std::shared_ptr<FontFamily> newFamily = FontFamily::create(family, variations);
921         if (newFamily) {
922             families.push_back(newFamily);
923         } else {
924             families.push_back(family);
925         }
926     }
927 
928     return std::shared_ptr<FontCollection>(new FontCollection(families));
929 }
930 
createCollectionWithFamilies(std::vector<std::shared_ptr<FontFamily>> && families) const931 std::shared_ptr<FontCollection> FontCollection::createCollectionWithFamilies(
932         std::vector<std::shared_ptr<FontFamily>>&& families) const {
933     families.reserve(families.size() + getFamilyCount());
934     for (size_t i = 0; i < getFamilyCount(); i++) {
935         families.push_back(getFamilyAt(i));
936     }
937     return FontCollection::create(families);
938 }
939 
getId() const940 uint32_t FontCollection::getId() const {
941     return mId;
942 }
943 
944 }  // namespace minikin
945