xref: /aosp_15_r20/external/skia/modules/skunicode/src/SkUnicode_icu.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2 * Copyright 2020 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7 #include "modules/skunicode/include/SkUnicode_icu.h"
8 
9 #include "include/core/SkRefCnt.h"
10 #include "include/core/SkString.h"
11 #include "include/core/SkTypes.h"
12 #include "include/private/base/SkDebug.h"
13 #include "include/private/base/SkMutex.h"
14 #include "include/private/base/SkSpan_impl.h"
15 #include "include/private/base/SkTArray.h"
16 #include "include/private/base/SkTemplates.h"
17 #include "include/private/base/SkTo.h"
18 #include "modules/skunicode/include/SkUnicode.h"
19 #include "modules/skunicode/src/SkBidiFactory_icu_full.h"
20 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
21 #include "modules/skunicode/src/SkUnicode_icupriv.h"
22 #include "src/base/SkBitmaskEnum.h"
23 #include "src/base/SkUTF.h"
24 #include "src/core/SkChecksum.h"
25 #include "src/core/SkTHash.h"
26 
27 #include <unicode/ubrk.h>
28 #include <unicode/uchar.h>
29 #include <unicode/uloc.h>
30 #include <unicode/umachine.h>
31 #include <unicode/utext.h>
32 #include <unicode/utypes.h>
33 
34 #include <cstdint>
35 #include <cstring>
36 #include <functional>
37 #include <memory>
38 #include <string>
39 #include <utility>
40 #include <vector>
41 
42 #if defined(SK_USING_THIRD_PARTY_ICU) && defined(SK_BUILD_FOR_WIN)
43 #include "SkLoadICU.h"
44 #include "include/private/base/SkOnce.h"
45 #endif
46 
47 using namespace skia_private;
48 
SkGetICULib()49 const SkICULib* SkGetICULib() {
50     static const auto gICU = SkLoadICULib();
51     return gICU.get();
52 }
53 
54 // sk_* wrappers for ICU funcs
55 #define SKICU_FUNC(funcname)                                                                \
56     template <typename... Args>                                                             \
57     auto sk_##funcname(Args&&... args) -> decltype(funcname(std::forward<Args>(args)...)) { \
58         return SkGetICULib()->f_##funcname(std::forward<Args>(args)...);                    \
59     }                                                                                       \
60 
61 SKICU_EMIT_FUNCS
62 #undef SKICU_FUNC
63 
sk_ubrk_clone(const UBreakIterator * bi,UErrorCode * status)64 static inline UBreakIterator* sk_ubrk_clone(const UBreakIterator* bi, UErrorCode* status) {
65     const auto* icu = SkGetICULib();
66     SkASSERT(icu->f_ubrk_clone_ || icu->f_ubrk_safeClone_);
67     return icu->f_ubrk_clone_
68         ? icu->f_ubrk_clone_(bi, status)
69         : icu->f_ubrk_safeClone_(bi, nullptr, nullptr, status);
70 }
71 
utext_close_wrapper(UText * ut)72 static UText* utext_close_wrapper(UText* ut) {
73     return sk_utext_close(ut);
74 }
ubrk_close_wrapper(UBreakIterator * bi)75 static void ubrk_close_wrapper(UBreakIterator* bi) {
76     sk_ubrk_close(bi);
77 }
78 
79 using ICUUText = std::unique_ptr<UText, SkFunctionObject<utext_close_wrapper>>;
80 using ICUBreakIterator = std::unique_ptr<UBreakIterator, SkFunctionObject<ubrk_close_wrapper>>;
81 /** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
utf8_next(const char ** ptr,const char * end)82 static inline SkUnichar utf8_next(const char** ptr, const char* end) {
83     SkUnichar val = SkUTF::NextUTF8(ptr, end);
84     return val < 0 ? 0xFFFD : val;
85 }
86 
convertType(SkUnicode::BreakType type)87 static UBreakIteratorType convertType(SkUnicode::BreakType type) {
88     switch (type) {
89         case SkUnicode::BreakType::kLines: return UBRK_LINE;
90         case SkUnicode::BreakType::kGraphemes: return UBRK_CHARACTER;
91         case SkUnicode::BreakType::kWords: return UBRK_WORD;
92         case SkUnicode::BreakType::kSentences:
93             return UBRK_SENTENCE;
94         default:
95             return UBRK_CHARACTER;
96     }
97 }
98 
99 class SkBreakIterator_icu : public SkBreakIterator {
100     ICUBreakIterator fBreakIterator;
101     Position fLastResult;
102  public:
SkBreakIterator_icu(ICUBreakIterator iter)103     explicit SkBreakIterator_icu(ICUBreakIterator iter)
104             : fBreakIterator(std::move(iter))
105             , fLastResult(0) {}
first()106     Position first() override { return fLastResult = sk_ubrk_first(fBreakIterator.get()); }
current()107     Position current() override { return fLastResult = sk_ubrk_current(fBreakIterator.get()); }
next()108     Position next() override { return fLastResult = sk_ubrk_next(fBreakIterator.get()); }
status()109     Status status() override { return sk_ubrk_getRuleStatus(fBreakIterator.get()); }
isDone()110     bool isDone() override { return fLastResult == UBRK_DONE; }
111 
setText(const char utftext8[],int utf8Units)112     bool setText(const char utftext8[], int utf8Units) override {
113         UErrorCode status = U_ZERO_ERROR;
114         ICUUText text(sk_utext_openUTF8(nullptr, &utftext8[0], utf8Units, &status));
115 
116         if (U_FAILURE(status)) {
117             SkDEBUGF("Break error: %s", sk_u_errorName(status));
118             return false;
119         }
120         SkASSERT(text);
121         sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status);
122         if (U_FAILURE(status)) {
123             SkDEBUGF("Break error: %s", sk_u_errorName(status));
124             return false;
125         }
126         fLastResult = 0;
127         return true;
128     }
setText(const char16_t utftext16[],int utf16Units)129     bool setText(const char16_t utftext16[], int utf16Units) override {
130         UErrorCode status = U_ZERO_ERROR;
131         ICUUText text(sk_utext_openUChars(nullptr, reinterpret_cast<const UChar*>(&utftext16[0]),
132                                           utf16Units, &status));
133 
134         if (U_FAILURE(status)) {
135             SkDEBUGF("Break error: %s", sk_u_errorName(status));
136             return false;
137         }
138         SkASSERT(text);
139         sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status);
140         if (U_FAILURE(status)) {
141             SkDEBUGF("Break error: %s", sk_u_errorName(status));
142             return false;
143         }
144         fLastResult = 0;
145         return true;
146     }
147 };
148 
149 class SkIcuBreakIteratorCache final {
150     struct Request final {
RequestSkIcuBreakIteratorCache::Request151         Request(SkUnicode::BreakType type, const char* icuLocale)
152             : fType(type)
153             , fIcuLocale(icuLocale)
154             , hash(SkGoodHash()(type) ^ SkGoodHash()(fIcuLocale))
155         {}
156         const SkUnicode::BreakType fType;
157         const SkString fIcuLocale;
158         const uint32_t hash;
159         struct Hash {
operator ()SkIcuBreakIteratorCache::Request::Hash160             uint32_t operator()(const Request& key) const {
161                 return key.hash;
162             }
163         };
operator ==SkIcuBreakIteratorCache::Request164         bool operator==(const Request& that) const {
165             return this->fType == that.fType && this->fIcuLocale == that.fIcuLocale;
166         }
167     };
168     /* Every holder of this class is referencing the same (logical) break iterator.
169      * Due to caching, the actual break iterator may come and go.
170      */
171     class BreakIteratorRef final {
172     public:
BreakIteratorRef(ICUBreakIterator iter)173         BreakIteratorRef(ICUBreakIterator iter) : breakIterator(iter.release()), fRefCnt(1) {
174             ++Instances;
175         }
176         BreakIteratorRef(SkRefCntBase&&) = delete;
177         BreakIteratorRef(const SkRefCntBase&) = delete;
178         BreakIteratorRef& operator=(SkRefCntBase&&) = delete;
179         BreakIteratorRef& operator=(const SkRefCntBase&) = delete;
~BreakIteratorRef()180         ~BreakIteratorRef() {
181             if (breakIterator) {
182                 ubrk_close_wrapper(breakIterator);
183             }
184         }
185 
ref() const186         void ref() const {
187             SkASSERT(fRefCnt > 0);
188             ++fRefCnt;
189         }
unref() const190         void unref() const {
191             SkASSERT(fRefCnt > 0);
192             if (1 == fRefCnt--) {
193                 delete this;
194                 --Instances;
195             }
196         }
197 
198         UBreakIterator* breakIterator;
GetInstanceCount()199         static int32_t GetInstanceCount() { return Instances; }
200     private:
201         mutable int32_t fRefCnt;
202         static int32_t Instances;
203     };
204     THashMap<Request, sk_sp<BreakIteratorRef>, Request::Hash> fRequestCache;
205     SkMutex fCacheMutex;
206 
purgeIfNeeded()207     void purgeIfNeeded() {
208         // If there are too many requests remove some (oldest first?)
209         // This may free some break iterators
210         if (fRequestCache.count() > 100) {
211             // remove the oldest requests
212             fRequestCache.reset();
213         }
214         // If there are still too many break iterators remove some (oldest first?)
215         if (BreakIteratorRef::GetInstanceCount() > 4) {
216             // delete the oldest break iterators and set the references to nullptr
217             for (auto&& [key, value] : fRequestCache) {
218                 if (value->breakIterator) {
219                     sk_ubrk_close(value->breakIterator);
220                     value->breakIterator = nullptr;
221                 }
222             }
223         }
224     }
225 
226  public:
get()227     static SkIcuBreakIteratorCache& get() {
228         static SkIcuBreakIteratorCache instance;
229         return instance;
230     }
231 
makeBreakIterator(SkUnicode::BreakType type,const char * bcp47)232     ICUBreakIterator makeBreakIterator(SkUnicode::BreakType type, const char* bcp47) {
233         SkAutoMutexExclusive lock(fCacheMutex);
234         UErrorCode status = U_ZERO_ERROR;
235 
236         // Get ICU locale for BCP47 langtag
237         char localeIDStorage[ULOC_FULLNAME_CAPACITY];
238         const char* localeID = nullptr;
239         if (bcp47) {
240             sk_uloc_forLanguageTag(bcp47, localeIDStorage, ULOC_FULLNAME_CAPACITY, nullptr, &status);
241             if (U_FAILURE(status)) {
242                 SkDEBUGF("Break error could not get language tag: %s", sk_u_errorName(status));
243             } else if (localeIDStorage[0]) {
244                 localeID = localeIDStorage;
245             }
246         }
247         if (!localeID) {
248             localeID = sk_uloc_getDefault();
249         }
250 
251         auto make = [](const Request& request) -> UBreakIterator* {
252             UErrorCode status = U_ZERO_ERROR;
253             UBreakIterator* bi = sk_ubrk_open(convertType(request.fType),
254                                               request.fIcuLocale.c_str(),
255                                               nullptr, 0, &status);
256             if (U_FAILURE(status)) {
257                 SkDEBUGF("Break error: %s", sk_u_errorName(status));
258             }
259             return bi;
260         };
261 
262         auto clone = [](const UBreakIterator* existing) -> ICUBreakIterator {
263             if (!existing) {
264                 return nullptr;
265             }
266 
267             UErrorCode status = U_ZERO_ERROR;
268             ICUBreakIterator clone(sk_ubrk_clone(existing, &status));
269             if (U_FAILURE(status)) {
270                 SkDEBUGF("Break error: %s", sk_u_errorName(status));
271             }
272             return clone;
273         };
274 
275         Request request(type, localeID);
276 
277         // See if this request is already in the cache
278         const sk_sp<BreakIteratorRef>* ref = fRequestCache.find(request);
279         if (ref) {
280             // See if the breakIterator needs to be re-created
281             if (!(*ref)->breakIterator) {
282                 (*ref)->breakIterator = make(request);
283             }
284             return clone((*ref)->breakIterator);
285         }
286 
287         // This request was not in the cache, create an iterator.
288         ICUBreakIterator newIter(make(request));
289         if (!newIter) {
290             return nullptr;
291         }
292 
293         sk_sp<BreakIteratorRef> newRef;
294 
295         // Check if the new iterator is a duplicate
296         // Android doesn't expose ubrk_getLocaleByType so there is no means of de-duplicating.
297         // ubrk_getAvailable seems like it should work, but the implementation is just every locale.
298         if (SkGetICULib()->f_ubrk_getLocaleByType) {
299             const char* actualLocale = SkGetICULib()->f_ubrk_getLocaleByType(
300                                            newIter.get(), ULOC_ACTUAL_LOCALE, &status);
301             // Android doesn't expose ubrk_getLocaleByType so a wrapper may return an error.
302             if (!U_FAILURE(status)) {
303                 if (!actualLocale) {
304                     actualLocale = "";
305                 }
306                 // If the actual locale is the same as the requested locale we know there is no entry.
307                 if (strcmp(actualLocale, localeID) != 0) {
308                     Request actualRequest(type, actualLocale);
309                     const sk_sp<BreakIteratorRef>* actualRef = fRequestCache.find(actualRequest);
310                     if (actualRef) {
311                         if (!(*actualRef)->breakIterator) {
312                             (*actualRef)->breakIterator = newIter.release();
313                         }
314                         actualRef = fRequestCache.set(request, *actualRef);
315                         return clone((*actualRef)->breakIterator);
316                     } else {
317                         this->purgeIfNeeded();
318                         newRef = sk_make_sp<BreakIteratorRef>(std::move(newIter));
319                         fRequestCache.set(actualRequest, newRef);
320                     }
321                 }
322             }
323         }
324 
325         if (!newRef) {
326             this->purgeIfNeeded();
327             newRef = sk_make_sp<BreakIteratorRef>(std::move(newIter));
328         }
329         fRequestCache.set(request, newRef);
330 
331         return clone(newRef->breakIterator);
332     }
333 };
334 /*static*/ int32_t SkIcuBreakIteratorCache::BreakIteratorRef::Instances{0};
335 
336 class SkUnicode_icu : public SkUnicode {
337 
extractWords(uint16_t utf16[],int utf16Units,const char * locale,std::vector<Position> * words)338     static bool extractWords(uint16_t utf16[], int utf16Units, const char* locale,
339                              std::vector<Position>* words) {
340 
341         UErrorCode status = U_ZERO_ERROR;
342 
343         const BreakType type = BreakType::kWords;
344         ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(type, locale);
345         if (!iterator) {
346             SkDEBUGF("Break error: %s", sk_u_errorName(status));
347             return false;
348         }
349         SkASSERT(iterator);
350 
351         ICUUText utf16UText(sk_utext_openUChars(nullptr, (UChar*)utf16, utf16Units, &status));
352         if (U_FAILURE(status)) {
353             SkDEBUGF("Break error: %s", sk_u_errorName(status));
354             return false;
355         }
356 
357         sk_ubrk_setUText(iterator.get(), utf16UText.get(), &status);
358         if (U_FAILURE(status)) {
359             SkDEBUGF("Break error: %s", sk_u_errorName(status));
360             return false;
361         }
362 
363         // Get the words
364         int32_t pos = sk_ubrk_first(iterator.get());
365         while (pos != UBRK_DONE) {
366             words->emplace_back(pos);
367             pos = sk_ubrk_next(iterator.get());
368         }
369 
370         return true;
371     }
372 
extractPositions(const char utf8[],int utf8Units,BreakType type,const char * locale,const std::function<void (int,int)> & setBreak)373     static bool extractPositions(const char utf8[], int utf8Units,
374                                  BreakType type, const char* locale,
375                                  const std::function<void(int, int)>& setBreak) {
376 
377         UErrorCode status = U_ZERO_ERROR;
378         ICUUText text(sk_utext_openUTF8(nullptr, &utf8[0], utf8Units, &status));
379         if (U_FAILURE(status)) {
380             SkDEBUGF("Break error: %s", sk_u_errorName(status));
381             return false;
382         }
383         SkASSERT(text);
384 
385         ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(type, locale);
386         if (!iterator) {
387             return false;
388         }
389 
390         sk_ubrk_setUText(iterator.get(), text.get(), &status);
391         if (U_FAILURE(status)) {
392             SkDEBUGF("Break error: %s", sk_u_errorName(status));
393             return false;
394         }
395 
396         auto iter = iterator.get();
397         int32_t pos = sk_ubrk_first(iter);
398         while (pos != UBRK_DONE) {
399             int s = type == SkUnicode::BreakType::kLines
400                         ? UBRK_LINE_SOFT
401                         : sk_ubrk_getRuleStatus(iter);
402             setBreak(pos, s);
403             pos = sk_ubrk_next(iter);
404         }
405 
406         if (type == SkUnicode::BreakType::kLines) {
407             // This is a workaround for https://bugs.chromium.org/p/skia/issues/detail?id=10715
408             // (ICU line break iterator does not work correctly on Thai text with new lines)
409             // So, we only use the iterator to collect soft line breaks and
410             // scan the text for all hard line breaks ourselves
411             const char* end = utf8 + utf8Units;
412             const char* ch = utf8;
413             while (ch < end) {
414                 auto unichar = utf8_next(&ch, end);
415                 if (SkUnicode_icu::isHardLineBreak(unichar)) {
416                     setBreak(ch - utf8, UBRK_LINE_HARD);
417                 }
418             }
419         }
420         return true;
421     }
422 
isControl(SkUnichar utf8)423     bool isControl(SkUnichar utf8) override {
424         return sk_u_iscntrl(utf8);
425     }
426 
isWhitespace(SkUnichar utf8)427     bool isWhitespace(SkUnichar utf8) override {
428         return sk_u_isWhitespace(utf8);
429     }
430 
isSpace(SkUnichar utf8)431     bool isSpace(SkUnichar utf8) override {
432         return sk_u_isspace(utf8);
433     }
434 
isHardBreak(SkUnichar utf8)435     bool isHardBreak(SkUnichar utf8) override {
436         return SkUnicode_icu::isHardLineBreak(utf8);
437     }
438 
isEmoji(SkUnichar unichar)439     bool isEmoji(SkUnichar unichar) override {
440         return sk_u_hasBinaryProperty(unichar, UCHAR_EMOJI);
441     }
442 
isEmojiComponent(SkUnichar unichar)443     bool isEmojiComponent(SkUnichar unichar) override {
444         return sk_u_hasBinaryProperty(unichar, UCHAR_EMOJI_COMPONENT);
445     }
446 
isEmojiModifierBase(SkUnichar unichar)447     bool isEmojiModifierBase(SkUnichar unichar) override {
448         return sk_u_hasBinaryProperty(unichar, UCHAR_EMOJI_MODIFIER_BASE);
449     }
450 
isEmojiModifier(SkUnichar unichar)451     bool isEmojiModifier(SkUnichar unichar) override {
452         return sk_u_hasBinaryProperty(unichar, UCHAR_EMOJI_MODIFIER);
453     }
454 
isRegionalIndicator(SkUnichar unichar)455     bool isRegionalIndicator(SkUnichar unichar) override {
456         return sk_u_hasBinaryProperty(unichar, UCHAR_REGIONAL_INDICATOR);
457     }
458 
isIdeographic(SkUnichar unichar)459     bool isIdeographic(SkUnichar unichar) override {
460         return sk_u_hasBinaryProperty(unichar, UCHAR_IDEOGRAPHIC);
461     }
462 
isTabulation(SkUnichar utf8)463     bool isTabulation(SkUnichar utf8) override {
464         return utf8 == '\t';
465     }
466 
isHardLineBreak(SkUnichar utf8)467     static bool isHardLineBreak(SkUnichar utf8) {
468         auto property = sk_u_getIntPropertyValue(utf8, UCHAR_LINE_BREAK);
469         return property == U_LB_LINE_FEED || property == U_LB_MANDATORY_BREAK;
470     }
471 
472 public:
~SkUnicode_icu()473     ~SkUnicode_icu() override { }
makeBidiIterator(const uint16_t text[],int count,SkBidiIterator::Direction dir)474     std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
475                                                      SkBidiIterator::Direction dir) override {
476         return fBidiFact->MakeIterator(text, count, dir);
477     }
makeBidiIterator(const char text[],int count,SkBidiIterator::Direction dir)478     std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
479                                                      int count,
480                                                      SkBidiIterator::Direction dir) override {
481         return fBidiFact->MakeIterator(text, count, dir);
482     }
makeBreakIterator(const char locale[],BreakType type)483     std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
484                                                        BreakType type) override {
485         ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(type, locale);
486         if (!iterator) {
487             return nullptr;
488         }
489         return std::unique_ptr<SkBreakIterator>(new SkBreakIterator_icu(std::move(iterator)));
490     }
makeBreakIterator(BreakType type)491     std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType type) override {
492         return makeBreakIterator(sk_uloc_getDefault(), type);
493     }
494 
toUpper(const SkString & str)495     SkString toUpper(const SkString& str) override {
496         return this->toUpper(str, nullptr);
497     }
498 
toUpper(const SkString & str,const char * locale)499     SkString toUpper(const SkString& str, const char* locale) override {
500         // Convert to UTF16 since that's what ICU wants.
501         auto str16 = SkUnicode::convertUtf8ToUtf16(str.c_str(), str.size());
502 
503         UErrorCode icu_err = U_ZERO_ERROR;
504         const auto upper16len = sk_u_strToUpper(nullptr, 0, (UChar*)(str16.c_str()), str16.size(),
505                                                 locale, &icu_err);
506         if (icu_err != U_BUFFER_OVERFLOW_ERROR || upper16len <= 0) {
507             return SkString();
508         }
509 
510         AutoSTArray<128, uint16_t> upper16(upper16len);
511         icu_err = U_ZERO_ERROR;
512         sk_u_strToUpper((UChar*)(upper16.get()), SkToS32(upper16.size()),
513                         (UChar*)(str16.c_str()), str16.size(),
514                         locale, &icu_err);
515         SkASSERT(!U_FAILURE(icu_err));
516 
517         // ... and back to utf8 'cause that's what we want.
518         return convertUtf16ToUtf8((char16_t*)upper16.get(), upper16.size());
519     }
520 
getBidiRegions(const char utf8[],int utf8Units,TextDirection dir,std::vector<BidiRegion> * results)521     bool getBidiRegions(const char utf8[],
522                         int utf8Units,
523                         TextDirection dir,
524                         std::vector<BidiRegion>* results) override {
525         return fBidiFact->ExtractBidi(utf8, utf8Units, dir, results);
526     }
527 
getWords(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)528     bool getWords(const char utf8[], int utf8Units, const char* locale,
529                   std::vector<Position>* results) override {
530 
531         // Convert to UTF16 since we want the results in utf16
532         auto utf16 = convertUtf8ToUtf16(utf8, utf8Units);
533         return SkUnicode_icu::extractWords((uint16_t*)utf16.c_str(), utf16.size(), locale, results);
534     }
535 
getUtf8Words(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)536     bool getUtf8Words(const char utf8[],
537                       int utf8Units,
538                       const char* locale,
539                       std::vector<Position>* results) override {
540         // Convert to UTF16 since we want the results in utf16
541         auto utf16 = convertUtf8ToUtf16(utf8, utf8Units);
542         std::vector<Position> utf16Results;
543         if (!SkUnicode_icu::extractWords(
544                     (uint16_t*)utf16.c_str(), utf16.size(), locale, &utf16Results)) {
545             return false;
546         }
547 
548         std::vector<Position> mapping;
549         SkSpan<const char> text(utf8, utf8Units);
550         SkUnicode::extractUtfConversionMapping(
551                 text, [&](size_t index) { mapping.emplace_back(index); }, [&](size_t index) {});
552 
553         for (auto i16 : utf16Results) {
554             results->emplace_back(mapping[i16]);
555         }
556         return true;
557     }
558 
getSentences(const char utf8[],int utf8Units,const char * locale,std::vector<SkUnicode::Position> * results)559     bool getSentences(const char utf8[],
560                       int utf8Units,
561                       const char* locale,
562                       std::vector<SkUnicode::Position>* results) override {
563         SkUnicode_icu::extractPositions(
564                 utf8, utf8Units, BreakType::kSentences, nullptr,
565                 [&](int pos, int status) {
566                     results->emplace_back(pos);
567                 });
568         return true;
569     }
570 
computeCodeUnitFlags(char utf8[],int utf8Units,bool replaceTabs,TArray<SkUnicode::CodeUnitFlags,true> * results)571     bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs,
572                               TArray<SkUnicode::CodeUnitFlags, true>* results) override {
573         results->clear();
574         results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
575 
576         SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kLines, nullptr, // TODO: locale
577                                         [&](int pos, int status) {
578             (*results)[pos] |= status == UBRK_LINE_HARD
579                                        ? CodeUnitFlags::kHardLineBreakBefore
580                                        : CodeUnitFlags::kSoftLineBreakBefore;
581         });
582 
583         SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kGraphemes, nullptr, //TODO
584                                         [&](int pos, int status) {
585             (*results)[pos] |= CodeUnitFlags::kGraphemeStart;
586         });
587 
588         const char* current = utf8;
589         const char* end = utf8 + utf8Units;
590         while (current < end) {
591             auto before = current - utf8;
592             SkUnichar unichar = SkUTF::NextUTF8(&current, end);
593             if (unichar < 0) unichar = 0xFFFD;
594             auto after = current - utf8;
595             if (replaceTabs && this->isTabulation(unichar)) {
596                 results->at(before) |= SkUnicode::kTabulation;
597                 if (replaceTabs) {
598                     unichar = ' ';
599                     utf8[before] = ' ';
600                 }
601             }
602             for (auto i = before; i < after; ++i) {
603                 if (this->isSpace(unichar)) {
604                     results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
605                 }
606                 if (this->isWhitespace(unichar)) {
607                     results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
608                 }
609                 if (this->isControl(unichar)) {
610                     results->at(i) |= SkUnicode::kControl;
611                 }
612                 if (this->isIdeographic(unichar)) {
613                     results->at(i) |= SkUnicode::kIdeographic;
614                 }
615             }
616         }
617 
618         return true;
619     }
620 
computeCodeUnitFlags(char16_t utf16[],int utf16Units,bool replaceTabs,TArray<SkUnicode::CodeUnitFlags,true> * results)621     bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
622                           TArray<SkUnicode::CodeUnitFlags, true>* results) override {
623         results->clear();
624         results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
625 
626         // Get white spaces
627         this->forEachCodepoint((char16_t*)&utf16[0], utf16Units,
628            [this, results, replaceTabs, &utf16](SkUnichar unichar, int32_t start, int32_t end) {
629                 for (auto i = start; i < end; ++i) {
630                     if (replaceTabs && this->isTabulation(unichar)) {
631                         results->at(i) |= SkUnicode::kTabulation;
632                     if (replaceTabs) {
633                             unichar = ' ';
634                             utf16[start] = ' ';
635                         }
636                     }
637                     if (this->isSpace(unichar)) {
638                         results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
639                     }
640                     if (this->isWhitespace(unichar)) {
641                         results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
642                     }
643                     if (this->isControl(unichar)) {
644                         results->at(i) |= SkUnicode::kControl;
645                     }
646                 }
647            });
648         // Get graphemes
649         this->forEachBreak((char16_t*)&utf16[0],
650                            utf16Units,
651                            SkUnicode::BreakType::kGraphemes,
652                            [results](SkBreakIterator::Position pos, SkBreakIterator::Status) {
653                                (*results)[pos] |= CodeUnitFlags::kGraphemeStart;
654                            });
655         // Get line breaks
656         this->forEachBreak(
657                 (char16_t*)&utf16[0],
658                 utf16Units,
659                 SkUnicode::BreakType::kLines,
660                 [results](SkBreakIterator::Position pos, SkBreakIterator::Status status) {
661                     if (status ==
662                         (SkBreakIterator::Status)SkUnicode::LineBreakType::kHardLineBreak) {
663                         // Hard line breaks clears off all the other flags
664                         // TODO: Treat \n as a formatting mark and do not pass it to SkShaper
665                         (*results)[pos-1] = CodeUnitFlags::kHardLineBreakBefore;
666                     } else {
667                         (*results)[pos] |= CodeUnitFlags::kSoftLineBreakBefore;
668                     }
669                 });
670 
671         return true;
672     }
673 
reorderVisual(const BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])674     void reorderVisual(const BidiLevel runLevels[],
675                        int levelsCount,
676                        int32_t logicalFromVisual[]) override {
677         fBidiFact->bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
678     }
679 
680 private:
681     sk_sp<SkBidiFactory> fBidiFact = sk_make_sp<SkBidiICUFactory>();
682 };
683 
684 namespace SkUnicodes::ICU {
Make()685 sk_sp<SkUnicode> Make() {
686     // We haven't yet created a way to encode the ICU data for assembly on Windows,
687     // so we use a helper library to load icudtl.dat from the harddrive.
688 #if defined(SK_USING_THIRD_PARTY_ICU) && defined(SK_BUILD_FOR_WIN)
689     if (!SkLoadICU()) {
690         static SkOnce once;
691         once([] { SkDEBUGF("SkLoadICU() failed!\n"); });
692         return nullptr;
693     }
694 #endif
695     if (SkGetICULib()) {
696         return sk_make_sp<SkUnicode_icu>();
697     }
698     return nullptr;
699 }
700 }  // namespace SkUnicodes::ICU
701