1 /*
2 * Copyright 2020 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7 #include "modules/skunicode/include/SkUnicode_icu.h"
8
9 #include "include/core/SkRefCnt.h"
10 #include "include/core/SkString.h"
11 #include "include/core/SkTypes.h"
12 #include "include/private/base/SkDebug.h"
13 #include "include/private/base/SkMutex.h"
14 #include "include/private/base/SkSpan_impl.h"
15 #include "include/private/base/SkTArray.h"
16 #include "include/private/base/SkTemplates.h"
17 #include "include/private/base/SkTo.h"
18 #include "modules/skunicode/include/SkUnicode.h"
19 #include "modules/skunicode/src/SkBidiFactory_icu_full.h"
20 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
21 #include "modules/skunicode/src/SkUnicode_icupriv.h"
22 #include "src/base/SkBitmaskEnum.h"
23 #include "src/base/SkUTF.h"
24 #include "src/core/SkChecksum.h"
25 #include "src/core/SkTHash.h"
26
27 #include <unicode/ubrk.h>
28 #include <unicode/uchar.h>
29 #include <unicode/uloc.h>
30 #include <unicode/umachine.h>
31 #include <unicode/utext.h>
32 #include <unicode/utypes.h>
33
34 #include <cstdint>
35 #include <cstring>
36 #include <functional>
37 #include <memory>
38 #include <string>
39 #include <utility>
40 #include <vector>
41
42 #if defined(SK_USING_THIRD_PARTY_ICU) && defined(SK_BUILD_FOR_WIN)
43 #include "SkLoadICU.h"
44 #include "include/private/base/SkOnce.h"
45 #endif
46
47 using namespace skia_private;
48
SkGetICULib()49 const SkICULib* SkGetICULib() {
50 static const auto gICU = SkLoadICULib();
51 return gICU.get();
52 }
53
54 // sk_* wrappers for ICU funcs
55 #define SKICU_FUNC(funcname) \
56 template <typename... Args> \
57 auto sk_##funcname(Args&&... args) -> decltype(funcname(std::forward<Args>(args)...)) { \
58 return SkGetICULib()->f_##funcname(std::forward<Args>(args)...); \
59 } \
60
61 SKICU_EMIT_FUNCS
62 #undef SKICU_FUNC
63
sk_ubrk_clone(const UBreakIterator * bi,UErrorCode * status)64 static inline UBreakIterator* sk_ubrk_clone(const UBreakIterator* bi, UErrorCode* status) {
65 const auto* icu = SkGetICULib();
66 SkASSERT(icu->f_ubrk_clone_ || icu->f_ubrk_safeClone_);
67 return icu->f_ubrk_clone_
68 ? icu->f_ubrk_clone_(bi, status)
69 : icu->f_ubrk_safeClone_(bi, nullptr, nullptr, status);
70 }
71
utext_close_wrapper(UText * ut)72 static UText* utext_close_wrapper(UText* ut) {
73 return sk_utext_close(ut);
74 }
ubrk_close_wrapper(UBreakIterator * bi)75 static void ubrk_close_wrapper(UBreakIterator* bi) {
76 sk_ubrk_close(bi);
77 }
78
79 using ICUUText = std::unique_ptr<UText, SkFunctionObject<utext_close_wrapper>>;
80 using ICUBreakIterator = std::unique_ptr<UBreakIterator, SkFunctionObject<ubrk_close_wrapper>>;
81 /** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
utf8_next(const char ** ptr,const char * end)82 static inline SkUnichar utf8_next(const char** ptr, const char* end) {
83 SkUnichar val = SkUTF::NextUTF8(ptr, end);
84 return val < 0 ? 0xFFFD : val;
85 }
86
convertType(SkUnicode::BreakType type)87 static UBreakIteratorType convertType(SkUnicode::BreakType type) {
88 switch (type) {
89 case SkUnicode::BreakType::kLines: return UBRK_LINE;
90 case SkUnicode::BreakType::kGraphemes: return UBRK_CHARACTER;
91 case SkUnicode::BreakType::kWords: return UBRK_WORD;
92 case SkUnicode::BreakType::kSentences:
93 return UBRK_SENTENCE;
94 default:
95 return UBRK_CHARACTER;
96 }
97 }
98
99 class SkBreakIterator_icu : public SkBreakIterator {
100 ICUBreakIterator fBreakIterator;
101 Position fLastResult;
102 public:
SkBreakIterator_icu(ICUBreakIterator iter)103 explicit SkBreakIterator_icu(ICUBreakIterator iter)
104 : fBreakIterator(std::move(iter))
105 , fLastResult(0) {}
first()106 Position first() override { return fLastResult = sk_ubrk_first(fBreakIterator.get()); }
current()107 Position current() override { return fLastResult = sk_ubrk_current(fBreakIterator.get()); }
next()108 Position next() override { return fLastResult = sk_ubrk_next(fBreakIterator.get()); }
status()109 Status status() override { return sk_ubrk_getRuleStatus(fBreakIterator.get()); }
isDone()110 bool isDone() override { return fLastResult == UBRK_DONE; }
111
setText(const char utftext8[],int utf8Units)112 bool setText(const char utftext8[], int utf8Units) override {
113 UErrorCode status = U_ZERO_ERROR;
114 ICUUText text(sk_utext_openUTF8(nullptr, &utftext8[0], utf8Units, &status));
115
116 if (U_FAILURE(status)) {
117 SkDEBUGF("Break error: %s", sk_u_errorName(status));
118 return false;
119 }
120 SkASSERT(text);
121 sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status);
122 if (U_FAILURE(status)) {
123 SkDEBUGF("Break error: %s", sk_u_errorName(status));
124 return false;
125 }
126 fLastResult = 0;
127 return true;
128 }
setText(const char16_t utftext16[],int utf16Units)129 bool setText(const char16_t utftext16[], int utf16Units) override {
130 UErrorCode status = U_ZERO_ERROR;
131 ICUUText text(sk_utext_openUChars(nullptr, reinterpret_cast<const UChar*>(&utftext16[0]),
132 utf16Units, &status));
133
134 if (U_FAILURE(status)) {
135 SkDEBUGF("Break error: %s", sk_u_errorName(status));
136 return false;
137 }
138 SkASSERT(text);
139 sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status);
140 if (U_FAILURE(status)) {
141 SkDEBUGF("Break error: %s", sk_u_errorName(status));
142 return false;
143 }
144 fLastResult = 0;
145 return true;
146 }
147 };
148
149 class SkIcuBreakIteratorCache final {
150 struct Request final {
RequestSkIcuBreakIteratorCache::Request151 Request(SkUnicode::BreakType type, const char* icuLocale)
152 : fType(type)
153 , fIcuLocale(icuLocale)
154 , hash(SkGoodHash()(type) ^ SkGoodHash()(fIcuLocale))
155 {}
156 const SkUnicode::BreakType fType;
157 const SkString fIcuLocale;
158 const uint32_t hash;
159 struct Hash {
operator ()SkIcuBreakIteratorCache::Request::Hash160 uint32_t operator()(const Request& key) const {
161 return key.hash;
162 }
163 };
operator ==SkIcuBreakIteratorCache::Request164 bool operator==(const Request& that) const {
165 return this->fType == that.fType && this->fIcuLocale == that.fIcuLocale;
166 }
167 };
168 /* Every holder of this class is referencing the same (logical) break iterator.
169 * Due to caching, the actual break iterator may come and go.
170 */
171 class BreakIteratorRef final {
172 public:
BreakIteratorRef(ICUBreakIterator iter)173 BreakIteratorRef(ICUBreakIterator iter) : breakIterator(iter.release()), fRefCnt(1) {
174 ++Instances;
175 }
176 BreakIteratorRef(SkRefCntBase&&) = delete;
177 BreakIteratorRef(const SkRefCntBase&) = delete;
178 BreakIteratorRef& operator=(SkRefCntBase&&) = delete;
179 BreakIteratorRef& operator=(const SkRefCntBase&) = delete;
~BreakIteratorRef()180 ~BreakIteratorRef() {
181 if (breakIterator) {
182 ubrk_close_wrapper(breakIterator);
183 }
184 }
185
ref() const186 void ref() const {
187 SkASSERT(fRefCnt > 0);
188 ++fRefCnt;
189 }
unref() const190 void unref() const {
191 SkASSERT(fRefCnt > 0);
192 if (1 == fRefCnt--) {
193 delete this;
194 --Instances;
195 }
196 }
197
198 UBreakIterator* breakIterator;
GetInstanceCount()199 static int32_t GetInstanceCount() { return Instances; }
200 private:
201 mutable int32_t fRefCnt;
202 static int32_t Instances;
203 };
204 THashMap<Request, sk_sp<BreakIteratorRef>, Request::Hash> fRequestCache;
205 SkMutex fCacheMutex;
206
purgeIfNeeded()207 void purgeIfNeeded() {
208 // If there are too many requests remove some (oldest first?)
209 // This may free some break iterators
210 if (fRequestCache.count() > 100) {
211 // remove the oldest requests
212 fRequestCache.reset();
213 }
214 // If there are still too many break iterators remove some (oldest first?)
215 if (BreakIteratorRef::GetInstanceCount() > 4) {
216 // delete the oldest break iterators and set the references to nullptr
217 for (auto&& [key, value] : fRequestCache) {
218 if (value->breakIterator) {
219 sk_ubrk_close(value->breakIterator);
220 value->breakIterator = nullptr;
221 }
222 }
223 }
224 }
225
226 public:
get()227 static SkIcuBreakIteratorCache& get() {
228 static SkIcuBreakIteratorCache instance;
229 return instance;
230 }
231
makeBreakIterator(SkUnicode::BreakType type,const char * bcp47)232 ICUBreakIterator makeBreakIterator(SkUnicode::BreakType type, const char* bcp47) {
233 SkAutoMutexExclusive lock(fCacheMutex);
234 UErrorCode status = U_ZERO_ERROR;
235
236 // Get ICU locale for BCP47 langtag
237 char localeIDStorage[ULOC_FULLNAME_CAPACITY];
238 const char* localeID = nullptr;
239 if (bcp47) {
240 sk_uloc_forLanguageTag(bcp47, localeIDStorage, ULOC_FULLNAME_CAPACITY, nullptr, &status);
241 if (U_FAILURE(status)) {
242 SkDEBUGF("Break error could not get language tag: %s", sk_u_errorName(status));
243 } else if (localeIDStorage[0]) {
244 localeID = localeIDStorage;
245 }
246 }
247 if (!localeID) {
248 localeID = sk_uloc_getDefault();
249 }
250
251 auto make = [](const Request& request) -> UBreakIterator* {
252 UErrorCode status = U_ZERO_ERROR;
253 UBreakIterator* bi = sk_ubrk_open(convertType(request.fType),
254 request.fIcuLocale.c_str(),
255 nullptr, 0, &status);
256 if (U_FAILURE(status)) {
257 SkDEBUGF("Break error: %s", sk_u_errorName(status));
258 }
259 return bi;
260 };
261
262 auto clone = [](const UBreakIterator* existing) -> ICUBreakIterator {
263 if (!existing) {
264 return nullptr;
265 }
266
267 UErrorCode status = U_ZERO_ERROR;
268 ICUBreakIterator clone(sk_ubrk_clone(existing, &status));
269 if (U_FAILURE(status)) {
270 SkDEBUGF("Break error: %s", sk_u_errorName(status));
271 }
272 return clone;
273 };
274
275 Request request(type, localeID);
276
277 // See if this request is already in the cache
278 const sk_sp<BreakIteratorRef>* ref = fRequestCache.find(request);
279 if (ref) {
280 // See if the breakIterator needs to be re-created
281 if (!(*ref)->breakIterator) {
282 (*ref)->breakIterator = make(request);
283 }
284 return clone((*ref)->breakIterator);
285 }
286
287 // This request was not in the cache, create an iterator.
288 ICUBreakIterator newIter(make(request));
289 if (!newIter) {
290 return nullptr;
291 }
292
293 sk_sp<BreakIteratorRef> newRef;
294
295 // Check if the new iterator is a duplicate
296 // Android doesn't expose ubrk_getLocaleByType so there is no means of de-duplicating.
297 // ubrk_getAvailable seems like it should work, but the implementation is just every locale.
298 if (SkGetICULib()->f_ubrk_getLocaleByType) {
299 const char* actualLocale = SkGetICULib()->f_ubrk_getLocaleByType(
300 newIter.get(), ULOC_ACTUAL_LOCALE, &status);
301 // Android doesn't expose ubrk_getLocaleByType so a wrapper may return an error.
302 if (!U_FAILURE(status)) {
303 if (!actualLocale) {
304 actualLocale = "";
305 }
306 // If the actual locale is the same as the requested locale we know there is no entry.
307 if (strcmp(actualLocale, localeID) != 0) {
308 Request actualRequest(type, actualLocale);
309 const sk_sp<BreakIteratorRef>* actualRef = fRequestCache.find(actualRequest);
310 if (actualRef) {
311 if (!(*actualRef)->breakIterator) {
312 (*actualRef)->breakIterator = newIter.release();
313 }
314 actualRef = fRequestCache.set(request, *actualRef);
315 return clone((*actualRef)->breakIterator);
316 } else {
317 this->purgeIfNeeded();
318 newRef = sk_make_sp<BreakIteratorRef>(std::move(newIter));
319 fRequestCache.set(actualRequest, newRef);
320 }
321 }
322 }
323 }
324
325 if (!newRef) {
326 this->purgeIfNeeded();
327 newRef = sk_make_sp<BreakIteratorRef>(std::move(newIter));
328 }
329 fRequestCache.set(request, newRef);
330
331 return clone(newRef->breakIterator);
332 }
333 };
334 /*static*/ int32_t SkIcuBreakIteratorCache::BreakIteratorRef::Instances{0};
335
336 class SkUnicode_icu : public SkUnicode {
337
extractWords(uint16_t utf16[],int utf16Units,const char * locale,std::vector<Position> * words)338 static bool extractWords(uint16_t utf16[], int utf16Units, const char* locale,
339 std::vector<Position>* words) {
340
341 UErrorCode status = U_ZERO_ERROR;
342
343 const BreakType type = BreakType::kWords;
344 ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(type, locale);
345 if (!iterator) {
346 SkDEBUGF("Break error: %s", sk_u_errorName(status));
347 return false;
348 }
349 SkASSERT(iterator);
350
351 ICUUText utf16UText(sk_utext_openUChars(nullptr, (UChar*)utf16, utf16Units, &status));
352 if (U_FAILURE(status)) {
353 SkDEBUGF("Break error: %s", sk_u_errorName(status));
354 return false;
355 }
356
357 sk_ubrk_setUText(iterator.get(), utf16UText.get(), &status);
358 if (U_FAILURE(status)) {
359 SkDEBUGF("Break error: %s", sk_u_errorName(status));
360 return false;
361 }
362
363 // Get the words
364 int32_t pos = sk_ubrk_first(iterator.get());
365 while (pos != UBRK_DONE) {
366 words->emplace_back(pos);
367 pos = sk_ubrk_next(iterator.get());
368 }
369
370 return true;
371 }
372
extractPositions(const char utf8[],int utf8Units,BreakType type,const char * locale,const std::function<void (int,int)> & setBreak)373 static bool extractPositions(const char utf8[], int utf8Units,
374 BreakType type, const char* locale,
375 const std::function<void(int, int)>& setBreak) {
376
377 UErrorCode status = U_ZERO_ERROR;
378 ICUUText text(sk_utext_openUTF8(nullptr, &utf8[0], utf8Units, &status));
379 if (U_FAILURE(status)) {
380 SkDEBUGF("Break error: %s", sk_u_errorName(status));
381 return false;
382 }
383 SkASSERT(text);
384
385 ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(type, locale);
386 if (!iterator) {
387 return false;
388 }
389
390 sk_ubrk_setUText(iterator.get(), text.get(), &status);
391 if (U_FAILURE(status)) {
392 SkDEBUGF("Break error: %s", sk_u_errorName(status));
393 return false;
394 }
395
396 auto iter = iterator.get();
397 int32_t pos = sk_ubrk_first(iter);
398 while (pos != UBRK_DONE) {
399 int s = type == SkUnicode::BreakType::kLines
400 ? UBRK_LINE_SOFT
401 : sk_ubrk_getRuleStatus(iter);
402 setBreak(pos, s);
403 pos = sk_ubrk_next(iter);
404 }
405
406 if (type == SkUnicode::BreakType::kLines) {
407 // This is a workaround for https://bugs.chromium.org/p/skia/issues/detail?id=10715
408 // (ICU line break iterator does not work correctly on Thai text with new lines)
409 // So, we only use the iterator to collect soft line breaks and
410 // scan the text for all hard line breaks ourselves
411 const char* end = utf8 + utf8Units;
412 const char* ch = utf8;
413 while (ch < end) {
414 auto unichar = utf8_next(&ch, end);
415 if (SkUnicode_icu::isHardLineBreak(unichar)) {
416 setBreak(ch - utf8, UBRK_LINE_HARD);
417 }
418 }
419 }
420 return true;
421 }
422
isControl(SkUnichar utf8)423 bool isControl(SkUnichar utf8) override {
424 return sk_u_iscntrl(utf8);
425 }
426
isWhitespace(SkUnichar utf8)427 bool isWhitespace(SkUnichar utf8) override {
428 return sk_u_isWhitespace(utf8);
429 }
430
isSpace(SkUnichar utf8)431 bool isSpace(SkUnichar utf8) override {
432 return sk_u_isspace(utf8);
433 }
434
isHardBreak(SkUnichar utf8)435 bool isHardBreak(SkUnichar utf8) override {
436 return SkUnicode_icu::isHardLineBreak(utf8);
437 }
438
isEmoji(SkUnichar unichar)439 bool isEmoji(SkUnichar unichar) override {
440 return sk_u_hasBinaryProperty(unichar, UCHAR_EMOJI);
441 }
442
isEmojiComponent(SkUnichar unichar)443 bool isEmojiComponent(SkUnichar unichar) override {
444 return sk_u_hasBinaryProperty(unichar, UCHAR_EMOJI_COMPONENT);
445 }
446
isEmojiModifierBase(SkUnichar unichar)447 bool isEmojiModifierBase(SkUnichar unichar) override {
448 return sk_u_hasBinaryProperty(unichar, UCHAR_EMOJI_MODIFIER_BASE);
449 }
450
isEmojiModifier(SkUnichar unichar)451 bool isEmojiModifier(SkUnichar unichar) override {
452 return sk_u_hasBinaryProperty(unichar, UCHAR_EMOJI_MODIFIER);
453 }
454
isRegionalIndicator(SkUnichar unichar)455 bool isRegionalIndicator(SkUnichar unichar) override {
456 return sk_u_hasBinaryProperty(unichar, UCHAR_REGIONAL_INDICATOR);
457 }
458
isIdeographic(SkUnichar unichar)459 bool isIdeographic(SkUnichar unichar) override {
460 return sk_u_hasBinaryProperty(unichar, UCHAR_IDEOGRAPHIC);
461 }
462
isTabulation(SkUnichar utf8)463 bool isTabulation(SkUnichar utf8) override {
464 return utf8 == '\t';
465 }
466
isHardLineBreak(SkUnichar utf8)467 static bool isHardLineBreak(SkUnichar utf8) {
468 auto property = sk_u_getIntPropertyValue(utf8, UCHAR_LINE_BREAK);
469 return property == U_LB_LINE_FEED || property == U_LB_MANDATORY_BREAK;
470 }
471
472 public:
~SkUnicode_icu()473 ~SkUnicode_icu() override { }
makeBidiIterator(const uint16_t text[],int count,SkBidiIterator::Direction dir)474 std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
475 SkBidiIterator::Direction dir) override {
476 return fBidiFact->MakeIterator(text, count, dir);
477 }
makeBidiIterator(const char text[],int count,SkBidiIterator::Direction dir)478 std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
479 int count,
480 SkBidiIterator::Direction dir) override {
481 return fBidiFact->MakeIterator(text, count, dir);
482 }
makeBreakIterator(const char locale[],BreakType type)483 std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
484 BreakType type) override {
485 ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(type, locale);
486 if (!iterator) {
487 return nullptr;
488 }
489 return std::unique_ptr<SkBreakIterator>(new SkBreakIterator_icu(std::move(iterator)));
490 }
makeBreakIterator(BreakType type)491 std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType type) override {
492 return makeBreakIterator(sk_uloc_getDefault(), type);
493 }
494
toUpper(const SkString & str)495 SkString toUpper(const SkString& str) override {
496 return this->toUpper(str, nullptr);
497 }
498
toUpper(const SkString & str,const char * locale)499 SkString toUpper(const SkString& str, const char* locale) override {
500 // Convert to UTF16 since that's what ICU wants.
501 auto str16 = SkUnicode::convertUtf8ToUtf16(str.c_str(), str.size());
502
503 UErrorCode icu_err = U_ZERO_ERROR;
504 const auto upper16len = sk_u_strToUpper(nullptr, 0, (UChar*)(str16.c_str()), str16.size(),
505 locale, &icu_err);
506 if (icu_err != U_BUFFER_OVERFLOW_ERROR || upper16len <= 0) {
507 return SkString();
508 }
509
510 AutoSTArray<128, uint16_t> upper16(upper16len);
511 icu_err = U_ZERO_ERROR;
512 sk_u_strToUpper((UChar*)(upper16.get()), SkToS32(upper16.size()),
513 (UChar*)(str16.c_str()), str16.size(),
514 locale, &icu_err);
515 SkASSERT(!U_FAILURE(icu_err));
516
517 // ... and back to utf8 'cause that's what we want.
518 return convertUtf16ToUtf8((char16_t*)upper16.get(), upper16.size());
519 }
520
getBidiRegions(const char utf8[],int utf8Units,TextDirection dir,std::vector<BidiRegion> * results)521 bool getBidiRegions(const char utf8[],
522 int utf8Units,
523 TextDirection dir,
524 std::vector<BidiRegion>* results) override {
525 return fBidiFact->ExtractBidi(utf8, utf8Units, dir, results);
526 }
527
getWords(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)528 bool getWords(const char utf8[], int utf8Units, const char* locale,
529 std::vector<Position>* results) override {
530
531 // Convert to UTF16 since we want the results in utf16
532 auto utf16 = convertUtf8ToUtf16(utf8, utf8Units);
533 return SkUnicode_icu::extractWords((uint16_t*)utf16.c_str(), utf16.size(), locale, results);
534 }
535
getUtf8Words(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)536 bool getUtf8Words(const char utf8[],
537 int utf8Units,
538 const char* locale,
539 std::vector<Position>* results) override {
540 // Convert to UTF16 since we want the results in utf16
541 auto utf16 = convertUtf8ToUtf16(utf8, utf8Units);
542 std::vector<Position> utf16Results;
543 if (!SkUnicode_icu::extractWords(
544 (uint16_t*)utf16.c_str(), utf16.size(), locale, &utf16Results)) {
545 return false;
546 }
547
548 std::vector<Position> mapping;
549 SkSpan<const char> text(utf8, utf8Units);
550 SkUnicode::extractUtfConversionMapping(
551 text, [&](size_t index) { mapping.emplace_back(index); }, [&](size_t index) {});
552
553 for (auto i16 : utf16Results) {
554 results->emplace_back(mapping[i16]);
555 }
556 return true;
557 }
558
getSentences(const char utf8[],int utf8Units,const char * locale,std::vector<SkUnicode::Position> * results)559 bool getSentences(const char utf8[],
560 int utf8Units,
561 const char* locale,
562 std::vector<SkUnicode::Position>* results) override {
563 SkUnicode_icu::extractPositions(
564 utf8, utf8Units, BreakType::kSentences, nullptr,
565 [&](int pos, int status) {
566 results->emplace_back(pos);
567 });
568 return true;
569 }
570
computeCodeUnitFlags(char utf8[],int utf8Units,bool replaceTabs,TArray<SkUnicode::CodeUnitFlags,true> * results)571 bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs,
572 TArray<SkUnicode::CodeUnitFlags, true>* results) override {
573 results->clear();
574 results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
575
576 SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kLines, nullptr, // TODO: locale
577 [&](int pos, int status) {
578 (*results)[pos] |= status == UBRK_LINE_HARD
579 ? CodeUnitFlags::kHardLineBreakBefore
580 : CodeUnitFlags::kSoftLineBreakBefore;
581 });
582
583 SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kGraphemes, nullptr, //TODO
584 [&](int pos, int status) {
585 (*results)[pos] |= CodeUnitFlags::kGraphemeStart;
586 });
587
588 const char* current = utf8;
589 const char* end = utf8 + utf8Units;
590 while (current < end) {
591 auto before = current - utf8;
592 SkUnichar unichar = SkUTF::NextUTF8(¤t, end);
593 if (unichar < 0) unichar = 0xFFFD;
594 auto after = current - utf8;
595 if (replaceTabs && this->isTabulation(unichar)) {
596 results->at(before) |= SkUnicode::kTabulation;
597 if (replaceTabs) {
598 unichar = ' ';
599 utf8[before] = ' ';
600 }
601 }
602 for (auto i = before; i < after; ++i) {
603 if (this->isSpace(unichar)) {
604 results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
605 }
606 if (this->isWhitespace(unichar)) {
607 results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
608 }
609 if (this->isControl(unichar)) {
610 results->at(i) |= SkUnicode::kControl;
611 }
612 if (this->isIdeographic(unichar)) {
613 results->at(i) |= SkUnicode::kIdeographic;
614 }
615 }
616 }
617
618 return true;
619 }
620
computeCodeUnitFlags(char16_t utf16[],int utf16Units,bool replaceTabs,TArray<SkUnicode::CodeUnitFlags,true> * results)621 bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
622 TArray<SkUnicode::CodeUnitFlags, true>* results) override {
623 results->clear();
624 results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
625
626 // Get white spaces
627 this->forEachCodepoint((char16_t*)&utf16[0], utf16Units,
628 [this, results, replaceTabs, &utf16](SkUnichar unichar, int32_t start, int32_t end) {
629 for (auto i = start; i < end; ++i) {
630 if (replaceTabs && this->isTabulation(unichar)) {
631 results->at(i) |= SkUnicode::kTabulation;
632 if (replaceTabs) {
633 unichar = ' ';
634 utf16[start] = ' ';
635 }
636 }
637 if (this->isSpace(unichar)) {
638 results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
639 }
640 if (this->isWhitespace(unichar)) {
641 results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
642 }
643 if (this->isControl(unichar)) {
644 results->at(i) |= SkUnicode::kControl;
645 }
646 }
647 });
648 // Get graphemes
649 this->forEachBreak((char16_t*)&utf16[0],
650 utf16Units,
651 SkUnicode::BreakType::kGraphemes,
652 [results](SkBreakIterator::Position pos, SkBreakIterator::Status) {
653 (*results)[pos] |= CodeUnitFlags::kGraphemeStart;
654 });
655 // Get line breaks
656 this->forEachBreak(
657 (char16_t*)&utf16[0],
658 utf16Units,
659 SkUnicode::BreakType::kLines,
660 [results](SkBreakIterator::Position pos, SkBreakIterator::Status status) {
661 if (status ==
662 (SkBreakIterator::Status)SkUnicode::LineBreakType::kHardLineBreak) {
663 // Hard line breaks clears off all the other flags
664 // TODO: Treat \n as a formatting mark and do not pass it to SkShaper
665 (*results)[pos-1] = CodeUnitFlags::kHardLineBreakBefore;
666 } else {
667 (*results)[pos] |= CodeUnitFlags::kSoftLineBreakBefore;
668 }
669 });
670
671 return true;
672 }
673
reorderVisual(const BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])674 void reorderVisual(const BidiLevel runLevels[],
675 int levelsCount,
676 int32_t logicalFromVisual[]) override {
677 fBidiFact->bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
678 }
679
680 private:
681 sk_sp<SkBidiFactory> fBidiFact = sk_make_sp<SkBidiICUFactory>();
682 };
683
684 namespace SkUnicodes::ICU {
Make()685 sk_sp<SkUnicode> Make() {
686 // We haven't yet created a way to encode the ICU data for assembly on Windows,
687 // so we use a helper library to load icudtl.dat from the harddrive.
688 #if defined(SK_USING_THIRD_PARTY_ICU) && defined(SK_BUILD_FOR_WIN)
689 if (!SkLoadICU()) {
690 static SkOnce once;
691 once([] { SkDEBUGF("SkLoadICU() failed!\n"); });
692 return nullptr;
693 }
694 #endif
695 if (SkGetICULib()) {
696 return sk_make_sp<SkUnicode_icu>();
697 }
698 return nullptr;
699 }
700 } // namespace SkUnicodes::ICU
701