1 /*
2 * Copyright 2022 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
8
9 #include "include/core/SkTypes.h"
10 #include "include/private/base/SkDebug.h"
11 #include "include/private/base/SkTFitsIn.h"
12 #include "src/base/SkUTF.h"
13
14 #include <unicode/ubidi.h>
15 #include <unicode/umachine.h>
16 #include <unicode/utypes.h>
17
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <utility>
22 #include <vector>
23
24 namespace {
25 using SkUnicodeBidi = std::unique_ptr<UBiDi, SkBidiFactory::BidiCloseCallback>;
26
27 class SkBidiIterator_icu : public SkBidiIterator {
28 public:
SkBidiIterator_icu(SkUnicodeBidi bidi,sk_sp<SkBidiFactory> fact)29 SkBidiIterator_icu(SkUnicodeBidi bidi, sk_sp<SkBidiFactory> fact)
30 : fBidi(std::move(bidi)), fBidiFact(std::move(fact)) {}
31
getLength()32 Position getLength() override { return fBidiFact->bidi_getLength(fBidi.get()); }
33
getLevelAt(Position pos)34 Level getLevelAt(Position pos) override { return fBidiFact->bidi_getLevelAt(fBidi.get(), pos); }
35
36 private:
37 SkUnicodeBidi fBidi;
38 sk_sp<SkBidiFactory> fBidiFact;
39 };
40 } // namespace
41
MakeIterator(const uint16_t utf16[],int utf16Units,SkBidiIterator::Direction dir) const42 std::unique_ptr<SkBidiIterator> SkBidiFactory::MakeIterator(const uint16_t utf16[],
43 int utf16Units,
44 SkBidiIterator::Direction dir) const {
45 UErrorCode status = U_ZERO_ERROR;
46 SkUnicodeBidi bidi(this->bidi_openSized(utf16Units, 0, &status), this->bidi_close_callback());
47 if (U_FAILURE(status)) {
48 SkDEBUGF("Bidi error: %s", this->errorName(status));
49 return nullptr;
50 }
51 SkASSERT(bidi);
52 uint8_t bidiLevel = (dir == SkBidiIterator::kLTR) ? UBIDI_LTR : UBIDI_RTL;
53 // The required lifetime of utf16 isn't well documented.
54 // It appears it isn't used after ubidi_setPara except through ubidi_getText.
55 this->bidi_setPara(bidi.get(), (const UChar*)utf16, utf16Units, bidiLevel, nullptr, &status);
56 if (U_FAILURE(status)) {
57 SkDEBUGF("Bidi error: %s", this->errorName(status));
58 return nullptr;
59 }
60 return std::unique_ptr<SkBidiIterator>(
61 new SkBidiIterator_icu(std::move(bidi), sk_ref_sp(this)));
62 }
63
MakeIterator(const char utf8[],int utf8Units,SkBidiIterator::Direction dir) const64 std::unique_ptr<SkBidiIterator> SkBidiFactory::MakeIterator(const char utf8[],
65 int utf8Units,
66 SkBidiIterator::Direction dir) const {
67 // Convert utf8 into utf16 since ubidi only accepts utf16
68 if (!SkTFitsIn<int32_t>(utf8Units)) {
69 SkDEBUGF("Bidi error: text too long");
70 return nullptr;
71 }
72
73 // Getting the length like this seems to always set U_BUFFER_OVERFLOW_ERROR
74 int utf16Units = SkUTF::UTF8ToUTF16(nullptr, 0, utf8, utf8Units);
75 if (utf16Units < 0) {
76 SkDEBUGF("Bidi error: Invalid utf8 input");
77 return nullptr;
78 }
79 std::unique_ptr<uint16_t[]> utf16(new uint16_t[utf16Units]);
80 SkDEBUGCODE(int dstLen =) SkUTF::UTF8ToUTF16(utf16.get(), utf16Units, utf8, utf8Units);
81 SkASSERT(dstLen == utf16Units);
82
83 return MakeIterator(utf16.get(), utf16Units, dir);
84 }
85
86 /** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
utf8_next(const char ** ptr,const char * end)87 static inline SkUnichar utf8_next(const char** ptr, const char* end) {
88 SkUnichar val = SkUTF::NextUTF8(ptr, end);
89 return val < 0 ? 0xFFFD : val;
90 }
91
ExtractBidi(const char utf8[],int utf8Units,SkUnicode::TextDirection dir,std::vector<SkUnicode::BidiRegion> * bidiRegions) const92 bool SkBidiFactory::ExtractBidi(const char utf8[],
93 int utf8Units,
94 SkUnicode::TextDirection dir,
95 std::vector<SkUnicode::BidiRegion>* bidiRegions) const {
96 // Convert to UTF16 since for now bidi iterator only operates on utf16
97 auto utf16 = SkUnicode::convertUtf8ToUtf16(utf8, utf8Units);
98
99 // Create bidi iterator
100 UErrorCode status = U_ZERO_ERROR;
101 SkUnicodeBidi bidi(this->bidi_openSized(utf16.size(), 0, &status), this->bidi_close_callback());
102 if (U_FAILURE(status)) {
103 SkDEBUGF("Bidi error: %s", this->errorName(status));
104 return false;
105 }
106 SkASSERT(bidi);
107 uint8_t bidiLevel = (dir == SkUnicode::TextDirection::kLTR) ? UBIDI_LTR : UBIDI_RTL;
108 // The required lifetime of utf16 isn't well documented.
109 // It appears it isn't used after ubidi_setPara except through ubidi_getText.
110 this->bidi_setPara(
111 bidi.get(), (const UChar*)utf16.c_str(), utf16.size(), bidiLevel, nullptr, &status);
112 if (U_FAILURE(status)) {
113 SkDEBUGF("Bidi error: %s", this->errorName(status));
114 return false;
115 }
116
117 // Iterate through bidi regions and the result positions into utf8
118 const char* start8 = utf8;
119 const char* end8 = utf8 + utf8Units;
120 SkUnicode::BidiLevel currentLevel = 0;
121
122 SkUnicode::Position pos8 = 0;
123 SkUnicode::Position pos16 = 0;
124 SkUnicode::Position end16 = this->bidi_getLength(bidi.get());
125
126 if (end16 == 0) {
127 return true;
128 }
129 if (this->bidi_getDirection(bidi.get()) != UBIDI_MIXED) {
130 // The entire paragraph is unidirectional.
131 bidiRegions->emplace_back(0, utf8Units, this->bidi_getLevelAt(bidi.get(), 0));
132 return true;
133 }
134
135 while (pos16 < end16) {
136 auto level = this->bidi_getLevelAt(bidi.get(), pos16);
137 if (pos16 == 0) {
138 currentLevel = level;
139 } else if (level != currentLevel) {
140 SkUnicode::Position end = start8 - utf8;
141 bidiRegions->emplace_back(pos8, end, currentLevel);
142 currentLevel = level;
143 pos8 = end;
144 }
145 SkUnichar u = utf8_next(&start8, end8);
146 pos16 += SkUTF::ToUTF16(u);
147 }
148 SkUnicode::Position end = start8 - utf8;
149 if (end != pos8) {
150 bidiRegions->emplace_back(pos8, end, currentLevel);
151 }
152 return true;
153 }
154