xref: /aosp_15_r20/external/skia/modules/skunicode/src/SkUnicode_client.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2 * Copyright 2022 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7 #include "modules/skunicode/include/SkUnicode_client.h"
8 
9 #include "include/core/SkSpan.h"
10 #include "include/core/SkString.h"
11 #include "include/core/SkTypes.h"
12 #include "include/private/base/SkTArray.h"
13 #include "include/private/base/SkTo.h"
14 #include "modules/skunicode/include/SkUnicode.h"
15 #include "modules/skunicode/src/SkBidiFactory_icu_subset.h"
16 #include "modules/skunicode/src/SkUnicode_hardcoded.h"
17 #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
18 #include "src/base/SkBitmaskEnum.h"
19 #include "src/base/SkUTF.h"
20 
21 #include <algorithm>
22 #include <cstdint>
23 #include <memory>
24 #include <string>
25 #include <utility>
26 #include <vector>
27 #include <array>
28 #include <unicode/ubidi.h>
29 #include <unicode/ubrk.h>
30 #include <unicode/uchar.h>
31 #include <unicode/uloc.h>
32 #include <unicode/uscript.h>
33 #include <unicode/ustring.h>
34 #include <unicode/utext.h>
35 #include <unicode/utypes.h>
36 
37 using namespace skia_private;
38 
39 class SkUnicode_client : public SkUnicodeHardCodedCharProperties {
40 public:
41     struct Data {
42         SkSpan<const char> fText8;
43         SkSpan<const char16_t> fText16;
44         std::vector<Position> fWords;
45         std::vector<SkUnicode::Position> fGraphemeBreaks;
46         std::vector<SkUnicode::LineBreakBefore> fLineBreaks;
DataSkUnicode_client::Data47         Data(SkSpan<char> text,
48              std::vector<SkUnicode::Position> words,
49              std::vector<SkUnicode::Position> graphemeBreaks,
50              std::vector<SkUnicode::LineBreakBefore> lineBreaks)
51             : fText8(text)
52             , fText16(SkSpan<const char16_t>(nullptr, 0))
53             , fWords(std::move(words))
54             , fGraphemeBreaks(std::move(graphemeBreaks))
55             , fLineBreaks(std::move(lineBreaks)) {
56         }
57 
resetSkUnicode_client::Data58         void reset() {
59             fText8 = SkSpan<const char>(nullptr, 0);
60             fText16 = SkSpan<const char16_t>(nullptr, 0);
61             fGraphemeBreaks.clear();
62             fLineBreaks.clear();
63         }
64     };
65     SkUnicode_client() = delete;
SkUnicode_client(SkSpan<char> text,std::vector<SkUnicode::Position> words,std::vector<SkUnicode::Position> graphemeBreaks,std::vector<SkUnicode::LineBreakBefore> lineBreaks)66     SkUnicode_client(SkSpan<char> text,
67                      std::vector<SkUnicode::Position> words,
68                      std::vector<SkUnicode::Position> graphemeBreaks,
69                      std::vector<SkUnicode::LineBreakBefore> lineBreaks)
70             : fData(std::make_shared<Data>(text,
71                                            std::move(words),
72                                            std::move(graphemeBreaks),
73                                            std::move(lineBreaks))) { }
74 
75     ~SkUnicode_client() override = default;
76 
reset()77     void reset() { fData->reset(); }
78     // For SkShaper
79     std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
80                                                      SkBidiIterator::Direction dir) override;
81     std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
82                                                      int count,
83                                                      SkBidiIterator::Direction dir) override;
84     std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
85                                                        BreakType breakType) override;
86     std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override;
87     // For SkParagraph
getBidiRegions(const char utf8[],int utf8Units,TextDirection dir,std::vector<BidiRegion> * results)88     bool getBidiRegions(const char utf8[],
89                         int utf8Units,
90                         TextDirection dir,
91                         std::vector<BidiRegion>* results) override {
92         return fBidiFact->ExtractBidi(utf8, utf8Units, dir, results);
93     }
94 
getUtf8Words(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)95     bool getUtf8Words(const char utf8[],
96                       int utf8Units,
97                       const char* locale,
98                       std::vector<Position>* results) override {
99         SkDEBUGF("Method 'getUtf8Words' is not implemented\n");
100         return false;
101     }
102 
getSentences(const char utf8[],int utf8Units,const char * locale,std::vector<SkUnicode::Position> * results)103     bool getSentences(const char utf8[],
104                       int utf8Units,
105                       const char* locale,
106                       std::vector<SkUnicode::Position>* results) override {
107         SkDEBUGF("Method 'getSentences' is not implemented\n");
108         return false;
109     }
110 
computeCodeUnitFlags(char utf8[],int utf8Units,bool replaceTabs,TArray<SkUnicode::CodeUnitFlags,true> * results)111     bool computeCodeUnitFlags(char utf8[],
112                               int utf8Units,
113                               bool replaceTabs,
114                               TArray<SkUnicode::CodeUnitFlags, true>* results) override {
115         results->clear();
116         results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
117         for (auto& lineBreak : fData->fLineBreaks) {
118             (*results)[lineBreak.pos] |=
119                 lineBreak.breakType == LineBreakType::kHardLineBreak
120                     ? CodeUnitFlags::kHardLineBreakBefore
121                     : CodeUnitFlags::kSoftLineBreakBefore;
122         }
123         for (auto& grapheme : fData->fGraphemeBreaks) {
124             (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
125         }
126         const char* current = utf8;
127         const char* end = utf8 + utf8Units;
128         while (current < end) {
129             auto before = current - utf8;
130             SkUnichar unichar = SkUTF::NextUTF8(&current, end);
131             if (unichar < 0) unichar = 0xFFFD;
132             auto after = current - utf8;
133             if (replaceTabs && this->isTabulation(unichar)) {
134                 results->at(before) |= SkUnicode::kTabulation;
135                 if (replaceTabs) {
136                     unichar = ' ';
137                     utf8[before] = ' ';
138                 }
139             }
140             for (auto i = before; i < after; ++i) {
141                 if (this->isSpace(unichar)) {
142                     results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
143                 }
144                 if (this->isWhitespace(unichar)) {
145                     results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
146                 }
147                 if (this->isControl(unichar)) {
148                     results->at(i) |= SkUnicode::kControl;
149                 }
150             }
151         }
152         return true;
153     }
154 
computeCodeUnitFlags(char16_t utf16[],int utf16Units,bool replaceTabs,TArray<SkUnicode::CodeUnitFlags,true> * results)155     bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
156                           TArray<SkUnicode::CodeUnitFlags, true>* results) override {
157         results->clear();
158         results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
159         for (auto& lineBreak : fData->fLineBreaks) {
160             (*results)[lineBreak.pos] |=
161                 lineBreak.breakType == LineBreakType::kHardLineBreak
162                     ? CodeUnitFlags::kHardLineBreakBefore
163                     : CodeUnitFlags::kSoftLineBreakBefore;
164         }
165         for (auto& grapheme : fData->fGraphemeBreaks) {
166             (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
167         }
168         return true;
169     }
170 
getWords(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)171     bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override {
172         *results = fData->fWords;
173         return true;
174     }
175 
toUpper(const SkString & str)176     SkString toUpper(const SkString& str) override {
177         return this->toUpper(str, nullptr);
178     }
179 
toUpper(const SkString & str,const char * locale)180     SkString toUpper(const SkString& str, const char* locale) override {
181         return SkString(fData->fText8.data(), fData->fText8.size());
182     }
183 
reorderVisual(const BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])184     void reorderVisual(const BidiLevel runLevels[],
185                        int levelsCount,
186                        int32_t logicalFromVisual[]) override {
187         fBidiFact->bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
188     }
189 private:
190     friend class SkBreakIterator_client;
191 
192     std::shared_ptr<Data> fData;
193     sk_sp<SkBidiFactory> fBidiFact = sk_make_sp<SkBidiSubsetFactory>();
194 };
195 
196 class SkBreakIterator_client: public SkBreakIterator {
197     std::shared_ptr<SkUnicode_client::Data> fData;
198     Position fLastResult;
199     Position fStart;
200     Position fEnd;
201 public:
SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data)202     explicit SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data) : fData(data) { }
first()203     Position first() override
204       { return fData->fLineBreaks[fStart + (fLastResult = 0)].pos; }
current()205     Position current() override
206       { return fData->fLineBreaks[fStart + fLastResult].pos; }
next()207     Position next() override
208       { return fData->fLineBreaks[fStart + fLastResult + 1].pos; }
status()209     Status status() override {
210         return fData->fLineBreaks[fStart + fLastResult].breakType ==
211                        SkUnicode::LineBreakType::kHardLineBreak
212                        ? SkUnicode::CodeUnitFlags::kHardLineBreakBefore
213                        : SkUnicode::CodeUnitFlags::kSoftLineBreakBefore;
214     }
isDone()215     bool isDone() override { return fStart + fLastResult == fEnd; }
setText(const char utftext8[],int utf8Units)216     bool setText(const char utftext8[], int utf8Units) override {
217         SkASSERT(utftext8 >= fData->fText8.data() &&
218                  utf8Units <= SkToS16(fData->fText8.size()));
219         fStart = utftext8 - fData->fText8.data();
220         fEnd = fStart + utf8Units;
221         fLastResult = 0;
222         return true;
223     }
setText(const char16_t utftext16[],int utf16Units)224     bool setText(const char16_t utftext16[], int utf16Units) override {
225         SkASSERT(utftext16 >= fData->fText16.data() &&
226                  utf16Units <= SkToS16(fData->fText16.size()));
227         fStart = utftext16 - fData->fText16.data();
228         fEnd = fStart + utf16Units;
229         fLastResult = 0;
230         return true;
231     }
232 };
makeBidiIterator(const uint16_t text[],int count,SkBidiIterator::Direction dir)233 std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const uint16_t text[], int count,
234                                                  SkBidiIterator::Direction dir) {
235     return fBidiFact->MakeIterator(text, count, dir);
236 }
makeBidiIterator(const char text[],int count,SkBidiIterator::Direction dir)237 std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const char text[],
238                                                  int count,
239                                                  SkBidiIterator::Direction dir) {
240     return fBidiFact->MakeIterator(text, count, dir);
241 }
makeBreakIterator(const char locale[],BreakType breakType)242 std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(const char locale[],
243                                                    BreakType breakType) {
244     return std::make_unique<SkBreakIterator_client>(fData);
245 }
makeBreakIterator(BreakType breakType)246 std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(BreakType breakType) {
247     return std::make_unique<SkBreakIterator_client>(fData);
248 }
249 
250 namespace SkUnicodes::Client {
Make(SkSpan<char> text,std::vector<SkUnicode::Position> words,std::vector<SkUnicode::Position> graphemeBreaks,std::vector<SkUnicode::LineBreakBefore> lineBreaks)251 sk_sp<SkUnicode> Make(
252         SkSpan<char> text,
253         std::vector<SkUnicode::Position> words,
254         std::vector<SkUnicode::Position> graphemeBreaks,
255         std::vector<SkUnicode::LineBreakBefore> lineBreaks) {
256     return sk_make_sp<SkUnicode_client>(text,
257                                         std::move(words),
258                                         std::move(graphemeBreaks),
259                                         std::move(lineBreaks));
260 }
261 }
262 
263 
264