/* * Copyright 2020 Google LLC * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #ifndef SkUnicode_DEFINED #define SkUnicode_DEFINED #include "include/core/SkRefCnt.h" #include "include/core/SkSpan.h" #include "include/core/SkString.h" #include "include/core/SkTypes.h" #include "include/private/base/SkTArray.h" #include "include/private/base/SkTo.h" #include "src/base/SkUTF.h" #include #include #include #include #include namespace sknonstd { template struct is_bitmask_enum; } #if !defined(SKUNICODE_IMPLEMENTATION) #define SKUNICODE_IMPLEMENTATION 0 #endif #if !defined(SKUNICODE_API) #if defined(SKUNICODE_DLL) #if defined(_MSC_VER) #if SKUNICODE_IMPLEMENTATION #define SKUNICODE_API __declspec(dllexport) #else #define SKUNICODE_API __declspec(dllimport) #endif #else #define SKUNICODE_API __attribute__((visibility("default"))) #endif #else #define SKUNICODE_API #endif #endif class SKUNICODE_API SkBidiIterator { public: typedef int32_t Position; typedef uint8_t Level; struct Region { Region(Position start, Position end, Level level) : start(start), end(end), level(level) { } Position start; Position end; Level level; }; enum Direction { kLTR, kRTL, }; virtual ~SkBidiIterator() = default; virtual Position getLength() = 0; virtual Level getLevelAt(Position) = 0; }; class SKUNICODE_API SkBreakIterator { public: typedef int32_t Position; typedef int32_t Status; virtual ~SkBreakIterator() = default; virtual Position first() = 0; virtual Position current() = 0; virtual Position next() = 0; virtual Status status() = 0; virtual bool isDone() = 0; virtual bool setText(const char utftext8[], int utf8Units) = 0; virtual bool setText(const char16_t utftext16[], int utf16Units) = 0; }; class SKUNICODE_API SkUnicode : public SkRefCnt { public: enum CodeUnitFlags { kNoCodeUnitFlag = 0x00, kPartOfWhiteSpaceBreak = 0x01, kGraphemeStart = 0x02, kSoftLineBreakBefore = 0x04, kHardLineBreakBefore = 0x08, kPartOfIntraWordBreak = 0x10, kControl = 0x20, kTabulation = 0x40, kGlyphClusterStart = 0x80, kIdeographic = 0x100, kEmoji = 0x200, kWordBreak = 0x400, kSentenceBreak = 0x800, }; enum class TextDirection { kLTR, kRTL, }; typedef size_t Position; typedef uint8_t BidiLevel; struct BidiRegion { BidiRegion(Position start, Position end, BidiLevel level) : start(start), end(end), level(level) { } Position start; Position end; BidiLevel level; }; enum class LineBreakType { kSoftLineBreak = 0, kHardLineBreak = 100, }; enum class BreakType { kWords, kGraphemes, kLines, kSentences }; struct LineBreakBefore { LineBreakBefore(Position pos, LineBreakType breakType) : pos(pos), breakType(breakType) { } Position pos; LineBreakType breakType; }; ~SkUnicode() override = default; // deprecated virtual SkString toUpper(const SkString&) = 0; virtual SkString toUpper(const SkString&, const char* locale) = 0; virtual bool isControl(SkUnichar utf8) = 0; virtual bool isWhitespace(SkUnichar utf8) = 0; virtual bool isSpace(SkUnichar utf8) = 0; virtual bool isTabulation(SkUnichar utf8) = 0; virtual bool isHardBreak(SkUnichar utf8) = 0; /** * Returns if a code point may start an emoji sequence. * Returns true for '#', '*', and '0'-'9' since they may start an emoji sequence. * To determine if a list of code points begins with an emoji sequence, use * getEmojiSequence. **/ virtual bool isEmoji(SkUnichar utf8) = 0; virtual bool isEmojiComponent(SkUnichar utf8) = 0; virtual bool isEmojiModifierBase(SkUnichar utf8) = 0; virtual bool isEmojiModifier(SkUnichar utf8) = 0; virtual bool isRegionalIndicator(SkUnichar utf8) = 0; virtual bool isIdeographic(SkUnichar utf8) = 0; // Methods used in SkShaper and SkText virtual std::unique_ptr makeBidiIterator (const uint16_t text[], int count, SkBidiIterator::Direction) = 0; virtual std::unique_ptr makeBidiIterator (const char text[], int count, SkBidiIterator::Direction) = 0; virtual std::unique_ptr makeBreakIterator (const char locale[], BreakType breakType) = 0; virtual std::unique_ptr makeBreakIterator(BreakType type) = 0; // Methods used in SkParagraph static bool hasTabulationFlag(SkUnicode::CodeUnitFlags flags); static bool hasHardLineBreakFlag(SkUnicode::CodeUnitFlags flags); static bool hasSoftLineBreakFlag(SkUnicode::CodeUnitFlags flags); static bool hasGraphemeStartFlag(SkUnicode::CodeUnitFlags flags); static bool hasControlFlag(SkUnicode::CodeUnitFlags flags); static bool hasPartOfWhiteSpaceBreakFlag(SkUnicode::CodeUnitFlags flags); static bool extractBidi(const char utf8[], int utf8Units, TextDirection dir, std::vector* bidiRegions); virtual bool getBidiRegions(const char utf8[], int utf8Units, TextDirection dir, std::vector* results) = 0; // Returns results in utf16 virtual bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector* results) = 0; virtual bool getUtf8Words(const char utf8[], int utf8Units, const char* locale, std::vector* results) = 0; virtual bool getSentences(const char utf8[], int utf8Units, const char* locale, std::vector* results) = 0; virtual bool computeCodeUnitFlags( char utf8[], int utf8Units, bool replaceTabs, skia_private::TArray* results) = 0; virtual bool computeCodeUnitFlags( char16_t utf16[], int utf16Units, bool replaceTabs, skia_private::TArray* results) = 0; static SkString convertUtf16ToUtf8(const char16_t * utf16, int utf16Units); static SkString convertUtf16ToUtf8(const std::u16string& utf16); static std::u16string convertUtf8ToUtf16(const char* utf8, int utf8Units); static std::u16string convertUtf8ToUtf16(const SkString& utf8); template static bool extractUtfConversionMapping(SkSpan utf8, Appender8&& appender8, Appender16&& appender16) { size_t size8 = 0; size_t size16 = 0; auto ptr = utf8.begin(); auto end = utf8.end(); while (ptr < end) { size_t index = SkToSizeT(ptr - utf8.begin()); SkUnichar u = SkUTF::NextUTF8(&ptr, end); // All UTF8 code units refer to the same codepoint size_t next = SkToSizeT(ptr - utf8.begin()); for (auto i = index; i < next; ++i) { //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size()); appender16(size8); ++size16; } //SkASSERT(fUTF16IndexForUTF8Index.size() == next); SkASSERT(size16 == next); if (size16 != next) { return false; } // One or two UTF16 code units refer to the same codepoint uint16_t buffer[2]; size_t count = SkUTF::ToUTF16(u, buffer); //fUTF8IndexForUTF16Index.emplace_back(index); appender8(index); ++size8; if (count > 1) { //fUTF8IndexForUTF16Index.emplace_back(index); appender8(index); ++size8; } } //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size()); appender16(size8); ++size16; //fUTF8IndexForUTF16Index.emplace_back(fText.size()); appender8(utf8.size()); ++size8; return true; } template void forEachCodepoint(const char* utf8, int32_t utf8Units, Callback&& callback) { const char* current = utf8; const char* end = utf8 + utf8Units; while (current < end) { auto before = current - utf8; SkUnichar unichar = SkUTF::NextUTF8(¤t, end); if (unichar < 0) unichar = 0xFFFD; auto after = current - utf8; uint16_t buffer[2]; size_t count = SkUTF::ToUTF16(unichar, buffer); callback(unichar, before, after, count); } } template void forEachCodepoint(const char16_t* utf16, int32_t utf16Units, Callback&& callback) { const char16_t* current = utf16; const char16_t* end = utf16 + utf16Units; while (current < end) { auto before = current - utf16; SkUnichar unichar = SkUTF::NextUTF16((const uint16_t**)¤t, (const uint16_t*)end); auto after = current - utf16; callback(unichar, before, after); } } template void forEachBidiRegion(const uint16_t utf16[], int utf16Units, SkBidiIterator::Direction dir, Callback&& callback) { auto iter = makeBidiIterator(utf16, utf16Units, dir); const uint16_t* start16 = utf16; const uint16_t* end16 = utf16 + utf16Units; SkBidiIterator::Level currentLevel = 0; SkBidiIterator::Position pos16 = 0; while (pos16 <= iter->getLength()) { auto level = iter->getLevelAt(pos16); if (pos16 == 0) { currentLevel = level; } else if (level != currentLevel) { callback(pos16, start16 - utf16, currentLevel); currentLevel = level; } if (start16 == end16) { break; } SkUnichar u = SkUTF::NextUTF16(&start16, end16); pos16 += SkUTF::ToUTF16(u); } } template void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type, Callback&& callback) { auto iter = makeBreakIterator(type); iter->setText(utf16, utf16Units); auto pos = iter->first(); do { callback(pos, iter->status()); pos = iter->next(); } while (!iter->isDone()); } virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0; }; namespace sknonstd { template <> struct is_bitmask_enum : std::true_type {}; } // namespace sknonstd #endif // SkUnicode_DEFINED