xref: /aosp_15_r20/external/skia/src/base/SkUTF.h (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 // Copyright 2018 Google LLC.
2 // Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
3 #ifndef SkUTF_DEFINED
4 #define SkUTF_DEFINED
5 
6 #include "include/private/base/SkAPI.h"
7 
8 #include <cstddef>
9 #include <cstdint>
10 
11 typedef int32_t SkUnichar;
12 
13 namespace SkUTF {
14 
15 /** Given a sequence of UTF-8 bytes, return the number of unicode codepoints.
16     If the sequence is invalid UTF-8, return -1.
17 */
18 SK_SPI int CountUTF8(const char* utf8, size_t byteLength);
19 
20 /** Given a sequence of aligned UTF-16 characters in machine-endian form,
21     return the number of unicode codepoints.  If the sequence is invalid
22     UTF-16, return -1.
23 */
24 SK_SPI int CountUTF16(const uint16_t* utf16, size_t byteLength);
25 
26 /** Given a sequence of aligned UTF-32 characters in machine-endian form,
27     return the number of unicode codepoints.  If the sequence is invalid
28     UTF-32, return -1.
29 */
30 SK_SPI int CountUTF32(const int32_t* utf32, size_t byteLength);
31 
32 /** Given a sequence of UTF-8 bytes, return the first unicode codepoint.
33     The pointer will be incremented to point at the next codepoint's start.  If
34     invalid UTF-8 is encountered, set *ptr to end and return -1.
35 */
36 SK_SPI SkUnichar NextUTF8(const char** ptr, const char* end);
37 
38 /** Given a sequence of UTF-8 bytes, return the first unicode codepoint.
39     The pointer will be incremented to point at the next codepoint's start.  If
40     invalid UTF-8 is encountered, set *ptr to end and
41     return the replacement character (0xFFFD)
42 */
43 SK_SPI SkUnichar NextUTF8WithReplacement(const char** ptr, const char* end);
44 
45 /** Given a sequence of aligned UTF-16 characters in machine-endian form,
46     return the first unicode codepoint.  The pointer will be incremented to
47     point at the next codepoint's start.  If invalid UTF-16 is encountered,
48     set *ptr to end and return -1.
49 */
50 SK_SPI SkUnichar NextUTF16(const uint16_t** ptr, const uint16_t* end);
51 
52 /** Given a sequence of aligned UTF-32 characters in machine-endian form,
53     return the first unicode codepoint.  The pointer will be incremented to
54     point at the next codepoint's start.  If invalid UTF-32 is encountered,
55     set *ptr to end and return -1.
56 */
57 SK_SPI SkUnichar NextUTF32(const int32_t** ptr, const int32_t* end);
58 
59 constexpr unsigned kMaxBytesInUTF8Sequence = 4;
60 
61 /** Convert the unicode codepoint into UTF-8.  If `utf8` is non-null, place the
62     result in that array.  Return the number of bytes in the result.  If `utf8`
63     is null, simply return the number of bytes that would be used.  For invalid
64     unicode codepoints, return 0.
65 */
66 SK_SPI size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr);
67 
68 /** Convert the unicode codepoint into UTF-16.  If `utf16` is non-null, place
69     the result in that array.  Return the number of UTF-16 code units in the
70     result (1 or 2).  If `utf16` is null, simply return the number of code
71     units that would be used.  For invalid unicode codepoints, return 0.
72 */
73 SK_SPI size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr);
74 
75 /** Returns the number of resulting UTF16 values needed to convert the src utf8 sequence.
76  *  If dst is not null, it is filled with the corresponding values up to its capacity.
77  *  If there is an error, -1 is returned and the dst[] buffer is undefined.
78  */
79 SK_SPI int UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength);
80 
81 /** Returns the number of resulting UTF8 values needed to convert the src utf16 sequence.
82  *  If dst is not null, it is filled with the corresponding values up to its capacity.
83  *  If there is an error, -1 is returned and the dst[] buffer is undefined.
84  */
85 SK_SPI int UTF16ToUTF8(char dst[], int dstCapacity, const uint16_t src[], size_t srcLength);
86 
87 /**
88  * Given a UTF-16 code point, returns true iff it is a leading surrogate.
89  * https://unicode.org/faq/utf_bom.html#utf16-2
90  */
IsLeadingSurrogateUTF16(uint16_t c)91 static inline bool IsLeadingSurrogateUTF16(uint16_t c) { return ((c) & 0xFC00) == 0xD800; }
92 
93 /**
94  * Given a UTF-16 code point, returns true iff it is a trailing surrogate.
95  * https://unicode.org/faq/utf_bom.html#utf16-2
96  */
IsTrailingSurrogateUTF16(uint16_t c)97 static inline bool IsTrailingSurrogateUTF16(uint16_t c) { return ((c) & 0xFC00) == 0xDC00; }
98 
99 
100 }  // namespace SkUTF
101 
102 #endif  // SkUTF_DEFINED
103