1 // © 2017 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 // bytesinkutil.h 5 // created: 2017sep14 Markus W. Scherer 6 7 #ifndef BYTESINKUTIL_H 8 #define BYTESINKUTIL_H 9 10 #include <type_traits> 11 12 #include "unicode/utypes.h" 13 #include "unicode/bytestream.h" 14 #include "unicode/edits.h" 15 #include "charstr.h" 16 #include "cmemory.h" 17 #include "uassert.h" 18 #include "ustr_imp.h" 19 20 U_NAMESPACE_BEGIN 21 22 class ByteSink; 23 class Edits; 24 25 class U_COMMON_API CharStringByteSink : public ByteSink { 26 public: 27 CharStringByteSink(CharString* dest); 28 ~CharStringByteSink() override; 29 30 CharStringByteSink() = delete; 31 CharStringByteSink(const CharStringByteSink&) = delete; 32 CharStringByteSink& operator=(const CharStringByteSink&) = delete; 33 34 void Append(const char* bytes, int32_t n) override; 35 36 char* GetAppendBuffer(int32_t min_capacity, 37 int32_t desired_capacity_hint, 38 char* scratch, 39 int32_t scratch_capacity, 40 int32_t* result_capacity) override; 41 42 private: 43 CharString& dest_; 44 }; 45 46 // CharString doesn't provide the public API that StringByteSink requires a 47 // string class to have so this template specialization replaces the default 48 // implementation of StringByteSink<CharString> with CharStringByteSink. 49 template<> 50 class StringByteSink<CharString> : public CharStringByteSink { 51 public: StringByteSink(CharString * dest)52 StringByteSink(CharString* dest) : CharStringByteSink(dest) { } StringByteSink(CharString * dest,int32_t)53 StringByteSink(CharString* dest, int32_t /*initialAppendCapacity*/) : CharStringByteSink(dest) { } 54 }; 55 56 class U_COMMON_API ByteSinkUtil { 57 public: 58 ByteSinkUtil() = delete; // all static 59 60 /** (length) bytes were mapped to valid (s16, s16Length). */ 61 static UBool appendChange(int32_t length, 62 const char16_t *s16, int32_t s16Length, 63 ByteSink &sink, Edits *edits, UErrorCode &errorCode); 64 65 /** The bytes at [s, limit[ were mapped to valid (s16, s16Length). */ 66 static UBool appendChange(const uint8_t *s, const uint8_t *limit, 67 const char16_t *s16, int32_t s16Length, 68 ByteSink &sink, Edits *edits, UErrorCode &errorCode); 69 70 /** (length) bytes were mapped/changed to valid code point c. */ 71 static void appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits = nullptr); 72 73 /** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */ 74 static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c, 75 ByteSink &sink, Edits *edits = nullptr) { 76 appendCodePoint((int32_t)(nextSrc - src), c, sink, edits); 77 } 78 79 /** Append the two-byte character (U+0080..U+07FF). */ 80 static void appendTwoBytes(UChar32 c, ByteSink &sink); 81 appendUnchanged(const uint8_t * s,int32_t length,ByteSink & sink,uint32_t options,Edits * edits,UErrorCode & errorCode)82 static UBool appendUnchanged(const uint8_t *s, int32_t length, 83 ByteSink &sink, uint32_t options, Edits *edits, 84 UErrorCode &errorCode) { 85 if (U_FAILURE(errorCode)) { return false; } 86 if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); } 87 return true; 88 } 89 90 static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit, 91 ByteSink &sink, uint32_t options, Edits *edits, 92 UErrorCode &errorCode); 93 94 /** 95 * Calls a lambda that writes to a ByteSink with a CheckedArrayByteSink 96 * and then returns through u_terminateChars(), in order to implement 97 * the classic ICU4C C API writing to a fix sized buffer on top of a 98 * contemporary C++ API. 99 * 100 * @param buffer receiving buffer 101 * @param capacity capacity of receiving buffer 102 * @param lambda that gets called with the sink as an argument 103 * @param status set to U_BUFFER_OVERFLOW_ERROR on overflow 104 * @return number of bytes written, or needed (in case of overflow) 105 * @internal 106 */ 107 template <typename F, 108 typename = std::enable_if_t< 109 std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>> viaByteSinkToTerminatedChars(char * buffer,int32_t capacity,F && lambda,UErrorCode & status)110 static int32_t viaByteSinkToTerminatedChars(char* buffer, int32_t capacity, 111 F&& lambda, 112 UErrorCode& status) { 113 if (U_FAILURE(status)) { return 0; } 114 CheckedArrayByteSink sink(buffer, capacity); 115 lambda(sink, status); 116 if (U_FAILURE(status)) { return 0; } 117 118 int32_t reslen = sink.NumberOfBytesAppended(); 119 120 if (sink.Overflowed()) { 121 status = U_BUFFER_OVERFLOW_ERROR; 122 return reslen; 123 } 124 125 return u_terminateChars(buffer, capacity, reslen, &status); 126 } 127 128 /** 129 * Calls a lambda that writes to a ByteSink with a CharStringByteSink and 130 * then returns a CharString, in order to implement a contemporary C++ API 131 * on top of a C/C++ compatibility ByteSink API. 132 * 133 * @param lambda that gets called with the sink as an argument 134 * @param status to check and report 135 * @return the resulting string, or an empty string (in case of error) 136 * @internal 137 */ 138 template <typename F, 139 typename = std::enable_if_t< 140 std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>> viaByteSinkToCharString(F && lambda,UErrorCode & status)141 static CharString viaByteSinkToCharString(F&& lambda, UErrorCode& status) { 142 if (U_FAILURE(status)) { return {}; } 143 CharString result; 144 CharStringByteSink sink(&result); 145 lambda(sink, status); 146 return result; 147 } 148 149 private: 150 static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length, 151 ByteSink &sink, uint32_t options, Edits *edits); 152 }; 153 154 U_NAMESPACE_END 155 156 #endif //BYTESINKUTIL_H 157