1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_UTIL_CHARACTER_ITERATOR_H_ 16 #define ICING_UTIL_CHARACTER_ITERATOR_H_ 17 18 #include <string> 19 #include <string_view> 20 21 #include "icing/legacy/core/icing-string-util.h" 22 #include "icing/util/i18n-utils.h" 23 #include "unicode/utypes.h" 24 25 namespace icing { 26 namespace lib { 27 28 class CharacterIterator { 29 public: CharacterIterator(std::string_view text)30 explicit CharacterIterator(std::string_view text) 31 : CharacterIterator(text, 0, 0, 0) {} 32 CharacterIterator(std::string_view text,int utf8_index,int utf16_index,int utf32_index)33 CharacterIterator(std::string_view text, int utf8_index, int utf16_index, 34 int utf32_index) 35 : text_(text), 36 cached_current_char_(i18n_utils::kInvalidUChar32), 37 utf8_index_(utf8_index), 38 utf16_index_(utf16_index), 39 utf32_index_(utf32_index) {} 40 41 // Returns the character that the iterator currently points to. 42 // i18n_utils::kInvalidUChar32 if unable to read that character. 43 UChar32 GetCurrentChar() const; 44 45 // Moves current position to desired_utf8_index. 46 // REQUIRES: 0 <= desired_utf8_index <= text_.length() 47 bool MoveToUtf8(int desired_utf8_index); 48 49 // Advances from current position to the character that includes the specified 50 // UTF-8 index. 51 // REQUIRES: desired_utf8_index <= text_.length() 52 // desired_utf8_index is allowed to point one index past the end, but no 53 // further. 54 bool AdvanceToUtf8(int desired_utf8_index); 55 56 // Rewinds from current position to the character that includes the specified 57 // UTF-8 index. 58 // REQUIRES: 0 <= desired_utf8_index 59 bool RewindToUtf8(int desired_utf8_index); 60 61 // Moves current position to desired_utf16_index. 62 // REQUIRES: 0 <= desired_utf16_index <= text_.utf16_length() 63 bool MoveToUtf16(int desired_utf16_index); 64 65 // Advances current position to desired_utf16_index. 66 // REQUIRES: desired_utf16_index <= text_.utf16_length() 67 // desired_utf16_index is allowed to point one index past the end, but no 68 // further. 69 bool AdvanceToUtf16(int desired_utf16_index); 70 71 // Rewinds current position to desired_utf16_index. 72 // REQUIRES: 0 <= desired_utf16_index 73 bool RewindToUtf16(int desired_utf16_index); 74 75 // Moves current position to desired_utf32_index. 76 // REQUIRES: 0 <= desired_utf32_index <= text_.utf32_length() 77 bool MoveToUtf32(int desired_utf32_index); 78 79 // Advances current position to desired_utf32_index. 80 // REQUIRES: desired_utf32_index <= text_.utf32_length() 81 // desired_utf32_index is allowed to point one index past the end, but no 82 // further. 83 bool AdvanceToUtf32(int desired_utf32_index); 84 85 // Rewinds current position to desired_utf32_index. 86 // REQUIRES: 0 <= desired_utf32_index 87 bool RewindToUtf32(int desired_utf32_index); 88 utf8_index()89 int utf8_index() const { return utf8_index_; } utf16_index()90 int utf16_index() const { return utf16_index_; } utf32_index()91 int utf32_index() const { return utf32_index_; } 92 93 bool operator==(const CharacterIterator& rhs) const { 94 // cached_current_char_ is just that: a cached value. As such, it's not 95 // considered for equality. 96 return text_ == rhs.text_ && utf8_index_ == rhs.utf8_index_ && 97 utf16_index_ == rhs.utf16_index_ && utf32_index_ == rhs.utf32_index_; 98 } 99 DebugString()100 std::string DebugString() const { 101 return IcingStringUtil::StringPrintf("(u8:%d,u16:%d,u32:%d)", utf8_index_, 102 utf16_index_, utf32_index_); 103 } 104 105 private: 106 // Resets the character iterator to the start of the text if any of the 107 // indices are negative. 108 void ResetToStartIfNecessary(); 109 110 std::string_view text_; 111 mutable UChar32 cached_current_char_; 112 int utf8_index_; 113 int utf16_index_; 114 int utf32_index_; 115 }; 116 117 } // namespace lib 118 } // namespace icing 119 120 #endif // ICING_UTIL_CHARACTER_ITERATOR_H_ 121