xref: /aosp_15_r20/external/icing/icing/util/character-iterator.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_UTIL_CHARACTER_ITERATOR_H_
16 #define ICING_UTIL_CHARACTER_ITERATOR_H_
17 
18 #include <string>
19 #include <string_view>
20 
21 #include "icing/legacy/core/icing-string-util.h"
22 #include "icing/util/i18n-utils.h"
23 #include "unicode/utypes.h"
24 
25 namespace icing {
26 namespace lib {
27 
28 class CharacterIterator {
29  public:
CharacterIterator(std::string_view text)30   explicit CharacterIterator(std::string_view text)
31       : CharacterIterator(text, 0, 0, 0) {}
32 
CharacterIterator(std::string_view text,int utf8_index,int utf16_index,int utf32_index)33   CharacterIterator(std::string_view text, int utf8_index, int utf16_index,
34                     int utf32_index)
35       : text_(text),
36         cached_current_char_(i18n_utils::kInvalidUChar32),
37         utf8_index_(utf8_index),
38         utf16_index_(utf16_index),
39         utf32_index_(utf32_index) {}
40 
41   // Returns the character that the iterator currently points to.
42   // i18n_utils::kInvalidUChar32 if unable to read that character.
43   UChar32 GetCurrentChar() const;
44 
45   // Moves current position to desired_utf8_index.
46   // REQUIRES: 0 <= desired_utf8_index <= text_.length()
47   bool MoveToUtf8(int desired_utf8_index);
48 
49   // Advances from current position to the character that includes the specified
50   // UTF-8 index.
51   // REQUIRES: desired_utf8_index <= text_.length()
52   // desired_utf8_index is allowed to point one index past the end, but no
53   // further.
54   bool AdvanceToUtf8(int desired_utf8_index);
55 
56   // Rewinds from current position to the character that includes the specified
57   // UTF-8 index.
58   // REQUIRES: 0 <= desired_utf8_index
59   bool RewindToUtf8(int desired_utf8_index);
60 
61   // Moves current position to desired_utf16_index.
62   // REQUIRES: 0 <= desired_utf16_index <= text_.utf16_length()
63   bool MoveToUtf16(int desired_utf16_index);
64 
65   // Advances current position to desired_utf16_index.
66   // REQUIRES: desired_utf16_index <= text_.utf16_length()
67   // desired_utf16_index is allowed to point one index past the end, but no
68   // further.
69   bool AdvanceToUtf16(int desired_utf16_index);
70 
71   // Rewinds current position to desired_utf16_index.
72   // REQUIRES: 0 <= desired_utf16_index
73   bool RewindToUtf16(int desired_utf16_index);
74 
75   // Moves current position to desired_utf32_index.
76   // REQUIRES: 0 <= desired_utf32_index <= text_.utf32_length()
77   bool MoveToUtf32(int desired_utf32_index);
78 
79   // Advances current position to desired_utf32_index.
80   // REQUIRES: desired_utf32_index <= text_.utf32_length()
81   // desired_utf32_index is allowed to point one index past the end, but no
82   // further.
83   bool AdvanceToUtf32(int desired_utf32_index);
84 
85   // Rewinds current position to desired_utf32_index.
86   // REQUIRES: 0 <= desired_utf32_index
87   bool RewindToUtf32(int desired_utf32_index);
88 
utf8_index()89   int utf8_index() const { return utf8_index_; }
utf16_index()90   int utf16_index() const { return utf16_index_; }
utf32_index()91   int utf32_index() const { return utf32_index_; }
92 
93   bool operator==(const CharacterIterator& rhs) const {
94     // cached_current_char_ is just that: a cached value. As such, it's not
95     // considered for equality.
96     return text_ == rhs.text_ && utf8_index_ == rhs.utf8_index_ &&
97            utf16_index_ == rhs.utf16_index_ && utf32_index_ == rhs.utf32_index_;
98   }
99 
DebugString()100   std::string DebugString() const {
101     return IcingStringUtil::StringPrintf("(u8:%d,u16:%d,u32:%d)", utf8_index_,
102                                          utf16_index_, utf32_index_);
103   }
104 
105  private:
106   // Resets the character iterator to the start of the text if any of the
107   // indices are negative.
108   void ResetToStartIfNecessary();
109 
110   std::string_view text_;
111   mutable UChar32 cached_current_char_;
112   int utf8_index_;
113   int utf16_index_;
114   int utf32_index_;
115 };
116 
117 }  // namespace lib
118 }  // namespace icing
119 
120 #endif  // ICING_UTIL_CHARACTER_ITERATOR_H_
121