xref: /aosp_15_r20/external/libtextclassifier/native/utils/strings/stringpiece.h (revision 993b0882672172b81d12fad7a7ac0c3e5c824a12)
1*993b0882SAndroid Build Coastguard Worker /*
2*993b0882SAndroid Build Coastguard Worker  * Copyright (C) 2018 The Android Open Source Project
3*993b0882SAndroid Build Coastguard Worker  *
4*993b0882SAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*993b0882SAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*993b0882SAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*993b0882SAndroid Build Coastguard Worker  *
8*993b0882SAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*993b0882SAndroid Build Coastguard Worker  *
10*993b0882SAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*993b0882SAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*993b0882SAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*993b0882SAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*993b0882SAndroid Build Coastguard Worker  * limitations under the License.
15*993b0882SAndroid Build Coastguard Worker  */
16*993b0882SAndroid Build Coastguard Worker 
17*993b0882SAndroid Build Coastguard Worker #ifndef LIBTEXTCLASSIFIER_UTILS_STRINGS_STRINGPIECE_H_
18*993b0882SAndroid Build Coastguard Worker #define LIBTEXTCLASSIFIER_UTILS_STRINGS_STRINGPIECE_H_
19*993b0882SAndroid Build Coastguard Worker 
20*993b0882SAndroid Build Coastguard Worker #include <cstddef>
21*993b0882SAndroid Build Coastguard Worker #include <string>
22*993b0882SAndroid Build Coastguard Worker 
23*993b0882SAndroid Build Coastguard Worker #include "utils/base/logging.h"
24*993b0882SAndroid Build Coastguard Worker 
25*993b0882SAndroid Build Coastguard Worker namespace libtextclassifier3 {
26*993b0882SAndroid Build Coastguard Worker 
27*993b0882SAndroid Build Coastguard Worker // Read-only "view" of a piece of data.  Does not own the underlying data.
28*993b0882SAndroid Build Coastguard Worker class StringPiece {
29*993b0882SAndroid Build Coastguard Worker  public:
30*993b0882SAndroid Build Coastguard Worker   static constexpr size_t npos = static_cast<size_t>(-1);
31*993b0882SAndroid Build Coastguard Worker 
StringPiece()32*993b0882SAndroid Build Coastguard Worker   StringPiece() : StringPiece(nullptr, 0) {}
33*993b0882SAndroid Build Coastguard Worker 
StringPiece(const char * str)34*993b0882SAndroid Build Coastguard Worker   StringPiece(const char* str)  // NOLINT(runtime/explicit)
35*993b0882SAndroid Build Coastguard Worker       : start_(str), size_(str == nullptr ? 0 : strlen(str)) {}
36*993b0882SAndroid Build Coastguard Worker 
StringPiece(const char * start,size_t size)37*993b0882SAndroid Build Coastguard Worker   StringPiece(const char* start, size_t size) : start_(start), size_(size) {}
38*993b0882SAndroid Build Coastguard Worker 
39*993b0882SAndroid Build Coastguard Worker   // Intentionally no "explicit" keyword: in function calls, we want strings to
40*993b0882SAndroid Build Coastguard Worker   // be converted to StringPiece implicitly.
StringPiece(const std::string & s)41*993b0882SAndroid Build Coastguard Worker   StringPiece(const std::string& s)  // NOLINT(runtime/explicit)
42*993b0882SAndroid Build Coastguard Worker       : StringPiece(s.data(), s.size()) {}
43*993b0882SAndroid Build Coastguard Worker 
StringPiece(const std::string & s,int offset,int len)44*993b0882SAndroid Build Coastguard Worker   StringPiece(const std::string& s, int offset, int len)
45*993b0882SAndroid Build Coastguard Worker       : StringPiece(s.data() + offset, len) {}
46*993b0882SAndroid Build Coastguard Worker 
47*993b0882SAndroid Build Coastguard Worker   char operator[](size_t i) const { return start_[i]; }
48*993b0882SAndroid Build Coastguard Worker 
49*993b0882SAndroid Build Coastguard Worker   // Returns start address of underlying data.
data()50*993b0882SAndroid Build Coastguard Worker   const char* data() const { return start_; }
51*993b0882SAndroid Build Coastguard Worker 
52*993b0882SAndroid Build Coastguard Worker   // Returns number of bytes of underlying data.
size()53*993b0882SAndroid Build Coastguard Worker   size_t size() const { return size_; }
length()54*993b0882SAndroid Build Coastguard Worker   size_t length() const { return size_; }
55*993b0882SAndroid Build Coastguard Worker 
empty()56*993b0882SAndroid Build Coastguard Worker   bool empty() const { return size_ == 0; }
57*993b0882SAndroid Build Coastguard Worker 
58*993b0882SAndroid Build Coastguard Worker   // Returns a std::string containing a copy of the underlying data.
ToString()59*993b0882SAndroid Build Coastguard Worker   std::string ToString() const { return std::string(data(), size()); }
60*993b0882SAndroid Build Coastguard Worker 
61*993b0882SAndroid Build Coastguard Worker   // Returns whether string ends with a given suffix.
EndsWith(StringPiece suffix)62*993b0882SAndroid Build Coastguard Worker   bool EndsWith(StringPiece suffix) const {
63*993b0882SAndroid Build Coastguard Worker     return suffix.empty() || (size_ >= suffix.size() &&
64*993b0882SAndroid Build Coastguard Worker                               memcmp(start_ + (size_ - suffix.size()),
65*993b0882SAndroid Build Coastguard Worker                                      suffix.data(), suffix.size()) == 0);
66*993b0882SAndroid Build Coastguard Worker   }
67*993b0882SAndroid Build Coastguard Worker 
68*993b0882SAndroid Build Coastguard Worker   // Returns whether the string begins with a given prefix.
StartsWith(StringPiece prefix)69*993b0882SAndroid Build Coastguard Worker   bool StartsWith(StringPiece prefix) const {
70*993b0882SAndroid Build Coastguard Worker     return prefix.empty() ||
71*993b0882SAndroid Build Coastguard Worker            (size_ >= prefix.size() &&
72*993b0882SAndroid Build Coastguard Worker             memcmp(start_, prefix.data(), prefix.size()) == 0);
73*993b0882SAndroid Build Coastguard Worker   }
74*993b0882SAndroid Build Coastguard Worker 
Equals(StringPiece other)75*993b0882SAndroid Build Coastguard Worker   bool Equals(StringPiece other) const {
76*993b0882SAndroid Build Coastguard Worker     return size() == other.size() && memcmp(start_, other.data(), size_) == 0;
77*993b0882SAndroid Build Coastguard Worker   }
78*993b0882SAndroid Build Coastguard Worker 
79*993b0882SAndroid Build Coastguard Worker   // Removes the first `n` characters from the string piece. Note that the
80*993b0882SAndroid Build Coastguard Worker   // underlying string is not changed, only the view.
RemovePrefix(int n)81*993b0882SAndroid Build Coastguard Worker   void RemovePrefix(int n) {
82*993b0882SAndroid Build Coastguard Worker     TC3_CHECK_LE(n, size_);
83*993b0882SAndroid Build Coastguard Worker     start_ += n;
84*993b0882SAndroid Build Coastguard Worker     size_ -= n;
85*993b0882SAndroid Build Coastguard Worker   }
86*993b0882SAndroid Build Coastguard Worker 
87*993b0882SAndroid Build Coastguard Worker   // Removes the last `n` characters from the string piece. Note that the
88*993b0882SAndroid Build Coastguard Worker   // underlying string is not changed, only the view.
RemoveSuffix(int n)89*993b0882SAndroid Build Coastguard Worker   void RemoveSuffix(int n) {
90*993b0882SAndroid Build Coastguard Worker     TC3_CHECK_LE(n, size_);
91*993b0882SAndroid Build Coastguard Worker     size_ -= n;
92*993b0882SAndroid Build Coastguard Worker   }
93*993b0882SAndroid Build Coastguard Worker 
94*993b0882SAndroid Build Coastguard Worker   // Finds the first occurrence of the substring `s` within the `StringPiece`,
95*993b0882SAndroid Build Coastguard Worker   // returning the position of the first character's match, or `npos` if no
96*993b0882SAndroid Build Coastguard Worker   // match was found.
97*993b0882SAndroid Build Coastguard Worker   // Here
98*993b0882SAndroid Build Coastguard Worker   // - c is the char to search for in the StringPiece
99*993b0882SAndroid Build Coastguard Worker   // - pos is the position at which to start the search.
100*993b0882SAndroid Build Coastguard Worker   size_t find(char c, size_t pos = 0) const noexcept {
101*993b0882SAndroid Build Coastguard Worker     if (empty() || pos >= size_) {
102*993b0882SAndroid Build Coastguard Worker       return npos;
103*993b0882SAndroid Build Coastguard Worker     }
104*993b0882SAndroid Build Coastguard Worker     const char* result =
105*993b0882SAndroid Build Coastguard Worker         static_cast<const char*>(memchr(start_ + pos, c, size_ - pos));
106*993b0882SAndroid Build Coastguard Worker     return result != nullptr ? result - start_ : npos;
107*993b0882SAndroid Build Coastguard Worker   }
108*993b0882SAndroid Build Coastguard Worker 
109*993b0882SAndroid Build Coastguard Worker   size_t find(StringPiece s, size_t pos = 0) const noexcept {
110*993b0882SAndroid Build Coastguard Worker     if (empty() || pos >= size_) {
111*993b0882SAndroid Build Coastguard Worker       if (empty() && pos == 0 && s.empty()) {
112*993b0882SAndroid Build Coastguard Worker         return 0;
113*993b0882SAndroid Build Coastguard Worker       }
114*993b0882SAndroid Build Coastguard Worker       return npos;
115*993b0882SAndroid Build Coastguard Worker     }
116*993b0882SAndroid Build Coastguard Worker     const char* result = memmatch(start_ + pos, size_ - pos, s.start_, s.size_);
117*993b0882SAndroid Build Coastguard Worker     return result ? result - start_ : npos;
118*993b0882SAndroid Build Coastguard Worker   }
119*993b0882SAndroid Build Coastguard Worker 
120*993b0882SAndroid Build Coastguard Worker  private:
memmatch(const char * phaystack,size_t haylen,const char * pneedle,size_t neelen)121*993b0882SAndroid Build Coastguard Worker   const char* memmatch(const char* phaystack, size_t haylen,
122*993b0882SAndroid Build Coastguard Worker                        const char* pneedle, size_t neelen) const {
123*993b0882SAndroid Build Coastguard Worker     if (0 == neelen) {
124*993b0882SAndroid Build Coastguard Worker       return phaystack;  // Even if haylen is 0.
125*993b0882SAndroid Build Coastguard Worker     }
126*993b0882SAndroid Build Coastguard Worker     if (haylen < neelen) {
127*993b0882SAndroid Build Coastguard Worker       return nullptr;
128*993b0882SAndroid Build Coastguard Worker     }
129*993b0882SAndroid Build Coastguard Worker 
130*993b0882SAndroid Build Coastguard Worker     const char* match;
131*993b0882SAndroid Build Coastguard Worker     const char* hayend = phaystack + haylen - neelen + 1;
132*993b0882SAndroid Build Coastguard Worker     while ((match = static_cast<const char*>(
133*993b0882SAndroid Build Coastguard Worker                 memchr(phaystack, pneedle[0], hayend - phaystack)))) {
134*993b0882SAndroid Build Coastguard Worker       if (memcmp(match, pneedle, neelen) == 0) {
135*993b0882SAndroid Build Coastguard Worker         return match;
136*993b0882SAndroid Build Coastguard Worker       } else {
137*993b0882SAndroid Build Coastguard Worker         phaystack = match + 1;
138*993b0882SAndroid Build Coastguard Worker       }
139*993b0882SAndroid Build Coastguard Worker     }
140*993b0882SAndroid Build Coastguard Worker     return nullptr;
141*993b0882SAndroid Build Coastguard Worker   }
142*993b0882SAndroid Build Coastguard Worker 
143*993b0882SAndroid Build Coastguard Worker   const char* start_;  // Not owned.
144*993b0882SAndroid Build Coastguard Worker   size_t size_;
145*993b0882SAndroid Build Coastguard Worker };
146*993b0882SAndroid Build Coastguard Worker 
EndsWith(StringPiece text,StringPiece suffix)147*993b0882SAndroid Build Coastguard Worker inline bool EndsWith(StringPiece text, StringPiece suffix) {
148*993b0882SAndroid Build Coastguard Worker   return text.EndsWith(suffix);
149*993b0882SAndroid Build Coastguard Worker }
150*993b0882SAndroid Build Coastguard Worker 
StartsWith(StringPiece text,StringPiece prefix)151*993b0882SAndroid Build Coastguard Worker inline bool StartsWith(StringPiece text, StringPiece prefix) {
152*993b0882SAndroid Build Coastguard Worker   return text.StartsWith(prefix);
153*993b0882SAndroid Build Coastguard Worker }
154*993b0882SAndroid Build Coastguard Worker 
ConsumePrefix(StringPiece * text,StringPiece prefix)155*993b0882SAndroid Build Coastguard Worker inline bool ConsumePrefix(StringPiece* text, StringPiece prefix) {
156*993b0882SAndroid Build Coastguard Worker   if (!text->StartsWith(prefix)) {
157*993b0882SAndroid Build Coastguard Worker     return false;
158*993b0882SAndroid Build Coastguard Worker   }
159*993b0882SAndroid Build Coastguard Worker   text->RemovePrefix(prefix.size());
160*993b0882SAndroid Build Coastguard Worker   return true;
161*993b0882SAndroid Build Coastguard Worker }
162*993b0882SAndroid Build Coastguard Worker 
ConsumeSuffix(StringPiece * text,StringPiece suffix)163*993b0882SAndroid Build Coastguard Worker inline bool ConsumeSuffix(StringPiece* text, StringPiece suffix) {
164*993b0882SAndroid Build Coastguard Worker   if (!text->EndsWith(suffix)) {
165*993b0882SAndroid Build Coastguard Worker     return false;
166*993b0882SAndroid Build Coastguard Worker   }
167*993b0882SAndroid Build Coastguard Worker   text->RemoveSuffix(suffix.size());
168*993b0882SAndroid Build Coastguard Worker   return true;
169*993b0882SAndroid Build Coastguard Worker }
170*993b0882SAndroid Build Coastguard Worker 
171*993b0882SAndroid Build Coastguard Worker inline logging::LoggingStringStream& operator<<(
172*993b0882SAndroid Build Coastguard Worker     logging::LoggingStringStream& stream, StringPiece message) {
173*993b0882SAndroid Build Coastguard Worker   stream.message.append(message.data(), message.size());
174*993b0882SAndroid Build Coastguard Worker   return stream;
175*993b0882SAndroid Build Coastguard Worker }
176*993b0882SAndroid Build Coastguard Worker 
177*993b0882SAndroid Build Coastguard Worker }  // namespace libtextclassifier3
178*993b0882SAndroid Build Coastguard Worker 
179*993b0882SAndroid Build Coastguard Worker #endif  // LIBTEXTCLASSIFIER_UTILS_STRINGS_STRINGPIECE_H_
180