1*993b0882SAndroid Build Coastguard Worker /*
2*993b0882SAndroid Build Coastguard Worker * Copyright (C) 2018 The Android Open Source Project
3*993b0882SAndroid Build Coastguard Worker *
4*993b0882SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*993b0882SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*993b0882SAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*993b0882SAndroid Build Coastguard Worker *
8*993b0882SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0
9*993b0882SAndroid Build Coastguard Worker *
10*993b0882SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*993b0882SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*993b0882SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*993b0882SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*993b0882SAndroid Build Coastguard Worker * limitations under the License.
15*993b0882SAndroid Build Coastguard Worker */
16*993b0882SAndroid Build Coastguard Worker
17*993b0882SAndroid Build Coastguard Worker #ifndef LIBTEXTCLASSIFIER_UTILS_STRINGS_STRINGPIECE_H_
18*993b0882SAndroid Build Coastguard Worker #define LIBTEXTCLASSIFIER_UTILS_STRINGS_STRINGPIECE_H_
19*993b0882SAndroid Build Coastguard Worker
20*993b0882SAndroid Build Coastguard Worker #include <cstddef>
21*993b0882SAndroid Build Coastguard Worker #include <string>
22*993b0882SAndroid Build Coastguard Worker
23*993b0882SAndroid Build Coastguard Worker #include "utils/base/logging.h"
24*993b0882SAndroid Build Coastguard Worker
25*993b0882SAndroid Build Coastguard Worker namespace libtextclassifier3 {
26*993b0882SAndroid Build Coastguard Worker
27*993b0882SAndroid Build Coastguard Worker // Read-only "view" of a piece of data. Does not own the underlying data.
28*993b0882SAndroid Build Coastguard Worker class StringPiece {
29*993b0882SAndroid Build Coastguard Worker public:
30*993b0882SAndroid Build Coastguard Worker static constexpr size_t npos = static_cast<size_t>(-1);
31*993b0882SAndroid Build Coastguard Worker
StringPiece()32*993b0882SAndroid Build Coastguard Worker StringPiece() : StringPiece(nullptr, 0) {}
33*993b0882SAndroid Build Coastguard Worker
StringPiece(const char * str)34*993b0882SAndroid Build Coastguard Worker StringPiece(const char* str) // NOLINT(runtime/explicit)
35*993b0882SAndroid Build Coastguard Worker : start_(str), size_(str == nullptr ? 0 : strlen(str)) {}
36*993b0882SAndroid Build Coastguard Worker
StringPiece(const char * start,size_t size)37*993b0882SAndroid Build Coastguard Worker StringPiece(const char* start, size_t size) : start_(start), size_(size) {}
38*993b0882SAndroid Build Coastguard Worker
39*993b0882SAndroid Build Coastguard Worker // Intentionally no "explicit" keyword: in function calls, we want strings to
40*993b0882SAndroid Build Coastguard Worker // be converted to StringPiece implicitly.
StringPiece(const std::string & s)41*993b0882SAndroid Build Coastguard Worker StringPiece(const std::string& s) // NOLINT(runtime/explicit)
42*993b0882SAndroid Build Coastguard Worker : StringPiece(s.data(), s.size()) {}
43*993b0882SAndroid Build Coastguard Worker
StringPiece(const std::string & s,int offset,int len)44*993b0882SAndroid Build Coastguard Worker StringPiece(const std::string& s, int offset, int len)
45*993b0882SAndroid Build Coastguard Worker : StringPiece(s.data() + offset, len) {}
46*993b0882SAndroid Build Coastguard Worker
47*993b0882SAndroid Build Coastguard Worker char operator[](size_t i) const { return start_[i]; }
48*993b0882SAndroid Build Coastguard Worker
49*993b0882SAndroid Build Coastguard Worker // Returns start address of underlying data.
data()50*993b0882SAndroid Build Coastguard Worker const char* data() const { return start_; }
51*993b0882SAndroid Build Coastguard Worker
52*993b0882SAndroid Build Coastguard Worker // Returns number of bytes of underlying data.
size()53*993b0882SAndroid Build Coastguard Worker size_t size() const { return size_; }
length()54*993b0882SAndroid Build Coastguard Worker size_t length() const { return size_; }
55*993b0882SAndroid Build Coastguard Worker
empty()56*993b0882SAndroid Build Coastguard Worker bool empty() const { return size_ == 0; }
57*993b0882SAndroid Build Coastguard Worker
58*993b0882SAndroid Build Coastguard Worker // Returns a std::string containing a copy of the underlying data.
ToString()59*993b0882SAndroid Build Coastguard Worker std::string ToString() const { return std::string(data(), size()); }
60*993b0882SAndroid Build Coastguard Worker
61*993b0882SAndroid Build Coastguard Worker // Returns whether string ends with a given suffix.
EndsWith(StringPiece suffix)62*993b0882SAndroid Build Coastguard Worker bool EndsWith(StringPiece suffix) const {
63*993b0882SAndroid Build Coastguard Worker return suffix.empty() || (size_ >= suffix.size() &&
64*993b0882SAndroid Build Coastguard Worker memcmp(start_ + (size_ - suffix.size()),
65*993b0882SAndroid Build Coastguard Worker suffix.data(), suffix.size()) == 0);
66*993b0882SAndroid Build Coastguard Worker }
67*993b0882SAndroid Build Coastguard Worker
68*993b0882SAndroid Build Coastguard Worker // Returns whether the string begins with a given prefix.
StartsWith(StringPiece prefix)69*993b0882SAndroid Build Coastguard Worker bool StartsWith(StringPiece prefix) const {
70*993b0882SAndroid Build Coastguard Worker return prefix.empty() ||
71*993b0882SAndroid Build Coastguard Worker (size_ >= prefix.size() &&
72*993b0882SAndroid Build Coastguard Worker memcmp(start_, prefix.data(), prefix.size()) == 0);
73*993b0882SAndroid Build Coastguard Worker }
74*993b0882SAndroid Build Coastguard Worker
Equals(StringPiece other)75*993b0882SAndroid Build Coastguard Worker bool Equals(StringPiece other) const {
76*993b0882SAndroid Build Coastguard Worker return size() == other.size() && memcmp(start_, other.data(), size_) == 0;
77*993b0882SAndroid Build Coastguard Worker }
78*993b0882SAndroid Build Coastguard Worker
79*993b0882SAndroid Build Coastguard Worker // Removes the first `n` characters from the string piece. Note that the
80*993b0882SAndroid Build Coastguard Worker // underlying string is not changed, only the view.
RemovePrefix(int n)81*993b0882SAndroid Build Coastguard Worker void RemovePrefix(int n) {
82*993b0882SAndroid Build Coastguard Worker TC3_CHECK_LE(n, size_);
83*993b0882SAndroid Build Coastguard Worker start_ += n;
84*993b0882SAndroid Build Coastguard Worker size_ -= n;
85*993b0882SAndroid Build Coastguard Worker }
86*993b0882SAndroid Build Coastguard Worker
87*993b0882SAndroid Build Coastguard Worker // Removes the last `n` characters from the string piece. Note that the
88*993b0882SAndroid Build Coastguard Worker // underlying string is not changed, only the view.
RemoveSuffix(int n)89*993b0882SAndroid Build Coastguard Worker void RemoveSuffix(int n) {
90*993b0882SAndroid Build Coastguard Worker TC3_CHECK_LE(n, size_);
91*993b0882SAndroid Build Coastguard Worker size_ -= n;
92*993b0882SAndroid Build Coastguard Worker }
93*993b0882SAndroid Build Coastguard Worker
94*993b0882SAndroid Build Coastguard Worker // Finds the first occurrence of the substring `s` within the `StringPiece`,
95*993b0882SAndroid Build Coastguard Worker // returning the position of the first character's match, or `npos` if no
96*993b0882SAndroid Build Coastguard Worker // match was found.
97*993b0882SAndroid Build Coastguard Worker // Here
98*993b0882SAndroid Build Coastguard Worker // - c is the char to search for in the StringPiece
99*993b0882SAndroid Build Coastguard Worker // - pos is the position at which to start the search.
100*993b0882SAndroid Build Coastguard Worker size_t find(char c, size_t pos = 0) const noexcept {
101*993b0882SAndroid Build Coastguard Worker if (empty() || pos >= size_) {
102*993b0882SAndroid Build Coastguard Worker return npos;
103*993b0882SAndroid Build Coastguard Worker }
104*993b0882SAndroid Build Coastguard Worker const char* result =
105*993b0882SAndroid Build Coastguard Worker static_cast<const char*>(memchr(start_ + pos, c, size_ - pos));
106*993b0882SAndroid Build Coastguard Worker return result != nullptr ? result - start_ : npos;
107*993b0882SAndroid Build Coastguard Worker }
108*993b0882SAndroid Build Coastguard Worker
109*993b0882SAndroid Build Coastguard Worker size_t find(StringPiece s, size_t pos = 0) const noexcept {
110*993b0882SAndroid Build Coastguard Worker if (empty() || pos >= size_) {
111*993b0882SAndroid Build Coastguard Worker if (empty() && pos == 0 && s.empty()) {
112*993b0882SAndroid Build Coastguard Worker return 0;
113*993b0882SAndroid Build Coastguard Worker }
114*993b0882SAndroid Build Coastguard Worker return npos;
115*993b0882SAndroid Build Coastguard Worker }
116*993b0882SAndroid Build Coastguard Worker const char* result = memmatch(start_ + pos, size_ - pos, s.start_, s.size_);
117*993b0882SAndroid Build Coastguard Worker return result ? result - start_ : npos;
118*993b0882SAndroid Build Coastguard Worker }
119*993b0882SAndroid Build Coastguard Worker
120*993b0882SAndroid Build Coastguard Worker private:
memmatch(const char * phaystack,size_t haylen,const char * pneedle,size_t neelen)121*993b0882SAndroid Build Coastguard Worker const char* memmatch(const char* phaystack, size_t haylen,
122*993b0882SAndroid Build Coastguard Worker const char* pneedle, size_t neelen) const {
123*993b0882SAndroid Build Coastguard Worker if (0 == neelen) {
124*993b0882SAndroid Build Coastguard Worker return phaystack; // Even if haylen is 0.
125*993b0882SAndroid Build Coastguard Worker }
126*993b0882SAndroid Build Coastguard Worker if (haylen < neelen) {
127*993b0882SAndroid Build Coastguard Worker return nullptr;
128*993b0882SAndroid Build Coastguard Worker }
129*993b0882SAndroid Build Coastguard Worker
130*993b0882SAndroid Build Coastguard Worker const char* match;
131*993b0882SAndroid Build Coastguard Worker const char* hayend = phaystack + haylen - neelen + 1;
132*993b0882SAndroid Build Coastguard Worker while ((match = static_cast<const char*>(
133*993b0882SAndroid Build Coastguard Worker memchr(phaystack, pneedle[0], hayend - phaystack)))) {
134*993b0882SAndroid Build Coastguard Worker if (memcmp(match, pneedle, neelen) == 0) {
135*993b0882SAndroid Build Coastguard Worker return match;
136*993b0882SAndroid Build Coastguard Worker } else {
137*993b0882SAndroid Build Coastguard Worker phaystack = match + 1;
138*993b0882SAndroid Build Coastguard Worker }
139*993b0882SAndroid Build Coastguard Worker }
140*993b0882SAndroid Build Coastguard Worker return nullptr;
141*993b0882SAndroid Build Coastguard Worker }
142*993b0882SAndroid Build Coastguard Worker
143*993b0882SAndroid Build Coastguard Worker const char* start_; // Not owned.
144*993b0882SAndroid Build Coastguard Worker size_t size_;
145*993b0882SAndroid Build Coastguard Worker };
146*993b0882SAndroid Build Coastguard Worker
EndsWith(StringPiece text,StringPiece suffix)147*993b0882SAndroid Build Coastguard Worker inline bool EndsWith(StringPiece text, StringPiece suffix) {
148*993b0882SAndroid Build Coastguard Worker return text.EndsWith(suffix);
149*993b0882SAndroid Build Coastguard Worker }
150*993b0882SAndroid Build Coastguard Worker
StartsWith(StringPiece text,StringPiece prefix)151*993b0882SAndroid Build Coastguard Worker inline bool StartsWith(StringPiece text, StringPiece prefix) {
152*993b0882SAndroid Build Coastguard Worker return text.StartsWith(prefix);
153*993b0882SAndroid Build Coastguard Worker }
154*993b0882SAndroid Build Coastguard Worker
ConsumePrefix(StringPiece * text,StringPiece prefix)155*993b0882SAndroid Build Coastguard Worker inline bool ConsumePrefix(StringPiece* text, StringPiece prefix) {
156*993b0882SAndroid Build Coastguard Worker if (!text->StartsWith(prefix)) {
157*993b0882SAndroid Build Coastguard Worker return false;
158*993b0882SAndroid Build Coastguard Worker }
159*993b0882SAndroid Build Coastguard Worker text->RemovePrefix(prefix.size());
160*993b0882SAndroid Build Coastguard Worker return true;
161*993b0882SAndroid Build Coastguard Worker }
162*993b0882SAndroid Build Coastguard Worker
ConsumeSuffix(StringPiece * text,StringPiece suffix)163*993b0882SAndroid Build Coastguard Worker inline bool ConsumeSuffix(StringPiece* text, StringPiece suffix) {
164*993b0882SAndroid Build Coastguard Worker if (!text->EndsWith(suffix)) {
165*993b0882SAndroid Build Coastguard Worker return false;
166*993b0882SAndroid Build Coastguard Worker }
167*993b0882SAndroid Build Coastguard Worker text->RemoveSuffix(suffix.size());
168*993b0882SAndroid Build Coastguard Worker return true;
169*993b0882SAndroid Build Coastguard Worker }
170*993b0882SAndroid Build Coastguard Worker
171*993b0882SAndroid Build Coastguard Worker inline logging::LoggingStringStream& operator<<(
172*993b0882SAndroid Build Coastguard Worker logging::LoggingStringStream& stream, StringPiece message) {
173*993b0882SAndroid Build Coastguard Worker stream.message.append(message.data(), message.size());
174*993b0882SAndroid Build Coastguard Worker return stream;
175*993b0882SAndroid Build Coastguard Worker }
176*993b0882SAndroid Build Coastguard Worker
177*993b0882SAndroid Build Coastguard Worker } // namespace libtextclassifier3
178*993b0882SAndroid Build Coastguard Worker
179*993b0882SAndroid Build Coastguard Worker #endif // LIBTEXTCLASSIFIER_UTILS_STRINGS_STRINGPIECE_H_
180