xref: /aosp_15_r20/external/abseil-cpp/absl/strings/internal/str_split_internal.h (revision 9356374a3709195abf420251b3e825997ff56c0f)
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 
16 // This file declares INTERNAL parts of the Split API that are inline/templated
17 // or otherwise need to be available at compile time. The main abstractions
18 // defined in here are
19 //
20 //   - ConvertibleToStringView
21 //   - SplitIterator<>
22 //   - Splitter<>
23 //
24 // DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
25 // absl/strings/str_split.h.
26 //
27 // IWYU pragma: private, include "absl/strings/str_split.h"
28 
29 #ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
30 #define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
31 
32 #include <array>
33 #include <cstddef>
34 #include <initializer_list>
35 #include <iterator>
36 #include <tuple>
37 #include <type_traits>
38 #include <utility>
39 #include <vector>
40 
41 #include "absl/base/macros.h"
42 #include "absl/base/port.h"
43 #include "absl/meta/type_traits.h"
44 #include "absl/strings/string_view.h"
45 
46 #ifdef _GLIBCXX_DEBUG
47 #include "absl/strings/internal/stl_type_traits.h"
48 #endif  // _GLIBCXX_DEBUG
49 
50 namespace absl {
51 ABSL_NAMESPACE_BEGIN
52 namespace strings_internal {
53 
54 // This class is implicitly constructible from everything that absl::string_view
55 // is implicitly constructible from, except for rvalue strings.  This means it
56 // can be used as a function parameter in places where passing a temporary
57 // string might cause memory lifetime issues.
58 class ConvertibleToStringView {
59  public:
ConvertibleToStringView(const char * s)60   ConvertibleToStringView(const char* s)  // NOLINT(runtime/explicit)
61       : value_(s) {}
ConvertibleToStringView(char * s)62   ConvertibleToStringView(char* s) : value_(s) {}  // NOLINT(runtime/explicit)
ConvertibleToStringView(absl::string_view s)63   ConvertibleToStringView(absl::string_view s)     // NOLINT(runtime/explicit)
64       : value_(s) {}
ConvertibleToStringView(const std::string & s)65   ConvertibleToStringView(const std::string& s)  // NOLINT(runtime/explicit)
66       : value_(s) {}
67 
68   // Disable conversion from rvalue strings.
69   ConvertibleToStringView(std::string&& s) = delete;
70   ConvertibleToStringView(const std::string&& s) = delete;
71 
value()72   absl::string_view value() const { return value_; }
73 
74  private:
75   absl::string_view value_;
76 };
77 
78 // An iterator that enumerates the parts of a string from a Splitter. The text
79 // to be split, the Delimiter, and the Predicate are all taken from the given
80 // Splitter object. Iterators may only be compared if they refer to the same
81 // Splitter instance.
82 //
83 // This class is NOT part of the public splitting API.
84 template <typename Splitter>
85 class SplitIterator {
86  public:
87   using iterator_category = std::input_iterator_tag;
88   using value_type = absl::string_view;
89   using difference_type = ptrdiff_t;
90   using pointer = const value_type*;
91   using reference = const value_type&;
92 
93   enum State { kInitState, kLastState, kEndState };
SplitIterator(State state,const Splitter * splitter)94   SplitIterator(State state, const Splitter* splitter)
95       : pos_(0),
96         state_(state),
97         splitter_(splitter),
98         delimiter_(splitter->delimiter()),
99         predicate_(splitter->predicate()) {
100     // Hack to maintain backward compatibility. This one block makes it so an
101     // empty absl::string_view whose .data() happens to be nullptr behaves
102     // *differently* from an otherwise empty absl::string_view whose .data() is
103     // not nullptr. This is an undesirable difference in general, but this
104     // behavior is maintained to avoid breaking existing code that happens to
105     // depend on this old behavior/bug. Perhaps it will be fixed one day. The
106     // difference in behavior is as follows:
107     //   Split(absl::string_view(""), '-');  // {""}
108     //   Split(absl::string_view(), '-');    // {}
109     if (splitter_->text().data() == nullptr) {
110       state_ = kEndState;
111       pos_ = splitter_->text().size();
112       return;
113     }
114 
115     if (state_ == kEndState) {
116       pos_ = splitter_->text().size();
117     } else {
118       ++(*this);
119     }
120   }
121 
at_end()122   bool at_end() const { return state_ == kEndState; }
123 
124   reference operator*() const { return curr_; }
125   pointer operator->() const { return &curr_; }
126 
127   SplitIterator& operator++() {
128     do {
129       if (state_ == kLastState) {
130         state_ = kEndState;
131         return *this;
132       }
133       const absl::string_view text = splitter_->text();
134       const absl::string_view d = delimiter_.Find(text, pos_);
135       if (d.data() == text.data() + text.size()) state_ = kLastState;
136       curr_ = text.substr(pos_,
137                           static_cast<size_t>(d.data() - (text.data() + pos_)));
138       pos_ += curr_.size() + d.size();
139     } while (!predicate_(curr_));
140     return *this;
141   }
142 
143   SplitIterator operator++(int) {
144     SplitIterator old(*this);
145     ++(*this);
146     return old;
147   }
148 
149   friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
150     return a.state_ == b.state_ && a.pos_ == b.pos_;
151   }
152 
153   friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
154     return !(a == b);
155   }
156 
157  private:
158   size_t pos_;
159   State state_;
160   absl::string_view curr_;
161   const Splitter* splitter_;
162   typename Splitter::DelimiterType delimiter_;
163   typename Splitter::PredicateType predicate_;
164 };
165 
166 // HasMappedType<T>::value is true iff there exists a type T::mapped_type.
167 template <typename T, typename = void>
168 struct HasMappedType : std::false_type {};
169 template <typename T>
170 struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
171     : std::true_type {};
172 
173 // HasValueType<T>::value is true iff there exists a type T::value_type.
174 template <typename T, typename = void>
175 struct HasValueType : std::false_type {};
176 template <typename T>
177 struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
178 };
179 
180 // HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
181 template <typename T, typename = void>
182 struct HasConstIterator : std::false_type {};
183 template <typename T>
184 struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
185     : std::true_type {};
186 
187 // HasEmplace<T>::value is true iff there exists a method T::emplace().
188 template <typename T, typename = void>
189 struct HasEmplace : std::false_type {};
190 template <typename T>
191 struct HasEmplace<T, absl::void_t<decltype(std::declval<T>().emplace())>>
192     : std::true_type {};
193 
194 // IsInitializerList<T>::value is true iff T is an std::initializer_list. More
195 // details below in Splitter<> where this is used.
196 std::false_type IsInitializerListDispatch(...);  // default: No
197 template <typename T>
198 std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
199 template <typename T>
200 struct IsInitializerList
201     : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
202 
203 // A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
204 // is true for type 'C'.
205 //
206 // Restricts conversion to container-like types (by testing for the presence of
207 // a const_iterator member type) and also to disable conversion to an
208 // std::initializer_list (which also has a const_iterator). Otherwise, code
209 // compiled in C++11 will get an error due to ambiguous conversion paths (in
210 // C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
211 // or an std::initializer_list<T>).
212 
213 template <typename C, bool has_value_type, bool has_mapped_type>
214 struct SplitterIsConvertibleToImpl : std::false_type {};
215 
216 template <typename C>
217 struct SplitterIsConvertibleToImpl<C, true, false>
218     : std::is_constructible<typename C::value_type, absl::string_view> {};
219 
220 template <typename C>
221 struct SplitterIsConvertibleToImpl<C, true, true>
222     : absl::conjunction<
223           std::is_constructible<typename C::key_type, absl::string_view>,
224           std::is_constructible<typename C::mapped_type, absl::string_view>> {};
225 
226 template <typename C>
227 struct SplitterIsConvertibleTo
228     : SplitterIsConvertibleToImpl<
229           C,
230 #ifdef _GLIBCXX_DEBUG
231           !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value &&
232 #endif  // _GLIBCXX_DEBUG
233               !IsInitializerList<
234                   typename std::remove_reference<C>::type>::value &&
235               HasValueType<C>::value && HasConstIterator<C>::value,
236           HasMappedType<C>::value> {
237 };
238 
239 template <typename StringType, typename Container, typename = void>
240 struct ShouldUseLifetimeBound : std::false_type {};
241 
242 template <typename StringType, typename Container>
243 struct ShouldUseLifetimeBound<
244     StringType, Container,
245     std::enable_if_t<
246         std::is_same<StringType, std::string>::value &&
247         std::is_same<typename Container::value_type, absl::string_view>::value>>
248     : std::true_type {};
249 
250 template <typename StringType, typename First, typename Second>
251 using ShouldUseLifetimeBoundForPair = std::integral_constant<
252     bool, std::is_same<StringType, std::string>::value &&
253               (std::is_same<First, absl::string_view>::value ||
254                std::is_same<Second, absl::string_view>::value)>;
255 
256 
257 // This class implements the range that is returned by absl::StrSplit(). This
258 // class has templated conversion operators that allow it to be implicitly
259 // converted to a variety of types that the caller may have specified on the
260 // left-hand side of an assignment.
261 //
262 // The main interface for interacting with this class is through its implicit
263 // conversion operators. However, this class may also be used like a container
264 // in that it has .begin() and .end() member functions. It may also be used
265 // within a range-for loop.
266 //
267 // Output containers can be collections of any type that is constructible from
268 // an absl::string_view.
269 //
270 // An Predicate functor may be supplied. This predicate will be used to filter
271 // the split strings: only strings for which the predicate returns true will be
272 // kept. A Predicate object is any unary functor that takes an absl::string_view
273 // and returns bool.
274 //
275 // The StringType parameter can be either string_view or string, depending on
276 // whether the Splitter refers to a string stored elsewhere, or if the string
277 // resides inside the Splitter itself.
278 template <typename Delimiter, typename Predicate, typename StringType>
279 class Splitter {
280  public:
281   using DelimiterType = Delimiter;
282   using PredicateType = Predicate;
283   using const_iterator = strings_internal::SplitIterator<Splitter>;
284   using value_type = typename std::iterator_traits<const_iterator>::value_type;
285 
286   Splitter(StringType input_text, Delimiter d, Predicate p)
287       : text_(std::move(input_text)),
288         delimiter_(std::move(d)),
289         predicate_(std::move(p)) {}
290 
291   absl::string_view text() const { return text_; }
292   const Delimiter& delimiter() const { return delimiter_; }
293   const Predicate& predicate() const { return predicate_; }
294 
295   // Range functions that iterate the split substrings as absl::string_view
296   // objects. These methods enable a Splitter to be used in a range-based for
297   // loop.
298   const_iterator begin() const { return {const_iterator::kInitState, this}; }
299   const_iterator end() const { return {const_iterator::kEndState, this}; }
300 
301   // An implicit conversion operator that is restricted to only those containers
302   // that the splitter is convertible to.
303   template <
304       typename Container,
305       std::enable_if_t<ShouldUseLifetimeBound<StringType, Container>::value &&
306                            SplitterIsConvertibleTo<Container>::value,
307                        std::nullptr_t> = nullptr>
308   // NOLINTNEXTLINE(google-explicit-constructor)
309   operator Container() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
310     return ConvertToContainer<Container, typename Container::value_type,
311                               HasMappedType<Container>::value>()(*this);
312   }
313 
314   template <
315       typename Container,
316       std::enable_if_t<!ShouldUseLifetimeBound<StringType, Container>::value &&
317                            SplitterIsConvertibleTo<Container>::value,
318                        std::nullptr_t> = nullptr>
319   // NOLINTNEXTLINE(google-explicit-constructor)
320   operator Container() const {
321     return ConvertToContainer<Container, typename Container::value_type,
322                               HasMappedType<Container>::value>()(*this);
323   }
324 
325   // Returns a pair with its .first and .second members set to the first two
326   // strings returned by the begin() iterator. Either/both of .first and .second
327   // will be constructed with empty strings if the iterator doesn't have a
328   // corresponding value.
329   template <typename First, typename Second,
330             std::enable_if_t<
331                 ShouldUseLifetimeBoundForPair<StringType, First, Second>::value,
332                 std::nullptr_t> = nullptr>
333   // NOLINTNEXTLINE(google-explicit-constructor)
334   operator std::pair<First, Second>() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
335     return ConvertToPair<First, Second>();
336   }
337 
338   template <typename First, typename Second,
339             std::enable_if_t<!ShouldUseLifetimeBoundForPair<StringType, First,
340                                                             Second>::value,
341                              std::nullptr_t> = nullptr>
342   // NOLINTNEXTLINE(google-explicit-constructor)
343   operator std::pair<First, Second>() const {
344     return ConvertToPair<First, Second>();
345   }
346 
347  private:
348   template <typename First, typename Second>
349   std::pair<First, Second> ConvertToPair() const {
350     absl::string_view first, second;
351     auto it = begin();
352     if (it != end()) {
353       first = *it;
354       if (++it != end()) {
355         second = *it;
356       }
357     }
358     return {First(first), Second(second)};
359   }
360 
361   // ConvertToContainer is a functor converting a Splitter to the requested
362   // Container of ValueType. It is specialized below to optimize splitting to
363   // certain combinations of Container and ValueType.
364   //
365   // This base template handles the generic case of storing the split results in
366   // the requested non-map-like container and converting the split substrings to
367   // the requested type.
368   template <typename Container, typename ValueType, bool is_map = false>
369   struct ConvertToContainer {
370     Container operator()(const Splitter& splitter) const {
371       Container c;
372       auto it = std::inserter(c, c.end());
373       for (const auto& sp : splitter) {
374         *it++ = ValueType(sp);
375       }
376       return c;
377     }
378   };
379 
380   // Partial specialization for a std::vector<absl::string_view>.
381   //
382   // Optimized for the common case of splitting to a
383   // std::vector<absl::string_view>. In this case we first split the results to
384   // a small array of absl::string_view on the stack, to reduce reallocations.
385   template <typename A>
386   struct ConvertToContainer<std::vector<absl::string_view, A>,
387                             absl::string_view, false> {
388     std::vector<absl::string_view, A> operator()(
389         const Splitter& splitter) const {
390       struct raw_view {
391         const char* data;
392         size_t size;
393         operator absl::string_view() const {  // NOLINT(runtime/explicit)
394           return {data, size};
395         }
396       };
397       std::vector<absl::string_view, A> v;
398       std::array<raw_view, 16> ar;
399       for (auto it = splitter.begin(); !it.at_end();) {
400         size_t index = 0;
401         do {
402           ar[index].data = it->data();
403           ar[index].size = it->size();
404           ++it;
405         } while (++index != ar.size() && !it.at_end());
406         // We static_cast index to a signed type to work around overzealous
407         // compiler warnings about signedness.
408         v.insert(v.end(), ar.begin(),
409                  ar.begin() + static_cast<ptrdiff_t>(index));
410       }
411       return v;
412     }
413   };
414 
415   // Partial specialization for a std::vector<std::string>.
416   //
417   // Optimized for the common case of splitting to a std::vector<std::string>.
418   // In this case we first split the results to a std::vector<absl::string_view>
419   // so the returned std::vector<std::string> can have space reserved to avoid
420   // std::string moves.
421   template <typename A>
422   struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
423     std::vector<std::string, A> operator()(const Splitter& splitter) const {
424       const std::vector<absl::string_view> v = splitter;
425       return std::vector<std::string, A>(v.begin(), v.end());
426     }
427   };
428 
429   // Partial specialization for containers of pairs (e.g., maps).
430   //
431   // The algorithm is to insert a new pair into the map for each even-numbered
432   // item, with the even-numbered item as the key with a default-constructed
433   // value. Each odd-numbered item will then be assigned to the last pair's
434   // value.
435   template <typename Container, typename First, typename Second>
436   struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
437     using iterator = typename Container::iterator;
438 
439     Container operator()(const Splitter& splitter) const {
440       Container m;
441       iterator it;
442       bool insert = true;
443       for (const absl::string_view sv : splitter) {
444         if (insert) {
445           it = InsertOrEmplace(&m, sv);
446         } else {
447           it->second = Second(sv);
448         }
449         insert = !insert;
450       }
451       return m;
452     }
453 
454     // Inserts the key and an empty value into the map, returning an iterator to
455     // the inserted item. We use emplace() if available, otherwise insert().
456     template <typename M>
457     static absl::enable_if_t<HasEmplace<M>::value, iterator> InsertOrEmplace(
458         M* m, absl::string_view key) {
459       // Use piecewise_construct to support old versions of gcc in which pair
460       // constructor can't otherwise construct string from string_view.
461       return ToIter(m->emplace(std::piecewise_construct, std::make_tuple(key),
462                                std::tuple<>()));
463     }
464     template <typename M>
465     static absl::enable_if_t<!HasEmplace<M>::value, iterator> InsertOrEmplace(
466         M* m, absl::string_view key) {
467       return ToIter(m->insert(std::make_pair(First(key), Second(""))));
468     }
469 
470     static iterator ToIter(std::pair<iterator, bool> pair) {
471       return pair.first;
472     }
473     static iterator ToIter(iterator iter) { return iter; }
474   };
475 
476   StringType text_;
477   Delimiter delimiter_;
478   Predicate predicate_;
479 };
480 
481 }  // namespace strings_internal
482 ABSL_NAMESPACE_END
483 }  // namespace absl
484 
485 #endif  // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
486