xref: /aosp_15_r20/external/openscreen/third_party/abseil/src/absl/strings/cord.cc (revision 3f982cf4871df8771c9d4abe6e9a6f8d829b2736)
1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/cord.h"
16 
17 #include <algorithm>
18 #include <atomic>
19 #include <cstddef>
20 #include <cstdio>
21 #include <cstdlib>
22 #include <iomanip>
23 #include <iostream>
24 #include <limits>
25 #include <ostream>
26 #include <sstream>
27 #include <type_traits>
28 #include <unordered_set>
29 #include <vector>
30 
31 #include "absl/base/casts.h"
32 #include "absl/base/internal/raw_logging.h"
33 #include "absl/base/macros.h"
34 #include "absl/base/port.h"
35 #include "absl/container/fixed_array.h"
36 #include "absl/container/inlined_vector.h"
37 #include "absl/strings/escaping.h"
38 #include "absl/strings/internal/cord_internal.h"
39 #include "absl/strings/internal/resize_uninitialized.h"
40 #include "absl/strings/str_cat.h"
41 #include "absl/strings/str_format.h"
42 #include "absl/strings/str_join.h"
43 #include "absl/strings/string_view.h"
44 
45 namespace absl {
46 ABSL_NAMESPACE_BEGIN
47 
48 using ::absl::cord_internal::CordRep;
49 using ::absl::cord_internal::CordRepConcat;
50 using ::absl::cord_internal::CordRepExternal;
51 using ::absl::cord_internal::CordRepSubstring;
52 
53 using ::absl::cord_internal::CONCAT;
54 using ::absl::cord_internal::EXTERNAL;
55 using ::absl::cord_internal::FLAT;
56 using ::absl::cord_internal::SUBSTRING;
57 
58 namespace cord_internal {
59 
concat()60 inline CordRepConcat* CordRep::concat() {
61   assert(tag == CONCAT);
62   return static_cast<CordRepConcat*>(this);
63 }
64 
concat() const65 inline const CordRepConcat* CordRep::concat() const {
66   assert(tag == CONCAT);
67   return static_cast<const CordRepConcat*>(this);
68 }
69 
substring()70 inline CordRepSubstring* CordRep::substring() {
71   assert(tag == SUBSTRING);
72   return static_cast<CordRepSubstring*>(this);
73 }
74 
substring() const75 inline const CordRepSubstring* CordRep::substring() const {
76   assert(tag == SUBSTRING);
77   return static_cast<const CordRepSubstring*>(this);
78 }
79 
external()80 inline CordRepExternal* CordRep::external() {
81   assert(tag == EXTERNAL);
82   return static_cast<CordRepExternal*>(this);
83 }
84 
external() const85 inline const CordRepExternal* CordRep::external() const {
86   assert(tag == EXTERNAL);
87   return static_cast<const CordRepExternal*>(this);
88 }
89 
90 }  // namespace cord_internal
91 
92 static const size_t kFlatOverhead = offsetof(CordRep, data);
93 
94 // Largest and smallest flat node lengths we are willing to allocate
95 // Flat allocation size is stored in tag, which currently can encode sizes up
96 // to 4K, encoded as multiple of either 8 or 32 bytes.
97 // If we allow for larger sizes, we need to change this to 8/64, 16/128, etc.
98 static constexpr size_t kMaxFlatSize = 4096;
99 static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead;
100 static constexpr size_t kMinFlatLength = 32 - kFlatOverhead;
101 
102 // Prefer copying blocks of at most this size, otherwise reference count.
103 static const size_t kMaxBytesToCopy = 511;
104 
105 // Helper functions for rounded div, and rounding to exact sizes.
DivUp(size_t n,size_t m)106 static size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; }
RoundUp(size_t n,size_t m)107 static size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; }
108 
109 // Returns the size to the nearest equal or larger value that can be
110 // expressed exactly as a tag value.
RoundUpForTag(size_t size)111 static size_t RoundUpForTag(size_t size) {
112   return RoundUp(size, (size <= 1024) ? 8 : 32);
113 }
114 
115 // Converts the allocated size to a tag, rounding down if the size
116 // does not exactly match a 'tag expressible' size value. The result is
117 // undefined if the size exceeds the maximum size that can be encoded in
118 // a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>).
AllocatedSizeToTag(size_t size)119 static uint8_t AllocatedSizeToTag(size_t size) {
120   const size_t tag = (size <= 1024) ? size / 8 : 128 + size / 32 - 1024 / 32;
121   assert(tag <= std::numeric_limits<uint8_t>::max());
122   return tag;
123 }
124 
125 // Converts the provided tag to the corresponding allocated size
TagToAllocatedSize(uint8_t tag)126 static constexpr size_t TagToAllocatedSize(uint8_t tag) {
127   return (tag <= 128) ? (tag * 8) : (1024 + (tag - 128) * 32);
128 }
129 
130 // Converts the provided tag to the corresponding available data length
TagToLength(uint8_t tag)131 static constexpr size_t TagToLength(uint8_t tag) {
132   return TagToAllocatedSize(tag) - kFlatOverhead;
133 }
134 
135 // Enforce that kMaxFlatSize maps to a well-known exact tag value.
136 static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic");
137 
Fibonacci(unsigned char n,uint64_t a=0,uint64_t b=1)138 constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) {
139   return n == 0 ? a : Fibonacci(n - 1, b, a + b);
140 }
141 
142 static_assert(Fibonacci(63) == 6557470319842,
143               "Fibonacci values computed incorrectly");
144 
145 // Minimum length required for a given depth tree -- a tree is considered
146 // balanced if
147 //      length(t) >= min_length[depth(t)]
148 // The root node depth is allowed to become twice as large to reduce rebalancing
149 // for larger strings (see IsRootBalanced).
150 static constexpr uint64_t min_length[] = {
151     Fibonacci(2),          Fibonacci(3),  Fibonacci(4),  Fibonacci(5),
152     Fibonacci(6),          Fibonacci(7),  Fibonacci(8),  Fibonacci(9),
153     Fibonacci(10),         Fibonacci(11), Fibonacci(12), Fibonacci(13),
154     Fibonacci(14),         Fibonacci(15), Fibonacci(16), Fibonacci(17),
155     Fibonacci(18),         Fibonacci(19), Fibonacci(20), Fibonacci(21),
156     Fibonacci(22),         Fibonacci(23), Fibonacci(24), Fibonacci(25),
157     Fibonacci(26),         Fibonacci(27), Fibonacci(28), Fibonacci(29),
158     Fibonacci(30),         Fibonacci(31), Fibonacci(32), Fibonacci(33),
159     Fibonacci(34),         Fibonacci(35), Fibonacci(36), Fibonacci(37),
160     Fibonacci(38),         Fibonacci(39), Fibonacci(40), Fibonacci(41),
161     Fibonacci(42),         Fibonacci(43), Fibonacci(44), Fibonacci(45),
162     Fibonacci(46),         Fibonacci(47),
163     0xffffffffffffffffull,  // Avoid overflow
164 };
165 
166 static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length);
167 
168 // The inlined size to use with absl::InlinedVector.
169 //
170 // Note: The InlinedVectors in this file (and in cord.h) do not need to use
171 // the same value for their inlined size. The fact that they do is historical.
172 // It may be desirable for each to use a different inlined size optimized for
173 // that InlinedVector's usage.
174 //
175 // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
176 // the inlined vector size (47 exists for backward compatibility).
177 static const int kInlinedVectorSize = 47;
178 
IsRootBalanced(CordRep * node)179 static inline bool IsRootBalanced(CordRep* node) {
180   if (node->tag != CONCAT) {
181     return true;
182   } else if (node->concat()->depth() <= 15) {
183     return true;
184   } else if (node->concat()->depth() > kMinLengthSize) {
185     return false;
186   } else {
187     // Allow depth to become twice as large as implied by fibonacci rule to
188     // reduce rebalancing for larger strings.
189     return (node->length >= min_length[node->concat()->depth() / 2]);
190   }
191 }
192 
193 static CordRep* Rebalance(CordRep* node);
194 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os);
195 static bool VerifyNode(CordRep* root, CordRep* start_node,
196                        bool full_validation);
197 
VerifyTree(CordRep * node)198 static inline CordRep* VerifyTree(CordRep* node) {
199   // Verification is expensive, so only do it in debug mode.
200   // Even in debug mode we normally do only light validation.
201   // If you are debugging Cord itself, you should define the
202   // macro EXTRA_CORD_VALIDATION, e.g. by adding
203   // --copt=-DEXTRA_CORD_VALIDATION to the blaze line.
204 #ifdef EXTRA_CORD_VALIDATION
205   assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/true));
206 #else   // EXTRA_CORD_VALIDATION
207   assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/false));
208 #endif  // EXTRA_CORD_VALIDATION
209   static_cast<void>(&VerifyNode);
210 
211   return node;
212 }
213 
214 // --------------------------------------------------------------------
215 // Memory management
216 
Ref(CordRep * rep)217 inline CordRep* Ref(CordRep* rep) {
218   if (rep != nullptr) {
219     rep->refcount.Increment();
220   }
221   return rep;
222 }
223 
224 // This internal routine is called from the cold path of Unref below. Keeping it
225 // in a separate routine allows good inlining of Unref into many profitable call
226 // sites. However, the call to this function can be highly disruptive to the
227 // register pressure in those callers. To minimize the cost to callers, we use
228 // a special LLVM calling convention that preserves most registers. This allows
229 // the call to this routine in cold paths to not disrupt the caller's register
230 // pressure. This calling convention is not available on all platforms; we
231 // intentionally allow LLVM to ignore the attribute rather than attempting to
232 // hardcode the list of supported platforms.
233 #if defined(__clang__) && !defined(__i386__)
234 #pragma clang diagnostic push
235 #pragma clang diagnostic ignored "-Wattributes"
236 __attribute__((preserve_most))
237 #pragma clang diagnostic pop
238 #endif
UnrefInternal(CordRep * rep)239 static void UnrefInternal(CordRep* rep) {
240   assert(rep != nullptr);
241 
242   absl::InlinedVector<CordRep*, kInlinedVectorSize> pending;
243   while (true) {
244     assert(!rep->refcount.IsImmortal());
245     if (rep->tag == CONCAT) {
246       CordRepConcat* rep_concat = rep->concat();
247       CordRep* right = rep_concat->right;
248       if (!right->refcount.Decrement()) {
249         pending.push_back(right);
250       }
251       CordRep* left = rep_concat->left;
252       delete rep_concat;
253       rep = nullptr;
254       if (!left->refcount.Decrement()) {
255         rep = left;
256         continue;
257       }
258     } else if (rep->tag == EXTERNAL) {
259       CordRepExternal* rep_external = rep->external();
260       assert(rep_external->releaser_invoker != nullptr);
261       rep_external->releaser_invoker(rep_external);
262       rep = nullptr;
263     } else if (rep->tag == SUBSTRING) {
264       CordRepSubstring* rep_substring = rep->substring();
265       CordRep* child = rep_substring->child;
266       delete rep_substring;
267       rep = nullptr;
268       if (!child->refcount.Decrement()) {
269         rep = child;
270         continue;
271       }
272     } else {
273       // Flat CordReps are allocated and constructed with raw ::operator new
274       // and placement new, and must be destructed and deallocated
275       // accordingly.
276 #if defined(__cpp_sized_deallocation)
277       size_t size = TagToAllocatedSize(rep->tag);
278       rep->~CordRep();
279       ::operator delete(rep, size);
280 #else
281       rep->~CordRep();
282       ::operator delete(rep);
283 #endif
284       rep = nullptr;
285     }
286 
287     if (!pending.empty()) {
288       rep = pending.back();
289       pending.pop_back();
290     } else {
291       break;
292     }
293   }
294 }
295 
Unref(CordRep * rep)296 inline void Unref(CordRep* rep) {
297   // Fast-path for two common, hot cases: a null rep and a shared root.
298   if (ABSL_PREDICT_TRUE(rep == nullptr ||
299                         rep->refcount.DecrementExpectHighRefcount())) {
300     return;
301   }
302 
303   UnrefInternal(rep);
304 }
305 
306 // Return the depth of a node
Depth(const CordRep * rep)307 static int Depth(const CordRep* rep) {
308   if (rep->tag == CONCAT) {
309     return rep->concat()->depth();
310   } else {
311     return 0;
312   }
313 }
314 
SetConcatChildren(CordRepConcat * concat,CordRep * left,CordRep * right)315 static void SetConcatChildren(CordRepConcat* concat, CordRep* left,
316                               CordRep* right) {
317   concat->left = left;
318   concat->right = right;
319 
320   concat->length = left->length + right->length;
321   concat->set_depth(1 + std::max(Depth(left), Depth(right)));
322 }
323 
324 // Create a concatenation of the specified nodes.
325 // Does not change the refcounts of "left" and "right".
326 // The returned node has a refcount of 1.
RawConcat(CordRep * left,CordRep * right)327 static CordRep* RawConcat(CordRep* left, CordRep* right) {
328   // Avoid making degenerate concat nodes (one child is empty)
329   if (left == nullptr || left->length == 0) {
330     Unref(left);
331     return right;
332   }
333   if (right == nullptr || right->length == 0) {
334     Unref(right);
335     return left;
336   }
337 
338   CordRepConcat* rep = new CordRepConcat();
339   rep->tag = CONCAT;
340   SetConcatChildren(rep, left, right);
341 
342   return rep;
343 }
344 
Concat(CordRep * left,CordRep * right)345 static CordRep* Concat(CordRep* left, CordRep* right) {
346   CordRep* rep = RawConcat(left, right);
347   if (rep != nullptr && !IsRootBalanced(rep)) {
348     rep = Rebalance(rep);
349   }
350   return VerifyTree(rep);
351 }
352 
353 // Make a balanced tree out of an array of leaf nodes.
MakeBalancedTree(CordRep ** reps,size_t n)354 static CordRep* MakeBalancedTree(CordRep** reps, size_t n) {
355   // Make repeated passes over the array, merging adjacent pairs
356   // until we are left with just a single node.
357   while (n > 1) {
358     size_t dst = 0;
359     for (size_t src = 0; src < n; src += 2) {
360       if (src + 1 < n) {
361         reps[dst] = Concat(reps[src], reps[src + 1]);
362       } else {
363         reps[dst] = reps[src];
364       }
365       dst++;
366     }
367     n = dst;
368   }
369 
370   return reps[0];
371 }
372 
373 // Create a new flat node.
NewFlat(size_t length_hint)374 static CordRep* NewFlat(size_t length_hint) {
375   if (length_hint <= kMinFlatLength) {
376     length_hint = kMinFlatLength;
377   } else if (length_hint > kMaxFlatLength) {
378     length_hint = kMaxFlatLength;
379   }
380 
381   // Round size up so it matches a size we can exactly express in a tag.
382   const size_t size = RoundUpForTag(length_hint + kFlatOverhead);
383   void* const raw_rep = ::operator new(size);
384   CordRep* rep = new (raw_rep) CordRep();
385   rep->tag = AllocatedSizeToTag(size);
386   return VerifyTree(rep);
387 }
388 
389 // Create a new tree out of the specified array.
390 // The returned node has a refcount of 1.
NewTree(const char * data,size_t length,size_t alloc_hint)391 static CordRep* NewTree(const char* data,
392                         size_t length,
393                         size_t alloc_hint) {
394   if (length == 0) return nullptr;
395   absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1);
396   size_t n = 0;
397   do {
398     const size_t len = std::min(length, kMaxFlatLength);
399     CordRep* rep = NewFlat(len + alloc_hint);
400     rep->length = len;
401     memcpy(rep->data, data, len);
402     reps[n++] = VerifyTree(rep);
403     data += len;
404     length -= len;
405   } while (length != 0);
406   return MakeBalancedTree(reps.data(), n);
407 }
408 
409 namespace cord_internal {
410 
InitializeCordRepExternal(absl::string_view data,CordRepExternal * rep)411 void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep) {
412   assert(!data.empty());
413   rep->length = data.size();
414   rep->tag = EXTERNAL;
415   rep->base = data.data();
416   VerifyTree(rep);
417 }
418 
419 }  // namespace cord_internal
420 
NewSubstring(CordRep * child,size_t offset,size_t length)421 static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) {
422   // Never create empty substring nodes
423   if (length == 0) {
424     Unref(child);
425     return nullptr;
426   } else {
427     CordRepSubstring* rep = new CordRepSubstring();
428     assert((offset + length) <= child->length);
429     rep->length = length;
430     rep->tag = SUBSTRING;
431     rep->start = offset;
432     rep->child = child;
433     return VerifyTree(rep);
434   }
435 }
436 
437 // --------------------------------------------------------------------
438 // Cord::InlineRep functions
439 
440 constexpr unsigned char Cord::InlineRep::kMaxInline;
441 
set_data(const char * data,size_t n,bool nullify_tail)442 inline void Cord::InlineRep::set_data(const char* data, size_t n,
443                                       bool nullify_tail) {
444   static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15");
445 
446   cord_internal::SmallMemmove(data_.as_chars, data, n, nullify_tail);
447   set_tagged_size(static_cast<char>(n));
448 }
449 
set_data(size_t n)450 inline char* Cord::InlineRep::set_data(size_t n) {
451   assert(n <= kMaxInline);
452   ResetToEmpty();
453   set_tagged_size(static_cast<char>(n));
454   return data_.as_chars;
455 }
456 
force_tree(size_t extra_hint)457 inline CordRep* Cord::InlineRep::force_tree(size_t extra_hint) {
458   size_t len = tagged_size();
459   if (len > kMaxInline) {
460     return data_.as_tree.rep;
461   }
462 
463   CordRep* result = NewFlat(len + extra_hint);
464   result->length = len;
465   static_assert(kMinFlatLength >= sizeof(data_.as_chars), "");
466   memcpy(result->data, data_.as_chars, sizeof(data_.as_chars));
467   set_tree(result);
468   return result;
469 }
470 
reduce_size(size_t n)471 inline void Cord::InlineRep::reduce_size(size_t n) {
472   size_t tag = tagged_size();
473   assert(tag <= kMaxInline);
474   assert(tag >= n);
475   tag -= n;
476   memset(data_.as_chars + tag, 0, n);
477   set_tagged_size(static_cast<char>(tag));
478 }
479 
remove_prefix(size_t n)480 inline void Cord::InlineRep::remove_prefix(size_t n) {
481   cord_internal::SmallMemmove(data_.as_chars, data_.as_chars + n,
482                               tagged_size() - n);
483   reduce_size(n);
484 }
485 
AppendTree(CordRep * tree)486 void Cord::InlineRep::AppendTree(CordRep* tree) {
487   if (tree == nullptr) return;
488   size_t len = tagged_size();
489   if (len == 0) {
490     set_tree(tree);
491   } else {
492     set_tree(Concat(force_tree(0), tree));
493   }
494 }
495 
PrependTree(CordRep * tree)496 void Cord::InlineRep::PrependTree(CordRep* tree) {
497   assert(tree != nullptr);
498   size_t len = tagged_size();
499   if (len == 0) {
500     set_tree(tree);
501   } else {
502     set_tree(Concat(tree, force_tree(0)));
503   }
504 }
505 
506 // Searches for a non-full flat node at the rightmost leaf of the tree. If a
507 // suitable leaf is found, the function will update the length field for all
508 // nodes to account for the size increase. The append region address will be
509 // written to region and the actual size increase will be written to size.
PrepareAppendRegion(CordRep * root,char ** region,size_t * size,size_t max_length)510 static inline bool PrepareAppendRegion(CordRep* root, char** region,
511                                        size_t* size, size_t max_length) {
512   // Search down the right-hand path for a non-full FLAT node.
513   CordRep* dst = root;
514   while (dst->tag == CONCAT && dst->refcount.IsOne()) {
515     dst = dst->concat()->right;
516   }
517 
518   if (dst->tag < FLAT || !dst->refcount.IsOne()) {
519     *region = nullptr;
520     *size = 0;
521     return false;
522   }
523 
524   const size_t in_use = dst->length;
525   const size_t capacity = TagToLength(dst->tag);
526   if (in_use == capacity) {
527     *region = nullptr;
528     *size = 0;
529     return false;
530   }
531 
532   size_t size_increase = std::min(capacity - in_use, max_length);
533 
534   // We need to update the length fields for all nodes, including the leaf node.
535   for (CordRep* rep = root; rep != dst; rep = rep->concat()->right) {
536     rep->length += size_increase;
537   }
538   dst->length += size_increase;
539 
540   *region = dst->data + in_use;
541   *size = size_increase;
542   return true;
543 }
544 
GetAppendRegion(char ** region,size_t * size,size_t max_length)545 void Cord::InlineRep::GetAppendRegion(char** region, size_t* size,
546                                       size_t max_length) {
547   if (max_length == 0) {
548     *region = nullptr;
549     *size = 0;
550     return;
551   }
552 
553   // Try to fit in the inline buffer if possible.
554   size_t inline_length = tagged_size();
555   if (inline_length < kMaxInline && max_length <= kMaxInline - inline_length) {
556     *region = data_.as_chars + inline_length;
557     *size = max_length;
558     set_tagged_size(static_cast<char>(inline_length + max_length));
559     return;
560   }
561 
562   CordRep* root = force_tree(max_length);
563 
564   if (PrepareAppendRegion(root, region, size, max_length)) {
565     return;
566   }
567 
568   // Allocate new node.
569   CordRep* new_node =
570       NewFlat(std::max(static_cast<size_t>(root->length), max_length));
571   new_node->length =
572       std::min(static_cast<size_t>(TagToLength(new_node->tag)), max_length);
573   *region = new_node->data;
574   *size = new_node->length;
575   replace_tree(Concat(root, new_node));
576 }
577 
GetAppendRegion(char ** region,size_t * size)578 void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) {
579   const size_t max_length = std::numeric_limits<size_t>::max();
580 
581   // Try to fit in the inline buffer if possible.
582   size_t inline_length = tagged_size();
583   if (inline_length < kMaxInline) {
584     *region = data_.as_chars + inline_length;
585     *size = kMaxInline - inline_length;
586     set_tagged_size(kMaxInline);
587     return;
588   }
589 
590   CordRep* root = force_tree(max_length);
591 
592   if (PrepareAppendRegion(root, region, size, max_length)) {
593     return;
594   }
595 
596   // Allocate new node.
597   CordRep* new_node = NewFlat(root->length);
598   new_node->length = TagToLength(new_node->tag);
599   *region = new_node->data;
600   *size = new_node->length;
601   replace_tree(Concat(root, new_node));
602 }
603 
604 // If the rep is a leaf, this will increment the value at total_mem_usage and
605 // will return true.
RepMemoryUsageLeaf(const CordRep * rep,size_t * total_mem_usage)606 static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) {
607   if (rep->tag >= FLAT) {
608     *total_mem_usage += TagToAllocatedSize(rep->tag);
609     return true;
610   }
611   if (rep->tag == EXTERNAL) {
612     *total_mem_usage += sizeof(CordRepConcat) + rep->length;
613     return true;
614   }
615   return false;
616 }
617 
AssignSlow(const Cord::InlineRep & src)618 void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) {
619   ClearSlow();
620 
621   data_ = src.data_;
622   if (is_tree()) {
623     Ref(tree());
624   }
625 }
626 
ClearSlow()627 void Cord::InlineRep::ClearSlow() {
628   if (is_tree()) {
629     Unref(tree());
630   }
631   ResetToEmpty();
632 }
633 
634 // --------------------------------------------------------------------
635 // Constructors and destructors
636 
Cord(const Cord & src)637 Cord::Cord(const Cord& src) : contents_(src.contents_) {
638   Ref(contents_.tree());  // Does nothing if contents_ has embedded data
639 }
640 
Cord(absl::string_view src)641 Cord::Cord(absl::string_view src) {
642   const size_t n = src.size();
643   if (n <= InlineRep::kMaxInline) {
644     contents_.set_data(src.data(), n, false);
645   } else {
646     contents_.set_tree(NewTree(src.data(), n, 0));
647   }
648 }
649 
650 template <typename T, Cord::EnableIfString<T>>
Cord(T && src)651 Cord::Cord(T&& src) {
652   if (
653       // String is short: copy data to avoid external block overhead.
654       src.size() <= kMaxBytesToCopy ||
655       // String is wasteful: copy data to avoid pinning too much unused memory.
656       src.size() < src.capacity() / 2
657   ) {
658     if (src.size() <= InlineRep::kMaxInline) {
659       contents_.set_data(src.data(), src.size(), false);
660     } else {
661       contents_.set_tree(NewTree(src.data(), src.size(), 0));
662     }
663   } else {
664     struct StringReleaser {
665       void operator()(absl::string_view /* data */) {}
666       std::string data;
667     };
668     const absl::string_view original_data = src;
669     auto* rep = static_cast<
670         ::absl::cord_internal::CordRepExternalImpl<StringReleaser>*>(
671         absl::cord_internal::NewExternalRep(
672             original_data, StringReleaser{std::forward<T>(src)}));
673     // Moving src may have invalidated its data pointer, so adjust it.
674     rep->base = rep->template get<0>().data.data();
675     contents_.set_tree(rep);
676   }
677 }
678 
679 template Cord::Cord(std::string&& src);
680 
681 // The destruction code is separate so that the compiler can determine
682 // that it does not need to call the destructor on a moved-from Cord.
DestroyCordSlow()683 void Cord::DestroyCordSlow() {
684   Unref(VerifyTree(contents_.tree()));
685 }
686 
687 // --------------------------------------------------------------------
688 // Mutators
689 
Clear()690 void Cord::Clear() {
691   Unref(contents_.clear());
692 }
693 
operator =(absl::string_view src)694 Cord& Cord::operator=(absl::string_view src) {
695 
696   const char* data = src.data();
697   size_t length = src.size();
698   CordRep* tree = contents_.tree();
699   if (length <= InlineRep::kMaxInline) {
700     // Embed into this->contents_
701     contents_.set_data(data, length, true);
702     Unref(tree);
703     return *this;
704   }
705   if (tree != nullptr && tree->tag >= FLAT &&
706       TagToLength(tree->tag) >= length && tree->refcount.IsOne()) {
707     // Copy in place if the existing FLAT node is reusable.
708     memmove(tree->data, data, length);
709     tree->length = length;
710     VerifyTree(tree);
711     return *this;
712   }
713   contents_.set_tree(NewTree(data, length, 0));
714   Unref(tree);
715   return *this;
716 }
717 
718 template <typename T, Cord::EnableIfString<T>>
operator =(T && src)719 Cord& Cord::operator=(T&& src) {
720   if (src.size() <= kMaxBytesToCopy) {
721     *this = absl::string_view(src);
722   } else {
723     *this = Cord(std::forward<T>(src));
724   }
725   return *this;
726 }
727 
728 template Cord& Cord::operator=(std::string&& src);
729 
730 // TODO(sanjay): Move to Cord::InlineRep section of file.  For now,
731 // we keep it here to make diffs easier.
AppendArray(const char * src_data,size_t src_size)732 void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) {
733   if (src_size == 0) return;  // memcpy(_, nullptr, 0) is undefined.
734   // Try to fit in the inline buffer if possible.
735   size_t inline_length = tagged_size();
736   if (inline_length < kMaxInline && src_size <= kMaxInline - inline_length) {
737     // Append new data to embedded array
738     set_tagged_size(static_cast<char>(inline_length + src_size));
739     memcpy(data_.as_chars + inline_length, src_data, src_size);
740     return;
741   }
742 
743   CordRep* root = tree();
744 
745   size_t appended = 0;
746   if (root) {
747     char* region;
748     if (PrepareAppendRegion(root, &region, &appended, src_size)) {
749       memcpy(region, src_data, appended);
750     }
751   } else {
752     // It is possible that src_data == data_, but when we transition from an
753     // InlineRep to a tree we need to assign data_ = root via set_tree. To
754     // avoid corrupting the source data before we copy it, delay calling
755     // set_tree until after we've copied data.
756     // We are going from an inline size to beyond inline size. Make the new size
757     // either double the inlined size, or the added size + 10%.
758     const size_t size1 = inline_length * 2 + src_size;
759     const size_t size2 = inline_length + src_size / 10;
760     root = NewFlat(std::max<size_t>(size1, size2));
761     appended = std::min(src_size, TagToLength(root->tag) - inline_length);
762     memcpy(root->data, data_.as_chars, inline_length);
763     memcpy(root->data + inline_length, src_data, appended);
764     root->length = inline_length + appended;
765     set_tree(root);
766   }
767 
768   src_data += appended;
769   src_size -= appended;
770   if (src_size == 0) {
771     return;
772   }
773 
774   // Use new block(s) for any remaining bytes that were not handled above.
775   // Alloc extra memory only if the right child of the root of the new tree is
776   // going to be a FLAT node, which will permit further inplace appends.
777   size_t length = src_size;
778   if (src_size < kMaxFlatLength) {
779     // The new length is either
780     // - old size + 10%
781     // - old_size + src_size
782     // This will cause a reasonable conservative step-up in size that is still
783     // large enough to avoid excessive amounts of small fragments being added.
784     length = std::max<size_t>(root->length / 10, src_size);
785   }
786   set_tree(Concat(root, NewTree(src_data, src_size, length - src_size)));
787 }
788 
TakeRep() const789 inline CordRep* Cord::TakeRep() const& {
790   return Ref(contents_.tree());
791 }
792 
TakeRep()793 inline CordRep* Cord::TakeRep() && {
794   CordRep* rep = contents_.tree();
795   contents_.clear();
796   return rep;
797 }
798 
799 template <typename C>
AppendImpl(C && src)800 inline void Cord::AppendImpl(C&& src) {
801   if (empty()) {
802     // In case of an empty destination avoid allocating a new node, do not copy
803     // data.
804     *this = std::forward<C>(src);
805     return;
806   }
807 
808   // For short cords, it is faster to copy data if there is room in dst.
809   const size_t src_size = src.contents_.size();
810   if (src_size <= kMaxBytesToCopy) {
811     CordRep* src_tree = src.contents_.tree();
812     if (src_tree == nullptr) {
813       // src has embedded data.
814       contents_.AppendArray(src.contents_.data(), src_size);
815       return;
816     }
817     if (src_tree->tag >= FLAT) {
818       // src tree just has one flat node.
819       contents_.AppendArray(src_tree->data, src_size);
820       return;
821     }
822     if (&src == this) {
823       // ChunkIterator below assumes that src is not modified during traversal.
824       Append(Cord(src));
825       return;
826     }
827     // TODO(mec): Should we only do this if "dst" has space?
828     for (absl::string_view chunk : src.Chunks()) {
829       Append(chunk);
830     }
831     return;
832   }
833 
834   contents_.AppendTree(std::forward<C>(src).TakeRep());
835 }
836 
Append(const Cord & src)837 void Cord::Append(const Cord& src) { AppendImpl(src); }
838 
Append(Cord && src)839 void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); }
840 
841 template <typename T, Cord::EnableIfString<T>>
Append(T && src)842 void Cord::Append(T&& src) {
843   if (src.size() <= kMaxBytesToCopy) {
844     Append(absl::string_view(src));
845   } else {
846     Append(Cord(std::forward<T>(src)));
847   }
848 }
849 
850 template void Cord::Append(std::string&& src);
851 
Prepend(const Cord & src)852 void Cord::Prepend(const Cord& src) {
853   CordRep* src_tree = src.contents_.tree();
854   if (src_tree != nullptr) {
855     Ref(src_tree);
856     contents_.PrependTree(src_tree);
857     return;
858   }
859 
860   // `src` cord is inlined.
861   absl::string_view src_contents(src.contents_.data(), src.contents_.size());
862   return Prepend(src_contents);
863 }
864 
Prepend(absl::string_view src)865 void Cord::Prepend(absl::string_view src) {
866   if (src.empty()) return;  // memcpy(_, nullptr, 0) is undefined.
867   size_t cur_size = contents_.size();
868   if (!contents_.is_tree() && cur_size + src.size() <= InlineRep::kMaxInline) {
869     // Use embedded storage.
870     char data[InlineRep::kMaxInline + 1] = {0};
871     data[InlineRep::kMaxInline] = cur_size + src.size();  // set size
872     memcpy(data, src.data(), src.size());
873     memcpy(data + src.size(), contents_.data(), cur_size);
874     memcpy(reinterpret_cast<void*>(&contents_), data,
875            InlineRep::kMaxInline + 1);
876   } else {
877     contents_.PrependTree(NewTree(src.data(), src.size(), 0));
878   }
879 }
880 
881 template <typename T, Cord::EnableIfString<T>>
Prepend(T && src)882 inline void Cord::Prepend(T&& src) {
883   if (src.size() <= kMaxBytesToCopy) {
884     Prepend(absl::string_view(src));
885   } else {
886     Prepend(Cord(std::forward<T>(src)));
887   }
888 }
889 
890 template void Cord::Prepend(std::string&& src);
891 
RemovePrefixFrom(CordRep * node,size_t n)892 static CordRep* RemovePrefixFrom(CordRep* node, size_t n) {
893   if (n >= node->length) return nullptr;
894   if (n == 0) return Ref(node);
895   absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack;
896 
897   while (node->tag == CONCAT) {
898     assert(n <= node->length);
899     if (n < node->concat()->left->length) {
900       // Push right to stack, descend left.
901       rhs_stack.push_back(node->concat()->right);
902       node = node->concat()->left;
903     } else {
904       // Drop left, descend right.
905       n -= node->concat()->left->length;
906       node = node->concat()->right;
907     }
908   }
909   assert(n <= node->length);
910 
911   if (n == 0) {
912     Ref(node);
913   } else {
914     size_t start = n;
915     size_t len = node->length - n;
916     if (node->tag == SUBSTRING) {
917       // Consider in-place update of node, similar to in RemoveSuffixFrom().
918       start += node->substring()->start;
919       node = node->substring()->child;
920     }
921     node = NewSubstring(Ref(node), start, len);
922   }
923   while (!rhs_stack.empty()) {
924     node = Concat(node, Ref(rhs_stack.back()));
925     rhs_stack.pop_back();
926   }
927   return node;
928 }
929 
930 // RemoveSuffixFrom() is very similar to RemovePrefixFrom(), with the
931 // exception that removing a suffix has an optimization where a node may be
932 // edited in place iff that node and all its ancestors have a refcount of 1.
RemoveSuffixFrom(CordRep * node,size_t n)933 static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) {
934   if (n >= node->length) return nullptr;
935   if (n == 0) return Ref(node);
936   absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack;
937   bool inplace_ok = node->refcount.IsOne();
938 
939   while (node->tag == CONCAT) {
940     assert(n <= node->length);
941     if (n < node->concat()->right->length) {
942       // Push left to stack, descend right.
943       lhs_stack.push_back(node->concat()->left);
944       node = node->concat()->right;
945     } else {
946       // Drop right, descend left.
947       n -= node->concat()->right->length;
948       node = node->concat()->left;
949     }
950     inplace_ok = inplace_ok && node->refcount.IsOne();
951   }
952   assert(n <= node->length);
953 
954   if (n == 0) {
955     Ref(node);
956   } else if (inplace_ok && node->tag != EXTERNAL) {
957     // Consider making a new buffer if the current node capacity is much
958     // larger than the new length.
959     Ref(node);
960     node->length -= n;
961   } else {
962     size_t start = 0;
963     size_t len = node->length - n;
964     if (node->tag == SUBSTRING) {
965       start = node->substring()->start;
966       node = node->substring()->child;
967     }
968     node = NewSubstring(Ref(node), start, len);
969   }
970   while (!lhs_stack.empty()) {
971     node = Concat(Ref(lhs_stack.back()), node);
972     lhs_stack.pop_back();
973   }
974   return node;
975 }
976 
RemovePrefix(size_t n)977 void Cord::RemovePrefix(size_t n) {
978   ABSL_INTERNAL_CHECK(n <= size(),
979                       absl::StrCat("Requested prefix size ", n,
980                                    " exceeds Cord's size ", size()));
981   CordRep* tree = contents_.tree();
982   if (tree == nullptr) {
983     contents_.remove_prefix(n);
984   } else {
985     CordRep* newrep = RemovePrefixFrom(tree, n);
986     Unref(tree);
987     contents_.replace_tree(VerifyTree(newrep));
988   }
989 }
990 
RemoveSuffix(size_t n)991 void Cord::RemoveSuffix(size_t n) {
992   ABSL_INTERNAL_CHECK(n <= size(),
993                       absl::StrCat("Requested suffix size ", n,
994                                    " exceeds Cord's size ", size()));
995   CordRep* tree = contents_.tree();
996   if (tree == nullptr) {
997     contents_.reduce_size(n);
998   } else {
999     CordRep* newrep = RemoveSuffixFrom(tree, n);
1000     Unref(tree);
1001     contents_.replace_tree(VerifyTree(newrep));
1002   }
1003 }
1004 
1005 // Work item for NewSubRange().
1006 struct SubRange {
SubRangeabsl::SubRange1007   SubRange(CordRep* a_node, size_t a_pos, size_t a_n)
1008       : node(a_node), pos(a_pos), n(a_n) {}
1009   CordRep* node;  // nullptr means concat last 2 results.
1010   size_t pos;
1011   size_t n;
1012 };
1013 
NewSubRange(CordRep * node,size_t pos,size_t n)1014 static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) {
1015   absl::InlinedVector<CordRep*, kInlinedVectorSize> results;
1016   absl::InlinedVector<SubRange, kInlinedVectorSize> todo;
1017   todo.push_back(SubRange(node, pos, n));
1018   do {
1019     const SubRange& sr = todo.back();
1020     node = sr.node;
1021     pos = sr.pos;
1022     n = sr.n;
1023     todo.pop_back();
1024 
1025     if (node == nullptr) {
1026       assert(results.size() >= 2);
1027       CordRep* right = results.back();
1028       results.pop_back();
1029       CordRep* left = results.back();
1030       results.pop_back();
1031       results.push_back(Concat(left, right));
1032     } else if (pos == 0 && n == node->length) {
1033       results.push_back(Ref(node));
1034     } else if (node->tag != CONCAT) {
1035       if (node->tag == SUBSTRING) {
1036         pos += node->substring()->start;
1037         node = node->substring()->child;
1038       }
1039       results.push_back(NewSubstring(Ref(node), pos, n));
1040     } else if (pos + n <= node->concat()->left->length) {
1041       todo.push_back(SubRange(node->concat()->left, pos, n));
1042     } else if (pos >= node->concat()->left->length) {
1043       pos -= node->concat()->left->length;
1044       todo.push_back(SubRange(node->concat()->right, pos, n));
1045     } else {
1046       size_t left_n = node->concat()->left->length - pos;
1047       todo.push_back(SubRange(nullptr, 0, 0));  // Concat()
1048       todo.push_back(SubRange(node->concat()->right, 0, n - left_n));
1049       todo.push_back(SubRange(node->concat()->left, pos, left_n));
1050     }
1051   } while (!todo.empty());
1052   assert(results.size() == 1);
1053   return results[0];
1054 }
1055 
Subcord(size_t pos,size_t new_size) const1056 Cord Cord::Subcord(size_t pos, size_t new_size) const {
1057   Cord sub_cord;
1058   size_t length = size();
1059   if (pos > length) pos = length;
1060   if (new_size > length - pos) new_size = length - pos;
1061   CordRep* tree = contents_.tree();
1062   if (tree == nullptr) {
1063     // sub_cord is newly constructed, no need to re-zero-out the tail of
1064     // contents_ memory.
1065     sub_cord.contents_.set_data(contents_.data() + pos, new_size, false);
1066   } else if (new_size == 0) {
1067     // We want to return empty subcord, so nothing to do.
1068   } else if (new_size <= InlineRep::kMaxInline) {
1069     Cord::ChunkIterator it = chunk_begin();
1070     it.AdvanceBytes(pos);
1071     char* dest = sub_cord.contents_.data_.as_chars;
1072     size_t remaining_size = new_size;
1073     while (remaining_size > it->size()) {
1074       cord_internal::SmallMemmove(dest, it->data(), it->size());
1075       remaining_size -= it->size();
1076       dest += it->size();
1077       ++it;
1078     }
1079     cord_internal::SmallMemmove(dest, it->data(), remaining_size);
1080     sub_cord.contents_.set_tagged_size(new_size);
1081   } else {
1082     sub_cord.contents_.set_tree(NewSubRange(tree, pos, new_size));
1083   }
1084   return sub_cord;
1085 }
1086 
1087 // --------------------------------------------------------------------
1088 // Balancing
1089 
1090 class CordForest {
1091  public:
CordForest(size_t length)1092   explicit CordForest(size_t length)
1093       : root_length_(length), trees_(kMinLengthSize, nullptr) {}
1094 
Build(CordRep * cord_root)1095   void Build(CordRep* cord_root) {
1096     std::vector<CordRep*> pending = {cord_root};
1097 
1098     while (!pending.empty()) {
1099       CordRep* node = pending.back();
1100       pending.pop_back();
1101       CheckNode(node);
1102       if (ABSL_PREDICT_FALSE(node->tag != CONCAT)) {
1103         AddNode(node);
1104         continue;
1105       }
1106 
1107       CordRepConcat* concat_node = node->concat();
1108       if (concat_node->depth() >= kMinLengthSize ||
1109           concat_node->length < min_length[concat_node->depth()]) {
1110         pending.push_back(concat_node->right);
1111         pending.push_back(concat_node->left);
1112 
1113         if (concat_node->refcount.IsOne()) {
1114           concat_node->left = concat_freelist_;
1115           concat_freelist_ = concat_node;
1116         } else {
1117           Ref(concat_node->right);
1118           Ref(concat_node->left);
1119           Unref(concat_node);
1120         }
1121       } else {
1122         AddNode(node);
1123       }
1124     }
1125   }
1126 
ConcatNodes()1127   CordRep* ConcatNodes() {
1128     CordRep* sum = nullptr;
1129     for (auto* node : trees_) {
1130       if (node == nullptr) continue;
1131 
1132       sum = PrependNode(node, sum);
1133       root_length_ -= node->length;
1134       if (root_length_ == 0) break;
1135     }
1136     ABSL_INTERNAL_CHECK(sum != nullptr, "Failed to locate sum node");
1137     return VerifyTree(sum);
1138   }
1139 
1140  private:
AppendNode(CordRep * node,CordRep * sum)1141   CordRep* AppendNode(CordRep* node, CordRep* sum) {
1142     return (sum == nullptr) ? node : MakeConcat(sum, node);
1143   }
1144 
PrependNode(CordRep * node,CordRep * sum)1145   CordRep* PrependNode(CordRep* node, CordRep* sum) {
1146     return (sum == nullptr) ? node : MakeConcat(node, sum);
1147   }
1148 
AddNode(CordRep * node)1149   void AddNode(CordRep* node) {
1150     CordRep* sum = nullptr;
1151 
1152     // Collect together everything with which we will merge with node
1153     int i = 0;
1154     for (; node->length > min_length[i + 1]; ++i) {
1155       auto& tree_at_i = trees_[i];
1156 
1157       if (tree_at_i == nullptr) continue;
1158       sum = PrependNode(tree_at_i, sum);
1159       tree_at_i = nullptr;
1160     }
1161 
1162     sum = AppendNode(node, sum);
1163 
1164     // Insert sum into appropriate place in the forest
1165     for (; sum->length >= min_length[i]; ++i) {
1166       auto& tree_at_i = trees_[i];
1167       if (tree_at_i == nullptr) continue;
1168 
1169       sum = MakeConcat(tree_at_i, sum);
1170       tree_at_i = nullptr;
1171     }
1172 
1173     // min_length[0] == 1, which means sum->length >= min_length[0]
1174     assert(i > 0);
1175     trees_[i - 1] = sum;
1176   }
1177 
1178   // Make concat node trying to resue existing CordRepConcat nodes we
1179   // already collected in the concat_freelist_.
MakeConcat(CordRep * left,CordRep * right)1180   CordRep* MakeConcat(CordRep* left, CordRep* right) {
1181     if (concat_freelist_ == nullptr) return RawConcat(left, right);
1182 
1183     CordRepConcat* rep = concat_freelist_;
1184     if (concat_freelist_->left == nullptr) {
1185       concat_freelist_ = nullptr;
1186     } else {
1187       concat_freelist_ = concat_freelist_->left->concat();
1188     }
1189     SetConcatChildren(rep, left, right);
1190 
1191     return rep;
1192   }
1193 
CheckNode(CordRep * node)1194   static void CheckNode(CordRep* node) {
1195     ABSL_INTERNAL_CHECK(node->length != 0u, "");
1196     if (node->tag == CONCAT) {
1197       ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, "");
1198       ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, "");
1199       ABSL_INTERNAL_CHECK(node->length == (node->concat()->left->length +
1200                                            node->concat()->right->length),
1201                           "");
1202     }
1203   }
1204 
1205   size_t root_length_;
1206 
1207   // use an inlined vector instead of a flat array to get bounds checking
1208   absl::InlinedVector<CordRep*, kInlinedVectorSize> trees_;
1209 
1210   // List of concat nodes we can re-use for Cord balancing.
1211   CordRepConcat* concat_freelist_ = nullptr;
1212 };
1213 
Rebalance(CordRep * node)1214 static CordRep* Rebalance(CordRep* node) {
1215   VerifyTree(node);
1216   assert(node->tag == CONCAT);
1217 
1218   if (node->length == 0) {
1219     return nullptr;
1220   }
1221 
1222   CordForest forest(node->length);
1223   forest.Build(node);
1224   return forest.ConcatNodes();
1225 }
1226 
1227 // --------------------------------------------------------------------
1228 // Comparators
1229 
1230 namespace {
1231 
ClampResult(int memcmp_res)1232 int ClampResult(int memcmp_res) {
1233   return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0);
1234 }
1235 
CompareChunks(absl::string_view * lhs,absl::string_view * rhs,size_t * size_to_compare)1236 int CompareChunks(absl::string_view* lhs, absl::string_view* rhs,
1237                   size_t* size_to_compare) {
1238   size_t compared_size = std::min(lhs->size(), rhs->size());
1239   assert(*size_to_compare >= compared_size);
1240   *size_to_compare -= compared_size;
1241 
1242   int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size);
1243   if (memcmp_res != 0) return memcmp_res;
1244 
1245   lhs->remove_prefix(compared_size);
1246   rhs->remove_prefix(compared_size);
1247 
1248   return 0;
1249 }
1250 
1251 // This overload set computes comparison results from memcmp result. This
1252 // interface is used inside GenericCompare below. Differet implementations
1253 // are specialized for int and bool. For int we clamp result to {-1, 0, 1}
1254 // set. For bool we just interested in "value == 0".
1255 template <typename ResultType>
ComputeCompareResult(int memcmp_res)1256 ResultType ComputeCompareResult(int memcmp_res) {
1257   return ClampResult(memcmp_res);
1258 }
1259 template <>
ComputeCompareResult(int memcmp_res)1260 bool ComputeCompareResult<bool>(int memcmp_res) {
1261   return memcmp_res == 0;
1262 }
1263 
1264 }  // namespace
1265 
1266 // Helper routine. Locates the first flat chunk of the Cord without
1267 // initializing the iterator.
FindFlatStartPiece() const1268 inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const {
1269   size_t n = tagged_size();
1270   if (n <= kMaxInline) {
1271     return absl::string_view(data_.as_chars, n);
1272   }
1273 
1274   CordRep* node = tree();
1275   if (node->tag >= FLAT) {
1276     return absl::string_view(node->data, node->length);
1277   }
1278 
1279   if (node->tag == EXTERNAL) {
1280     return absl::string_view(node->external()->base, node->length);
1281   }
1282 
1283   // Walk down the left branches until we hit a non-CONCAT node.
1284   while (node->tag == CONCAT) {
1285     node = node->concat()->left;
1286   }
1287 
1288   // Get the child node if we encounter a SUBSTRING.
1289   size_t offset = 0;
1290   size_t length = node->length;
1291   assert(length != 0);
1292 
1293   if (node->tag == SUBSTRING) {
1294     offset = node->substring()->start;
1295     node = node->substring()->child;
1296   }
1297 
1298   if (node->tag >= FLAT) {
1299     return absl::string_view(node->data + offset, length);
1300   }
1301 
1302   assert((node->tag == EXTERNAL) && "Expect FLAT or EXTERNAL node here");
1303 
1304   return absl::string_view(node->external()->base + offset, length);
1305 }
1306 
CompareSlowPath(absl::string_view rhs,size_t compared_size,size_t size_to_compare) const1307 inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size,
1308                                  size_t size_to_compare) const {
1309   auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1310     if (!chunk->empty()) return true;
1311     ++*it;
1312     if (it->bytes_remaining_ == 0) return false;
1313     *chunk = **it;
1314     return true;
1315   };
1316 
1317   Cord::ChunkIterator lhs_it = chunk_begin();
1318 
1319   // compared_size is inside first chunk.
1320   absl::string_view lhs_chunk =
1321       (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1322   assert(compared_size <= lhs_chunk.size());
1323   assert(compared_size <= rhs.size());
1324   lhs_chunk.remove_prefix(compared_size);
1325   rhs.remove_prefix(compared_size);
1326   size_to_compare -= compared_size;  // skip already compared size.
1327 
1328   while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) {
1329     int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare);
1330     if (comparison_result != 0) return comparison_result;
1331     if (size_to_compare == 0) return 0;
1332   }
1333 
1334   return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty());
1335 }
1336 
CompareSlowPath(const Cord & rhs,size_t compared_size,size_t size_to_compare) const1337 inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size,
1338                                  size_t size_to_compare) const {
1339   auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1340     if (!chunk->empty()) return true;
1341     ++*it;
1342     if (it->bytes_remaining_ == 0) return false;
1343     *chunk = **it;
1344     return true;
1345   };
1346 
1347   Cord::ChunkIterator lhs_it = chunk_begin();
1348   Cord::ChunkIterator rhs_it = rhs.chunk_begin();
1349 
1350   // compared_size is inside both first chunks.
1351   absl::string_view lhs_chunk =
1352       (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1353   absl::string_view rhs_chunk =
1354       (rhs_it.bytes_remaining_ != 0) ? *rhs_it : absl::string_view();
1355   assert(compared_size <= lhs_chunk.size());
1356   assert(compared_size <= rhs_chunk.size());
1357   lhs_chunk.remove_prefix(compared_size);
1358   rhs_chunk.remove_prefix(compared_size);
1359   size_to_compare -= compared_size;  // skip already compared size.
1360 
1361   while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) {
1362     int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare);
1363     if (memcmp_res != 0) return memcmp_res;
1364     if (size_to_compare == 0) return 0;
1365   }
1366 
1367   return static_cast<int>(rhs_chunk.empty()) -
1368          static_cast<int>(lhs_chunk.empty());
1369 }
1370 
GetFirstChunk(const Cord & c)1371 inline absl::string_view Cord::GetFirstChunk(const Cord& c) {
1372   return c.contents_.FindFlatStartPiece();
1373 }
GetFirstChunk(absl::string_view sv)1374 inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) {
1375   return sv;
1376 }
1377 
1378 // Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed
1379 // that 'size_to_compare' is greater that size of smallest of first chunks.
1380 template <typename ResultType, typename RHS>
GenericCompare(const Cord & lhs,const RHS & rhs,size_t size_to_compare)1381 ResultType GenericCompare(const Cord& lhs, const RHS& rhs,
1382                           size_t size_to_compare) {
1383   absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs);
1384   absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs);
1385 
1386   size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size());
1387   assert(size_to_compare >= compared_size);
1388   int memcmp_res = ::memcmp(lhs_chunk.data(), rhs_chunk.data(), compared_size);
1389   if (compared_size == size_to_compare || memcmp_res != 0) {
1390     return ComputeCompareResult<ResultType>(memcmp_res);
1391   }
1392 
1393   return ComputeCompareResult<ResultType>(
1394       lhs.CompareSlowPath(rhs, compared_size, size_to_compare));
1395 }
1396 
EqualsImpl(absl::string_view rhs,size_t size_to_compare) const1397 bool Cord::EqualsImpl(absl::string_view rhs, size_t size_to_compare) const {
1398   return GenericCompare<bool>(*this, rhs, size_to_compare);
1399 }
1400 
EqualsImpl(const Cord & rhs,size_t size_to_compare) const1401 bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const {
1402   return GenericCompare<bool>(*this, rhs, size_to_compare);
1403 }
1404 
1405 template <typename RHS>
SharedCompareImpl(const Cord & lhs,const RHS & rhs)1406 inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) {
1407   size_t lhs_size = lhs.size();
1408   size_t rhs_size = rhs.size();
1409   if (lhs_size == rhs_size) {
1410     return GenericCompare<int>(lhs, rhs, lhs_size);
1411   }
1412   if (lhs_size < rhs_size) {
1413     auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size);
1414     return data_comp_res == 0 ? -1 : data_comp_res;
1415   }
1416 
1417   auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size);
1418   return data_comp_res == 0 ? +1 : data_comp_res;
1419 }
1420 
Compare(absl::string_view rhs) const1421 int Cord::Compare(absl::string_view rhs) const {
1422   return SharedCompareImpl(*this, rhs);
1423 }
1424 
CompareImpl(const Cord & rhs) const1425 int Cord::CompareImpl(const Cord& rhs) const {
1426   return SharedCompareImpl(*this, rhs);
1427 }
1428 
EndsWith(absl::string_view rhs) const1429 bool Cord::EndsWith(absl::string_view rhs) const {
1430   size_t my_size = size();
1431   size_t rhs_size = rhs.size();
1432 
1433   if (my_size < rhs_size) return false;
1434 
1435   Cord tmp(*this);
1436   tmp.RemovePrefix(my_size - rhs_size);
1437   return tmp.EqualsImpl(rhs, rhs_size);
1438 }
1439 
EndsWith(const Cord & rhs) const1440 bool Cord::EndsWith(const Cord& rhs) const {
1441   size_t my_size = size();
1442   size_t rhs_size = rhs.size();
1443 
1444   if (my_size < rhs_size) return false;
1445 
1446   Cord tmp(*this);
1447   tmp.RemovePrefix(my_size - rhs_size);
1448   return tmp.EqualsImpl(rhs, rhs_size);
1449 }
1450 
1451 // --------------------------------------------------------------------
1452 // Misc.
1453 
operator std::string() const1454 Cord::operator std::string() const {
1455   std::string s;
1456   absl::CopyCordToString(*this, &s);
1457   return s;
1458 }
1459 
CopyCordToString(const Cord & src,std::string * dst)1460 void CopyCordToString(const Cord& src, std::string* dst) {
1461   if (!src.contents_.is_tree()) {
1462     src.contents_.CopyTo(dst);
1463   } else {
1464     absl::strings_internal::STLStringResizeUninitialized(dst, src.size());
1465     src.CopyToArraySlowPath(&(*dst)[0]);
1466   }
1467 }
1468 
CopyToArraySlowPath(char * dst) const1469 void Cord::CopyToArraySlowPath(char* dst) const {
1470   assert(contents_.is_tree());
1471   absl::string_view fragment;
1472   if (GetFlatAux(contents_.tree(), &fragment)) {
1473     memcpy(dst, fragment.data(), fragment.size());
1474     return;
1475   }
1476   for (absl::string_view chunk : Chunks()) {
1477     memcpy(dst, chunk.data(), chunk.size());
1478     dst += chunk.size();
1479   }
1480 }
1481 
operator ++()1482 Cord::ChunkIterator& Cord::ChunkIterator::operator++() {
1483   ABSL_HARDENING_ASSERT(bytes_remaining_ > 0 &&
1484                         "Attempted to iterate past `end()`");
1485   assert(bytes_remaining_ >= current_chunk_.size());
1486   bytes_remaining_ -= current_chunk_.size();
1487 
1488   if (stack_of_right_children_.empty()) {
1489     assert(!current_chunk_.empty());  // Called on invalid iterator.
1490     // We have reached the end of the Cord.
1491     return *this;
1492   }
1493 
1494   // Process the next node on the stack.
1495   CordRep* node = stack_of_right_children_.back();
1496   stack_of_right_children_.pop_back();
1497 
1498   // Walk down the left branches until we hit a non-CONCAT node. Save the
1499   // right children to the stack for subsequent traversal.
1500   while (node->tag == CONCAT) {
1501     stack_of_right_children_.push_back(node->concat()->right);
1502     node = node->concat()->left;
1503   }
1504 
1505   // Get the child node if we encounter a SUBSTRING.
1506   size_t offset = 0;
1507   size_t length = node->length;
1508   if (node->tag == SUBSTRING) {
1509     offset = node->substring()->start;
1510     node = node->substring()->child;
1511   }
1512 
1513   assert(node->tag == EXTERNAL || node->tag >= FLAT);
1514   assert(length != 0);
1515   const char* data =
1516       node->tag == EXTERNAL ? node->external()->base : node->data;
1517   current_chunk_ = absl::string_view(data + offset, length);
1518   current_leaf_ = node;
1519   return *this;
1520 }
1521 
AdvanceAndReadBytes(size_t n)1522 Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
1523   ABSL_HARDENING_ASSERT(bytes_remaining_ >= n &&
1524                         "Attempted to iterate past `end()`");
1525   Cord subcord;
1526 
1527   if (n <= InlineRep::kMaxInline) {
1528     // Range to read fits in inline data. Flatten it.
1529     char* data = subcord.contents_.set_data(n);
1530     while (n > current_chunk_.size()) {
1531       memcpy(data, current_chunk_.data(), current_chunk_.size());
1532       data += current_chunk_.size();
1533       n -= current_chunk_.size();
1534       ++*this;
1535     }
1536     memcpy(data, current_chunk_.data(), n);
1537     if (n < current_chunk_.size()) {
1538       RemoveChunkPrefix(n);
1539     } else if (n > 0) {
1540       ++*this;
1541     }
1542     return subcord;
1543   }
1544   if (n < current_chunk_.size()) {
1545     // Range to read is a proper subrange of the current chunk.
1546     assert(current_leaf_ != nullptr);
1547     CordRep* subnode = Ref(current_leaf_);
1548     const char* data =
1549         subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data;
1550     subnode = NewSubstring(subnode, current_chunk_.data() - data, n);
1551     subcord.contents_.set_tree(VerifyTree(subnode));
1552     RemoveChunkPrefix(n);
1553     return subcord;
1554   }
1555 
1556   // Range to read begins with a proper subrange of the current chunk.
1557   assert(!current_chunk_.empty());
1558   assert(current_leaf_ != nullptr);
1559   CordRep* subnode = Ref(current_leaf_);
1560   if (current_chunk_.size() < subnode->length) {
1561     const char* data =
1562         subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data;
1563     subnode = NewSubstring(subnode, current_chunk_.data() - data,
1564                            current_chunk_.size());
1565   }
1566   n -= current_chunk_.size();
1567   bytes_remaining_ -= current_chunk_.size();
1568 
1569   // Process the next node(s) on the stack, reading whole subtrees depending on
1570   // their length and how many bytes we are advancing.
1571   CordRep* node = nullptr;
1572   while (!stack_of_right_children_.empty()) {
1573     node = stack_of_right_children_.back();
1574     stack_of_right_children_.pop_back();
1575     if (node->length > n) break;
1576     // TODO(qrczak): This might unnecessarily recreate existing concat nodes.
1577     // Avoiding that would need pretty complicated logic (instead of
1578     // current_leaf_, keep current_subtree_ which points to the highest node
1579     // such that the current leaf can be found on the path of left children
1580     // starting from current_subtree_; delay creating subnode while node is
1581     // below current_subtree_; find the proper node along the path of left
1582     // children starting from current_subtree_ if this loop exits while staying
1583     // below current_subtree_; etc.; alternatively, push parents instead of
1584     // right children on the stack).
1585     subnode = Concat(subnode, Ref(node));
1586     n -= node->length;
1587     bytes_remaining_ -= node->length;
1588     node = nullptr;
1589   }
1590 
1591   if (node == nullptr) {
1592     // We have reached the end of the Cord.
1593     assert(bytes_remaining_ == 0);
1594     subcord.contents_.set_tree(VerifyTree(subnode));
1595     return subcord;
1596   }
1597 
1598   // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1599   // right children to the stack for subsequent traversal.
1600   while (node->tag == CONCAT) {
1601     if (node->concat()->left->length > n) {
1602       // Push right, descend left.
1603       stack_of_right_children_.push_back(node->concat()->right);
1604       node = node->concat()->left;
1605     } else {
1606       // Read left, descend right.
1607       subnode = Concat(subnode, Ref(node->concat()->left));
1608       n -= node->concat()->left->length;
1609       bytes_remaining_ -= node->concat()->left->length;
1610       node = node->concat()->right;
1611     }
1612   }
1613 
1614   // Get the child node if we encounter a SUBSTRING.
1615   size_t offset = 0;
1616   size_t length = node->length;
1617   if (node->tag == SUBSTRING) {
1618     offset = node->substring()->start;
1619     node = node->substring()->child;
1620   }
1621 
1622   // Range to read ends with a proper (possibly empty) subrange of the current
1623   // chunk.
1624   assert(node->tag == EXTERNAL || node->tag >= FLAT);
1625   assert(length > n);
1626   if (n > 0) subnode = Concat(subnode, NewSubstring(Ref(node), offset, n));
1627   const char* data =
1628       node->tag == EXTERNAL ? node->external()->base : node->data;
1629   current_chunk_ = absl::string_view(data + offset + n, length - n);
1630   current_leaf_ = node;
1631   bytes_remaining_ -= n;
1632   subcord.contents_.set_tree(VerifyTree(subnode));
1633   return subcord;
1634 }
1635 
AdvanceBytesSlowPath(size_t n)1636 void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) {
1637   assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`");
1638   assert(n >= current_chunk_.size());  // This should only be called when
1639                                        // iterating to a new node.
1640 
1641   n -= current_chunk_.size();
1642   bytes_remaining_ -= current_chunk_.size();
1643 
1644   // Process the next node(s) on the stack, skipping whole subtrees depending on
1645   // their length and how many bytes we are advancing.
1646   CordRep* node = nullptr;
1647   while (!stack_of_right_children_.empty()) {
1648     node = stack_of_right_children_.back();
1649     stack_of_right_children_.pop_back();
1650     if (node->length > n) break;
1651     n -= node->length;
1652     bytes_remaining_ -= node->length;
1653     node = nullptr;
1654   }
1655 
1656   if (node == nullptr) {
1657     // We have reached the end of the Cord.
1658     assert(bytes_remaining_ == 0);
1659     return;
1660   }
1661 
1662   // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1663   // right children to the stack for subsequent traversal.
1664   while (node->tag == CONCAT) {
1665     if (node->concat()->left->length > n) {
1666       // Push right, descend left.
1667       stack_of_right_children_.push_back(node->concat()->right);
1668       node = node->concat()->left;
1669     } else {
1670       // Skip left, descend right.
1671       n -= node->concat()->left->length;
1672       bytes_remaining_ -= node->concat()->left->length;
1673       node = node->concat()->right;
1674     }
1675   }
1676 
1677   // Get the child node if we encounter a SUBSTRING.
1678   size_t offset = 0;
1679   size_t length = node->length;
1680   if (node->tag == SUBSTRING) {
1681     offset = node->substring()->start;
1682     node = node->substring()->child;
1683   }
1684 
1685   assert(node->tag == EXTERNAL || node->tag >= FLAT);
1686   assert(length > n);
1687   const char* data =
1688       node->tag == EXTERNAL ? node->external()->base : node->data;
1689   current_chunk_ = absl::string_view(data + offset + n, length - n);
1690   current_leaf_ = node;
1691   bytes_remaining_ -= n;
1692 }
1693 
operator [](size_t i) const1694 char Cord::operator[](size_t i) const {
1695   ABSL_HARDENING_ASSERT(i < size());
1696   size_t offset = i;
1697   const CordRep* rep = contents_.tree();
1698   if (rep == nullptr) {
1699     return contents_.data()[i];
1700   }
1701   while (true) {
1702     assert(rep != nullptr);
1703     assert(offset < rep->length);
1704     if (rep->tag >= FLAT) {
1705       // Get the "i"th character directly from the flat array.
1706       return rep->data[offset];
1707     } else if (rep->tag == EXTERNAL) {
1708       // Get the "i"th character from the external array.
1709       return rep->external()->base[offset];
1710     } else if (rep->tag == CONCAT) {
1711       // Recursively branch to the side of the concatenation that the "i"th
1712       // character is on.
1713       size_t left_length = rep->concat()->left->length;
1714       if (offset < left_length) {
1715         rep = rep->concat()->left;
1716       } else {
1717         offset -= left_length;
1718         rep = rep->concat()->right;
1719       }
1720     } else {
1721       // This must be a substring a node, so bypass it to get to the child.
1722       assert(rep->tag == SUBSTRING);
1723       offset += rep->substring()->start;
1724       rep = rep->substring()->child;
1725     }
1726   }
1727 }
1728 
FlattenSlowPath()1729 absl::string_view Cord::FlattenSlowPath() {
1730   size_t total_size = size();
1731   CordRep* new_rep;
1732   char* new_buffer;
1733 
1734   // Try to put the contents into a new flat rep. If they won't fit in the
1735   // biggest possible flat node, use an external rep instead.
1736   if (total_size <= kMaxFlatLength) {
1737     new_rep = NewFlat(total_size);
1738     new_rep->length = total_size;
1739     new_buffer = new_rep->data;
1740     CopyToArraySlowPath(new_buffer);
1741   } else {
1742     new_buffer = std::allocator<char>().allocate(total_size);
1743     CopyToArraySlowPath(new_buffer);
1744     new_rep = absl::cord_internal::NewExternalRep(
1745         absl::string_view(new_buffer, total_size), [](absl::string_view s) {
1746           std::allocator<char>().deallocate(const_cast<char*>(s.data()),
1747                                             s.size());
1748         });
1749   }
1750   Unref(contents_.tree());
1751   contents_.set_tree(new_rep);
1752   return absl::string_view(new_buffer, total_size);
1753 }
1754 
GetFlatAux(CordRep * rep,absl::string_view * fragment)1755 /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) {
1756   assert(rep != nullptr);
1757   if (rep->tag >= FLAT) {
1758     *fragment = absl::string_view(rep->data, rep->length);
1759     return true;
1760   } else if (rep->tag == EXTERNAL) {
1761     *fragment = absl::string_view(rep->external()->base, rep->length);
1762     return true;
1763   } else if (rep->tag == SUBSTRING) {
1764     CordRep* child = rep->substring()->child;
1765     if (child->tag >= FLAT) {
1766       *fragment =
1767           absl::string_view(child->data + rep->substring()->start, rep->length);
1768       return true;
1769     } else if (child->tag == EXTERNAL) {
1770       *fragment = absl::string_view(
1771           child->external()->base + rep->substring()->start, rep->length);
1772       return true;
1773     }
1774   }
1775   return false;
1776 }
1777 
ForEachChunkAux(absl::cord_internal::CordRep * rep,absl::FunctionRef<void (absl::string_view)> callback)1778 /* static */ void Cord::ForEachChunkAux(
1779     absl::cord_internal::CordRep* rep,
1780     absl::FunctionRef<void(absl::string_view)> callback) {
1781   assert(rep != nullptr);
1782   int stack_pos = 0;
1783   constexpr int stack_max = 128;
1784   // Stack of right branches for tree traversal
1785   absl::cord_internal::CordRep* stack[stack_max];
1786   absl::cord_internal::CordRep* current_node = rep;
1787   while (true) {
1788     if (current_node->tag == CONCAT) {
1789       if (stack_pos == stack_max) {
1790         // There's no more room on our stack array to add another right branch,
1791         // and the idea is to avoid allocations, so call this function
1792         // recursively to navigate this subtree further.  (This is not something
1793         // we expect to happen in practice).
1794         ForEachChunkAux(current_node, callback);
1795 
1796         // Pop the next right branch and iterate.
1797         current_node = stack[--stack_pos];
1798         continue;
1799       } else {
1800         // Save the right branch for later traversal and continue down the left
1801         // branch.
1802         stack[stack_pos++] = current_node->concat()->right;
1803         current_node = current_node->concat()->left;
1804         continue;
1805       }
1806     }
1807     // This is a leaf node, so invoke our callback.
1808     absl::string_view chunk;
1809     bool success = GetFlatAux(current_node, &chunk);
1810     assert(success);
1811     if (success) {
1812       callback(chunk);
1813     }
1814     if (stack_pos == 0) {
1815       // end of traversal
1816       return;
1817     }
1818     current_node = stack[--stack_pos];
1819   }
1820 }
1821 
DumpNode(CordRep * rep,bool include_data,std::ostream * os)1822 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os) {
1823   const int kIndentStep = 1;
1824   int indent = 0;
1825   absl::InlinedVector<CordRep*, kInlinedVectorSize> stack;
1826   absl::InlinedVector<int, kInlinedVectorSize> indents;
1827   for (;;) {
1828     *os << std::setw(3) << rep->refcount.Get();
1829     *os << " " << std::setw(7) << rep->length;
1830     *os << " [";
1831     if (include_data) *os << static_cast<void*>(rep);
1832     *os << "]";
1833     *os << " " << (IsRootBalanced(rep) ? 'b' : 'u');
1834     *os << " " << std::setw(indent) << "";
1835     if (rep->tag == CONCAT) {
1836       *os << "CONCAT depth=" << Depth(rep) << "\n";
1837       indent += kIndentStep;
1838       indents.push_back(indent);
1839       stack.push_back(rep->concat()->right);
1840       rep = rep->concat()->left;
1841     } else if (rep->tag == SUBSTRING) {
1842       *os << "SUBSTRING @ " << rep->substring()->start << "\n";
1843       indent += kIndentStep;
1844       rep = rep->substring()->child;
1845     } else {  // Leaf
1846       if (rep->tag == EXTERNAL) {
1847         *os << "EXTERNAL [";
1848         if (include_data)
1849           *os << absl::CEscape(std::string(rep->external()->base, rep->length));
1850         *os << "]\n";
1851       } else {
1852         *os << "FLAT cap=" << TagToLength(rep->tag) << " [";
1853         if (include_data)
1854           *os << absl::CEscape(std::string(rep->data, rep->length));
1855         *os << "]\n";
1856       }
1857       if (stack.empty()) break;
1858       rep = stack.back();
1859       stack.pop_back();
1860       indent = indents.back();
1861       indents.pop_back();
1862     }
1863   }
1864   ABSL_INTERNAL_CHECK(indents.empty(), "");
1865 }
1866 
ReportError(CordRep * root,CordRep * node)1867 static std::string ReportError(CordRep* root, CordRep* node) {
1868   std::ostringstream buf;
1869   buf << "Error at node " << node << " in:";
1870   DumpNode(root, true, &buf);
1871   return buf.str();
1872 }
1873 
VerifyNode(CordRep * root,CordRep * start_node,bool full_validation)1874 static bool VerifyNode(CordRep* root, CordRep* start_node,
1875                        bool full_validation) {
1876   absl::InlinedVector<CordRep*, 2> worklist;
1877   worklist.push_back(start_node);
1878   do {
1879     CordRep* node = worklist.back();
1880     worklist.pop_back();
1881 
1882     ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node));
1883     if (node != root) {
1884       ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node));
1885     }
1886 
1887     if (node->tag == CONCAT) {
1888       ABSL_INTERNAL_CHECK(node->concat()->left != nullptr,
1889                           ReportError(root, node));
1890       ABSL_INTERNAL_CHECK(node->concat()->right != nullptr,
1891                           ReportError(root, node));
1892       ABSL_INTERNAL_CHECK((node->length == node->concat()->left->length +
1893                                                node->concat()->right->length),
1894                           ReportError(root, node));
1895       if (full_validation) {
1896         worklist.push_back(node->concat()->right);
1897         worklist.push_back(node->concat()->left);
1898       }
1899     } else if (node->tag >= FLAT) {
1900       ABSL_INTERNAL_CHECK(node->length <= TagToLength(node->tag),
1901                           ReportError(root, node));
1902     } else if (node->tag == EXTERNAL) {
1903       ABSL_INTERNAL_CHECK(node->external()->base != nullptr,
1904                           ReportError(root, node));
1905     } else if (node->tag == SUBSTRING) {
1906       ABSL_INTERNAL_CHECK(
1907           node->substring()->start < node->substring()->child->length,
1908           ReportError(root, node));
1909       ABSL_INTERNAL_CHECK(node->substring()->start + node->length <=
1910                               node->substring()->child->length,
1911                           ReportError(root, node));
1912     }
1913   } while (!worklist.empty());
1914   return true;
1915 }
1916 
1917 // Traverses the tree and computes the total memory allocated.
MemoryUsageAux(const CordRep * rep)1918 /* static */ size_t Cord::MemoryUsageAux(const CordRep* rep) {
1919   size_t total_mem_usage = 0;
1920 
1921   // Allow a quick exit for the common case that the root is a leaf.
1922   if (RepMemoryUsageLeaf(rep, &total_mem_usage)) {
1923     return total_mem_usage;
1924   }
1925 
1926   // Iterate over the tree. cur_node is never a leaf node and leaf nodes will
1927   // never be appended to tree_stack. This reduces overhead from manipulating
1928   // tree_stack.
1929   absl::InlinedVector<const CordRep*, kInlinedVectorSize> tree_stack;
1930   const CordRep* cur_node = rep;
1931   while (true) {
1932     const CordRep* next_node = nullptr;
1933 
1934     if (cur_node->tag == CONCAT) {
1935       total_mem_usage += sizeof(CordRepConcat);
1936       const CordRep* left = cur_node->concat()->left;
1937       if (!RepMemoryUsageLeaf(left, &total_mem_usage)) {
1938         next_node = left;
1939       }
1940 
1941       const CordRep* right = cur_node->concat()->right;
1942       if (!RepMemoryUsageLeaf(right, &total_mem_usage)) {
1943         if (next_node) {
1944           tree_stack.push_back(next_node);
1945         }
1946         next_node = right;
1947       }
1948     } else {
1949       // Since cur_node is not a leaf or a concat node it must be a substring.
1950       assert(cur_node->tag == SUBSTRING);
1951       total_mem_usage += sizeof(CordRepSubstring);
1952       next_node = cur_node->substring()->child;
1953       if (RepMemoryUsageLeaf(next_node, &total_mem_usage)) {
1954         next_node = nullptr;
1955       }
1956     }
1957 
1958     if (!next_node) {
1959       if (tree_stack.empty()) {
1960         return total_mem_usage;
1961       }
1962       next_node = tree_stack.back();
1963       tree_stack.pop_back();
1964     }
1965     cur_node = next_node;
1966   }
1967 }
1968 
operator <<(std::ostream & out,const Cord & cord)1969 std::ostream& operator<<(std::ostream& out, const Cord& cord) {
1970   for (absl::string_view chunk : cord.Chunks()) {
1971     out.write(chunk.data(), chunk.size());
1972   }
1973   return out;
1974 }
1975 
1976 namespace strings_internal {
FlatOverhead()1977 size_t CordTestAccess::FlatOverhead() { return kFlatOverhead; }
MaxFlatLength()1978 size_t CordTestAccess::MaxFlatLength() { return kMaxFlatLength; }
FlatTagToLength(uint8_t tag)1979 size_t CordTestAccess::FlatTagToLength(uint8_t tag) {
1980   return TagToLength(tag);
1981 }
LengthToTag(size_t s)1982 uint8_t CordTestAccess::LengthToTag(size_t s) {
1983   ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s));
1984   return AllocatedSizeToTag(s + kFlatOverhead);
1985 }
SizeofCordRepConcat()1986 size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); }
SizeofCordRepExternal()1987 size_t CordTestAccess::SizeofCordRepExternal() {
1988   return sizeof(CordRepExternal);
1989 }
SizeofCordRepSubstring()1990 size_t CordTestAccess::SizeofCordRepSubstring() {
1991   return sizeof(CordRepSubstring);
1992 }
1993 }  // namespace strings_internal
1994 ABSL_NAMESPACE_END
1995 }  // namespace absl
1996