1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/strings/cord.h"
16
17 #include <algorithm>
18 #include <atomic>
19 #include <cstddef>
20 #include <cstdio>
21 #include <cstdlib>
22 #include <iomanip>
23 #include <iostream>
24 #include <limits>
25 #include <ostream>
26 #include <sstream>
27 #include <type_traits>
28 #include <unordered_set>
29 #include <vector>
30
31 #include "absl/base/casts.h"
32 #include "absl/base/internal/raw_logging.h"
33 #include "absl/base/macros.h"
34 #include "absl/base/port.h"
35 #include "absl/container/fixed_array.h"
36 #include "absl/container/inlined_vector.h"
37 #include "absl/strings/escaping.h"
38 #include "absl/strings/internal/cord_internal.h"
39 #include "absl/strings/internal/resize_uninitialized.h"
40 #include "absl/strings/str_cat.h"
41 #include "absl/strings/str_format.h"
42 #include "absl/strings/str_join.h"
43 #include "absl/strings/string_view.h"
44
45 namespace absl {
46 ABSL_NAMESPACE_BEGIN
47
48 using ::absl::cord_internal::CordRep;
49 using ::absl::cord_internal::CordRepConcat;
50 using ::absl::cord_internal::CordRepExternal;
51 using ::absl::cord_internal::CordRepSubstring;
52
53 using ::absl::cord_internal::CONCAT;
54 using ::absl::cord_internal::EXTERNAL;
55 using ::absl::cord_internal::FLAT;
56 using ::absl::cord_internal::SUBSTRING;
57
58 namespace cord_internal {
59
concat()60 inline CordRepConcat* CordRep::concat() {
61 assert(tag == CONCAT);
62 return static_cast<CordRepConcat*>(this);
63 }
64
concat() const65 inline const CordRepConcat* CordRep::concat() const {
66 assert(tag == CONCAT);
67 return static_cast<const CordRepConcat*>(this);
68 }
69
substring()70 inline CordRepSubstring* CordRep::substring() {
71 assert(tag == SUBSTRING);
72 return static_cast<CordRepSubstring*>(this);
73 }
74
substring() const75 inline const CordRepSubstring* CordRep::substring() const {
76 assert(tag == SUBSTRING);
77 return static_cast<const CordRepSubstring*>(this);
78 }
79
external()80 inline CordRepExternal* CordRep::external() {
81 assert(tag == EXTERNAL);
82 return static_cast<CordRepExternal*>(this);
83 }
84
external() const85 inline const CordRepExternal* CordRep::external() const {
86 assert(tag == EXTERNAL);
87 return static_cast<const CordRepExternal*>(this);
88 }
89
90 } // namespace cord_internal
91
92 static const size_t kFlatOverhead = offsetof(CordRep, data);
93
94 // Largest and smallest flat node lengths we are willing to allocate
95 // Flat allocation size is stored in tag, which currently can encode sizes up
96 // to 4K, encoded as multiple of either 8 or 32 bytes.
97 // If we allow for larger sizes, we need to change this to 8/64, 16/128, etc.
98 static constexpr size_t kMaxFlatSize = 4096;
99 static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead;
100 static constexpr size_t kMinFlatLength = 32 - kFlatOverhead;
101
102 // Prefer copying blocks of at most this size, otherwise reference count.
103 static const size_t kMaxBytesToCopy = 511;
104
105 // Helper functions for rounded div, and rounding to exact sizes.
DivUp(size_t n,size_t m)106 static size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; }
RoundUp(size_t n,size_t m)107 static size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; }
108
109 // Returns the size to the nearest equal or larger value that can be
110 // expressed exactly as a tag value.
RoundUpForTag(size_t size)111 static size_t RoundUpForTag(size_t size) {
112 return RoundUp(size, (size <= 1024) ? 8 : 32);
113 }
114
115 // Converts the allocated size to a tag, rounding down if the size
116 // does not exactly match a 'tag expressible' size value. The result is
117 // undefined if the size exceeds the maximum size that can be encoded in
118 // a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>).
AllocatedSizeToTag(size_t size)119 static uint8_t AllocatedSizeToTag(size_t size) {
120 const size_t tag = (size <= 1024) ? size / 8 : 128 + size / 32 - 1024 / 32;
121 assert(tag <= std::numeric_limits<uint8_t>::max());
122 return tag;
123 }
124
125 // Converts the provided tag to the corresponding allocated size
TagToAllocatedSize(uint8_t tag)126 static constexpr size_t TagToAllocatedSize(uint8_t tag) {
127 return (tag <= 128) ? (tag * 8) : (1024 + (tag - 128) * 32);
128 }
129
130 // Converts the provided tag to the corresponding available data length
TagToLength(uint8_t tag)131 static constexpr size_t TagToLength(uint8_t tag) {
132 return TagToAllocatedSize(tag) - kFlatOverhead;
133 }
134
135 // Enforce that kMaxFlatSize maps to a well-known exact tag value.
136 static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic");
137
Fibonacci(unsigned char n,uint64_t a=0,uint64_t b=1)138 constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) {
139 return n == 0 ? a : Fibonacci(n - 1, b, a + b);
140 }
141
142 static_assert(Fibonacci(63) == 6557470319842,
143 "Fibonacci values computed incorrectly");
144
145 // Minimum length required for a given depth tree -- a tree is considered
146 // balanced if
147 // length(t) >= min_length[depth(t)]
148 // The root node depth is allowed to become twice as large to reduce rebalancing
149 // for larger strings (see IsRootBalanced).
150 static constexpr uint64_t min_length[] = {
151 Fibonacci(2), Fibonacci(3), Fibonacci(4), Fibonacci(5),
152 Fibonacci(6), Fibonacci(7), Fibonacci(8), Fibonacci(9),
153 Fibonacci(10), Fibonacci(11), Fibonacci(12), Fibonacci(13),
154 Fibonacci(14), Fibonacci(15), Fibonacci(16), Fibonacci(17),
155 Fibonacci(18), Fibonacci(19), Fibonacci(20), Fibonacci(21),
156 Fibonacci(22), Fibonacci(23), Fibonacci(24), Fibonacci(25),
157 Fibonacci(26), Fibonacci(27), Fibonacci(28), Fibonacci(29),
158 Fibonacci(30), Fibonacci(31), Fibonacci(32), Fibonacci(33),
159 Fibonacci(34), Fibonacci(35), Fibonacci(36), Fibonacci(37),
160 Fibonacci(38), Fibonacci(39), Fibonacci(40), Fibonacci(41),
161 Fibonacci(42), Fibonacci(43), Fibonacci(44), Fibonacci(45),
162 Fibonacci(46), Fibonacci(47),
163 0xffffffffffffffffull, // Avoid overflow
164 };
165
166 static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length);
167
168 // The inlined size to use with absl::InlinedVector.
169 //
170 // Note: The InlinedVectors in this file (and in cord.h) do not need to use
171 // the same value for their inlined size. The fact that they do is historical.
172 // It may be desirable for each to use a different inlined size optimized for
173 // that InlinedVector's usage.
174 //
175 // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
176 // the inlined vector size (47 exists for backward compatibility).
177 static const int kInlinedVectorSize = 47;
178
IsRootBalanced(CordRep * node)179 static inline bool IsRootBalanced(CordRep* node) {
180 if (node->tag != CONCAT) {
181 return true;
182 } else if (node->concat()->depth() <= 15) {
183 return true;
184 } else if (node->concat()->depth() > kMinLengthSize) {
185 return false;
186 } else {
187 // Allow depth to become twice as large as implied by fibonacci rule to
188 // reduce rebalancing for larger strings.
189 return (node->length >= min_length[node->concat()->depth() / 2]);
190 }
191 }
192
193 static CordRep* Rebalance(CordRep* node);
194 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os);
195 static bool VerifyNode(CordRep* root, CordRep* start_node,
196 bool full_validation);
197
VerifyTree(CordRep * node)198 static inline CordRep* VerifyTree(CordRep* node) {
199 // Verification is expensive, so only do it in debug mode.
200 // Even in debug mode we normally do only light validation.
201 // If you are debugging Cord itself, you should define the
202 // macro EXTRA_CORD_VALIDATION, e.g. by adding
203 // --copt=-DEXTRA_CORD_VALIDATION to the blaze line.
204 #ifdef EXTRA_CORD_VALIDATION
205 assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/true));
206 #else // EXTRA_CORD_VALIDATION
207 assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/false));
208 #endif // EXTRA_CORD_VALIDATION
209 static_cast<void>(&VerifyNode);
210
211 return node;
212 }
213
214 // --------------------------------------------------------------------
215 // Memory management
216
Ref(CordRep * rep)217 inline CordRep* Ref(CordRep* rep) {
218 if (rep != nullptr) {
219 rep->refcount.Increment();
220 }
221 return rep;
222 }
223
224 // This internal routine is called from the cold path of Unref below. Keeping it
225 // in a separate routine allows good inlining of Unref into many profitable call
226 // sites. However, the call to this function can be highly disruptive to the
227 // register pressure in those callers. To minimize the cost to callers, we use
228 // a special LLVM calling convention that preserves most registers. This allows
229 // the call to this routine in cold paths to not disrupt the caller's register
230 // pressure. This calling convention is not available on all platforms; we
231 // intentionally allow LLVM to ignore the attribute rather than attempting to
232 // hardcode the list of supported platforms.
233 #if defined(__clang__) && !defined(__i386__)
234 #pragma clang diagnostic push
235 #pragma clang diagnostic ignored "-Wattributes"
236 __attribute__((preserve_most))
237 #pragma clang diagnostic pop
238 #endif
UnrefInternal(CordRep * rep)239 static void UnrefInternal(CordRep* rep) {
240 assert(rep != nullptr);
241
242 absl::InlinedVector<CordRep*, kInlinedVectorSize> pending;
243 while (true) {
244 assert(!rep->refcount.IsImmortal());
245 if (rep->tag == CONCAT) {
246 CordRepConcat* rep_concat = rep->concat();
247 CordRep* right = rep_concat->right;
248 if (!right->refcount.Decrement()) {
249 pending.push_back(right);
250 }
251 CordRep* left = rep_concat->left;
252 delete rep_concat;
253 rep = nullptr;
254 if (!left->refcount.Decrement()) {
255 rep = left;
256 continue;
257 }
258 } else if (rep->tag == EXTERNAL) {
259 CordRepExternal* rep_external = rep->external();
260 assert(rep_external->releaser_invoker != nullptr);
261 rep_external->releaser_invoker(rep_external);
262 rep = nullptr;
263 } else if (rep->tag == SUBSTRING) {
264 CordRepSubstring* rep_substring = rep->substring();
265 CordRep* child = rep_substring->child;
266 delete rep_substring;
267 rep = nullptr;
268 if (!child->refcount.Decrement()) {
269 rep = child;
270 continue;
271 }
272 } else {
273 // Flat CordReps are allocated and constructed with raw ::operator new
274 // and placement new, and must be destructed and deallocated
275 // accordingly.
276 #if defined(__cpp_sized_deallocation)
277 size_t size = TagToAllocatedSize(rep->tag);
278 rep->~CordRep();
279 ::operator delete(rep, size);
280 #else
281 rep->~CordRep();
282 ::operator delete(rep);
283 #endif
284 rep = nullptr;
285 }
286
287 if (!pending.empty()) {
288 rep = pending.back();
289 pending.pop_back();
290 } else {
291 break;
292 }
293 }
294 }
295
Unref(CordRep * rep)296 inline void Unref(CordRep* rep) {
297 // Fast-path for two common, hot cases: a null rep and a shared root.
298 if (ABSL_PREDICT_TRUE(rep == nullptr ||
299 rep->refcount.DecrementExpectHighRefcount())) {
300 return;
301 }
302
303 UnrefInternal(rep);
304 }
305
306 // Return the depth of a node
Depth(const CordRep * rep)307 static int Depth(const CordRep* rep) {
308 if (rep->tag == CONCAT) {
309 return rep->concat()->depth();
310 } else {
311 return 0;
312 }
313 }
314
SetConcatChildren(CordRepConcat * concat,CordRep * left,CordRep * right)315 static void SetConcatChildren(CordRepConcat* concat, CordRep* left,
316 CordRep* right) {
317 concat->left = left;
318 concat->right = right;
319
320 concat->length = left->length + right->length;
321 concat->set_depth(1 + std::max(Depth(left), Depth(right)));
322 }
323
324 // Create a concatenation of the specified nodes.
325 // Does not change the refcounts of "left" and "right".
326 // The returned node has a refcount of 1.
RawConcat(CordRep * left,CordRep * right)327 static CordRep* RawConcat(CordRep* left, CordRep* right) {
328 // Avoid making degenerate concat nodes (one child is empty)
329 if (left == nullptr || left->length == 0) {
330 Unref(left);
331 return right;
332 }
333 if (right == nullptr || right->length == 0) {
334 Unref(right);
335 return left;
336 }
337
338 CordRepConcat* rep = new CordRepConcat();
339 rep->tag = CONCAT;
340 SetConcatChildren(rep, left, right);
341
342 return rep;
343 }
344
Concat(CordRep * left,CordRep * right)345 static CordRep* Concat(CordRep* left, CordRep* right) {
346 CordRep* rep = RawConcat(left, right);
347 if (rep != nullptr && !IsRootBalanced(rep)) {
348 rep = Rebalance(rep);
349 }
350 return VerifyTree(rep);
351 }
352
353 // Make a balanced tree out of an array of leaf nodes.
MakeBalancedTree(CordRep ** reps,size_t n)354 static CordRep* MakeBalancedTree(CordRep** reps, size_t n) {
355 // Make repeated passes over the array, merging adjacent pairs
356 // until we are left with just a single node.
357 while (n > 1) {
358 size_t dst = 0;
359 for (size_t src = 0; src < n; src += 2) {
360 if (src + 1 < n) {
361 reps[dst] = Concat(reps[src], reps[src + 1]);
362 } else {
363 reps[dst] = reps[src];
364 }
365 dst++;
366 }
367 n = dst;
368 }
369
370 return reps[0];
371 }
372
373 // Create a new flat node.
NewFlat(size_t length_hint)374 static CordRep* NewFlat(size_t length_hint) {
375 if (length_hint <= kMinFlatLength) {
376 length_hint = kMinFlatLength;
377 } else if (length_hint > kMaxFlatLength) {
378 length_hint = kMaxFlatLength;
379 }
380
381 // Round size up so it matches a size we can exactly express in a tag.
382 const size_t size = RoundUpForTag(length_hint + kFlatOverhead);
383 void* const raw_rep = ::operator new(size);
384 CordRep* rep = new (raw_rep) CordRep();
385 rep->tag = AllocatedSizeToTag(size);
386 return VerifyTree(rep);
387 }
388
389 // Create a new tree out of the specified array.
390 // The returned node has a refcount of 1.
NewTree(const char * data,size_t length,size_t alloc_hint)391 static CordRep* NewTree(const char* data,
392 size_t length,
393 size_t alloc_hint) {
394 if (length == 0) return nullptr;
395 absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1);
396 size_t n = 0;
397 do {
398 const size_t len = std::min(length, kMaxFlatLength);
399 CordRep* rep = NewFlat(len + alloc_hint);
400 rep->length = len;
401 memcpy(rep->data, data, len);
402 reps[n++] = VerifyTree(rep);
403 data += len;
404 length -= len;
405 } while (length != 0);
406 return MakeBalancedTree(reps.data(), n);
407 }
408
409 namespace cord_internal {
410
InitializeCordRepExternal(absl::string_view data,CordRepExternal * rep)411 void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep) {
412 assert(!data.empty());
413 rep->length = data.size();
414 rep->tag = EXTERNAL;
415 rep->base = data.data();
416 VerifyTree(rep);
417 }
418
419 } // namespace cord_internal
420
NewSubstring(CordRep * child,size_t offset,size_t length)421 static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) {
422 // Never create empty substring nodes
423 if (length == 0) {
424 Unref(child);
425 return nullptr;
426 } else {
427 CordRepSubstring* rep = new CordRepSubstring();
428 assert((offset + length) <= child->length);
429 rep->length = length;
430 rep->tag = SUBSTRING;
431 rep->start = offset;
432 rep->child = child;
433 return VerifyTree(rep);
434 }
435 }
436
437 // --------------------------------------------------------------------
438 // Cord::InlineRep functions
439
440 constexpr unsigned char Cord::InlineRep::kMaxInline;
441
set_data(const char * data,size_t n,bool nullify_tail)442 inline void Cord::InlineRep::set_data(const char* data, size_t n,
443 bool nullify_tail) {
444 static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15");
445
446 cord_internal::SmallMemmove(data_.as_chars, data, n, nullify_tail);
447 set_tagged_size(static_cast<char>(n));
448 }
449
set_data(size_t n)450 inline char* Cord::InlineRep::set_data(size_t n) {
451 assert(n <= kMaxInline);
452 ResetToEmpty();
453 set_tagged_size(static_cast<char>(n));
454 return data_.as_chars;
455 }
456
force_tree(size_t extra_hint)457 inline CordRep* Cord::InlineRep::force_tree(size_t extra_hint) {
458 size_t len = tagged_size();
459 if (len > kMaxInline) {
460 return data_.as_tree.rep;
461 }
462
463 CordRep* result = NewFlat(len + extra_hint);
464 result->length = len;
465 static_assert(kMinFlatLength >= sizeof(data_.as_chars), "");
466 memcpy(result->data, data_.as_chars, sizeof(data_.as_chars));
467 set_tree(result);
468 return result;
469 }
470
reduce_size(size_t n)471 inline void Cord::InlineRep::reduce_size(size_t n) {
472 size_t tag = tagged_size();
473 assert(tag <= kMaxInline);
474 assert(tag >= n);
475 tag -= n;
476 memset(data_.as_chars + tag, 0, n);
477 set_tagged_size(static_cast<char>(tag));
478 }
479
remove_prefix(size_t n)480 inline void Cord::InlineRep::remove_prefix(size_t n) {
481 cord_internal::SmallMemmove(data_.as_chars, data_.as_chars + n,
482 tagged_size() - n);
483 reduce_size(n);
484 }
485
AppendTree(CordRep * tree)486 void Cord::InlineRep::AppendTree(CordRep* tree) {
487 if (tree == nullptr) return;
488 size_t len = tagged_size();
489 if (len == 0) {
490 set_tree(tree);
491 } else {
492 set_tree(Concat(force_tree(0), tree));
493 }
494 }
495
PrependTree(CordRep * tree)496 void Cord::InlineRep::PrependTree(CordRep* tree) {
497 assert(tree != nullptr);
498 size_t len = tagged_size();
499 if (len == 0) {
500 set_tree(tree);
501 } else {
502 set_tree(Concat(tree, force_tree(0)));
503 }
504 }
505
506 // Searches for a non-full flat node at the rightmost leaf of the tree. If a
507 // suitable leaf is found, the function will update the length field for all
508 // nodes to account for the size increase. The append region address will be
509 // written to region and the actual size increase will be written to size.
PrepareAppendRegion(CordRep * root,char ** region,size_t * size,size_t max_length)510 static inline bool PrepareAppendRegion(CordRep* root, char** region,
511 size_t* size, size_t max_length) {
512 // Search down the right-hand path for a non-full FLAT node.
513 CordRep* dst = root;
514 while (dst->tag == CONCAT && dst->refcount.IsOne()) {
515 dst = dst->concat()->right;
516 }
517
518 if (dst->tag < FLAT || !dst->refcount.IsOne()) {
519 *region = nullptr;
520 *size = 0;
521 return false;
522 }
523
524 const size_t in_use = dst->length;
525 const size_t capacity = TagToLength(dst->tag);
526 if (in_use == capacity) {
527 *region = nullptr;
528 *size = 0;
529 return false;
530 }
531
532 size_t size_increase = std::min(capacity - in_use, max_length);
533
534 // We need to update the length fields for all nodes, including the leaf node.
535 for (CordRep* rep = root; rep != dst; rep = rep->concat()->right) {
536 rep->length += size_increase;
537 }
538 dst->length += size_increase;
539
540 *region = dst->data + in_use;
541 *size = size_increase;
542 return true;
543 }
544
GetAppendRegion(char ** region,size_t * size,size_t max_length)545 void Cord::InlineRep::GetAppendRegion(char** region, size_t* size,
546 size_t max_length) {
547 if (max_length == 0) {
548 *region = nullptr;
549 *size = 0;
550 return;
551 }
552
553 // Try to fit in the inline buffer if possible.
554 size_t inline_length = tagged_size();
555 if (inline_length < kMaxInline && max_length <= kMaxInline - inline_length) {
556 *region = data_.as_chars + inline_length;
557 *size = max_length;
558 set_tagged_size(static_cast<char>(inline_length + max_length));
559 return;
560 }
561
562 CordRep* root = force_tree(max_length);
563
564 if (PrepareAppendRegion(root, region, size, max_length)) {
565 return;
566 }
567
568 // Allocate new node.
569 CordRep* new_node =
570 NewFlat(std::max(static_cast<size_t>(root->length), max_length));
571 new_node->length =
572 std::min(static_cast<size_t>(TagToLength(new_node->tag)), max_length);
573 *region = new_node->data;
574 *size = new_node->length;
575 replace_tree(Concat(root, new_node));
576 }
577
GetAppendRegion(char ** region,size_t * size)578 void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) {
579 const size_t max_length = std::numeric_limits<size_t>::max();
580
581 // Try to fit in the inline buffer if possible.
582 size_t inline_length = tagged_size();
583 if (inline_length < kMaxInline) {
584 *region = data_.as_chars + inline_length;
585 *size = kMaxInline - inline_length;
586 set_tagged_size(kMaxInline);
587 return;
588 }
589
590 CordRep* root = force_tree(max_length);
591
592 if (PrepareAppendRegion(root, region, size, max_length)) {
593 return;
594 }
595
596 // Allocate new node.
597 CordRep* new_node = NewFlat(root->length);
598 new_node->length = TagToLength(new_node->tag);
599 *region = new_node->data;
600 *size = new_node->length;
601 replace_tree(Concat(root, new_node));
602 }
603
604 // If the rep is a leaf, this will increment the value at total_mem_usage and
605 // will return true.
RepMemoryUsageLeaf(const CordRep * rep,size_t * total_mem_usage)606 static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) {
607 if (rep->tag >= FLAT) {
608 *total_mem_usage += TagToAllocatedSize(rep->tag);
609 return true;
610 }
611 if (rep->tag == EXTERNAL) {
612 *total_mem_usage += sizeof(CordRepConcat) + rep->length;
613 return true;
614 }
615 return false;
616 }
617
AssignSlow(const Cord::InlineRep & src)618 void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) {
619 ClearSlow();
620
621 data_ = src.data_;
622 if (is_tree()) {
623 Ref(tree());
624 }
625 }
626
ClearSlow()627 void Cord::InlineRep::ClearSlow() {
628 if (is_tree()) {
629 Unref(tree());
630 }
631 ResetToEmpty();
632 }
633
634 // --------------------------------------------------------------------
635 // Constructors and destructors
636
Cord(const Cord & src)637 Cord::Cord(const Cord& src) : contents_(src.contents_) {
638 Ref(contents_.tree()); // Does nothing if contents_ has embedded data
639 }
640
Cord(absl::string_view src)641 Cord::Cord(absl::string_view src) {
642 const size_t n = src.size();
643 if (n <= InlineRep::kMaxInline) {
644 contents_.set_data(src.data(), n, false);
645 } else {
646 contents_.set_tree(NewTree(src.data(), n, 0));
647 }
648 }
649
650 template <typename T, Cord::EnableIfString<T>>
Cord(T && src)651 Cord::Cord(T&& src) {
652 if (
653 // String is short: copy data to avoid external block overhead.
654 src.size() <= kMaxBytesToCopy ||
655 // String is wasteful: copy data to avoid pinning too much unused memory.
656 src.size() < src.capacity() / 2
657 ) {
658 if (src.size() <= InlineRep::kMaxInline) {
659 contents_.set_data(src.data(), src.size(), false);
660 } else {
661 contents_.set_tree(NewTree(src.data(), src.size(), 0));
662 }
663 } else {
664 struct StringReleaser {
665 void operator()(absl::string_view /* data */) {}
666 std::string data;
667 };
668 const absl::string_view original_data = src;
669 auto* rep = static_cast<
670 ::absl::cord_internal::CordRepExternalImpl<StringReleaser>*>(
671 absl::cord_internal::NewExternalRep(
672 original_data, StringReleaser{std::forward<T>(src)}));
673 // Moving src may have invalidated its data pointer, so adjust it.
674 rep->base = rep->template get<0>().data.data();
675 contents_.set_tree(rep);
676 }
677 }
678
679 template Cord::Cord(std::string&& src);
680
681 // The destruction code is separate so that the compiler can determine
682 // that it does not need to call the destructor on a moved-from Cord.
DestroyCordSlow()683 void Cord::DestroyCordSlow() {
684 Unref(VerifyTree(contents_.tree()));
685 }
686
687 // --------------------------------------------------------------------
688 // Mutators
689
Clear()690 void Cord::Clear() {
691 Unref(contents_.clear());
692 }
693
operator =(absl::string_view src)694 Cord& Cord::operator=(absl::string_view src) {
695
696 const char* data = src.data();
697 size_t length = src.size();
698 CordRep* tree = contents_.tree();
699 if (length <= InlineRep::kMaxInline) {
700 // Embed into this->contents_
701 contents_.set_data(data, length, true);
702 Unref(tree);
703 return *this;
704 }
705 if (tree != nullptr && tree->tag >= FLAT &&
706 TagToLength(tree->tag) >= length && tree->refcount.IsOne()) {
707 // Copy in place if the existing FLAT node is reusable.
708 memmove(tree->data, data, length);
709 tree->length = length;
710 VerifyTree(tree);
711 return *this;
712 }
713 contents_.set_tree(NewTree(data, length, 0));
714 Unref(tree);
715 return *this;
716 }
717
718 template <typename T, Cord::EnableIfString<T>>
operator =(T && src)719 Cord& Cord::operator=(T&& src) {
720 if (src.size() <= kMaxBytesToCopy) {
721 *this = absl::string_view(src);
722 } else {
723 *this = Cord(std::forward<T>(src));
724 }
725 return *this;
726 }
727
728 template Cord& Cord::operator=(std::string&& src);
729
730 // TODO(sanjay): Move to Cord::InlineRep section of file. For now,
731 // we keep it here to make diffs easier.
AppendArray(const char * src_data,size_t src_size)732 void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) {
733 if (src_size == 0) return; // memcpy(_, nullptr, 0) is undefined.
734 // Try to fit in the inline buffer if possible.
735 size_t inline_length = tagged_size();
736 if (inline_length < kMaxInline && src_size <= kMaxInline - inline_length) {
737 // Append new data to embedded array
738 set_tagged_size(static_cast<char>(inline_length + src_size));
739 memcpy(data_.as_chars + inline_length, src_data, src_size);
740 return;
741 }
742
743 CordRep* root = tree();
744
745 size_t appended = 0;
746 if (root) {
747 char* region;
748 if (PrepareAppendRegion(root, ®ion, &appended, src_size)) {
749 memcpy(region, src_data, appended);
750 }
751 } else {
752 // It is possible that src_data == data_, but when we transition from an
753 // InlineRep to a tree we need to assign data_ = root via set_tree. To
754 // avoid corrupting the source data before we copy it, delay calling
755 // set_tree until after we've copied data.
756 // We are going from an inline size to beyond inline size. Make the new size
757 // either double the inlined size, or the added size + 10%.
758 const size_t size1 = inline_length * 2 + src_size;
759 const size_t size2 = inline_length + src_size / 10;
760 root = NewFlat(std::max<size_t>(size1, size2));
761 appended = std::min(src_size, TagToLength(root->tag) - inline_length);
762 memcpy(root->data, data_.as_chars, inline_length);
763 memcpy(root->data + inline_length, src_data, appended);
764 root->length = inline_length + appended;
765 set_tree(root);
766 }
767
768 src_data += appended;
769 src_size -= appended;
770 if (src_size == 0) {
771 return;
772 }
773
774 // Use new block(s) for any remaining bytes that were not handled above.
775 // Alloc extra memory only if the right child of the root of the new tree is
776 // going to be a FLAT node, which will permit further inplace appends.
777 size_t length = src_size;
778 if (src_size < kMaxFlatLength) {
779 // The new length is either
780 // - old size + 10%
781 // - old_size + src_size
782 // This will cause a reasonable conservative step-up in size that is still
783 // large enough to avoid excessive amounts of small fragments being added.
784 length = std::max<size_t>(root->length / 10, src_size);
785 }
786 set_tree(Concat(root, NewTree(src_data, src_size, length - src_size)));
787 }
788
TakeRep() const789 inline CordRep* Cord::TakeRep() const& {
790 return Ref(contents_.tree());
791 }
792
TakeRep()793 inline CordRep* Cord::TakeRep() && {
794 CordRep* rep = contents_.tree();
795 contents_.clear();
796 return rep;
797 }
798
799 template <typename C>
AppendImpl(C && src)800 inline void Cord::AppendImpl(C&& src) {
801 if (empty()) {
802 // In case of an empty destination avoid allocating a new node, do not copy
803 // data.
804 *this = std::forward<C>(src);
805 return;
806 }
807
808 // For short cords, it is faster to copy data if there is room in dst.
809 const size_t src_size = src.contents_.size();
810 if (src_size <= kMaxBytesToCopy) {
811 CordRep* src_tree = src.contents_.tree();
812 if (src_tree == nullptr) {
813 // src has embedded data.
814 contents_.AppendArray(src.contents_.data(), src_size);
815 return;
816 }
817 if (src_tree->tag >= FLAT) {
818 // src tree just has one flat node.
819 contents_.AppendArray(src_tree->data, src_size);
820 return;
821 }
822 if (&src == this) {
823 // ChunkIterator below assumes that src is not modified during traversal.
824 Append(Cord(src));
825 return;
826 }
827 // TODO(mec): Should we only do this if "dst" has space?
828 for (absl::string_view chunk : src.Chunks()) {
829 Append(chunk);
830 }
831 return;
832 }
833
834 contents_.AppendTree(std::forward<C>(src).TakeRep());
835 }
836
Append(const Cord & src)837 void Cord::Append(const Cord& src) { AppendImpl(src); }
838
Append(Cord && src)839 void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); }
840
841 template <typename T, Cord::EnableIfString<T>>
Append(T && src)842 void Cord::Append(T&& src) {
843 if (src.size() <= kMaxBytesToCopy) {
844 Append(absl::string_view(src));
845 } else {
846 Append(Cord(std::forward<T>(src)));
847 }
848 }
849
850 template void Cord::Append(std::string&& src);
851
Prepend(const Cord & src)852 void Cord::Prepend(const Cord& src) {
853 CordRep* src_tree = src.contents_.tree();
854 if (src_tree != nullptr) {
855 Ref(src_tree);
856 contents_.PrependTree(src_tree);
857 return;
858 }
859
860 // `src` cord is inlined.
861 absl::string_view src_contents(src.contents_.data(), src.contents_.size());
862 return Prepend(src_contents);
863 }
864
Prepend(absl::string_view src)865 void Cord::Prepend(absl::string_view src) {
866 if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined.
867 size_t cur_size = contents_.size();
868 if (!contents_.is_tree() && cur_size + src.size() <= InlineRep::kMaxInline) {
869 // Use embedded storage.
870 char data[InlineRep::kMaxInline + 1] = {0};
871 data[InlineRep::kMaxInline] = cur_size + src.size(); // set size
872 memcpy(data, src.data(), src.size());
873 memcpy(data + src.size(), contents_.data(), cur_size);
874 memcpy(reinterpret_cast<void*>(&contents_), data,
875 InlineRep::kMaxInline + 1);
876 } else {
877 contents_.PrependTree(NewTree(src.data(), src.size(), 0));
878 }
879 }
880
881 template <typename T, Cord::EnableIfString<T>>
Prepend(T && src)882 inline void Cord::Prepend(T&& src) {
883 if (src.size() <= kMaxBytesToCopy) {
884 Prepend(absl::string_view(src));
885 } else {
886 Prepend(Cord(std::forward<T>(src)));
887 }
888 }
889
890 template void Cord::Prepend(std::string&& src);
891
RemovePrefixFrom(CordRep * node,size_t n)892 static CordRep* RemovePrefixFrom(CordRep* node, size_t n) {
893 if (n >= node->length) return nullptr;
894 if (n == 0) return Ref(node);
895 absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack;
896
897 while (node->tag == CONCAT) {
898 assert(n <= node->length);
899 if (n < node->concat()->left->length) {
900 // Push right to stack, descend left.
901 rhs_stack.push_back(node->concat()->right);
902 node = node->concat()->left;
903 } else {
904 // Drop left, descend right.
905 n -= node->concat()->left->length;
906 node = node->concat()->right;
907 }
908 }
909 assert(n <= node->length);
910
911 if (n == 0) {
912 Ref(node);
913 } else {
914 size_t start = n;
915 size_t len = node->length - n;
916 if (node->tag == SUBSTRING) {
917 // Consider in-place update of node, similar to in RemoveSuffixFrom().
918 start += node->substring()->start;
919 node = node->substring()->child;
920 }
921 node = NewSubstring(Ref(node), start, len);
922 }
923 while (!rhs_stack.empty()) {
924 node = Concat(node, Ref(rhs_stack.back()));
925 rhs_stack.pop_back();
926 }
927 return node;
928 }
929
930 // RemoveSuffixFrom() is very similar to RemovePrefixFrom(), with the
931 // exception that removing a suffix has an optimization where a node may be
932 // edited in place iff that node and all its ancestors have a refcount of 1.
RemoveSuffixFrom(CordRep * node,size_t n)933 static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) {
934 if (n >= node->length) return nullptr;
935 if (n == 0) return Ref(node);
936 absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack;
937 bool inplace_ok = node->refcount.IsOne();
938
939 while (node->tag == CONCAT) {
940 assert(n <= node->length);
941 if (n < node->concat()->right->length) {
942 // Push left to stack, descend right.
943 lhs_stack.push_back(node->concat()->left);
944 node = node->concat()->right;
945 } else {
946 // Drop right, descend left.
947 n -= node->concat()->right->length;
948 node = node->concat()->left;
949 }
950 inplace_ok = inplace_ok && node->refcount.IsOne();
951 }
952 assert(n <= node->length);
953
954 if (n == 0) {
955 Ref(node);
956 } else if (inplace_ok && node->tag != EXTERNAL) {
957 // Consider making a new buffer if the current node capacity is much
958 // larger than the new length.
959 Ref(node);
960 node->length -= n;
961 } else {
962 size_t start = 0;
963 size_t len = node->length - n;
964 if (node->tag == SUBSTRING) {
965 start = node->substring()->start;
966 node = node->substring()->child;
967 }
968 node = NewSubstring(Ref(node), start, len);
969 }
970 while (!lhs_stack.empty()) {
971 node = Concat(Ref(lhs_stack.back()), node);
972 lhs_stack.pop_back();
973 }
974 return node;
975 }
976
RemovePrefix(size_t n)977 void Cord::RemovePrefix(size_t n) {
978 ABSL_INTERNAL_CHECK(n <= size(),
979 absl::StrCat("Requested prefix size ", n,
980 " exceeds Cord's size ", size()));
981 CordRep* tree = contents_.tree();
982 if (tree == nullptr) {
983 contents_.remove_prefix(n);
984 } else {
985 CordRep* newrep = RemovePrefixFrom(tree, n);
986 Unref(tree);
987 contents_.replace_tree(VerifyTree(newrep));
988 }
989 }
990
RemoveSuffix(size_t n)991 void Cord::RemoveSuffix(size_t n) {
992 ABSL_INTERNAL_CHECK(n <= size(),
993 absl::StrCat("Requested suffix size ", n,
994 " exceeds Cord's size ", size()));
995 CordRep* tree = contents_.tree();
996 if (tree == nullptr) {
997 contents_.reduce_size(n);
998 } else {
999 CordRep* newrep = RemoveSuffixFrom(tree, n);
1000 Unref(tree);
1001 contents_.replace_tree(VerifyTree(newrep));
1002 }
1003 }
1004
1005 // Work item for NewSubRange().
1006 struct SubRange {
SubRangeabsl::SubRange1007 SubRange(CordRep* a_node, size_t a_pos, size_t a_n)
1008 : node(a_node), pos(a_pos), n(a_n) {}
1009 CordRep* node; // nullptr means concat last 2 results.
1010 size_t pos;
1011 size_t n;
1012 };
1013
NewSubRange(CordRep * node,size_t pos,size_t n)1014 static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) {
1015 absl::InlinedVector<CordRep*, kInlinedVectorSize> results;
1016 absl::InlinedVector<SubRange, kInlinedVectorSize> todo;
1017 todo.push_back(SubRange(node, pos, n));
1018 do {
1019 const SubRange& sr = todo.back();
1020 node = sr.node;
1021 pos = sr.pos;
1022 n = sr.n;
1023 todo.pop_back();
1024
1025 if (node == nullptr) {
1026 assert(results.size() >= 2);
1027 CordRep* right = results.back();
1028 results.pop_back();
1029 CordRep* left = results.back();
1030 results.pop_back();
1031 results.push_back(Concat(left, right));
1032 } else if (pos == 0 && n == node->length) {
1033 results.push_back(Ref(node));
1034 } else if (node->tag != CONCAT) {
1035 if (node->tag == SUBSTRING) {
1036 pos += node->substring()->start;
1037 node = node->substring()->child;
1038 }
1039 results.push_back(NewSubstring(Ref(node), pos, n));
1040 } else if (pos + n <= node->concat()->left->length) {
1041 todo.push_back(SubRange(node->concat()->left, pos, n));
1042 } else if (pos >= node->concat()->left->length) {
1043 pos -= node->concat()->left->length;
1044 todo.push_back(SubRange(node->concat()->right, pos, n));
1045 } else {
1046 size_t left_n = node->concat()->left->length - pos;
1047 todo.push_back(SubRange(nullptr, 0, 0)); // Concat()
1048 todo.push_back(SubRange(node->concat()->right, 0, n - left_n));
1049 todo.push_back(SubRange(node->concat()->left, pos, left_n));
1050 }
1051 } while (!todo.empty());
1052 assert(results.size() == 1);
1053 return results[0];
1054 }
1055
Subcord(size_t pos,size_t new_size) const1056 Cord Cord::Subcord(size_t pos, size_t new_size) const {
1057 Cord sub_cord;
1058 size_t length = size();
1059 if (pos > length) pos = length;
1060 if (new_size > length - pos) new_size = length - pos;
1061 CordRep* tree = contents_.tree();
1062 if (tree == nullptr) {
1063 // sub_cord is newly constructed, no need to re-zero-out the tail of
1064 // contents_ memory.
1065 sub_cord.contents_.set_data(contents_.data() + pos, new_size, false);
1066 } else if (new_size == 0) {
1067 // We want to return empty subcord, so nothing to do.
1068 } else if (new_size <= InlineRep::kMaxInline) {
1069 Cord::ChunkIterator it = chunk_begin();
1070 it.AdvanceBytes(pos);
1071 char* dest = sub_cord.contents_.data_.as_chars;
1072 size_t remaining_size = new_size;
1073 while (remaining_size > it->size()) {
1074 cord_internal::SmallMemmove(dest, it->data(), it->size());
1075 remaining_size -= it->size();
1076 dest += it->size();
1077 ++it;
1078 }
1079 cord_internal::SmallMemmove(dest, it->data(), remaining_size);
1080 sub_cord.contents_.set_tagged_size(new_size);
1081 } else {
1082 sub_cord.contents_.set_tree(NewSubRange(tree, pos, new_size));
1083 }
1084 return sub_cord;
1085 }
1086
1087 // --------------------------------------------------------------------
1088 // Balancing
1089
1090 class CordForest {
1091 public:
CordForest(size_t length)1092 explicit CordForest(size_t length)
1093 : root_length_(length), trees_(kMinLengthSize, nullptr) {}
1094
Build(CordRep * cord_root)1095 void Build(CordRep* cord_root) {
1096 std::vector<CordRep*> pending = {cord_root};
1097
1098 while (!pending.empty()) {
1099 CordRep* node = pending.back();
1100 pending.pop_back();
1101 CheckNode(node);
1102 if (ABSL_PREDICT_FALSE(node->tag != CONCAT)) {
1103 AddNode(node);
1104 continue;
1105 }
1106
1107 CordRepConcat* concat_node = node->concat();
1108 if (concat_node->depth() >= kMinLengthSize ||
1109 concat_node->length < min_length[concat_node->depth()]) {
1110 pending.push_back(concat_node->right);
1111 pending.push_back(concat_node->left);
1112
1113 if (concat_node->refcount.IsOne()) {
1114 concat_node->left = concat_freelist_;
1115 concat_freelist_ = concat_node;
1116 } else {
1117 Ref(concat_node->right);
1118 Ref(concat_node->left);
1119 Unref(concat_node);
1120 }
1121 } else {
1122 AddNode(node);
1123 }
1124 }
1125 }
1126
ConcatNodes()1127 CordRep* ConcatNodes() {
1128 CordRep* sum = nullptr;
1129 for (auto* node : trees_) {
1130 if (node == nullptr) continue;
1131
1132 sum = PrependNode(node, sum);
1133 root_length_ -= node->length;
1134 if (root_length_ == 0) break;
1135 }
1136 ABSL_INTERNAL_CHECK(sum != nullptr, "Failed to locate sum node");
1137 return VerifyTree(sum);
1138 }
1139
1140 private:
AppendNode(CordRep * node,CordRep * sum)1141 CordRep* AppendNode(CordRep* node, CordRep* sum) {
1142 return (sum == nullptr) ? node : MakeConcat(sum, node);
1143 }
1144
PrependNode(CordRep * node,CordRep * sum)1145 CordRep* PrependNode(CordRep* node, CordRep* sum) {
1146 return (sum == nullptr) ? node : MakeConcat(node, sum);
1147 }
1148
AddNode(CordRep * node)1149 void AddNode(CordRep* node) {
1150 CordRep* sum = nullptr;
1151
1152 // Collect together everything with which we will merge with node
1153 int i = 0;
1154 for (; node->length > min_length[i + 1]; ++i) {
1155 auto& tree_at_i = trees_[i];
1156
1157 if (tree_at_i == nullptr) continue;
1158 sum = PrependNode(tree_at_i, sum);
1159 tree_at_i = nullptr;
1160 }
1161
1162 sum = AppendNode(node, sum);
1163
1164 // Insert sum into appropriate place in the forest
1165 for (; sum->length >= min_length[i]; ++i) {
1166 auto& tree_at_i = trees_[i];
1167 if (tree_at_i == nullptr) continue;
1168
1169 sum = MakeConcat(tree_at_i, sum);
1170 tree_at_i = nullptr;
1171 }
1172
1173 // min_length[0] == 1, which means sum->length >= min_length[0]
1174 assert(i > 0);
1175 trees_[i - 1] = sum;
1176 }
1177
1178 // Make concat node trying to resue existing CordRepConcat nodes we
1179 // already collected in the concat_freelist_.
MakeConcat(CordRep * left,CordRep * right)1180 CordRep* MakeConcat(CordRep* left, CordRep* right) {
1181 if (concat_freelist_ == nullptr) return RawConcat(left, right);
1182
1183 CordRepConcat* rep = concat_freelist_;
1184 if (concat_freelist_->left == nullptr) {
1185 concat_freelist_ = nullptr;
1186 } else {
1187 concat_freelist_ = concat_freelist_->left->concat();
1188 }
1189 SetConcatChildren(rep, left, right);
1190
1191 return rep;
1192 }
1193
CheckNode(CordRep * node)1194 static void CheckNode(CordRep* node) {
1195 ABSL_INTERNAL_CHECK(node->length != 0u, "");
1196 if (node->tag == CONCAT) {
1197 ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, "");
1198 ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, "");
1199 ABSL_INTERNAL_CHECK(node->length == (node->concat()->left->length +
1200 node->concat()->right->length),
1201 "");
1202 }
1203 }
1204
1205 size_t root_length_;
1206
1207 // use an inlined vector instead of a flat array to get bounds checking
1208 absl::InlinedVector<CordRep*, kInlinedVectorSize> trees_;
1209
1210 // List of concat nodes we can re-use for Cord balancing.
1211 CordRepConcat* concat_freelist_ = nullptr;
1212 };
1213
Rebalance(CordRep * node)1214 static CordRep* Rebalance(CordRep* node) {
1215 VerifyTree(node);
1216 assert(node->tag == CONCAT);
1217
1218 if (node->length == 0) {
1219 return nullptr;
1220 }
1221
1222 CordForest forest(node->length);
1223 forest.Build(node);
1224 return forest.ConcatNodes();
1225 }
1226
1227 // --------------------------------------------------------------------
1228 // Comparators
1229
1230 namespace {
1231
ClampResult(int memcmp_res)1232 int ClampResult(int memcmp_res) {
1233 return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0);
1234 }
1235
CompareChunks(absl::string_view * lhs,absl::string_view * rhs,size_t * size_to_compare)1236 int CompareChunks(absl::string_view* lhs, absl::string_view* rhs,
1237 size_t* size_to_compare) {
1238 size_t compared_size = std::min(lhs->size(), rhs->size());
1239 assert(*size_to_compare >= compared_size);
1240 *size_to_compare -= compared_size;
1241
1242 int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size);
1243 if (memcmp_res != 0) return memcmp_res;
1244
1245 lhs->remove_prefix(compared_size);
1246 rhs->remove_prefix(compared_size);
1247
1248 return 0;
1249 }
1250
1251 // This overload set computes comparison results from memcmp result. This
1252 // interface is used inside GenericCompare below. Differet implementations
1253 // are specialized for int and bool. For int we clamp result to {-1, 0, 1}
1254 // set. For bool we just interested in "value == 0".
1255 template <typename ResultType>
ComputeCompareResult(int memcmp_res)1256 ResultType ComputeCompareResult(int memcmp_res) {
1257 return ClampResult(memcmp_res);
1258 }
1259 template <>
ComputeCompareResult(int memcmp_res)1260 bool ComputeCompareResult<bool>(int memcmp_res) {
1261 return memcmp_res == 0;
1262 }
1263
1264 } // namespace
1265
1266 // Helper routine. Locates the first flat chunk of the Cord without
1267 // initializing the iterator.
FindFlatStartPiece() const1268 inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const {
1269 size_t n = tagged_size();
1270 if (n <= kMaxInline) {
1271 return absl::string_view(data_.as_chars, n);
1272 }
1273
1274 CordRep* node = tree();
1275 if (node->tag >= FLAT) {
1276 return absl::string_view(node->data, node->length);
1277 }
1278
1279 if (node->tag == EXTERNAL) {
1280 return absl::string_view(node->external()->base, node->length);
1281 }
1282
1283 // Walk down the left branches until we hit a non-CONCAT node.
1284 while (node->tag == CONCAT) {
1285 node = node->concat()->left;
1286 }
1287
1288 // Get the child node if we encounter a SUBSTRING.
1289 size_t offset = 0;
1290 size_t length = node->length;
1291 assert(length != 0);
1292
1293 if (node->tag == SUBSTRING) {
1294 offset = node->substring()->start;
1295 node = node->substring()->child;
1296 }
1297
1298 if (node->tag >= FLAT) {
1299 return absl::string_view(node->data + offset, length);
1300 }
1301
1302 assert((node->tag == EXTERNAL) && "Expect FLAT or EXTERNAL node here");
1303
1304 return absl::string_view(node->external()->base + offset, length);
1305 }
1306
CompareSlowPath(absl::string_view rhs,size_t compared_size,size_t size_to_compare) const1307 inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size,
1308 size_t size_to_compare) const {
1309 auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1310 if (!chunk->empty()) return true;
1311 ++*it;
1312 if (it->bytes_remaining_ == 0) return false;
1313 *chunk = **it;
1314 return true;
1315 };
1316
1317 Cord::ChunkIterator lhs_it = chunk_begin();
1318
1319 // compared_size is inside first chunk.
1320 absl::string_view lhs_chunk =
1321 (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1322 assert(compared_size <= lhs_chunk.size());
1323 assert(compared_size <= rhs.size());
1324 lhs_chunk.remove_prefix(compared_size);
1325 rhs.remove_prefix(compared_size);
1326 size_to_compare -= compared_size; // skip already compared size.
1327
1328 while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) {
1329 int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare);
1330 if (comparison_result != 0) return comparison_result;
1331 if (size_to_compare == 0) return 0;
1332 }
1333
1334 return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty());
1335 }
1336
CompareSlowPath(const Cord & rhs,size_t compared_size,size_t size_to_compare) const1337 inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size,
1338 size_t size_to_compare) const {
1339 auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1340 if (!chunk->empty()) return true;
1341 ++*it;
1342 if (it->bytes_remaining_ == 0) return false;
1343 *chunk = **it;
1344 return true;
1345 };
1346
1347 Cord::ChunkIterator lhs_it = chunk_begin();
1348 Cord::ChunkIterator rhs_it = rhs.chunk_begin();
1349
1350 // compared_size is inside both first chunks.
1351 absl::string_view lhs_chunk =
1352 (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1353 absl::string_view rhs_chunk =
1354 (rhs_it.bytes_remaining_ != 0) ? *rhs_it : absl::string_view();
1355 assert(compared_size <= lhs_chunk.size());
1356 assert(compared_size <= rhs_chunk.size());
1357 lhs_chunk.remove_prefix(compared_size);
1358 rhs_chunk.remove_prefix(compared_size);
1359 size_to_compare -= compared_size; // skip already compared size.
1360
1361 while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) {
1362 int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare);
1363 if (memcmp_res != 0) return memcmp_res;
1364 if (size_to_compare == 0) return 0;
1365 }
1366
1367 return static_cast<int>(rhs_chunk.empty()) -
1368 static_cast<int>(lhs_chunk.empty());
1369 }
1370
GetFirstChunk(const Cord & c)1371 inline absl::string_view Cord::GetFirstChunk(const Cord& c) {
1372 return c.contents_.FindFlatStartPiece();
1373 }
GetFirstChunk(absl::string_view sv)1374 inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) {
1375 return sv;
1376 }
1377
1378 // Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed
1379 // that 'size_to_compare' is greater that size of smallest of first chunks.
1380 template <typename ResultType, typename RHS>
GenericCompare(const Cord & lhs,const RHS & rhs,size_t size_to_compare)1381 ResultType GenericCompare(const Cord& lhs, const RHS& rhs,
1382 size_t size_to_compare) {
1383 absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs);
1384 absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs);
1385
1386 size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size());
1387 assert(size_to_compare >= compared_size);
1388 int memcmp_res = ::memcmp(lhs_chunk.data(), rhs_chunk.data(), compared_size);
1389 if (compared_size == size_to_compare || memcmp_res != 0) {
1390 return ComputeCompareResult<ResultType>(memcmp_res);
1391 }
1392
1393 return ComputeCompareResult<ResultType>(
1394 lhs.CompareSlowPath(rhs, compared_size, size_to_compare));
1395 }
1396
EqualsImpl(absl::string_view rhs,size_t size_to_compare) const1397 bool Cord::EqualsImpl(absl::string_view rhs, size_t size_to_compare) const {
1398 return GenericCompare<bool>(*this, rhs, size_to_compare);
1399 }
1400
EqualsImpl(const Cord & rhs,size_t size_to_compare) const1401 bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const {
1402 return GenericCompare<bool>(*this, rhs, size_to_compare);
1403 }
1404
1405 template <typename RHS>
SharedCompareImpl(const Cord & lhs,const RHS & rhs)1406 inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) {
1407 size_t lhs_size = lhs.size();
1408 size_t rhs_size = rhs.size();
1409 if (lhs_size == rhs_size) {
1410 return GenericCompare<int>(lhs, rhs, lhs_size);
1411 }
1412 if (lhs_size < rhs_size) {
1413 auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size);
1414 return data_comp_res == 0 ? -1 : data_comp_res;
1415 }
1416
1417 auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size);
1418 return data_comp_res == 0 ? +1 : data_comp_res;
1419 }
1420
Compare(absl::string_view rhs) const1421 int Cord::Compare(absl::string_view rhs) const {
1422 return SharedCompareImpl(*this, rhs);
1423 }
1424
CompareImpl(const Cord & rhs) const1425 int Cord::CompareImpl(const Cord& rhs) const {
1426 return SharedCompareImpl(*this, rhs);
1427 }
1428
EndsWith(absl::string_view rhs) const1429 bool Cord::EndsWith(absl::string_view rhs) const {
1430 size_t my_size = size();
1431 size_t rhs_size = rhs.size();
1432
1433 if (my_size < rhs_size) return false;
1434
1435 Cord tmp(*this);
1436 tmp.RemovePrefix(my_size - rhs_size);
1437 return tmp.EqualsImpl(rhs, rhs_size);
1438 }
1439
EndsWith(const Cord & rhs) const1440 bool Cord::EndsWith(const Cord& rhs) const {
1441 size_t my_size = size();
1442 size_t rhs_size = rhs.size();
1443
1444 if (my_size < rhs_size) return false;
1445
1446 Cord tmp(*this);
1447 tmp.RemovePrefix(my_size - rhs_size);
1448 return tmp.EqualsImpl(rhs, rhs_size);
1449 }
1450
1451 // --------------------------------------------------------------------
1452 // Misc.
1453
operator std::string() const1454 Cord::operator std::string() const {
1455 std::string s;
1456 absl::CopyCordToString(*this, &s);
1457 return s;
1458 }
1459
CopyCordToString(const Cord & src,std::string * dst)1460 void CopyCordToString(const Cord& src, std::string* dst) {
1461 if (!src.contents_.is_tree()) {
1462 src.contents_.CopyTo(dst);
1463 } else {
1464 absl::strings_internal::STLStringResizeUninitialized(dst, src.size());
1465 src.CopyToArraySlowPath(&(*dst)[0]);
1466 }
1467 }
1468
CopyToArraySlowPath(char * dst) const1469 void Cord::CopyToArraySlowPath(char* dst) const {
1470 assert(contents_.is_tree());
1471 absl::string_view fragment;
1472 if (GetFlatAux(contents_.tree(), &fragment)) {
1473 memcpy(dst, fragment.data(), fragment.size());
1474 return;
1475 }
1476 for (absl::string_view chunk : Chunks()) {
1477 memcpy(dst, chunk.data(), chunk.size());
1478 dst += chunk.size();
1479 }
1480 }
1481
operator ++()1482 Cord::ChunkIterator& Cord::ChunkIterator::operator++() {
1483 ABSL_HARDENING_ASSERT(bytes_remaining_ > 0 &&
1484 "Attempted to iterate past `end()`");
1485 assert(bytes_remaining_ >= current_chunk_.size());
1486 bytes_remaining_ -= current_chunk_.size();
1487
1488 if (stack_of_right_children_.empty()) {
1489 assert(!current_chunk_.empty()); // Called on invalid iterator.
1490 // We have reached the end of the Cord.
1491 return *this;
1492 }
1493
1494 // Process the next node on the stack.
1495 CordRep* node = stack_of_right_children_.back();
1496 stack_of_right_children_.pop_back();
1497
1498 // Walk down the left branches until we hit a non-CONCAT node. Save the
1499 // right children to the stack for subsequent traversal.
1500 while (node->tag == CONCAT) {
1501 stack_of_right_children_.push_back(node->concat()->right);
1502 node = node->concat()->left;
1503 }
1504
1505 // Get the child node if we encounter a SUBSTRING.
1506 size_t offset = 0;
1507 size_t length = node->length;
1508 if (node->tag == SUBSTRING) {
1509 offset = node->substring()->start;
1510 node = node->substring()->child;
1511 }
1512
1513 assert(node->tag == EXTERNAL || node->tag >= FLAT);
1514 assert(length != 0);
1515 const char* data =
1516 node->tag == EXTERNAL ? node->external()->base : node->data;
1517 current_chunk_ = absl::string_view(data + offset, length);
1518 current_leaf_ = node;
1519 return *this;
1520 }
1521
AdvanceAndReadBytes(size_t n)1522 Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
1523 ABSL_HARDENING_ASSERT(bytes_remaining_ >= n &&
1524 "Attempted to iterate past `end()`");
1525 Cord subcord;
1526
1527 if (n <= InlineRep::kMaxInline) {
1528 // Range to read fits in inline data. Flatten it.
1529 char* data = subcord.contents_.set_data(n);
1530 while (n > current_chunk_.size()) {
1531 memcpy(data, current_chunk_.data(), current_chunk_.size());
1532 data += current_chunk_.size();
1533 n -= current_chunk_.size();
1534 ++*this;
1535 }
1536 memcpy(data, current_chunk_.data(), n);
1537 if (n < current_chunk_.size()) {
1538 RemoveChunkPrefix(n);
1539 } else if (n > 0) {
1540 ++*this;
1541 }
1542 return subcord;
1543 }
1544 if (n < current_chunk_.size()) {
1545 // Range to read is a proper subrange of the current chunk.
1546 assert(current_leaf_ != nullptr);
1547 CordRep* subnode = Ref(current_leaf_);
1548 const char* data =
1549 subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data;
1550 subnode = NewSubstring(subnode, current_chunk_.data() - data, n);
1551 subcord.contents_.set_tree(VerifyTree(subnode));
1552 RemoveChunkPrefix(n);
1553 return subcord;
1554 }
1555
1556 // Range to read begins with a proper subrange of the current chunk.
1557 assert(!current_chunk_.empty());
1558 assert(current_leaf_ != nullptr);
1559 CordRep* subnode = Ref(current_leaf_);
1560 if (current_chunk_.size() < subnode->length) {
1561 const char* data =
1562 subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data;
1563 subnode = NewSubstring(subnode, current_chunk_.data() - data,
1564 current_chunk_.size());
1565 }
1566 n -= current_chunk_.size();
1567 bytes_remaining_ -= current_chunk_.size();
1568
1569 // Process the next node(s) on the stack, reading whole subtrees depending on
1570 // their length and how many bytes we are advancing.
1571 CordRep* node = nullptr;
1572 while (!stack_of_right_children_.empty()) {
1573 node = stack_of_right_children_.back();
1574 stack_of_right_children_.pop_back();
1575 if (node->length > n) break;
1576 // TODO(qrczak): This might unnecessarily recreate existing concat nodes.
1577 // Avoiding that would need pretty complicated logic (instead of
1578 // current_leaf_, keep current_subtree_ which points to the highest node
1579 // such that the current leaf can be found on the path of left children
1580 // starting from current_subtree_; delay creating subnode while node is
1581 // below current_subtree_; find the proper node along the path of left
1582 // children starting from current_subtree_ if this loop exits while staying
1583 // below current_subtree_; etc.; alternatively, push parents instead of
1584 // right children on the stack).
1585 subnode = Concat(subnode, Ref(node));
1586 n -= node->length;
1587 bytes_remaining_ -= node->length;
1588 node = nullptr;
1589 }
1590
1591 if (node == nullptr) {
1592 // We have reached the end of the Cord.
1593 assert(bytes_remaining_ == 0);
1594 subcord.contents_.set_tree(VerifyTree(subnode));
1595 return subcord;
1596 }
1597
1598 // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1599 // right children to the stack for subsequent traversal.
1600 while (node->tag == CONCAT) {
1601 if (node->concat()->left->length > n) {
1602 // Push right, descend left.
1603 stack_of_right_children_.push_back(node->concat()->right);
1604 node = node->concat()->left;
1605 } else {
1606 // Read left, descend right.
1607 subnode = Concat(subnode, Ref(node->concat()->left));
1608 n -= node->concat()->left->length;
1609 bytes_remaining_ -= node->concat()->left->length;
1610 node = node->concat()->right;
1611 }
1612 }
1613
1614 // Get the child node if we encounter a SUBSTRING.
1615 size_t offset = 0;
1616 size_t length = node->length;
1617 if (node->tag == SUBSTRING) {
1618 offset = node->substring()->start;
1619 node = node->substring()->child;
1620 }
1621
1622 // Range to read ends with a proper (possibly empty) subrange of the current
1623 // chunk.
1624 assert(node->tag == EXTERNAL || node->tag >= FLAT);
1625 assert(length > n);
1626 if (n > 0) subnode = Concat(subnode, NewSubstring(Ref(node), offset, n));
1627 const char* data =
1628 node->tag == EXTERNAL ? node->external()->base : node->data;
1629 current_chunk_ = absl::string_view(data + offset + n, length - n);
1630 current_leaf_ = node;
1631 bytes_remaining_ -= n;
1632 subcord.contents_.set_tree(VerifyTree(subnode));
1633 return subcord;
1634 }
1635
AdvanceBytesSlowPath(size_t n)1636 void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) {
1637 assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`");
1638 assert(n >= current_chunk_.size()); // This should only be called when
1639 // iterating to a new node.
1640
1641 n -= current_chunk_.size();
1642 bytes_remaining_ -= current_chunk_.size();
1643
1644 // Process the next node(s) on the stack, skipping whole subtrees depending on
1645 // their length and how many bytes we are advancing.
1646 CordRep* node = nullptr;
1647 while (!stack_of_right_children_.empty()) {
1648 node = stack_of_right_children_.back();
1649 stack_of_right_children_.pop_back();
1650 if (node->length > n) break;
1651 n -= node->length;
1652 bytes_remaining_ -= node->length;
1653 node = nullptr;
1654 }
1655
1656 if (node == nullptr) {
1657 // We have reached the end of the Cord.
1658 assert(bytes_remaining_ == 0);
1659 return;
1660 }
1661
1662 // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1663 // right children to the stack for subsequent traversal.
1664 while (node->tag == CONCAT) {
1665 if (node->concat()->left->length > n) {
1666 // Push right, descend left.
1667 stack_of_right_children_.push_back(node->concat()->right);
1668 node = node->concat()->left;
1669 } else {
1670 // Skip left, descend right.
1671 n -= node->concat()->left->length;
1672 bytes_remaining_ -= node->concat()->left->length;
1673 node = node->concat()->right;
1674 }
1675 }
1676
1677 // Get the child node if we encounter a SUBSTRING.
1678 size_t offset = 0;
1679 size_t length = node->length;
1680 if (node->tag == SUBSTRING) {
1681 offset = node->substring()->start;
1682 node = node->substring()->child;
1683 }
1684
1685 assert(node->tag == EXTERNAL || node->tag >= FLAT);
1686 assert(length > n);
1687 const char* data =
1688 node->tag == EXTERNAL ? node->external()->base : node->data;
1689 current_chunk_ = absl::string_view(data + offset + n, length - n);
1690 current_leaf_ = node;
1691 bytes_remaining_ -= n;
1692 }
1693
operator [](size_t i) const1694 char Cord::operator[](size_t i) const {
1695 ABSL_HARDENING_ASSERT(i < size());
1696 size_t offset = i;
1697 const CordRep* rep = contents_.tree();
1698 if (rep == nullptr) {
1699 return contents_.data()[i];
1700 }
1701 while (true) {
1702 assert(rep != nullptr);
1703 assert(offset < rep->length);
1704 if (rep->tag >= FLAT) {
1705 // Get the "i"th character directly from the flat array.
1706 return rep->data[offset];
1707 } else if (rep->tag == EXTERNAL) {
1708 // Get the "i"th character from the external array.
1709 return rep->external()->base[offset];
1710 } else if (rep->tag == CONCAT) {
1711 // Recursively branch to the side of the concatenation that the "i"th
1712 // character is on.
1713 size_t left_length = rep->concat()->left->length;
1714 if (offset < left_length) {
1715 rep = rep->concat()->left;
1716 } else {
1717 offset -= left_length;
1718 rep = rep->concat()->right;
1719 }
1720 } else {
1721 // This must be a substring a node, so bypass it to get to the child.
1722 assert(rep->tag == SUBSTRING);
1723 offset += rep->substring()->start;
1724 rep = rep->substring()->child;
1725 }
1726 }
1727 }
1728
FlattenSlowPath()1729 absl::string_view Cord::FlattenSlowPath() {
1730 size_t total_size = size();
1731 CordRep* new_rep;
1732 char* new_buffer;
1733
1734 // Try to put the contents into a new flat rep. If they won't fit in the
1735 // biggest possible flat node, use an external rep instead.
1736 if (total_size <= kMaxFlatLength) {
1737 new_rep = NewFlat(total_size);
1738 new_rep->length = total_size;
1739 new_buffer = new_rep->data;
1740 CopyToArraySlowPath(new_buffer);
1741 } else {
1742 new_buffer = std::allocator<char>().allocate(total_size);
1743 CopyToArraySlowPath(new_buffer);
1744 new_rep = absl::cord_internal::NewExternalRep(
1745 absl::string_view(new_buffer, total_size), [](absl::string_view s) {
1746 std::allocator<char>().deallocate(const_cast<char*>(s.data()),
1747 s.size());
1748 });
1749 }
1750 Unref(contents_.tree());
1751 contents_.set_tree(new_rep);
1752 return absl::string_view(new_buffer, total_size);
1753 }
1754
GetFlatAux(CordRep * rep,absl::string_view * fragment)1755 /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) {
1756 assert(rep != nullptr);
1757 if (rep->tag >= FLAT) {
1758 *fragment = absl::string_view(rep->data, rep->length);
1759 return true;
1760 } else if (rep->tag == EXTERNAL) {
1761 *fragment = absl::string_view(rep->external()->base, rep->length);
1762 return true;
1763 } else if (rep->tag == SUBSTRING) {
1764 CordRep* child = rep->substring()->child;
1765 if (child->tag >= FLAT) {
1766 *fragment =
1767 absl::string_view(child->data + rep->substring()->start, rep->length);
1768 return true;
1769 } else if (child->tag == EXTERNAL) {
1770 *fragment = absl::string_view(
1771 child->external()->base + rep->substring()->start, rep->length);
1772 return true;
1773 }
1774 }
1775 return false;
1776 }
1777
ForEachChunkAux(absl::cord_internal::CordRep * rep,absl::FunctionRef<void (absl::string_view)> callback)1778 /* static */ void Cord::ForEachChunkAux(
1779 absl::cord_internal::CordRep* rep,
1780 absl::FunctionRef<void(absl::string_view)> callback) {
1781 assert(rep != nullptr);
1782 int stack_pos = 0;
1783 constexpr int stack_max = 128;
1784 // Stack of right branches for tree traversal
1785 absl::cord_internal::CordRep* stack[stack_max];
1786 absl::cord_internal::CordRep* current_node = rep;
1787 while (true) {
1788 if (current_node->tag == CONCAT) {
1789 if (stack_pos == stack_max) {
1790 // There's no more room on our stack array to add another right branch,
1791 // and the idea is to avoid allocations, so call this function
1792 // recursively to navigate this subtree further. (This is not something
1793 // we expect to happen in practice).
1794 ForEachChunkAux(current_node, callback);
1795
1796 // Pop the next right branch and iterate.
1797 current_node = stack[--stack_pos];
1798 continue;
1799 } else {
1800 // Save the right branch for later traversal and continue down the left
1801 // branch.
1802 stack[stack_pos++] = current_node->concat()->right;
1803 current_node = current_node->concat()->left;
1804 continue;
1805 }
1806 }
1807 // This is a leaf node, so invoke our callback.
1808 absl::string_view chunk;
1809 bool success = GetFlatAux(current_node, &chunk);
1810 assert(success);
1811 if (success) {
1812 callback(chunk);
1813 }
1814 if (stack_pos == 0) {
1815 // end of traversal
1816 return;
1817 }
1818 current_node = stack[--stack_pos];
1819 }
1820 }
1821
DumpNode(CordRep * rep,bool include_data,std::ostream * os)1822 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os) {
1823 const int kIndentStep = 1;
1824 int indent = 0;
1825 absl::InlinedVector<CordRep*, kInlinedVectorSize> stack;
1826 absl::InlinedVector<int, kInlinedVectorSize> indents;
1827 for (;;) {
1828 *os << std::setw(3) << rep->refcount.Get();
1829 *os << " " << std::setw(7) << rep->length;
1830 *os << " [";
1831 if (include_data) *os << static_cast<void*>(rep);
1832 *os << "]";
1833 *os << " " << (IsRootBalanced(rep) ? 'b' : 'u');
1834 *os << " " << std::setw(indent) << "";
1835 if (rep->tag == CONCAT) {
1836 *os << "CONCAT depth=" << Depth(rep) << "\n";
1837 indent += kIndentStep;
1838 indents.push_back(indent);
1839 stack.push_back(rep->concat()->right);
1840 rep = rep->concat()->left;
1841 } else if (rep->tag == SUBSTRING) {
1842 *os << "SUBSTRING @ " << rep->substring()->start << "\n";
1843 indent += kIndentStep;
1844 rep = rep->substring()->child;
1845 } else { // Leaf
1846 if (rep->tag == EXTERNAL) {
1847 *os << "EXTERNAL [";
1848 if (include_data)
1849 *os << absl::CEscape(std::string(rep->external()->base, rep->length));
1850 *os << "]\n";
1851 } else {
1852 *os << "FLAT cap=" << TagToLength(rep->tag) << " [";
1853 if (include_data)
1854 *os << absl::CEscape(std::string(rep->data, rep->length));
1855 *os << "]\n";
1856 }
1857 if (stack.empty()) break;
1858 rep = stack.back();
1859 stack.pop_back();
1860 indent = indents.back();
1861 indents.pop_back();
1862 }
1863 }
1864 ABSL_INTERNAL_CHECK(indents.empty(), "");
1865 }
1866
ReportError(CordRep * root,CordRep * node)1867 static std::string ReportError(CordRep* root, CordRep* node) {
1868 std::ostringstream buf;
1869 buf << "Error at node " << node << " in:";
1870 DumpNode(root, true, &buf);
1871 return buf.str();
1872 }
1873
VerifyNode(CordRep * root,CordRep * start_node,bool full_validation)1874 static bool VerifyNode(CordRep* root, CordRep* start_node,
1875 bool full_validation) {
1876 absl::InlinedVector<CordRep*, 2> worklist;
1877 worklist.push_back(start_node);
1878 do {
1879 CordRep* node = worklist.back();
1880 worklist.pop_back();
1881
1882 ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node));
1883 if (node != root) {
1884 ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node));
1885 }
1886
1887 if (node->tag == CONCAT) {
1888 ABSL_INTERNAL_CHECK(node->concat()->left != nullptr,
1889 ReportError(root, node));
1890 ABSL_INTERNAL_CHECK(node->concat()->right != nullptr,
1891 ReportError(root, node));
1892 ABSL_INTERNAL_CHECK((node->length == node->concat()->left->length +
1893 node->concat()->right->length),
1894 ReportError(root, node));
1895 if (full_validation) {
1896 worklist.push_back(node->concat()->right);
1897 worklist.push_back(node->concat()->left);
1898 }
1899 } else if (node->tag >= FLAT) {
1900 ABSL_INTERNAL_CHECK(node->length <= TagToLength(node->tag),
1901 ReportError(root, node));
1902 } else if (node->tag == EXTERNAL) {
1903 ABSL_INTERNAL_CHECK(node->external()->base != nullptr,
1904 ReportError(root, node));
1905 } else if (node->tag == SUBSTRING) {
1906 ABSL_INTERNAL_CHECK(
1907 node->substring()->start < node->substring()->child->length,
1908 ReportError(root, node));
1909 ABSL_INTERNAL_CHECK(node->substring()->start + node->length <=
1910 node->substring()->child->length,
1911 ReportError(root, node));
1912 }
1913 } while (!worklist.empty());
1914 return true;
1915 }
1916
1917 // Traverses the tree and computes the total memory allocated.
MemoryUsageAux(const CordRep * rep)1918 /* static */ size_t Cord::MemoryUsageAux(const CordRep* rep) {
1919 size_t total_mem_usage = 0;
1920
1921 // Allow a quick exit for the common case that the root is a leaf.
1922 if (RepMemoryUsageLeaf(rep, &total_mem_usage)) {
1923 return total_mem_usage;
1924 }
1925
1926 // Iterate over the tree. cur_node is never a leaf node and leaf nodes will
1927 // never be appended to tree_stack. This reduces overhead from manipulating
1928 // tree_stack.
1929 absl::InlinedVector<const CordRep*, kInlinedVectorSize> tree_stack;
1930 const CordRep* cur_node = rep;
1931 while (true) {
1932 const CordRep* next_node = nullptr;
1933
1934 if (cur_node->tag == CONCAT) {
1935 total_mem_usage += sizeof(CordRepConcat);
1936 const CordRep* left = cur_node->concat()->left;
1937 if (!RepMemoryUsageLeaf(left, &total_mem_usage)) {
1938 next_node = left;
1939 }
1940
1941 const CordRep* right = cur_node->concat()->right;
1942 if (!RepMemoryUsageLeaf(right, &total_mem_usage)) {
1943 if (next_node) {
1944 tree_stack.push_back(next_node);
1945 }
1946 next_node = right;
1947 }
1948 } else {
1949 // Since cur_node is not a leaf or a concat node it must be a substring.
1950 assert(cur_node->tag == SUBSTRING);
1951 total_mem_usage += sizeof(CordRepSubstring);
1952 next_node = cur_node->substring()->child;
1953 if (RepMemoryUsageLeaf(next_node, &total_mem_usage)) {
1954 next_node = nullptr;
1955 }
1956 }
1957
1958 if (!next_node) {
1959 if (tree_stack.empty()) {
1960 return total_mem_usage;
1961 }
1962 next_node = tree_stack.back();
1963 tree_stack.pop_back();
1964 }
1965 cur_node = next_node;
1966 }
1967 }
1968
operator <<(std::ostream & out,const Cord & cord)1969 std::ostream& operator<<(std::ostream& out, const Cord& cord) {
1970 for (absl::string_view chunk : cord.Chunks()) {
1971 out.write(chunk.data(), chunk.size());
1972 }
1973 return out;
1974 }
1975
1976 namespace strings_internal {
FlatOverhead()1977 size_t CordTestAccess::FlatOverhead() { return kFlatOverhead; }
MaxFlatLength()1978 size_t CordTestAccess::MaxFlatLength() { return kMaxFlatLength; }
FlatTagToLength(uint8_t tag)1979 size_t CordTestAccess::FlatTagToLength(uint8_t tag) {
1980 return TagToLength(tag);
1981 }
LengthToTag(size_t s)1982 uint8_t CordTestAccess::LengthToTag(size_t s) {
1983 ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s));
1984 return AllocatedSizeToTag(s + kFlatOverhead);
1985 }
SizeofCordRepConcat()1986 size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); }
SizeofCordRepExternal()1987 size_t CordTestAccess::SizeofCordRepExternal() {
1988 return sizeof(CordRepExternal);
1989 }
SizeofCordRepSubstring()1990 size_t CordTestAccess::SizeofCordRepSubstring() {
1991 return sizeof(CordRepSubstring);
1992 }
1993 } // namespace strings_internal
1994 ABSL_NAMESPACE_END
1995 } // namespace absl
1996